def main(): # change loadFromFile to True if you have dataset file dg = DatasetGenerator(token=GITHUB_TOKEN, repository=r, loadFromFile=True) gg = GraphGenerator(dg) gg.weibull()
def async_processing(post_data): job = get_current_job() url = post_data['github_url'] url = clean_url(url) filters_rules = { 'labels': { 'must_have': post_data['must_have'], 'blocklist_labels': post_data['blocklist_labels'] } } dg = DatasetGenerator( token=post_data['github_token'], repository=Repository(url, filters_rules), loadFromFile=False ) # SAVE ERROR ON DATABASE and reset progess status if(len(dg.filtered_issues) < 200): save_error(dg.repository, "This repository has less than 200 issues after applying the bug filters") job.meta['progress'] = 'ERROR' job.meta['error'] = "This repository has less than 200 issues after applying the bug filters" job.save_meta() return gg = GraphGenerator(dg) gg.weibull() save_image(dg.repository) job.meta['progress'] = 100 job.save_meta()
def generate_dataset(dataset_type, dataset_path, new_dataset_path, image_size): if dataset_type == "class": data_proc = DatasetGenerator(new_dataset_path) train_dataset, test_dataset = class_dataset_wrapper(dataset_path) create_randomly(train_dataset,os.path.join(dataset_path, "Final_Training", "Images"), data_proc, mode="train", size=image_size) #create_randomly(test_dataset,os.path.join(dataset_path, "Final_Test", "Images"), data_proc, mode="test", size=image_size) elif dataset_type == "german": train_dataset, valid_dataset, test_dataset = german_dataset_wrapper(dataset_path) data_proc = DatasetGenerator(new_dataset_path) # creating training data #create_positives(train_dataset[0],dataset_path, data_proc, mode="train", size=image_size, full=1, crop=1, w_background=0) create_negatives(train_dataset[1], dataset_path, data_proc, mode="train", size=image_size, goal_amount=60000) # creating test_data #create_positives(test_dataset[0],dataset_path, data_proc, mode="test", size=image_size, full=1, crop=1, w_background=0) create_negatives(valid_dataset[1], dataset_path, data_proc, mode="test", size=image_size, goal_amount=10000)
def setup_dataset(self): self.dataset_generator = DatasetGenerator(self.environment_size) self.sequence_length = self.sequence_length self.offset_timing = self.offset_timing self.validation_timing = self.n_epoch / 40 self.validation_dataset_length = 20 self.validation_dataset = [ self.generate_data() for i in range(self.validation_dataset_length) ] self.test_data = self.generate_data()
def generate_seq_sklearn(iterations): label = [] input_data = [] for i in range(iterations): test_data = DatasetGenerator(maze_size).generate_seq_random(100) test_square_sum_error, test_hh, test_error = evaluate(test_data, test=True) label.extend(test_data['coordinates']) input_data.extend(test_hh) return input_data, label
def train(self): model_path = self.__get_model_path() if os.path.exists(model_path): shutil.rmtree(model_path) os.makedirs(model_path) dataset = DatasetGenerator.generate(self.__max_text_length, self.__max_named_entity_size, self.__utterances) dataset = dataset.shuffle(1000).repeat(None).batch( self.__hyper_params['batch_size']) return self.__slot_tagger.train(dataset, self.__hyper_params['steps'])
def generate_seq_sklearn(iterations, test): label = [] input_data = [] for i in range(iterations): test_data = DatasetGenerator(maze_size).generate_seq(100) test_mean_squared_error, test_hh = evaluate(test_data, True) if test == True: label.append(test_data['coordinates']) input_data.append(test_hh) else: label.extend(test_data['coordinates']) input_data.extend(test_hh) return input_data, label
def train(run_name, dataset_path, aligns_path): lipnet = LipNet().compile_model() datagen = DatasetGenerator(dataset_path, aligns_path) callbacks = create_callbacks(run_name) start_time = time.time() lipnet.model.fit_generator(generator=datagen.train_generator, validation_data=datagen.val_generator, epochs=1, verbose=1, shuffle=True, max_queue_size=5, workers=2, callbacks=callbacks, use_multiprocessing=True) elapsed_time = time.time() - start_time print('\nTraining completed in: {}'.format( datetime.timedelta(seconds=elapsed_time)))
def predict(self, text: str): model_path = self.__get_model_path() if not os.path.exists(model_path): raise EnvironmentError('Should be trained.') utterance = Utterance.parse(text, self.__vocabs, self.__named_entity) dataset = DatasetGenerator.generate(self.__max_text_length, self.__max_named_entity_size, {utterance}) dataset = dataset.batch(1) predictions = self.__slot_tagger.predict(dataset) prediction = predictions[0][:len(utterance)] labels = list( map(lambda num: self.__vocabs['label'].restore(num), prediction)) slots = [] for token, label in zip(utterance.tokens, labels): if label.startswith('b-'): slots.append({ 'text': [token], 'slot': label.replace('b-', '', 1) }) elif label.startswith('i-'): slot = label.replace('i-', '', 1) if len(slots) > 0 and slots[-1]['slot'] == slot: slots[-1]['text'].append(token) for slot in slots: if len(slot['text']) == 1: slot['text'] = slot['text'][0]['text'] else: start = slot['text'][0]['span'].lower end = slot['text'][-1]['span'].upper slot['text'] = utterance.plain_text[start:end] return slots
def __init__(self, instence_id=0, config_dir='./cfg', ): self.root=root print('root in :\n',os.path.join(self.root,'..')) sys.path.append(os.path.join(sys.path[0],'../')) print('workspace in:\n') for i in sys.path: print(i) DatasetGenerator.__init__(self) # super(NetworkGenerator,self).__init__() super(Instence,self).__init__() print('\n\n-----Instence Class Init-----\n\n') ##################################################### #Dataloader self.TrainSet=DatasetGenerator() self.TrainSet.DefaultDataset(Mode='train') self.Trainloader=DataLoader( self.TrainSet, self.BatchSize, shuffle=True, num_workers=self.worker_num, collate_fn=self.TrainSet.detection_collate_fn ) self.ValSet=DatasetGenerator() self.ValSet.DefaultDataset(Mode='val') self.Valloader=DataLoader( self.ValSet, self.BatchSize, shuffle=True, num_workers=self.worker_num, collate_fn=self.ValSet.detection_collate_fn )
from matplotlib.mlab import PCA import numpy as np import matplotlib.pyplot as plt #constants center = Point(10, 10) r = 5 R = 10 slope_asc = 4 slope_desc = -4 x = 0 y = 1 NUM_EXMPL = 1000 generator = DatasetGenerator() ring_dataset = generator.ring_dataset(center, r, R) circle_dataset = generator.circle_dataset(center, r) asc_dataset = generator.linear_dataset(center, slope_asc) desc_dataset = generator.linear_dataset(center, slope_desc) fig = plt.figure(1) fig.suptitle('Ordinairy PCA') sub1 = fig.add_subplot(221) sub1.set_title('Circlular dataset no PCA') sub1.plot(circle_dataset[:, x], circle_dataset[:, y], '+r') sub1.plot(ring_dataset[:, x], ring_dataset[:, y], '+b') sub2 = fig.add_subplot(222) sub2.set_title('Linear dataset no PCA')
) param = np.load('dae.param.npy.1') model.copy_parameters_from(param) def encode(x): for l in range(0, 4): x = F.sigmoid(enc_layer[l](x)) return x def decode(h): for l in range(0, 4): h = F.sigmoid(dec_layer[l](h)) return h dg = DatasetGenerator((9, 9)) data = np.asarray(dg.generate_dataset_sae(10), dtype='f') N = len(data) for n in range(0, N): x = chainer.Variable(np.asarray([data[n]], dtype='f')) h = encode(x) y = decode(h) err = F.mean_squared_error(y, x) print(err.data) plt.subplot(4, 1, 1) plt.imshow(np.flipud(x.data.reshape((90, 12)).T), cmap=plt.cm.gray, interpolation='none', vmin=0, vmax=1) plt.subplot(4, 1, 2) plt.imshow(np.flipud(y.data.reshape((90, 12)).T), cmap=plt.cm.gray, interpolation='none', vmin=0, vmax=1) plt.subplot(4, 1, 3) plt.imshow(np.absolute(np.flipud((x.data - y.data).reshape((90, 12)).T)), cmap=plt.cm.gray, interpolation='none', vmin=0, vmax=1)
os.environ["CUDA_VISIBLE_DEVICES"] = ','.join(args.gpu.split(',')) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') logging.info(f'Using device {device}') net.to(device=device) net.load_state_dict(torch.load(args.model, map_location=device)) logging.info("Model loaded !") print("Model loaded !") alphanum_key = lambda key: [(int(re.split('_', key)[0][-1]), int(re.split('_', key)[1].split('.')[0]))] img_files = sorted(os.listdir(org_img_path), key=alphanum_key) true_masks = sorted(os.listdir(gt_mask_path), key=alphanum_key) i = 0 pwcNetwork = PWCNet().cuda().eval() datasetGenerator = DatasetGenerator(src_dir=org_img_path) if not args.no_viz: plt.ion() fig, ax = plt.subplots(2, 2, figsize=(8, 4)) plt.show() tot = 0 total_time = 0 while i < len(img_files): start_time = time.time() true_mask = Image.open(os.path.join(gt_mask_path, true_masks[i])).convert('L') print("\nPredicting image {} ...".format(img_files[i])) if 'png' in img_files[i] or 'jpg' in img_files[i] or 'bmp' in img_files[i]: org_img = Image.open(os.path.join(org_img_path, img_files[i])) if i == 0: # for the first frame, since there is no previous frame, we estimate the optical flow using it self
list_n_units = [20, 30, 40, 50, 60] # list_n_units = [60] # GPU parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() mod = cuda.cupy if args.gpu >= 0 else np # validation dataset valid_data_stack = [] for i in range(valid_iter): valid_data = DatasetGenerator(maze_size).generate_seq(100) valid_data_stack.append(valid_data) # test dataset test_data = DatasetGenerator(maze_size).generate_seq(100) # one-step forward propagation def forward_one_step(x, t, state, train=True): # if args.gpu >= 0: # data = cuda.to_gpu(data) # targets = cuda.to_gpu(targets) x = chainer.Variable(x, volatile=not train) t = chainer.Variable(t, volatile=not train) h_in = model.x_to_h(x) + model.h_to_h(state['h']) c, h = F.lstm(state['c'], h_in)
class Instence(NetworkGenerator,DatasetGenerator,Dataset): def __init__(self, instence_id=0, config_dir='./cfg', ): self.root=root print('root in :\n',os.path.join(self.root,'..')) sys.path.append(os.path.join(sys.path[0],'../')) print('workspace in:\n') for i in sys.path: print(i) DatasetGenerator.__init__(self) # super(NetworkGenerator,self).__init__() super(Instence,self).__init__() print('\n\n-----Instence Class Init-----\n\n') ##################################################### #Dataloader self.TrainSet=DatasetGenerator() self.TrainSet.DefaultDataset(Mode='train') self.Trainloader=DataLoader( self.TrainSet, self.BatchSize, shuffle=True, num_workers=self.worker_num, collate_fn=self.TrainSet.detection_collate_fn ) self.ValSet=DatasetGenerator() self.ValSet.DefaultDataset(Mode='val') self.Valloader=DataLoader( self.ValSet, self.BatchSize, shuffle=True, num_workers=self.worker_num, collate_fn=self.ValSet.detection_collate_fn ) ####################################################### def ToDecive(self,images,targets): images = list(img.to(self.device) for img in images) targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] return images,targets def targetmap(self): """ """ pass def InstenceInfo(self): print('\n\n-----Start with Instence ID',self.InstanceID,'-----\n\n') self.Enviroment_Info() self.DatasetInfo() self.NetWorkInfo() def train(self): print('\n\n----- Start Training -----\n\n') ##### #Epochs for epoch in range(self.epochs): print('---Epoch : ',epoch) for index,(images,targets) in enumerate(self.Trainloader): images,targets=self.ToDecive(images,targets) self.optimizer.zero_grad() loss_dict=train.model(images,targets) losses = sum(loss for loss in loss_dict.values()) loss=losses.cpu().detach().numpy() print('-----Step',index,'--LOSS--',loss) losses.backward() train.Optimzer.step() def val(self,valloader): print('\n\n----- Val Processing -----\n\n') def inference(self): print('\n\n----- Inference Processing -----\n\n') def Evaluation(self): print('\n\n----- Evaluation Processing -----\n\n') @torch.no_grad() def evaluate(self, data_loader): model=self.model device=self.device n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' coco = get_coco_api_from_dataset(data_loader.dataset) iou_types = _get_iou_types(model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every(data_loader, 100, header): images = list(img.to(device) for img in images) targets = [{k: v.to(device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs = model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = {target["image_id"].item(): output for target, output in zip(targets, outputs)} evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time)
train_data_length = [100] offset_timing = 1 valid_iter = 20 # GPU parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() mod = cuda.cupy if args.gpu >= 0 else np # validation dataset valid_data_stack = [] for i in range(valid_iter): valid_data = DatasetGenerator(maze_size).generate_seq(100, offset_timing) valid_data_stack.append(valid_data) # test dataset test_data = DatasetGenerator(maze_size).generate_seq(100, offset_timing) # model model = chainer.FunctionSet( x_to_h = F.Linear(64, n_units * 4), h_to_h = F.Linear(n_units, n_units * 4), h_to_y = F.Linear(n_units, maze_size[0] * maze_size[1])) if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() model.to_gpu()
def __init__( self, instence_id=0, configfile='./cfg', ): # ---------------------------------------------------------------------------- # # workspace info # # ---------------------------------------------------------------------------- # self.root = root self.configfile = configfile print('root in :\n', os.path.join(self.root, '..')) sys.path.append(os.path.join(sys.path[0], '../')) print('workspace in:\n') for i in sys.path: print(i) DatasetGenerator.__init__(self, configfile=configfile) # super(Instence,self).__init__() print('\n\n-----Instence Class Init-----\n\n') # ---------------------------------------------------------------------------- # # dataloader # # ---------------------------------------------------------------------------- # # ------------------------------ dataset object ------------------------------ # transforms = [] transforms.append(ConvertCocoPolysToMask()) transforms.append(T.ToTensor()) transforms.append(T.RandomHorizontalFlip(0.5)) self.transform_compose = T.Compose(transforms) # ---------------------------------------------------------------------------- # # temp part # # ---------------------------------------------------------------------------- # if self.DefaultDataset: self.datasets = DatasetGenerator(transforms=self.transform_compose, configfile=configfile) self.datasets.DefaultDatasetFunction() self.trainset = _coco_remove_images_without_annotations( self.datasets.trainset) self.valset = self.datasets.valset print('-----train&val set already done') # ----------------------------- DataLoader object ---------------------------- # if self.DistributedDataParallel: self.train_sampler = torch.utils.data.distributed.DistributedSampler( self.trainset) self.test_sampler = torch.utils.data.distributed.DistributedSampler( self.valset) print("-----DistributedDataParallel Sampler build done") self.model = torch.nn.parallel.DistributedDataParallel( self.model, device_ids=self.gpu_id) self.model_without_ddp = self.model.module if not self.DistributedDataParallel: self.train_sampler = torch.utils.data.RandomSampler(self.trainset) self.test_sampler = torch.utils.data.SequentialSampler(self.valset) print("-----DataSampler build done") # ---------------------------------- Sampler --------------------------------- # if self.aspect_ratio_factor >= 0: self.group_ids = create_aspect_ratio_groups( self.trainset, k=self.aspect_ratio_factor) self.train_batch_sampler = GroupedBatchSampler( self.train_sampler, self.group_ids, self.BatchSize) else: self.train_batch_sampler = torch.utils.data.BatchSampler( self.train_sampler, self.BatchSize, drop_last=True) # ---------------------------------- loader ---------------------------------- # self.trainloader = torch.utils.data.DataLoader( self.trainset, batch_sampler=self.train_batch_sampler, num_workers=self.worker_num, collate_fn=self.collate_fn) self.valloader = torch.utils.data.DataLoader( self.valset, batch_size=self.BatchSize, sampler=self.test_sampler, num_workers=self.worker_num, collate_fn=self.collate_fn)
mod = cuda.cupy if args.gpu >= 0 else np # monkey patching type check def sigmoid_cross_entropy_check_type_forward(self, in_types): type_check.expect(in_types.size() == 2) x_type, t_type = in_types type_check.expect( x_type.dtype == mod.float32, t_type.dtype == mod.float32, x_type.shape == t_type.shape ) F.SigmoidCrossEntropy.check_type_forward = sigmoid_cross_entropy_check_type_forward # generate dataset dg = DatasetGenerator(maze_size) # validation dataset valid_data = dg.generate_seq(100) # test dataset test_data = dg.generate_seq(100) # model model = chainer.FunctionSet( x_to_h = F.Linear(16, n_units * 4), h_to_h = F.Linear(n_units, n_units * 4), h_to_y = F.Linear(n_units, 12)) if args.gpu >= 0: print('using GPU #%s' % args.gpu) cuda.check_cuda_available()
# SVM and clustering parameters ev_iterations = 100 # iterations for generating SVM and clustering dataset # GPU parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() mod = cuda.cupy if args.gpu >= 0 else np # LSTM validation dataset: random valid_data = DatasetGenerator(maze_size).generate_seq_random(100) # LSTM model model = chainer.FunctionSet(x_to_h=F.Linear(64, n_units * 4), h_to_h=F.Linear(n_units, n_units * 4), h_to_y=F.Linear(n_units, 60)) if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use() model.to_gpu() # LSTM optimizer optimizer = optimizers.SGD(lr=1.) optimizer.setup(model.collect_parameters())
def create_dataset_from_mtcnn_output(image_proc, b_boxes, width, height, sign_position, size, dataset_path, mode, neg_delete): # This function works with Image_processor object from module image # image - image_processor object # boxes - net output, this function create dataset from it,, its dictionary # contains two keys: 1.offsets - Nx4 ndarray, where N is number of bounding boxes, # for each bounding box \TODO # width - array containing width for each one of b_boxes # height - array containing heights for each one of b_boxes # sign_position - dictionary, key is index of box from b_boxes # size - image size, we want to save # neg_per - interval <0,1>, how many percent of negative bounding boxes should not be saved, # wher 0 is 0 percent and 1 is 100 percent if not isinstance(image_proc, Image_processor): raise ValueError( "image_proc argument has to be instance of Image_processor class") #check interval range of neg_delete, <0,1>, if neg_delete < 0 or neg_delete > 1: raise ValueError("Parameter neg_delete, can have values from interval <0,1>") data_proc = DatasetGenerator(dataset_path) # for each bounding box for index in range(len(b_boxes['pictures'])): box_width = int(width[index]) box_height = int(height[index]) #!!!Carefull, if width or height is negative, skip it if box_width <= 0 or box_height <= 0: continue img_container = np.zeros((int(height[index]), int(width[index]), 3)) offsets = b_boxes['offsets'][index] picture_coor = b_boxes['pictures'][index] x1 = offsets[0] y1 = offsets[1] x2 = img_container.shape[1] + offsets[2] y2 = img_container.shape[0] + offsets[3] # crop from input image, unnormalize and resize to needed size img_container[int(y1):int(y2), int(x1):int(x2)] = image_proc.crop_picture(*picture_coor[0:4]) image = unnormalize_image(img_container) image = cv2.resize(image, dsize=(size, size)) #change order of color in image before saving, cv saves BGR image = change_channel_order(image, current=Image_processor.channel_order, new="BGR") # if this box dont catche any sign save as negative if index not in sign_position: ran_val = random.uniform(0.0,1.0) #if ran value is less then our percentage threshold, skip negative if ran_val < neg_delete: continue data_proc.save_img(image=image, sample_type="negatives", coordinates=[0, 0, 0, 0], box_width=0, box_height=0, mode=mode) continue # else check what type of image we are saving sign = sign_position[index] norm_coor = normalize_coordinates(norm_max=size-1, width=img_container.shape[1], height=img_container.shape[0], coor=sign['offset']) new_box_width = norm_coor[2] - norm_coor[0] + 1 new_box_height = norm_coor[3] - norm_coor[1] + 1 # positive images if sign['iou'] > 0.65: type_name = "positives" #part images elif sign['iou'] >= 0.40: type_name= "parts" #negative images elif sign['iou'] < 0.30: type_name= "negatives" else: #if between 0.30 and 0.40, dont save continue data_proc.save_img(image=image, sample_type=type_name, base_class=sign['class'], super_class=sign['super-class'], coordinates=norm_coor, box_width=new_box_height, box_height=new_box_width, mode=mode)
silhouette_scores = [] is_forgy = init is 'forgy' for num_iterations in range(1, max_iter+1): scores = [] for run in range(num_runs): if is_forgy : init = data[np.random.choice(data.shape[0], k, replace=False)] kmeans = KMeans(n_clusters=k, init=init, max_iter=num_iterations, n_init=1).fit(data_set) scores.append(silhouette_score(data_set, kmeans.labels_)) silhouette_scores.append(scores) print('...finished.') return silhouette_scores radius = 5 scaling_factor = 10 generator = DatasetGenerator() k= 9 num_runs = 30 max_iterations = 20 y_min, y_max = 0.4, 0.7 centers = [Point((i - i % 3), (i % 3)) for i in range(3, 12)]# 3x3 grid centers = scale_center_points(centers, scaling_factor=scaling_factor) data_set = [] for center in centers: data_set.append(generator.circle_dataset(center, radius, num=200)) data_set = np.concatenate(data_set)
command, "supervised", "-input", train_file, "-output", model_location, "-epoch", str(epoch), "-wordNgrams", str(wordNgrams), "-lr", str(lr), "-dim", str(dim), "-ws", str(ws), "-minn", str(minn), "-maxn", str(maxn), "-minCount", str(minCount) ] precisions = [] datasetgen = DatasetGenerator(dataset_path=fname, dest_folder=folder, kfolds=10) i = 0 for train_file, test_file in datasetgen: i += 1 if os.path.isfile(test_file) and os.path.isfile(train_file): print "Shuffling training set" subprocess.call(["shuf", train_file, "-o", train_file]) print "Training set", train_file subprocess.call(train_cmd_generator(train_file)) print "Testing set", test_file cmd = subprocess.Popen( [command, "test", model_location + ".bin", test_file, "1"], stdout=subprocess.PIPE)
maze_size = (9, 9) train_data_length = [20, 100] # GPU parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() mod = cuda.cupy if args.gpu >= 0 else np # generate dataset dg = DatasetGenerator(maze_size) # test dataset def generate_test_dataset(): return dg.generate_seq(100) # model test_data = generate_test_dataset() f = open('pretrained_model_' + str(n_units) + '.pkl', 'rb') model = pickle.load(f) f.close() if args.gpu >= 0: cuda.check_cuda_available() cuda.get_device(args.gpu).use()
class Instence(DatasetGenerator): def __init__( self, instence_id=0, configfile='./cfg', ): # ---------------------------------------------------------------------------- # # workspace info # # ---------------------------------------------------------------------------- # self.root = root self.configfile = configfile print('root in :\n', os.path.join(self.root, '..')) sys.path.append(os.path.join(sys.path[0], '../')) print('workspace in:\n') for i in sys.path: print(i) DatasetGenerator.__init__(self, configfile=configfile) # super(Instence,self).__init__() print('\n\n-----Instence Class Init-----\n\n') # ---------------------------------------------------------------------------- # # dataloader # # ---------------------------------------------------------------------------- # # ------------------------------ dataset object ------------------------------ # transforms = [] transforms.append(ConvertCocoPolysToMask()) transforms.append(T.ToTensor()) transforms.append(T.RandomHorizontalFlip(0.5)) self.transform_compose = T.Compose(transforms) # ---------------------------------------------------------------------------- # # temp part # # ---------------------------------------------------------------------------- # if self.DefaultDataset: self.datasets = DatasetGenerator(transforms=self.transform_compose, configfile=configfile) self.datasets.DefaultDatasetFunction() self.trainset = _coco_remove_images_without_annotations( self.datasets.trainset) self.valset = self.datasets.valset print('-----train&val set already done') # ----------------------------- DataLoader object ---------------------------- # if self.DistributedDataParallel: self.train_sampler = torch.utils.data.distributed.DistributedSampler( self.trainset) self.test_sampler = torch.utils.data.distributed.DistributedSampler( self.valset) print("-----DistributedDataParallel Sampler build done") self.model = torch.nn.parallel.DistributedDataParallel( self.model, device_ids=self.gpu_id) self.model_without_ddp = self.model.module if not self.DistributedDataParallel: self.train_sampler = torch.utils.data.RandomSampler(self.trainset) self.test_sampler = torch.utils.data.SequentialSampler(self.valset) print("-----DataSampler build done") # ---------------------------------- Sampler --------------------------------- # if self.aspect_ratio_factor >= 0: self.group_ids = create_aspect_ratio_groups( self.trainset, k=self.aspect_ratio_factor) self.train_batch_sampler = GroupedBatchSampler( self.train_sampler, self.group_ids, self.BatchSize) else: self.train_batch_sampler = torch.utils.data.BatchSampler( self.train_sampler, self.BatchSize, drop_last=True) # ---------------------------------- loader ---------------------------------- # self.trainloader = torch.utils.data.DataLoader( self.trainset, batch_sampler=self.train_batch_sampler, num_workers=self.worker_num, collate_fn=self.collate_fn) self.valloader = torch.utils.data.DataLoader( self.valset, batch_size=self.BatchSize, sampler=self.test_sampler, num_workers=self.worker_num, collate_fn=self.collate_fn) # ---------------------------------------------------------------------------- # # Instance Function # # ---------------------------------------------------------------------------- # def InstenceInfo(self): print('\n\n-----Start with Instence ID', self.InstanceID, '-----\n\n') self.Enviroment_Info() self.DatasetInfo() self.NetWorkInfo() def init_train(self): """ PROCESS OF TRAIN: 1.INIT: if resumeļ¼ load pretrain model init optimizer init lrscheduler init tensorboard """ if self.resume: assert os.path.exists(self.checkpoint), "Invalid resume model path" self.checkpoint = torch.load(self.checkpoint) self.model_without_ddp.load_state_dict(self.checkpoint['model']) self.optimizer.load_state_dict(self.checkpoint['optimizer']) self.lr_scheduler.load_state_dict(self.checkpoint['lr_scheduler']) # ---------------------------------------------------------------------------- # # tensorboard # # ---------------------------------------------------------------------------- # if self.visualization: self.writer = SummaryWriter(log_dir=self.logdir, comment='experiment' + str(self.InstanceID)) self.start = False def default_train(self): print('\n\n----- Start Training -----\n\n') start_time = time.time() self.init_train() baseloss = 0 for epoch in range(0, self.epochs): # ---------------------------------------------------------------------------- # # epoch process # # ---------------------------------------------------------------------------- # sumloss = self.train_one_epoch(epoch) self.lr_scheduler.step() self.evaluate() if epoch == 0: baseloss = sumloss if sumloss < baseloss: print("\n\n\n-----Model Update & Save") state = { "model": self.model.state_dict(), "optimizer": self.optimizer.state_dict(), 'epoch': epoch } torch.save( state, os.path.join(self.checkpoint, str(sumloss) + '.pth')) # ---------------------------------------------------------------------------- # # epoch process # # ---------------------------------------------------------------------------- # def default_val(self): print('\n\n----- Val Processing -----\n\n') def inference(self): print('\n\n----- Inference Processing -----\n\n') def Evaluation(self): print('\n\n----- Evaluation Processing -----\n\n') def train_one_epoch(self, epoch, print_freq=10): self.model.cuda() self.model.train() metric_logger = utils.MetricLogger(delimiter=" ") metric_logger.add_meter( 'lr', utils.SmoothedValue(window_size=1, fmt='{value:.6f}')) header = 'Epoch: [{}]'.format(epoch) # lr_scheduler = None if epoch == 0: warmup_factor = 1. / 1000 warmup_iters = min(1000, len(self.trainloader) - 1) # lr_scheduler = utils.warmup_lr_scheduler(self.optimizer, warmup_iters, warmup_factor) for images, targets in metric_logger.log_every(self.trainloader, print_freq, header): print(images) print(targets) images, targets = self.todevice(images, targets) loss_dict = self.model(images, targets) """ { 'loss_classifier': tensor(0.0925, device='cuda:0', grad_fn=<NllLossBackward>), 'loss_box_reg': tensor(0.0355, device='cuda:0', grad_fn=<DivBackward0>), 'loss_objectness': tensor(0.0270, device='cuda:0', grad_fn=<BinaryCrossEntropyWithLogitsBackward>), 'loss_rpn_box_reg': tensor(0.0112, device='cuda:0', grad_fn=<DivBackward0>) } """ losses = sum(loss for loss in loss_dict.values()) loss_dict_reduced = utils.reduce_dict(loss_dict) losses_reduced = sum(loss for loss in loss_dict_reduced.values()) loss_value = losses_reduced.item() if not math.isfinite(loss_value): print("Loss is {}, stopping training".format(loss_value)) print(loss_dict_reduced) sys.exit(1) self.optimizer.zero_grad() losses.backward() self.optimizer.step() # if lr_scheduler is not None: self.lr_scheduler.step() metric_logger.update(loss=losses_reduced, **loss_dict_reduced) metric_logger.update(lr=self.optimizer.param_groups[0]["lr"]) return losses def todevice(self, images, targets): """ transform the local data to device """ images = list(image.to(self.device) for image in images) targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] return images, targets # ---------------------------------------------------------------------------- # # Writer function # # ---------------------------------------------------------------------------- # def _get_iou_types(self): model_without_ddp = self.model if isinstance(self.model, torch.nn.parallel.DistributedDataParallel): model_without_ddp = self.model.module iou_types = ["bbox"] # ------------------------------- for detection ------------------------------ # if isinstance(model_without_ddp, torchvision.models.detection.MaskRCNN): iou_types.append("segm") # ----------------------------- for segmentation ----------------------------- # if isinstance(model_without_ddp, torchvision.models.detection.KeypointRCNN): iou_types.append("keypoints") # ------------------------------- for keypoint ------------------------------- # return iou_types @torch.no_grad() def evaluate(self, rate=0.1): n_threads = torch.get_num_threads() # FIXME remove this and make paste_masks_in_image run on the GPU torch.set_num_threads(1) cpu_device = torch.device("cpu") self.model.eval() metric_logger = utils.MetricLogger(delimiter=" ") header = 'Test:' coco = get_coco_api_from_dataset(self.valloader.dataset) iou_types = _get_iou_types(self.model) coco_evaluator = CocoEvaluator(coco, iou_types) for image, targets in metric_logger.log_every( self.valloader[:int(len(self.valloader) * rate)], 100, header): image = list(img.to(self.device) for img in image) targets = [{k: v.to(self.device) for k, v in t.items()} for t in targets] torch.cuda.synchronize() model_time = time.time() outputs = self.model(image) outputs = [{k: v.to(cpu_device) for k, v in t.items()} for t in outputs] model_time = time.time() - model_time res = { target["image_id"].item(): output for target, output in zip(targets, outputs) } evaluator_time = time.time() coco_evaluator.update(res) evaluator_time = time.time() - evaluator_time metric_logger.update(model_time=model_time, evaluator_time=evaluator_time) # gather the stats from all processes metric_logger.synchronize_between_processes() print("Averaged stats:", metric_logger) coco_evaluator.synchronize_between_processes() # accumulate predictions from all images coco_evaluator.accumulate() coco_evaluator.summarize() torch.set_num_threads(n_threads) return coco_evaluator
INT_DATA_BIT_DEPTH = 16 BATCH = 32 EPOCHS = 150 #%% # preparing data image_generator = GdGramGenerator(TRAIN_DATA_DIRECTORY, SAMPLE_RATE, GDGRAM_SHAPE, GDGRAM_DURATION, INT_DATA_BIT_DEPTH) image_generator.process_input_folder(number_of_threads=50) #%% # loading DataFrame with paths/labels for training and validation data and paths for testing data dataset_generator = DatasetGenerator(label_set=LABELS, train_input_path=TRAIN_DATA_DIRECTORY, test_input_path=TEST_DATA_DIRECTORY, bit_depth=INT_DATA_BIT_DEPTH) data_frame = dataset_generator.load_data() dataset_generator.apply_train_test_split(test_size=0.3, random_state=911) dataset_generator.apply_train_validation_split(validation_size=0.2, random_state=74) #%% # compiling model model = resnet_model.build_resnet18(input_shape=NN_INPUT_SHAPE, num_classes=len(LABELS)) model.compile(optimizer='Adam', loss='categorical_crossentropy', metrics=['acc'])
def generate_dataset(): ds = DatasetGenerator.generate_dataset(1000000) DatasetIO.write_dataset_to_csv_file(ds, Hyperparameters.DATASET_PATH)