def training_main(args): # process input file print('step 1: validating file name') print(args.train_file) input_file = utl.validate_file(args.train_file) print('step 2: splitting training and testing data sets') user_map, item_map, tr_sparse, test_sparse, item_ID_mapping_dd = utl.split_train_and_test( args, input_file) # train model print('step 3: training the model') output_row, output_col = model.train_model(tr_sparse, params) # save trained model to job directory print('step 4: saving the model') utl.save_model(args, user_map, item_map, output_row, output_col, item_ID_mapping_dd) # log results print('step 5: get results') train_rmse = model.get_rmse(output_row, output_col, tr_sparse) test_rmse = model.get_rmse(output_row, output_col, test_sparse) if args.hyperparam_tune: # write test_rmse metric for hyperparam tuning util.write_hptuning_metric(args, test_rmse) tf.logging.info('train RMSE = %.2f' % train_rmse) tf.logging.info('test RMSE = %.2f' % test_rmse)
def train_model(model, criterion, optimizer, scheduler, train_loader, train_dataset, test_loader, config): # Train the Model num_epochs = config['num_epochs'] batch_size = config['batchsize'] model_type = config['model_type'] top_test_acc = 0 for epoch in range(num_epochs): for i, (images, labels) in enumerate(train_loader): if torch.cuda.is_available(): images = images.cuda() labels = labels.cuda() images = Variable(images) labels = Variable(labels) # Forward + Backward + Optimize outputs = model(images) loss = criterion(outputs, labels) optimizer.zero_grad() loss.backward() optimizer.step() test_accuracy = test_model(test_loader) scheduler.step() # import ipdb as pdb; pdb.set_trace() print('[{0}] Test Accuracy of the model on the 10000 test images: {1} , lr:{2}, loss:{3}'.format(epoch, test_accuracy, get_lr(optimizer), float(loss.data.cpu().numpy()))) if test_accuracy > top_test_acc : utility.save_model(config=config, model=model) top_test_acc = test_accuracy
def train_gan(discriminator: nn.Module, generator: nn.Module, name, train_loader, val_loader=None, epochs=50, num_imgs=9, Optimizer_fn=optim.Adam, loss_type=0, k=1): os.makedirs(os.path.join(CKPT_PATH, name), exist_ok=True) with open(os.path.join(CKPT_PATH, name, "loss_log.txt"),"w") as loss_log: pass latent_sample = torch.randn(size=(num_imgs, generator.latent_size,), device=device) latent_sample[0] = 0 losses = dict( train_d_real_losses=[], train_d_fake_losses=[], train_g_losses=[], val_d_real_losses=[], ) d_optimizer, d_scheduler = get_optimizer_scheduler(Optimizer_fn, discriminator.parameters()) g_optimizer, g_scheduler = get_optimizer_scheduler(Optimizer_fn, generator.parameters()) for epoch in tqdm.trange(epochs): # loop over the dataset multiple times train_d_real_loss, train_d_fake_loss, train_g_loss = train_epoch(discriminator, generator, train_loader, device, d_optimizer, d_scheduler, g_optimizer, g_scheduler, loss_type, k) val_d_real_loss = val_epoch(discriminator, generator, val_loader, device, loss_type, 50) losses["train_d_real_losses"].append(train_d_real_loss) losses["train_d_fake_losses"].append(train_d_fake_loss) losses["train_g_losses"].append(train_g_loss) losses["val_d_real_losses"].append(val_d_real_loss) loss_msg = f"epoch {epoch}: train_d_real_loss={train_d_real_loss}, train_d_fake_loss={train_d_fake_loss}, train_g_loss={train_g_loss}, val_d_real_loss={val_d_real_loss}" logging.info(loss_msg) with open(os.path.join(CKPT_PATH, name, "loss_log.txt"),"a") as loss_log: loss_log.write(loss_msg) plot_losses(losses, name) generator.eval().to(device) save_image(generator(latent_sample).detach().to("cpu"), "generated_images", name, epoch) save_model(discriminator, generator, name)
def api_call_tfidf(): document_title_content_save_path = os.path.join(STORAGE, 'apicall.txt') test_document_title_content_save_path = os.path.join( STORAGE, 'test_apicall.txt') tfidf_save_path = os.path.join(STORAGE, 'apicall_tfidf/webpage.tfidf.model') rawy, raw_documents = load_documents(document_title_content_save_path) labels, docs, filesid = load_test_documents( test_document_title_content_save_path) documents = raw_documents + docs print(len(documents), len(documents)) # model = TfidfVectorizer(min_df = 4,decode_error ='ignore',stop_words='english',ngram_range=(1, 1),max_features=50000) model = TfidfVectorizer(decode_error='ignore', stop_words='english', ngram_range=(2, 3), max_features=5000) #apicall # model = TfidfVectorizer(decode_error ='ignore',stop_words='english',ngram_range=(1, 1),max_features=1500) x = model.fit_transform(documents) save_model(model, tfidf_save_path) display_scores(model, x, 'apicall_tfidf')
def main(): print("Reading in the training data") train = utility.load_data("training", "finalinput") truth = np.ravel(np.array(train['votes_useful_log'])) del train['votes_useful_log'] print("Extracting features and training review text model") classifier = get_pipeline() classifier.fit(train.values[:,1:], np.array(truth)) print("Saving the classifier") utility.save_model(classifier, "fullsgd_model_rev{}".format(revision))
def select_best_model(): min_mae, best_model_type, best_regressor = float('inf'), '', None for model_class in [ModelLinearRegression, ModelNeuralNetwork, ModelRandomForest, ModelXGBoost, ModelAdaBoost]: model_type = model_class.__name__[5:] print('\nBuilding {}...'.format(model_type)) builder = model_class(sub_dataframe_for_modeling, features) mae, regressor = builder.process_modeling() if model_type != 'NeuralNetwork': utility.save_model(athletes_name, activity, model_type, regressor) if mae < min_mae: min_mae, best_model_type, best_regressor = mae, model_type, regressor print("\n***Best model for activity '{}' is {} with mean absolute error: {}***" .format(activity, best_model_type, min_mae)) if best_regressor is not None: best_model_dict[activity] = best_model_type
def main(): revision = 4 print("Reading in the training data") train = utility.load_data("training", "rtext") inds = random.sample(range(len(train)), 100000) mtrain = train.ix[inds] print("Extracting features and training review text model") classifier = get_pipeline() classifier.fit(list(mtrain['rtext_bcat']), list(mtrain['votes_useful_log'])) print("Saving the classifier") utility.save_model(classifier, "train_rtext_rev{}".format(revision))
def train(self, model): # prepare dataset dataset = PerQuestionDataset(self.args, 'train', self.word2id, self.rela2id) if self.args.dataset.lower() == 'wq' or self.args.dataset.lower() == 'wq_train1test2': train_dataset, valid_dataset = random_split(dataset, 0.9, 0.1) else: train_dataset = dataset valid_dataset = PerQuestionDataset(self.args, 'valid', self.word2id, self.rela2id) datas = DataLoader(dataset=train_dataset, batch_size=1, shuffle=True, num_workers=18, pin_memory=False, collate_fn=quick_collate) self._set_optimizer(model) earlystop_counter, min_valid_metric = 0, 100 # training for epoch in range(0, self.args.epoch_num): model = model.train().cuda() total_loss, total_acc = 0.0, 0.0 loss_count, acc_count = 0, 0 total_rc_acc, total_td_acc = 0.0, 0.0 rc_count, td_count = 0, 0 for trained_num, data in enumerate(datas): if self.args.framework == 'baseline': # baseline is equivalent to single step relation choose index, ques, tuples = data self.optimizer.zero_grad(); model.zero_grad(); loss, acc, score = self._single_step_rela_choose(model, ques, tuples) if loss != 0: loss.backward(); self.optimizer.step() total_loss += (loss.data if loss!=0 else 0); loss_count += 1 total_acc += acc; acc_count += 1 print(f'\r{self.args.framework}_{self.args.model}({self.args.dynamic}) {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} Epoch {epoch} {trained_num}/{len(datas)} Loss:{total_loss/loss_count:.5f} Acc:{total_acc/acc_count:.4f}', end='') else: model, loss, acc, score, label, rc_acc, td_acc = self._execute_UHop(model, data, 'train') total_loss += loss[0].data; loss_count += loss[1] total_acc += acc; acc_count += 1 total_rc_acc += rc_acc[0]; rc_count += rc_acc[1] total_td_acc += td_acc[0]; td_count += td_acc[1] print(f'\r{self.args.framework}_{self.args.model}({self.args.dynamic}) {datetime.now().strftime("%Y-%m-%d %H:%M:%S")} Epoch {epoch} {trained_num}/{len(datas)} Loss:{total_loss/loss_count:.5f} Acc:{total_acc/acc_count:.4f} RC_Acc:{total_rc_acc/rc_count:.2f} TD_Acc:{total_td_acc/td_count:.2f}', end='') # validation for examing if early stop valid_loss, valid_acc, valid_score, _ = self.evaluate(model, 'valid', valid_dataset) if valid_loss < min_valid_metric: min_valid_metric = valid_loss earlystop_counter = 0 save_model(model, self.args.path) else: earlystop_counter += 1 if earlystop_counter > self.args.earlystop_tolerance: break return model
def train(self): logging.debug(f"Start training of {self.name}") self._decoder = self._decoder_class().to(device).train() self._encoder = self._encoder_class().to(device).train() train_autoencoder(self.encoder, self.decoder, self.train_loader, device, self.name, self.encoder.latent_size, epochs=self.epochs, Optimizer=self.Optimizer, normal_loss_factor=self.normal_loss, val_loader=self.val_loader) save_model(self.encoder, self.decoder, self.name) self._trained = True
def model_build_main(storage, datasetpath, featureheaders, targethearders): name, clf, modelinfo = model_build(datasetpath, featureheaders, targethearders) summarypath = os.path.join(storage, 'model/lightgbm.model.esimate') modelsavepath = os.path.join(storage, 'model/lightgbm.model') modelinfosavepath = os.path.join(storage, 'model/lightgbm.modelinfo') txt = json.dumps(modelinfo, indent=4) write_to_file(modelinfosavepath, txt.encode('utf-8'), mode='wb+') save_model(clf, modelsavepath) print('model summary:') print('save model summary->', summarypath) write_to_file(summarypath, txt.encode('utf-8'), mode='wb+')
def main(): word_vectors = {} with open(GOOGLE_ENGLISH_WORD_PATH) as f: lines = f.readlines() for line in lines: line = line.strip('\n') if line: word = line print(line) word_vectors[word] = None model = gensim.models.KeyedVectors.load_word2vec_format( GOOGLE_WORD2VEC_MODEL, binary=True) for word in word_vectors: try: v = model.wv[word] word_vectors[word] = v except: pass save_model(word_vectors, GOOGLE_WORD_FEATURE)
def api_call_tfidf_1(): document_title_content_save_path = os.path.join(STORAGE, 'apicall.txt') test_document_title_content_save_path = os.path.join( STORAGE, 'test_apicall.txt') tfidf_save_path = os.path.join(STORAGE, 'apicall_tfidf_1/webpage.tfidf.model') rawy, raw_documents = load_documents(document_title_content_save_path) labels, docs, filesid = load_test_documents( test_document_title_content_save_path) documents = raw_documents + docs model = TfidfVectorizer(decode_error='ignore', stop_words='english', ngram_range=(1, 1)) x = model.fit_transform(documents) save_model(model, tfidf_save_path) display_scores(model, x, 'apicall_tfidf_1')
def run_main(config): train_loss_total_avg = 0.0 train_transform = transforms.Compose([ CenterCrop2D((200, 200)), ElasticTransform(alpha_range=(28.0, 30.0), sigma_range=(3.5, 4.0), p=0.3), RandomAffine(degrees=4.6, scale=(0.98, 1.02), translate=(0.03, 0.03)), RandomTensorChannelShift((-0.10, 0.10)), ToTensor(), NormalizeInstance(), ]) val_transform = transforms.Compose([ CenterCrop2D((200, 200)), ToTensor(), NormalizeInstance(), ]) # import ipdb as pdb; pdb.set_trace() # Here we assume that the SC GM Challenge data is inside the folder # "data" and it was previously resampled. gmdataset_train = SCGMChallenge2DTrain(root_dir="data", subj_ids=range(1, 9), transform=train_transform, slice_filter_fn=SliceFilter()) # Here we assume that the SC GM Challenge data is inside the folder # "../data" and it was previously resampled. gmdataset_val = SCGMChallenge2DTrain(root_dir="data", subj_ids=range(9, 11), transform=val_transform) train_loader = DataLoader(gmdataset_train, batch_size=16, shuffle=True, pin_memory=True, collate_fn=mt_collate, num_workers=1) val_loader = DataLoader(gmdataset_val, batch_size=16, shuffle=True, pin_memory=True, collate_fn=mt_collate, num_workers=1) # import ipdb as pdb; pdb.set_trace() utility.create_log_file(config) utility.log_info( config, "{0}\nStarting experiment {1}\n{0}\n".format( 50 * "=", utility.get_experiment_name(config))) model = Unet(drop_rate=0.4, bn_momentum=0.1, config=config) # print(model) #summary(model, (3, 224, 224)) # import ipdb as pdb; pdb.set_trace() if config['operation_mode'].lower( ) == "retrain" or config['operation_mode'].lower() == "inference": print("Using a trained model...") model.load_state_dict(torch.load(config['trained_model'])) elif config["operation_mode"].lower() == "visualize": print("Visualizing weights...") if cuda: model.load_state_dict(torch.load(config['trained_model'])) else: model.load_state_dict( torch.load(config['trained_model'], map_location='cpu')) v.visualize_model(model, config) return # import ipdb as pdb; pdb.set_trace() if cuda: model.cuda() num_epochs = config["num_epochs"] initial_lr = config["lr"] optimizer = optim.Adam(model.parameters(), lr=initial_lr) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs) betas = torch.linspace(3.0, 8.0, num_epochs) best_dice = 0 # import ipdb as pdb; pdb.set_trace() writer = SummaryWriter(log_dir=utility.get_experiment_dir(config)) for epoch in tqdm(range(1, num_epochs + 1)): start_time = time.time() if not (config['operation_mode'].lower() == "inference"): scheduler.step() lr = scheduler.get_lr()[0] model.beta = betas[epoch - 1] # for ternary net, set beta writer.add_scalar('learning_rate', lr, epoch) model.train() train_loss_total = 0.0 num_steps = 0 for i, batch in enumerate(train_loader): input_samples, gt_samples = batch["input"], batch["gt"] if cuda: var_input = input_samples.cuda() var_gt = gt_samples.cuda() else: var_input = input_samples var_gt = gt_samples preds = model(var_input) loss = dice_loss(preds, var_gt) # if epoch == 1 and i == len(train_loader) - 1: # import ipdb as pdb; pdb.set_trace() # if epoch == 4 and i == len(train_loader) - 1: # import ipdb as pdb; pdb.set_trace() train_loss_total += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() num_steps += 1 if epoch % 5 == 0: grid_img = vutils.make_grid(input_samples, normalize=True, scale_each=True) writer.add_image('Input', grid_img, epoch) grid_img = vutils.make_grid(preds.data.cpu(), normalize=True, scale_each=True) writer.add_image('Predictions', grid_img, epoch) grid_img = vutils.make_grid(gt_samples, normalize=True, scale_each=True) writer.add_image('Ground Truth', grid_img, epoch) if not (config['operation_mode'].lower() == "inference"): train_loss_total_avg = train_loss_total / num_steps # import ipdb as pdb; pdb.set_trace() model.eval() val_loss_total = 0.0 num_steps = 0 metric_fns = [ dice_score, hausdorff_score, precision_score, recall_score, specificity_score, intersection_over_union, accuracy_score ] metric_mgr = MetricManager(metric_fns) for i, batch in enumerate(val_loader): # import ipdb as pdb; pdb.set_trace() input_samples, gt_samples = batch["input"], batch["gt"] with torch.no_grad(): if cuda: var_input = input_samples.cuda() var_gt = gt_samples.cuda() else: var_input = input_samples var_gt = gt_samples preds = model(var_input) loss = dice_loss(preds, var_gt) val_loss_total += loss.item() # Metrics computation gt_npy = gt_samples.numpy().astype(np.uint8) gt_npy = gt_npy.squeeze(axis=1) preds = preds.data.cpu().numpy() # if np.isnan(preds).any(): # import ipdb as pdb; pdb.set_trace() preds = threshold_predictions(preds) preds = preds.astype(np.uint8) preds = preds.squeeze(axis=1) metric_mgr(preds, gt_npy) num_steps += 1 metrics_dict = metric_mgr.get_results() metric_mgr.reset() writer.add_scalars('metrics', metrics_dict, epoch) val_loss_total_avg = val_loss_total / num_steps if not (config['operation_mode'].lower() == "inference"): writer.add_scalars('losses', {'train_loss': train_loss_total_avg}, epoch) writer.add_scalars('losses', { 'val_loss': val_loss_total_avg, 'train_loss': train_loss_total_avg }, epoch) end_time = time.time() total_time = end_time - start_time log_str = "Epoch {} took {:.2f} seconds dice_score={}.".format( epoch, total_time, metrics_dict["dice_score"]) utility.log_info(config, log_str) tqdm.write(log_str) if metrics_dict["dice_score"] > best_dice: best_dice = metrics_dict["dice_score"] utility.save_model(model=model, config=config) if not (config['operation_mode'].lower() == "inference"): utility.save_model(model=model, config=config)
# Next, five fully connected layers model.add(Dense(1164, activation='relu')) model.add(Dropout(keep_prob)) model.add(Dense(100, activation='relu')) model.add(Dense(50, activation='relu')) model.add(Dense(10, activation='relu')) model.add(Dense(1)) model.summary() #model.compile(optimizer=Adam(learning_rate), loss="mse",metrics=['accuracy'] ) model.compile(optimizer=Adam(learning_rate), loss="mse") # create two generators for training and validation train_data_gen = utility.generate_train_batch() validation_data_gen = utility.generate_val_batch() history = model.fit_generator(train_data_gen, samples_per_epoch=num_train_images, nb_epoch=number_of_epochs, validation_data=validation_data_gen, nb_val_samples=num_val_images, verbose=1) # finally save our model and weights utility.save_model(model)
def run_main(config): dataset_base_path = "./data/" target_path = natsorted(glob(dataset_base_path + '/mask/*.png')) image_paths = natsorted(glob(dataset_base_path + '/img/*.png')) target_val_path = natsorted(glob(dataset_base_path + '/val_mask/*.png')) image_val_path = natsorted(glob(dataset_base_path + '/val_img/*.png')) nih_dataset_train = EMdataset(image_paths=image_paths, target_paths=target_path) nih_dataset_val = EMdataset(image_paths=image_val_path, target_paths=target_val_path) #import ipdb as pdb; pdb.set_trace() train_loader = DataLoader(nih_dataset_train, batch_size=16, shuffle=True, num_workers=1) val_loader = DataLoader(nih_dataset_val, batch_size=16, shuffle=True, num_workers=1) model = m.Unet(drop_rate=0.4, bn_momentum=0.1, config=config) if config['operation_mode'].lower( ) == "retrain" or config['operation_mode'].lower() == "inference": print("Using a trained model...") model.load_state_dict(torch.load(config['trained_model'])) elif config["operation_mode"].lower() == "visualize": print("Using a trained model...") if cuda: model.load_state_dict(torch.load(config['trained_model'])) else: model.load_state_dict( torch.load(config['trained_model'], map_location='cpu')) v.visualize_model(model, config) return # import ipdb as pdb; pdb.set_trace() if cuda: model.cuda() print('gpu_activate') num_epochs = config["num_epochs"] initial_lr = config["lr"] experiment_path = config["log_output_dir"] + config['experiment_name'] output_image_dir = experiment_path + "/figs/" betas = torch.linspace(3.0, 8.0, num_epochs) # criterion = nn.BCELoss() optimizer = optim.Adam(model.parameters(), lr=initial_lr) scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs) # import ipdb as pdb; pdb.set_trace() writer = SummaryWriter(log_dir=utility.get_experiment_dir(config)) best_score = 0 for epoch in tqdm(range(1, num_epochs + 1)): start_time = time.time() scheduler.step() lr = scheduler.get_lr()[0] model.beta = betas[epoch - 1] # for ternary net, set beta writer.add_scalar('learning_rate', lr, epoch) model.train() train_loss_total = 0.0 num_steps = 0 capture = True for i, batch in enumerate(train_loader): input_samples, gt_samples = batch[0], batch[1] if cuda: var_input = input_samples.cuda() var_gt = gt_samples.cuda() else: var_input = input_samples var_gt = gt_samples preds = model(var_input) loss = dice_loss(preds, var_gt) # import ipdb as pdb; pdb.set_trace() var_gt = var_gt.float() train_loss_total += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() num_steps += 1 if epoch % 1 == 0 and capture: capture = False input_samples, gt_samples = get_samples( image_val_path, target_val_path, 4) if cuda: input_samples = input_samples.cuda() preds = model(input_samples) input_samples = input_samples.data.cpu().numpy() preds = preds.data.cpu().numpy() # import ipdb as pdb; pdb.set_trace() save_image(input_samples[0][0], gt_samples[0][0], preds[0][0], epoch, 0, output_image_dir) train_loss_total_avg = train_loss_total / num_steps # import ipdb as pdb; pdb.set_trace() model.eval() val_loss_total = 0.0 num_steps = 0 metric_fns = [ dice_score, hausdorff_score, precision_score, recall_score, specificity_score, intersection_over_union, accuracy_score ] metric_mgr = MetricManager(metric_fns) for i, batch in enumerate(val_loader): input_samples, gt_samples = batch[0], batch[1] with torch.no_grad(): if cuda: var_input = input_samples.cuda() var_gt = gt_samples.cuda(async=True) else: var_input = input_samples var_gt = gt_samples preds = model(var_input) loss = dice_loss(preds, var_gt) # loss = criterion(preds, var_gt) # loss = weighted_bce_loss(preds, var_gt, 0.5, 2.5) val_loss_total += loss.item() gt_npy = gt_samples.data.cpu().numpy() #.astype(np.uint8) gt_npy = gt_npy.squeeze(axis=1) preds = preds.data.cpu().numpy() preds = threshold_predictions(preds) # preds = preds.astype(np.uint8) preds = preds.squeeze(axis=1) metric_mgr(preds, gt_npy) num_steps += 1 metrics_dict = metric_mgr.get_results() metric_mgr.reset() writer.add_scalars('metrics', metrics_dict, epoch) val_loss_total_avg = val_loss_total / num_steps writer.add_scalars('losses', { 'val_loss': val_loss_total_avg, 'train_loss': train_loss_total_avg }, epoch) end_time = time.time() total_time = end_time - start_time msg = "Epoch {} took {:.2f} seconds dice_score={}. precision={} iou={} loss_train={} val_loss={}".format( epoch, total_time, metrics_dict["dice_score"], metrics_dict["precision_score"], metrics_dict["intersection_over_union"], train_loss_total_avg, val_loss_total_avg) utility.log_info(config, msg) tqdm.write(msg) writer.add_scalars('losses', {'train_loss': train_loss_total_avg}, epoch) if metrics_dict["dice_score"] > best_score: best_score = metrics_dict["dice_score"] utility.save_model(model=model, config=config) if not (config['operation_mode'].lower() == "inference"): utility.save_model(model=model, config=config)
classification_report_df.to_csv(config.generic_path + "validation_classification_report.csv") graph["train_epoch_loss_list"].append(train_epoch_loss) graph['train_epoch_accu_list'].append(train_epoch_accu) graph['valid_epoch_loss_list'].append(valid_epoch_loss) graph['valid_epoch_accu_list'].append(valid_epoch_accu) validation_f1_score_macro = f1_score(y_valid_actual, y_valid_predicted, average="macro") print("validation_f1_score_macro: {}".format(validation_f1_score_macro)) graph['validation_f1_score_macro_list'].append(validation_f1_score_macro) if valid_epoch_accu > best_validation_accuracy: # Creating check point utility.save_model(EPOCH=epoch, model=model, optimizer=optimizer, LOSS=train_epoch_loss, ACCURACY=train_epoch_accu, PATH=config.checkpoint_path) best_validation_accuracy = valid_epoch_accu graph["best_validation_accuracy"] = best_validation_accuracy print("graph: {}".format(graph)) utility.save_graph(graph_data=graph, path=config.generic_path) bot.telegram_bot_sendtext(graph)
test_accuracy = test_model(test_loader) scheduler.step() # import ipdb as pdb; pdb.set_trace() print( '[{0}] Test Accuracy of the model on the 10000 test images: {1} , lr:{2}, loss:{3}' .format(epoch, test_accuracy, get_lr(optimizer), loss.item())) # print('Test Accuracy of the model on the 10000 test images: {0}'.format(test_accuracy)) if __name__ == '__main__': args = utility.parse_args() model_type = args['modelype'] config_file = args['configfile'] config = config.Configuration(model_type, config_file) print(config.get_config_str()) config = config.config_dict model, criterion, optimizer, scheduler = build_model(config) # import ipdb as pdb; pdb.set_trace() if torch.cuda.is_available(): model = model.cuda() train_loader, test_loader, train_dataset, test_dataset = utility.load_dataset( config) if config['operation_mode'] == "inference": model_inference(test_loader, config) else: train_model(model, criterion, optimizer, scheduler, train_loader, train_dataset, test_loader, config) # test_model(test_loader) # Save the Trained Model utility.save_model(config=config, model=model)
train_loss = 0 step = 0 for images, labels in loader['trainloader']: step += 1 model.train() images, labels = images.to(device), labels.to(device) optimizer.zero_grad() yhat = model(images) loss = criterion(yhat, labels) loss.backward() optimizer.step() train_loss += loss.item() if step % 30 == 0: val_loss, val_acc = validation(model, criterion, loader['valloader'], device) print('Step: {}; Validation Loss: {}; Validation Accuracy: {}'. format(step, val_loss, val_acc)) print('Epoch: {}/{}; Training Loss: {}'.format( epoch + 1, epochs, train_loss / len(loader['trainloader']))) # Test Accuracy: test_loss, test_acc = validation(model, criterion, loader['testloader'], device) print('Test Loss: {}; Test Accuracy: {}'.format(test_loss, test_acc)) save_model(model, categories_to_names, args.save_dir)
def train_stacked_ae(encoder: nn.Module, decoder: nn.Module, train_loader, device, name, latent_size=4, epochs=50, num_imgs=9, Optimizer=optim.Adam): train_losses = [] bunch = get_bunch(train_loader) latent_sample = torch.randn(size=( num_imgs, latent_size, ), device=device) latent_sample[0] = 0 original_images = next(iter(train_loader))[0].to(device) original_images = original_images[:min(num_imgs, original_images.size()[0])] save_img( get_grid(original_images.to("cpu")), os.path.join(CKPT_PATH, name, "compressed_images", "original.png")) criterion = nn.MSELoss() scheduler = optimizer = Optimizer( list(encoder.parameters()) + list(decoder.parameters())) while True: try: optimizer = optimizer.optimizer except: break if scheduler == optimizer: scheduler = False for epoch in tqdm.trange(epochs): # loop over the dataset multiple times torch.cuda.empty_cache() decoder = decoder.train().to(device) encoder = encoder.train().to(device) running_loss = 0.0 print(epoch) for i, data in tqdm.tqdm(enumerate(train_loader, 0)): # get the inputs; data is a list of [inputs, labels] inputs, labels = data inputs = inputs.to(device=device) # zero the parameter gradients optimizer.zero_grad() # forward + backward + optimize outputs = decoder(encoder(inputs, epoch), epoch) loss = criterion(outputs, inputs) loss.backward() optimizer.step() # print statistics running_loss += loss.item() if scheduler: scheduler.step() train_losses.append(running_loss / i) logging.info(f"epoch {epoch}: loss={running_loss/i}") decoder = decoder.eval() save_image( decoder(encoder(original_images, epoch), epoch).detach().to("cpu"), "compressed_images", name, epoch) save_image( decoder(latent_sample).detach().to("cpu"), "generated_images", name, epoch) torch.cuda.empty_cache() mean, cov = latent_space_pca(encoder, bunch) save_image( decoder( normal_to_pc(latent_sample, mean.to(device), cov.to(device))).detach().to("cpu"), "pca_gen_images", name, epoch) del mean, cov torch.cuda.empty_cache() save_labeled_pca_gen_images(encoder, decoder, latent_sample, bunch, name, epoch) save_model(encoder, decoder, name) return train_losses
# Train the model on the training data and evaluate it on the validation data. Save the model and the # resulting plots in the output path. training_losses, training_accuracies, validation_losses, validation_accuracies = [], [], [], [] for epoch in range(1, input_arguments.epochs + 1): training_results = utility.run_epoch(fasttext, training_iterator, loss_function, optimizer) training_losses.append(training_results[0]) training_accuracies.append(training_results[1]) validation_results = utility.run_epoch(fasttext, validation_iterator, loss_function) validation_losses.append(validation_results[0]) validation_accuracies.append(validation_results[1]) print( "Epoch: {} | Training Loss: {:.4f} | Training Accuracy: {:.2f} | Validation Loss: {:.4f} |" " Validation Accuracy: {:.2f}".format(epoch, training_losses[-1], training_accuracies[-1], validation_losses[-1], validation_accuracies[-1])) utility.save_training_plots(input_arguments.output_path, training_losses, training_accuracies, validation_losses, validation_accuracies) utility.save_model(fasttext, input_arguments.output_path) # Evaluate the model on the test data and save the results. test_loss, test_accuracy = utility.run_epoch(fasttext, test_iterator, loss_function) utility.save_test_results(input_arguments.output_path, test_loss, test_accuracy)
sys.exit() if number_of_epochs <= 0 or number_of_epochs > 10000: print("The number of epochs value should be between 1 and 10000") sys.exit() check_arguments() print("The training will be processed using", device) # data processing datasets, loaders = utility.data_processing(data_dir) # building model, criterion, and optimizer model = utility.model_build(arch, hidden_size, output) criterion, optimizer = utility.crit_optim(model, lr) # training the model model = utility.train_model(loaders["train"], loaders["valid"], model, criterion, optimizer, device, number_of_epochs, 40) # print accuracy __, test_accuracy = utility.loss_accuracy(loaders["test"], model, criterion, device) print("The model accuracy on the test set is: {:.2f}%".format(test_accuracy * 100)) # save the model model_name = utility.save_model(model, datasets["train"], optimizer, arch, output, hidden_size, model_dir="/") print("The model was saved in:", model_name)
def run_main(config): train_transform = transforms.Compose([ CenterCrop2D((200, 200)), ElasticTransform(alpha_range=(28.0, 30.0), sigma_range=(3.5, 4.0), p=0.3), RandomAffine(degrees=4.6, scale=(0.98, 1.02), translate=(0.03, 0.03)), RandomTensorChannelShift((-0.10, 0.10)), ToTensor(), NormalizeInstance(), ]) val_transform = transforms.Compose([ CenterCrop2D((200, 200)), ToTensor(), NormalizeInstance(), ]) # import ipdb as pdb; pdb.set_trace() dataset_base_path = "/export/tmp/hemmat/datasets/em_challenge/" target_path = natsort.natsorted(glob.glob(dataset_base_path + 'mask/*.PNG')) image_paths = natsort.natsorted(glob.glob(dataset_base_path + 'data/*.PNG')) target_val_path = natsort.natsorted( glob.glob(dataset_base_path + 'val_mask/*.PNG')) image_val_path = natsort.natsorted( glob.glob(dataset_base_path + 'val_img/*.PNG')) gmdataset_train = EMdataset(image_paths=image_paths, target_paths=target_path) gmdataset_val = EMdataset(image_paths=image_val_path, target_paths=target_val_path) train_loader = DataLoader(gmdataset_train, batch_size=5, shuffle=True, num_workers=1) val_loader = DataLoader(gmdataset_val, batch_size=4, shuffle=True, num_workers=1) utility.create_log_file(config) utility.log_info( config, "{0}\nStarting experiment {1}\n{0}\n".format( 50 * "=", utility.get_experiment_name(config))) # import ipdb as pdb; pdb.set_trace() model = m.Unet(drop_rate=0.4, bn_momentum=0.1, config=config) if config['operation_mode'].lower( ) == "retrain" or config['operation_mode'].lower() == "inference": print("Using a trained model...") model.load_state_dict(torch.load(config['trained_model'])) elif config["operation_mode"].lower() == "visualize": print("Using a trained model...") if cuda: model.load_state_dict(torch.load(config['trained_model'])) else: model.load_state_dict( torch.load(config['trained_model'], map_location='cpu')) mv.visualize_model(model, config) return # import ipdb as pdb; pdb.set_trace() if cuda: model.cuda() num_epochs = config["num_epochs"] initial_lr = config["lr"] experiment_path = config["log_output_dir"] + config['experiment_name'] output_image_dir = experiment_path + "/figs/" betas = torch.linspace(3.0, 8.0, num_epochs) optimizer = optim.Adam(model.parameters(), lr=initial_lr) # scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, num_epochs) lr_milestones = range(0, int(num_epochs), int(int(num_epochs) / 5)) lr_milestones = lr_milestones[1:] scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=lr_milestones, gamma=0.1) # import ipdb as pdb; pdb.set_trace() writer = SummaryWriter(log_dir=utility.get_experiment_dir(config)) best_dice = 0 for epoch in tqdm(range(1, num_epochs + 1)): start_time = time.time() scheduler.step() lr = scheduler.get_lr()[0] model.beta = betas[epoch - 1] # for ternary net, set beta writer.add_scalar('learning_rate', lr, epoch) model.train() train_loss_total = 0.0 num_steps = 0 capture = True for i, batch in enumerate(train_loader): #import ipdb as pdb; pdb.set_trace() input_samples, gt_samples, idx = batch[0], batch[1], batch[2] if cuda: var_input = input_samples.cuda() var_gt = gt_samples.cuda(async=True) var_gt = var_gt.float() else: var_input = input_samples var_gt = gt_samples var_gt = var_gt.float() preds = model(var_input) # import ipdb as pdb; pdb.set_trace() loss = calc_loss(preds, var_gt) train_loss_total += loss.item() optimizer.zero_grad() loss.backward() optimizer.step() num_steps += 1 if epoch % 5 == 0 and capture: capture = False input_samples, gt_samples = get_samples( image_val_path, target_val_path, 4) if cuda: input_samples = input_samples.cuda() preds = model(input_samples) input_samples = input_samples.data.cpu().numpy() preds = preds.data.cpu().numpy() # import ipdb as pdb; pdb.set_trace() save_image(input_samples[0][0], gt_samples[0][0], preds[0][0], epoch, 0, output_image_dir) train_loss_total_avg = train_loss_total / num_steps # import ipdb as pdb; pdb.set_trace() model.train() val_loss_total = 0.0 num_steps = 0 metric_fns = [ dice_score, hausdorff_score, precision_score, recall_score, specificity_score, intersection_over_union, accuracy_score, rand_index_score ] metric_mgr = MetricManager(metric_fns) for i, batch in enumerate(val_loader): # input_samples, gt_samples = batch[0], batch[1] input_samples, gt_samples, idx = batch[0], batch[1], batch[2] with torch.no_grad(): if cuda: var_input = input_samples.cuda() var_gt = gt_samples.cuda(async=True) var_gt = var_gt.float() else: var_input = input_samples var_gt = gt_samples var_gt = var_gt.float() # import ipdb as pdb; pdb.set_trace() preds = model(var_input) loss = dice_loss(preds, var_gt) val_loss_total += loss.item() # Metrics computation gt_npy = gt_samples.numpy().astype(np.uint8) gt_npy = gt_npy.squeeze(axis=1) preds = preds.data.cpu().numpy() preds = threshold_predictions(preds) preds = preds.astype(np.uint8) preds = preds.squeeze(axis=1) metric_mgr(preds, gt_npy) #save_image(input_samples[0][0], preds[0], gt_samples, epoch, idx[0]) # save_pred(model, image_val_path, epoch, output_image_dir) num_steps += 1 metrics_dict = metric_mgr.get_results() metric_mgr.reset() writer.add_scalars('metrics', metrics_dict, epoch) # import ipdb as pdb; pdb.set_trace() val_loss_total_avg = val_loss_total / num_steps writer.add_scalars('losses', { 'val_loss': val_loss_total_avg, 'train_loss': train_loss_total_avg }, epoch) end_time = time.time() total_time = end_time - start_time # import ipdb as pdb; pdb.set_trace() log_str = "Epoch {} took {:.2f} seconds train_loss={} dice_score={} rand_index_score={} lr={}.".format( epoch, total_time, train_loss_total_avg, metrics_dict["dice_score"], metrics_dict["rand_index_score"], get_lr(optimizer)) utility.log_info(config, log_str) tqdm.write(log_str) writer.add_scalars('losses', {'train_loss': train_loss_total_avg}, epoch) if metrics_dict["dice_score"] > best_dice: best_dice = metrics_dict["dice_score"] utility.save_model(model=model, config=config) if not (config['operation_mode'].lower() == "inference"): utility.save_model(model=model, config=config)
def train_vae(encoder: nn.Module, decoder: nn.Module, train_loader, device, name, epochs=50, num_imgs=9, Optimizer=optim.Adam, loss_type=0, val_loader=None): os.makedirs(os.path.join(CKPT_PATH, name), exist_ok=True) with open(os.path.join(CKPT_PATH, name, "loss_log.txt"), "w") as loss_log: pass latent_sample = torch.randn(size=( num_imgs, encoder.latent_size, ), device=device) latent_sample[0] = 0 original_images = next(iter(train_loader))[0].to(device) original_images = original_images[:min(num_imgs, original_images.size()[0])] save_img( get_grid(original_images.to("cpu")), os.path.join(CKPT_PATH, name, "compressed_images", "original.png")) save_img( get_grid(original_images.to("cpu")), os.path.join(CKPT_PATH, name, "compressed_sampled_images", "original.png")) criterion = nn.MSELoss() scheduler = optimizer = Optimizer( list(encoder.parameters()) + list(decoder.parameters())) log_scale = nn.Parameter(torch.Tensor([0.0])).to(device) train_losses = [] train_kls = [] val_losses = [] val_kls = [] while True: try: optimizer = optimizer.optimizer except: break if scheduler == optimizer: scheduler = False for epoch in tqdm.trange(epochs): # loop over the dataset multiple times train_loss, train_kl = train_epoch(decoder, device, encoder, train_loader, loss_type, criterion, log_scale, optimizer, scheduler) val_loss, val_kl = val_epoch(decoder, device, encoder, val_loader, loss_type, criterion, log_scale) train_losses.append(train_loss) train_kls.append(train_kl) val_losses.append(val_loss) val_kls.append(val_kl) loss_msg = f"epoch {epoch}: train_loss={train_loss}, train_kl={train_kl}, val_loss={val_loss}, val_kl={val_kl}" logging.info(loss_msg) with open(os.path.join(CKPT_PATH, name, "loss_log.txt"), "a") as loss_log: loss_log.write(loss_msg) plot_loss(train_losses, train_kls, val_losses, val_kls, name) make_images(decoder, device, encoder, original_images, name, epoch, latent_sample) save_model(encoder, decoder, name)