def train(model, optimizer, criterion, trainloader, validloader, epochs): # setting compute device model.to(device) # training steps = 0 running_loss = 0 print_step = 32 # validation results vloss = 0 vaccuracy = 0 # timer start = time.time() for e in range(epochs): for images, labels in iter(trainloader): steps += 1 images, labels = images.to(device), labels.to(device) optimizer.zero_grad() output = model.forward(images) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps % print_step == 0: model.eval() if steps % 320 == 0: with torch.no_grad(): vloss, vaccuracy = validation(model, criterion, validloader, device) end = time.time() print( 'Epoch: {}/{}\t'.format(e + 1, epochs), 'TLoss: {:.2f}\t'.format(running_loss / print_step), 'VLoss: {:.2f}\t'.format(vloss / len(validloader)), 'VAccuracy: {:.2f}\t'.format(vaccuracy / len(validloader) * 100), 'Time: {:.2f}'.format(end - start)) running_loss = 0 start = time.time() model.train() # saving model as checkpoint model.class_to_idx = class_to_idx h.save_model(args.arch, model, optimizer, input_size, output_size, epochs, args.drop_p, args.save_dir, args.learning_rate)
def train_nn(sess, epochs, batch_size, get_batches_fn, train_op, cross_entropy_loss, input_image, correct_label, keep_prob, learning_rate): """ Train neural network and print out the loss during training. :param sess: TF Session :param epochs: Number of epochs :param batch_size: Batch size :param get_batches_fn: Function to get batches of training data. Call using get_batches_fn(batch_size) :param train_op: TF Operation to train the neural network :param cross_entropy_loss: TF Tensor for the amount of loss :param input_image: TF Placeholder for input images :param correct_label: TF Placeholder for label images :param keep_prob: TF Placeholder for dropout keep probability :param learning_rate: TF Placeholder for learning rate :param l2_regularizer_scale: TF Placeholder for l2 regularizer scale """ sess.run(tf.global_variables_initializer()) model_save_dir = os.path.join( "models", time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime())) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) losses = [] for epoch in range(epochs): total_loss = 0 total_batches = 0 start = timer() for images, gt in get_batches_fn(batch_size): labels = np.reshape(gt, (-1, gt.shape[-1])).astype(np.float32) #labels = (labels.T / labels.sum(1)).T # normalize each row, here we alow a pixel to be in more than one class, but cannot be in no class _, loss = sess.run( [train_op, cross_entropy_loss], feed_dict={ input_image: images, correct_label: labels, learning_rate: lr, keep_prob: dropout_keep }) total_loss += loss total_batches += 1 end = timer() elasped = end - start mean_loss = total_loss / total_batches losses.append([mean_loss, elasped]) print("Epoch: ", epoch, ", time: ", elasped, ", total loss: ", total_loss, ", batches: ", total_batches, ", average loss: ", mean_loss) if mean_loss < loss_to_save: suffix = int(mean_loss * 10000) helper.save_model(sess, model_save_dir, "model-" + suffix, epoch) if mean_loss < target_loss: break np.save(os.path.join(model_save_dir, 'training_losses'), losses) return loss
def train(args, params, model, optimizer, tracker=None): loader_params = { 'batch_size': params['batch_size'], 'shuffle': params['shuffle'], 'num_workers': params['num_workers'] } train_loader, val_loader, _ = data_handler.init_data_loaders( params, loader_params) epoch = 0 max_epochs = params['max_epochs'] while epoch < max_epochs: print(f'{"=" * 40} In epoch: {epoch} {"=" * 40}') print(f'Training on {len(train_loader)} batches...') # each for loop for one epoch for i_batch, batch in enumerate(train_loader): # converting the labels batch to from Long tensor to Float tensor (otherwise won't work on GPU) img_batch = batch['image'].to(device).float() label_batch = batch['label'].to(device).float() # making gradients zero in each optimization step optimizer.zero_grad() # getting the network prediction and computing the loss pred = model(img_batch) train_loss = loss.compute_wcel(fx=pred, labels=label_batch) # if i_batch % 50 == 0: print( f'Batch: {i_batch} - train loss: {round(train_loss.item(), 3)}' ) # tracking the metrics using comet in each iteration track('train_loss', round(train_loss.item(), 3), args, tracker) # backward and optimization step train_loss.backward() optimizer.step() # save checkpoint after epoch save_path = helper.compute_paths(args, params)['save_path'] helper.make_dir_if_not_exists(save_path) helper.save_model(save_path, epoch, model, optimizer, train_loss) # save the model every epoch val_loss = loss.compute_val_loss(model, val_loader) # track val loss print(f'In [train]: epoch={epoch} - val_loss = {val_loss}') track('val_loss', val_loss, args, tracker) epoch += 1
def main(): stop_words = get_stop_words(STOP_WORDS_PATH) data = Initialize_Data(); visualizer = Visualize(); data.initialize_twitter_posts(TWITTER_POSTS_CSV, TWITTER_DATA_DIR) data.initialize_facebook_posts(FACEBOOK_POSTS_CSV, FACEBOOK_DATA_DIR) # Visalize daya df = np.array(data.posts); lf= np.array(data.labels); pos_ind = lf == "positive"; neg_ind = lf == "negative" pos = df[pos_ind] neg = df[neg_ind] visualizer.plot_data_distibution([pos.shape[0], neg.shape[0]], ["positive", "negative"], "Training set distribution") # Cleanup posts text_Cleanuper = Posts_Cleansing(data) text_Cleanuper.cleanup(Text_Cleanuper()) # Train and Test Model clf = train_test_model(create_ngram_model(frozenset(stop_words)), np.array(data.posts), np.array(data.labels) == "positive") # Find best Model params and train clf = grid_search_model(create_ngram_model, np.array(data.posts), np.array(data.labels) == "positive", frozenset(stop_words)) print('Saving model') save_model(clf, NAIVE_BAYES_MODEL_PATH); print('Loading model') trained_model = load_model(NAIVE_BAYES_MODEL_PATH) train_test_model(trained_model, np.array(data.posts), np.array(data.labels) == "positive") importance = get_most_important_features(trained_model.named_steps['vect'].vocabulary_.items(), trained_model.named_steps['clf'], 10) top_scores = [a[0] for a in importance[0]['tops']] top_words = [a[1] for a in importance[0]['tops']] bottom_scores = [a[0] for a in importance[0]['bottom']] bottom_words = [a[1] for a in importance[0]['bottom']] visualizer.plot_important_words(top_scores, top_words, bottom_scores, bottom_words, "Most important words for relevance") Y_predicted_word2vec = trained_model.predict(["Նա վատ աղջիկ է"]) print(Y_predicted_word2vec)
def train(model, optimizer, criterion, trainloader, validloader, epochs): # 设置计算设备 model.to(device) # 训练 steps = 0 running_loss = 0 print_step = 32 # 验证结果 vloss = 0 vaccuracy = 0 for e in range(epochs): for images, labels in iter(trainloader): steps += 1 images, labels = images.to(device), labels.to(device) optimizer.zero_grad() output = model.forward(images) loss = criterion(output, labels) loss.backward() optimizer.step() running_loss += loss.item() if steps%print_step == 0: model.eval() with torch.no_grad(): vloss, vaccuracy = validation(model, criterion, validloader, device) print('Epoch: {}/{}\t'.format(e+1, epochs), 'Train Loss: {:.3f}\t'.format(running_loss/print_step), 'Valid Loss: {:.3f}\t'.format(vloss/len(validloader)), 'Valid Accuracy: {:.3f}'.format(vaccuracy/len(validloader)*100)) running_loss = 0 model.train() # 保存模型作为检查点 model.class_to_idx = class_to_idx h.save_model(args.arch, model, optimizer, input_size, output_size, epochs, args.drop_p, args.save_dir, args.learning_rate)
def main(): # parse command line arguments parser = argparse.ArgumentParser() parser.add_argument("data_dir", help="directory containing training data") parser.add_argument("--save_dir", help="directory to save checkpoint", default=os.path.dirname(os.path.abspath(__file__))) parser.add_argument("--arch", help="choose architecture", choices=model_choices, default="vgg19") parser.add_argument("--learning_rate", help="set learning rate", type=float, default=0.001) parser.add_argument("--hidden_units", help="set hidden units", default=[25088, 4096, 4096, 102]) parser.add_argument("--epochs", help="set number of epochs to train for", type=int, default=5) parser.add_argument("--gpu", help="use GPU for training", action="store_true") args = parser.parse_args() data_dir = args.data_dir save_dir = args.save_dir arch = args.arch lr = args.learning_rate hidden_units = args.hidden_units epochs = args.epochs cuda = args.gpu # load data trainloader, validloader, class_to_idx = load_data(data_dir) # TODO: Build and train your network model = build_model(arch, hidden_units) # TODO: Train a model with a pre-trained network model = train(model, epochs, lr, cuda, trainloader, validloader) # TODO: Save the checkpoint save_model(model, arch, hidden_units, save_dir, class_to_idx)
def train_rnn(rnn, batch_size, optimizer, criterion, n_epochs, show_every_n_batches=100): batch_losses = [] rnn.train() minLoss = np.Inf print("Training for %d epoch(s)..." % n_epochs) for epoch_i in range(1, n_epochs + 1): # initialize hidden state hidden = rnn.init_hidden(batch_size) for batch_i, (inputs, labels) in enumerate(train_loader, 1): # make sure you iterate over completely full batches, only n_batches = len(train_loader.dataset) // batch_size if (batch_i > n_batches): break # forward, back prop loss, hidden = forward_back_prop(rnn, optimizer, criterion, inputs, labels, hidden) # record loss batch_losses.append(loss) # printing loss stats if batch_i % show_every_n_batches == 0: average_loss = np.average(batch_losses) print('Epoch: {:>4}/{:<4} Loss: {}\n'.format( epoch_i, n_epochs, np.average(batch_losses))) if average_loss <= minLoss: minLoss = average_loss helper.save_model('./save/trained_rnn', rnn) print('Model Trained and Saved') batch_losses = [] # returns a trained rnn return rnn
if phase == "train": loss.backward() optimizer.step() running_loss += loss.item() * input_img.size(0) jaccard_acc += jaccard(labels, preds) dice_acc += dice(labels, preds) epoch_loss = running_loss / len(dataloaders[phase]) aver_jaccard = jaccard_acc / len(dataloaders[phase]) aver_dice = dice_acc / len(dataloaders[phase]) print("| {} Loss: {:.4f} | Jaccard Average Acc: {:.4f} | Dice Average Acc: {:.4f} |".format(phase, epoch_loss, aver_jaccard, aver_dice)) if phase == "valid" and aver_jaccard > best_acc: best_acc = aver_jaccard best_model_wts = copy.deepcopy(cust_model.state_dict) pass if phase == "valid": val_acc_history.append(aver_jaccard) pass print("="*15) print(" ") time_elapsed = time.time() - start_time print("Training complete in {:.0f}m {:.0f}s".format(time_elapsed//60, time_elapsed % 60)) print("Best validation Accuracy: {:.4f}".format(best_acc)) best_model_wts = copy.deepcopy(cust_model.state_dict()) cust_model.load_state_dict(best_model_wts) return cust_model, val_acc_history segm_model, acc = train_model(segm_model, dataloader_dict, criterion, optimizer, nr_epochs) save_model(segm_model, name = "linknet_batch_15epch.pt")
#compute accuracy probabilities = torch.exp(log_probabilities) top_probs, top_class = probabilities.topk(1, dim=1) equality = top_class == labels.view(*top_class.shape) validation_accuracy += torch.mean( equality.type(torch.FloatTensor)) model.train() #save information about losses over time in case it winds up useful for debugging train_losses.append(running_loss / len(train_loader)) valid_losses.append(valid_loss / len(valid_loader)) #print training stats every time we validate print( "Epoch: {}/{}.. ".format(e + 1, epochs), "Training Loss: {:.3f}.. ".format(running_loss / len(train_loader)), "Validation Loss: {:.3f}.. ".format(valid_loss / len(valid_loader)), "Validation Accuracy: {:.3f}".format(validation_accuracy / len(valid_loader))) #If this validation pass confirmed our accuracy is high, break once to escape the inner for-loop if (validation_accuracy / len(valid_loader)) >= validity_threshold: break #....then break again to escape the next for-loop if (validation_accuracy / len(valid_loader)) >= validity_threshold: break helper.save_model(model, train_set)
def main(seed=25): seed_everything(25) device = torch.device('cuda:0') # arguments args = Args().parse() n_class = args.n_class img_path_train = args.img_path_train mask_path_train = args.mask_path_train img_path_val = args.img_path_val mask_path_val = args.mask_path_val model_path = os.path.join(args.model_path, args.task_name) # save model log_path = args.log_path output_path = args.output_path if not os.path.exists(model_path): os.makedirs(model_path) if not os.path.exists(log_path): os.makedirs(log_path) if not os.path.exists(output_path): os.makedirs(output_path) task_name = args.task_name print(task_name) ################################### evaluation = args.evaluation test = evaluation and False print("evaluation:", evaluation, "test:", test) ################################### print("preparing datasets and dataloaders......") batch_size = args.batch_size num_workers = args.num_workers config = args.config data_time = AverageMeter("DataTime", ':3.3f') batch_time = AverageMeter("BatchTime", ':3.3f') dataset_train = DoiDataset(img_path_train, config, train=True, root_mask=mask_path_train) dataloader_train = DataLoader(dataset_train, batch_size=batch_size, shuffle=True, num_workers=num_workers) dataset_val = DoiDataset(img_path_val, config, train=True, root_mask=mask_path_val) dataloader_val = DataLoader(dataset_val, batch_size=batch_size, shuffle=False, num_workers=num_workers) ################################### print("creating models......") model = DoiNet(n_class, config['min_descriptor'] + 6, 4) model = create_model_load_weights(model, evaluation=False, ckpt_path=args.ckpt_path) model.to(device) ################################### num_epochs = args.epochs learning_rate = args.lr optimizer = get_optimizer(model, learning_rate=learning_rate) scheduler = LR_Scheduler(args.scheduler, learning_rate, num_epochs, len(dataloader_train)) ################################## criterion_node = nn.CrossEntropyLoss() criterion_edge = nn.BCELoss() alpha = args.alpha writer = SummaryWriter(log_dir=log_path + task_name) f_log = open(log_path + task_name + ".log", 'w') ####################################### trainer = Trainer(criterion_node, criterion_edge, optimizer, n_class, device, alpha=alpha) evaluator = Evaluator(n_class, device) best_pred = 0.0 print("start training......") log = task_name + '\n' for k, v in args.__dict__.items(): log += str(k) + ' = ' + str(v) + '\n' print(log) f_log.write(log) f_log.flush() for epoch in range(num_epochs): optimizer.zero_grad() tbar = tqdm(dataloader_train) train_loss = 0 train_loss_edge = 0 train_loss_node = 0 start_time = time.time() for i_batch, sample in enumerate(tbar): data_time.update(time.time() - start_time) if evaluation: # evaluation pattern: no training break scheduler(optimizer, i_batch, epoch, best_pred) loss, loss_node, loss_edge = trainer.train(sample, model) train_loss += loss.item() train_loss_node += loss_node.item() train_loss_edge += loss_edge.item() train_scores_node, train_scores_edge = trainer.get_scores() batch_time.update(time.time() - start_time) start_time = time.time() if i_batch % 2 == 0: tbar.set_description( 'Train loss: %.4f (loss_node=%.4f loss_edge=%.4f); F1 node: %.4f F1 edge: %.4f; data time: %.2f; batch time: %.2f' % (train_loss / (i_batch + 1), train_loss_node / (i_batch + 1), train_loss_edge / (i_batch + 1), train_scores_node["macro_f1"], train_scores_edge["macro_f1"], data_time.avg, batch_time.avg)) trainer.reset_metrics() data_time.reset() batch_time.reset() if epoch % 1 == 0: with torch.no_grad(): model.eval() print("evaluating...") tbar = tqdm(dataloader_val) start_time = time.time() for i_batch, sample in enumerate(tbar): data_time.update(time.time() - start_time) pred_node, pred_edge = evaluator.eval(sample, model) val_scores_node, val_scores_edge = evaluator.get_scores() batch_time.update(time.time() - start_time) tbar.set_description( 'F1 node: %.4f F1 edge: %.4f; data time: %.2f; batch time: %.2f' % (val_scores_node["macro_f1"], val_scores_edge["macro_f1"], data_time.avg, batch_time.avg)) start_time = time.time() data_time.reset() batch_time.reset() val_scores_node, val_scores_node = evaluator.get_scores() evaluator.reset_metrics() best_pred = save_model(model, model_path, val_scores_node, val_scores_edge, alpha, task_name, epoch, best_pred) write_log(f_log, train_scores_node, train_scores_edge, val_scores_node, val_scores_edge, epoch, num_epochs) write_summaryWriter(writer, train_loss / len(dataloader_train), optimizer, train_scores_node, train_scores_edge, val_scores_node, val_scores_edge, epoch) f_log.close()
aver_jaccard = jaccard_acc / len(dataloaders[phase]) #aver_dice = dice_acc / len(dataloaders[phase]) print("| {} Loss: {:.4f} | Jaccard Average Acc: {:.4f} |".format( phase, epoch_loss, aver_jaccard)) print("_" * 15) if phase == "valid" and aver_jaccard > best_acc: best_acc = aver_jaccard best_model_wts = copy.deepcopy(cust_model.state_dict) if phase == "valid": val_acc_history.append(aver_jaccard) print("^" * 15) print(" ") scheduler.step() time_elapsed = time.time() - start_time print("Training Complete in {:.0f}m {:.0f}s".format( time_elapsed // 60, time_elapsed % 60)) print("Best Validation Accuracy: {:.4f}".format(best_acc)) best_model_wts = copy.deepcopy(cust_model.state_dict()) cust_model.load_state_dict(best_model_wts) return cust_model, val_acc_history segm_model, acc = train_model(segm_model, dict_loaders, criterion, optimizerSGD, nr_epochs, scheduler=scheduler) save_model(segm_model, name="dense_linknet_20.pt")
def exp(exp_name, device='cuda:0'): exp_path = helper.expname_to_path(exp_name) helper.copyfile(exp_path) # the hyper-parameter list plan to evaluate, not, all combination will be tested, so the time needed grows fast with the number of parameters you want to test. bsize_list = [64] lr_list = [1e-5] # recording device, call it rdevice to avoid confusing with the gpu device rdevice_list = [1, 2, 3, 4] audio_len_list = [1.0] filter_num_list = [64] sr_list = [44100] # using real multichannel or fake multichannel model (for ablation study) mch_setting = [True] frame_time_list = [0.02] param_list = list( itertools.product(bsize_list, lr_list, rdevice_list, audio_len_list, filter_num_list, sr_list, mch_setting, frame_time_list)) # TODO: channel number is a risk for param in param_list: bsize, lr, rdevice, audio_len, filter_num, sr, mch, frame_time = param channel_num_max = len( constants.MIC_ARRAY_CHANNEL[constants.MIC[rdevice]]) point_num = int(audio_len * sr) # test model using all channels available #for channel_num in range(channel_num_max, channel_num_max + 1): # test model using different number of channels for channel_num in range(1, channel_num_max + 1): # if real multichannel train_data, test_data = init_data(rdevice, channel_num, point_num, mch, sr) test_name = '_'.join([str(x) for x in param]) + '_' + str(channel_num) os.mkdir(os.path.join(exp_path, test_name)) f = open(os.path.join(exp_path, 'result.txt'), 'a') fc = open(os.path.join(exp_path, 'result.csv'), 'a') # initialize the model: net = model.model_init(channel_num=channel_num, p_num=filter_num, sr=sr, audio_len=audio_len, frame_time=frame_time) net = net.to(device) print(net) # train the model net, err = train.train(bsize, lr, epoch_num=100, train_data=train_data, test_data=test_data, device=device, base_model=net, record_path=os.path.join( exp_path, test_name)) err_name = [ 'running_loss', 'acc', 'acc_train', 'f1', 'f1_train', 'eer', 'eer_train' ] err = np.array(err) for i in range(err.shape[1]): plt.plot(err[:, i], label=err_name[i]) plt.legend() plt.ylim([0, 1]) plt.savefig(os.path.join(exp_path, test_name + ".png")) plt.close() err = np.array(err) np.savetxt(os.path.join(exp_path, test_name + '.csv'), err, delimiter=',') err = list(np.amin(err, 0)) helper.save_model(net, exp_path, model_name=test_name) print(err) result = [ str(x) for x in (list(param) + [channel_num] + list(err)) ] f.write(','.join(result) + '\n') fc.write(','.join(result) + '\n') print( 'batch_size: {}, lr: {}, channel_num: {}, loss: {}, acc: {}, acc_train: {}, f1: {}, f1_train: {}, eer: {}, eer_train: {}' .format(bsize, lr, channel_num, *err)) f.close() fc.close() with open(constants.SUMMARY_PATH, 'a') as fd: fd.write(','.join(result) + '\n') del net torch.cuda.empty_cache()
# 'bagging_fraction': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], # 'feature_fraction': [0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0], # 'learning_rate': [0.01, 0.03, 0.1, 0.3, 1.0] 'max_depth': [5, 15, 50, 63], 'num_leaves': [10, 31, 50, 75, 100], 'n_estimators': [25, 100, 250, 500, 750, 1000], 'scale_pos_weight': [1, 2, 6, 10, 15], 'min_data_in_leaf': [5, 15, 25, 30, 40, 50], 'bagging_fraction': [0.4, 0.6, 0.8, 1.0], 'feature_fraction': [0.4, 0.6, 0.8, 1.0], 'learning_rate': [0.01, 0.03, 0.1, 0.3, 1.0] } # fit cross validation cv = GridSearchCV(estimator=model, param_grid=params, scoring=scoring, n_jobs=-1, cv=5, refit='f1', verbose=2) print("Starting Grid Search...") start = time.time() cv.fit(X_train.values, y_train.values) print('Grid Search Completed!') print('Time: ', time.time() - start) model_performance = pd.DataFrame(cv.cv_results) final_model_location = save_model(model=model, model_dir=model_location, model_name="LightGBM_Unoptimized") model_performance.to_pickle(final_model_location)
str(loss.data.item())) log.flush() if iters > 0: loss_list.append(loss.data.item()) if iters % par.plot_interval == 0 and iters > 0: hl.plot_loss(loss=loss_list, epoch=ep, iteration=iters, step=1, loss_name="NLL", loss_dir=par.model_dir) if iters % par.save_interval == 0: hl.save_model(model=mapNet_model, model_dir=par.model_dir, model_name="MapNet", train_iter=iters) ''' if iters % par.test_interval == 0: mp3d_test = Habitat_MP3D(par, seq_len=par.seq_len, config_file=par.test_config, action_list=par.action_list, with_shortest_path=par.with_shortest_path) evaluate_MapNet(par, test_iter=iters, test_data=mp3d_test) ''' # ** Do a full-blown debugging, check the values of the tensors ''' # Define the map at time 0 map_0 = np.ones((par.batch_size, par.map_embedding, par.global_map_dim[0], par.global_map_dim[1]), dtype=np.float32) map_0[0,:,:,:] = map_0[0,:,:,:] / float(par.map_embedding * par.global_map_dim[0] * par.global_map_dim[1]) map_0 = torch.tensor(map_0, dtype=torch.float32).cuda()
def train_rnn(rnn, batch_size, optimizer, criterion, n_epochs, train_loader, show_every_n_batches=100, saved_model_name='trained_rnn'): batch_losses = [] loss_history = [] if isinstance(rnn.lstm, nn.RNN) or isinstance(rnn.lstm, nn.GRU): type = "vanilla" elif isinstance(rnn.lstm, nn.LSTM): type = "lstm" rnn.train() previousLoss = np.Inf minLoss = np.Inf print("Training for %d epoch(s)..." % n_epochs) for epoch_i in range(1, n_epochs + 1): # initialize hidden state hidden = rnn.init_hidden(batch_size) print("epoch ", epoch_i) for batch_i, (inputs, labels) in enumerate(train_loader, 1): batch_last = batch_i n_batches = len(train_loader.dataset) // batch_size if (batch_i > n_batches): break # forward, back prop loss, hidden = forward_back_prop(rnn, optimizer, criterion, inputs, labels, hidden, type, clip=5) batch_losses.append(loss) # printing loss stats if batch_i % show_every_n_batches == 0: average_loss = np.average(batch_losses) print( 'Epoch: {:>4}/{:<4} Loss: {} Decrease Rate: {} \n'.format( epoch_i, n_epochs, average_loss, (previousLoss - average_loss))) loss_history.append(average_loss) if average_loss <= previousLoss: previousLoss = average_loss if average_loss <= minLoss: minLoss = average_loss helper.save_model('./save/' + saved_model_name, rnn) print('Model Trained and Saved') batch_losses = [] # returns a trained rnn return rnn, loss_history
def train_nn(sess, global_step, epochs, batch_size, get_batches_fn, batches_n, train_op, cross_entropy_loss, prediction_op, metrics, metrics_reset_op, image_input, labels, keep_prob, learning_rate, save_model_freq=None, tensorboard_freq=None): """ Train neural network and print out the loss during training. :param sess: TF Session :param global_step: TF Placeholder containing the global step :param epochs: Number of epochs :param batch_size: Batch size :param get_batches_fn: Function to get batches of training data. Call using get_batches_fn(batch_size) :param batches_n: Number of batches to cover all the samples :param train_op: TF Operation to train the neural network :param cross_entropy_loss: TF Tensor for the amount of loss :param prediction_op: TF Tensor for the prediction class (index) :param metrics: Dictionary with the evaluation metrics :param metric_reset_op: TF Tensor used to reset the metrics counters :param image_input: TF Placeholder for input images :param labels: TF Placeholder for label images :param keep_prob: TF Placeholder for dropout keep probability :param learning_rate: TF Placeholder for learning rate :param save_model_freq: The frequency to save the model to disk, None to disable :param tensorboard_freq: The frequency to push the summaries to tensorboard, None to disable """ model_folder = _model_folder() if save_model_freq and helper.checkpoint_exists(model_folder): print( 'Checkpoint exists, restoring model from {}'.format(model_folder)) helper.load_model(sess, model_folder) else: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) if save_model_freq: saver = tf.train.Saver(max_to_keep=MODELS_LIMIT) iou_mean, iou_op = metrics['iou'] acc_mean, acc_op = metrics['acc'] # Evaluate current step step = global_step.eval(session=sess) start_step = step if tensorboard_freq: # Creates the tensorboard writer train_writer = _summary_writer(sess, model_folder) # Gets the batch of images/labels to feed to the image summary op summary_images, summary_labels = helper.image_summary_batch( os.path.join(FLAGS.data_dir, 'data_road', 'training'), IMAGE_SHAPE, TENSORBOARD_MAX_IMG) # Setup the summary ops summary_op, image_summary_op = _setup_summaries( sess, train_writer, image_input, labels, keep_prob, cross_entropy_loss, prediction_op, iou_mean, acc_mean, summary_images, summary_labels, step, CLASSES_N) training_log = [] print('Model folder: {}'.format(model_folder)) print( 'Training (First batch: {}, Epochs: {}, Batch Size: {}, Learning Rate: {}, Dropout: {}, L2 Reg: {}, Scaling: {})' .format(step + 1, FLAGS.epochs, FLAGS.batch_size, FLAGS.learning_rate, FLAGS.dropout, FLAGS.l2_reg, 'ON' if FLAGS.scale else 'OFF')) best_loss = 9999 ep_loss_incr = 0 start = time.time() for epoch in range(epochs): total_loss = 0 mean_loss = 9999 mean_acc = 0 mean_iou = 0 images_n = 0 # Resets the metrics variables at the beginning of the epoch sess.run(metrics_reset_op) batches = tqdm( get_batches_fn(batch_size), desc= 'Epoch {}/{} (Step: {}, Samples: N/A, Loss: N/A, Acc: N/A, IoU: N/A)' .format(epoch + 1, epochs, step), unit='batches', total=batches_n) for batch_images, batch_labels in batches: feed_dict = { image_input: batch_images, labels: batch_labels, keep_prob: (1.0 - FLAGS.dropout), learning_rate: FLAGS.learning_rate } # Train _ = sess.run(train_op, feed_dict=feed_dict) images_n += len(batch_images) # Evaluate loss, _, mean_iou, _, mean_acc = sess.run( [cross_entropy_loss, iou_op, iou_mean, acc_op, acc_mean], feed_dict={ image_input: batch_images, labels: batch_labels, keep_prob: 1.0 }) step = global_step.eval(session=sess) total_loss += loss * len(batch_images) mean_loss = total_loss / images_n # Saves metrics for tensorboard if tensorboard_freq: # Updates the summary according to frequency if step % tensorboard_freq == 0: training_summary = sess.run(summary_op, feed_dict={ image_input: batch_images, labels: batch_labels, keep_prob: 1.0 }) train_writer.add_summary(training_summary, global_step=step) # Writes the image every epoch if step % batches_n == 0: image_pred_summary = sess.run(image_summary_op, feed_dict={ image_input: summary_images, labels: summary_labels, keep_prob: 1.0 }) train_writer.add_summary(image_pred_summary, global_step=step) train_writer.flush() batches.set_description( 'Epoch {}/{} (Step: {}, Samples: {}, Loss: {:.4f}, Acc: {:.4f}, IoU: {:.4f})' .format(epoch + 1, epochs, step, images_n, mean_loss, mean_acc, mean_iou)) training_log.append((mean_loss, mean_acc, mean_iou)) if mean_loss < best_loss: ep_loss_incr = 0 best_loss = mean_loss else: ep_loss_incr += 1 if FLAGS.early_stopping is not None and ep_loss_incr >= FLAGS.early_stopping: print( 'Early Stopping Triggered (Loss not decreasing in the last {} epochs)' .format(ep_loss_incr)) break if save_model_freq and (epoch + 1) % save_model_freq == 0: helper.save_model(sess, saver, MODEL_NAME, model_folder, global_step) log_data = _to_log_data(training_log, start_step, step, batches_n) helper.save_log(log_data, model_folder) helper.plot_log(log_data, model_folder) elapsed = time.time() - start print( 'Training Completed ({:.1f} s): Last batch: {}, Loss: {:.4f}, Acc: {:.4f}, IoU: {:.4f}' .format(elapsed, step, mean_loss, mean_acc, mean_iou)) if save_model_freq: helper.save_model(sess, saver, MODEL_NAME, model_folder, global_step) log_data = _to_log_data(training_log, start_step, step, batches_n) helper.save_log(log_data, model_folder) helper.plot_log(log_data, model_folder)
model = model_final(preproc_source_sentences.shape, preproc_target_sentences.shape[1], len(source_tokenizer.word_index) + 1, len(target_tokenizer.word_index) + 1) model.summary() #CallBacks mfile = 'models/Glove_training_bach32.model.h5' model_checkpoint = callbacks.ModelCheckpoint(mfile, monitor='accuracy', save_best_only=True, save_weights_only=True) logger = callbacks.CSVLogger('results/training_bach_32.log') tensorboard = callbacks.TensorBoard(log_dir='results/training_bach_32') callbacks = [logger, tensorboard] #Training model and save callbacks: #model.fit(X_train, Y_train, batch_size=1024, epochs=25, validation_split=0.1, callbacks=callbacks) #Training model and save callbacks: model.fit(X_train, Y_train, batch_size=32, epochs=10, validation_split=0.01) Predicted_by_Glove = model.predict(X_test, len(X_test)) #Save Model helper.save_model(model, 'models/Glove_training_bach_32')
aver_jaccard = jaccard_acc / len(dataloaders[phase]) #aver_dice = dice_acc / len(dataloaders[phase]) print("| {} Loss: {:.4f} | Jaccard Average Acc: {:.4f} |".format( phase, epoch_loss, aver_jaccard)) print("_" * 15) if phase == "valid" and aver_jaccard > best_acc: best_acc = aver_jaccard best_model_wts = copy.deepcopy(cust_model.state_dict) if phase == "valid": val_acc_history.append(aver_jaccard) print("^" * 15) print(" ") scheduler.step() time_elapsed = time.time() - start_time print("Training Complete in {:.0f}m {:.0f}s".format( time_elapsed // 60, time_elapsed % 60)) print("Best Validation Accuracy: {:.4f}".format(best_acc)) best_model_wts = copy.deepcopy(cust_model.state_dict()) cust_model.load_state_dict(best_model_wts) return cust_model, val_acc_history segm_model, acc = train_model(segm_model, dict_loaders, criterion, optimizerSGD, nr_epochs, scheduler=scheduler) save_model(segm_model, name="big_dense_linknet_384_green_adgrad_bce.pt")
hidden_dim, n_layers, dropout=0.5) if train_on_gpu: rnn.cuda() # defining loss and optimization functions for training optimizer = torch.optim.Adam(rnn.parameters(), lr=learning_rate) criterion = nn.CrossEntropyLoss() # training the model trained_rnn = train_rnn(rnn, batch_size, optimizer, criterion, num_epochs, show_every_n_batches) # saving the trained model helper.save_model('./save/trained_rnn', trained_rnn) print('Model Trained and Saved') # ### 问题: 你如何决定你的模型超参数? # 比如,你是否试过不同的 different sequence_lengths 并发现哪个使得模型的收敛速度变化?那你的隐藏层数和层数呢?你是如何决定使用这个网络参数的? # **答案:** (在这里写下) # #### 学习率不能过大, 0.1 不是很合适, 所以使用的0.01 # #### epoch 的数量, 5 个epoch已经使损失下降到3.5一下了 # #### 语句长度不适宜太小或者过大,太小会降低学习效率,过大会郑家训练时间但对效率没有帮助 # #### 批次数量,嵌入层数和隐藏层数都使用了相对适中的数值 # --- # # 检查点 # # 通过运行上面的训练单元,你的模型已经以`trained_rnn`名字存储,如果你存储了你的notebook, **你可以在之后的任何时间来访问你的代码和结果**. 下述代码可以帮助你重载你的结果!
pe = 0 trans = 0 mse = 0 print("Training...") for epoch in tqdm(range(epochs)): for k in range(splits): loss = trainfn([exp_mi[k], exp_targ[k], exp_expert[k], exp_adv[k], exp_act[k]]) pe += loss[0] mse += loss[1] for k in range(splits2): lossT = trainh([exp_ol[k], exp_at[k], exp_nl[k]]) trans += lossT[0] print("Iteration ", it, ": action_policy loss: ", pe/(splits*epochs), " value loss: ", mse/(splits*epochs)," transition: ", trans/(splits2*epochs), " Game Length: ", n/(games*workers*2), " Data: ", len(ta)) if it % 10 == 1: save_model(model, "PPO_CSGO.json", "PPO_weights.h5") save_model(fmodel, "PPO_CSGOf.json", "PPO_weightsf.h5") save_model(gmodel, "PPO_CSGOg.json", "PPO_weightsg.h5") save_model(hmodel, "PPO_CSGOh.json", "PPO_weightsh.h5")
return model model = model_final(preproc_source_sentences.shape, preproc_target_sentences.shape[1], len(source_tokenizer.word_index) + 1, len(target_tokenizer.word_index) + 1) model.summary() #CallBacks mfile = 'models/model-Glove.model.h5' model_checkpoint = callbacks.ModelCheckpoint(mfile, monitor='accuracy', save_best_only=True, save_weights_only=True) logger = callbacks.CSVLogger('results/Glove_training.log') tensorboard = callbacks.TensorBoard(log_dir='results/Glove_tensprboard') callbacks = [logger, tensorboard] #Training model and save callbacks: #model.fit(X_train, Y_train, batch_size=1024, epochs=25, validation_split=0.1, callbacks=callbacks) #Training model and save callbacks: model.fit(X_train, Y_train, batch_size=1024, epochs=1, validation_split=0.1) Predicted_by_Glove = model.predict(X_test, len(X_test)) """ #Save Model helper.save_model(model, 'models/Glove') helper.Pickle_in_Data(Predicted_by_Glove) """
def train(model, optimizer, model_params, train_params, args, early_stopping=True, tracker=None, scheduler=None, att2=None): max_epochs = train_params['max_epochs'] batch_size = train_params['batch_size'] save_model_interval = train_params['save_model_interval'] train_loader, val_loader = train_params['train_loader'], train_params[ 'val_loader'] device = train_params['device'] transition_params = model_params['transition_params'] # count trainable params num_learnable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) print( f'In [train]: number of learnable params of the model: {num_learnable_params}, max_epochs = {max_epochs} \n' ) # setting up the name of the folder in which the model is going to be saved pool_mode = transition_params[ 'pool_mode'] # extracted for saving the models # also use the value of r for saving the model in case the pool mode is 'lse' if pool_mode == 'lse': r = transition_params['r'] pool_mode += f'_r={r}' # determining this part of the models folder based on whether we are using early stopping mins_since_epoch = int( time.time() / 60) # used in naming the model folder to be unique from other runs models_folder = f'models/max_epochs={max_epochs}_' \ f'batch_size={batch_size}_' \ f'pool_mode={pool_mode}_' \ f'lr={args.lr}_' \ f'no_crop={True}_' \ f'type={args.net_type}_' \ f'es={early_stopping}_{mins_since_epoch}' # set up early stopping if early_stopping: best_val_loss = math.inf # set to inf so that the first validation loss is less than this no_improvements = 0 # the number consecutive epochs through which validation loss has not improved patience = 3 min_delta = .001 # training epoch = 0 while epoch < max_epochs: print(f'{"=" * 40} In epoch: {epoch} {"=" * 40}') print(f'Training on {len(train_loader)} batches...') for i_batch, batch in enumerate(train_loader): # converting the labels batch to from Long tensor to Float tensor (otherwise won't work on GPU) img_batch = batch['image'].to(device).float() label_batch = batch['label'].to(device).float() # making gradients zero in each optimization step optimizer.zero_grad() # getting the network prediction and computing the loss pred = model(img_batch, verbose=False) train_loss = networks.WCEL(pred, label_batch, att2) if i_batch % 50 == 0: print( f'Batch: {i_batch}, train loss: {round(train_loss.item(), 3)}' ) # tracking the metrics using comet in each iteration if args.use_comet: tracker.track_metric('train_loss', round(train_loss.item(), 3)) # backward and optimization step train_loss.backward() optimizer.step() # save the model every several steps if wanted by the user if epoch % save_model_interval == 0 and args.save_checkpoints: helper.save_model(model, optimizer, models_folder, epoch) # compute the validation loss at the end of each epoch val_loss = helper.compute_val_loss(model, val_loader, device, att2) # track the validation loss using comet, if wanted by the user if args.use_comet: tracker.track_metric('val_loss', val_loss) # check validation loss for early stopping if early_stopping: print( f'\nIn [train]: prev_val_loss: {best_val_loss}, current_val_loss: {val_loss}' ) # check if the validation loss is improved compared to the previous epochs if val_loss > best_val_loss or best_val_loss - val_loss < min_delta: no_improvements += 1 print( f'In [train]: no_improvements incremented to {no_improvements} \n\n' ) else: # it is improved, reset no_improvements to 0 no_improvements = 0 # update the validation loss for the next epoch best_val_loss = val_loss print(f'In [train]: no_improvements set to 0 \n') # terminate training after several epochs without validation improvement if no_improvements >= patience: print( f'In [train]: no_improvements = {no_improvements}, training terminated...' ) break # learning rate decay, if wanted if scheduler is not None: scheduler.step() print('In [train]: learning rate scheduling step() done \n\n') epoch += 1
best_epoch_loss = epoch_loss #best_model_wts = copy.deepcopy(cust_model.state_dict) best_model_wts = copy.deepcopy(cust_model) best_optimizer_wts = optim.Adam(best_model_wts.parameters(), lr=0.0001) best_optimizer_wts.load_state_dict(optimizer.state_dict()) if phase == "valid": val_acc_history.append(aver_jaccard) print("^" * 15) save_checkpoint(best_model_wts, best_optimizer_wts, epoch + 1, best_epoch_loss, best_acc, best_acc_inter) print(" ") scheduler.step() time_elapsed = time.time() - start_time print("Training Complete in {:.0f}m {:.0f}s".format( time_elapsed // 60, time_elapsed % 60)) #print("Best Validation Accuracy: {:.4f}".format(best_acc)) #este no#best_model_wts = copy.deepcopy(cust_model.state_dict()) cust_model.load_state_dict(best_model_wts.state_dict()) return cust_model, val_acc_history segm_model, acc = train_model(segm_model, dict_loaders, criterion, optimizer, nr_epochs, scheduler=scheduler) save_model(segm_model, name="ResNet101inter_linknet_i384_e20_w1_bckg_3ch_cloud.pt")
def main(): model = build_model() train_model(model) helper.save_model(model)
# Finding Labels in Dataset hp.saveLogMsg("\nFinding labels...") labels = [each_y for sample_y in train_y for each_y in sample_y] labels = list(set(labels)) labels.remove('O') hp.saveLogMsg("#Labels={}\n".format(len(labels) + 1)) # Run Model handler = CRFHandler(labels) model = None if cf.MODE == "test" and os.path.exists(cf.MODEL_PATH): model = hp.load_model() hp.saveLogMsg("\nLoading best model from {}".format(cf.MODEL_PATH)) else: model = handler.train(train_x, train_y) hp.save_model(model) hp.saveLogMsg("\nSaving best model at {}".format(cf.MODEL_PATH)) assert model is not None # Eval Model if cf.TEST_LABELED: acc_score, clf_report = handler.evaluate(model, dev_x, dev_y) hp.saveLogMsg('\n[DEV] Accuracy Score: {}'.format(acc_score)) hp.saveLogMsg('\n[DEV] Classification Report: \n{}'.format(clf_report)) else: handler.predict(model, test_x) hp.saveLogMsg('\nSaving prediction at {}'.format(cf.PREDICT_PATH)) # Top-k likely/unlikely transitions def top_transition_features(transition_features):
def main(): ap = argparse.ArgumentParser(description='train.py') ap.add_argument('data_dir', nargs='*', action="store", default="./flowers") ap.add_argument('--save_dir', dest="save_dir", action="store", default="./checkpoint.pth") ap.add_argument('--number_epochs', dest="number_epochs", action="store", type=int, default=1) ap.add_argument('--model_type', dest="model_type", action="store", default="vgg16", type=str) ap.add_argument('--hidden_units', type=int, dest="hidden_units", default=760, action="store") ap.add_argument('--learning_rate', dest="learning_rate", action="store", default=0.001, type=float) ap.add_argument('--dropout', dest="dropout", action="store", default=0.5) ap.add_argument('--gpu', dest="gpu", action='store_true', default=False) args = ap.parse_args() train_dir = args.data_dir + '/train' valid_dir = args.data_dir + '/valid' test_dir = args.data_dir + '/test' mean = [0.485, 0.456, 0.406] std = [0.229, 0.224, 0.225] batch_size = 64 output_class = 102 trainloader, traindata = helper.train_data_loader(train_dir, mean, std, batch_size) validloader = helper.test_data_loader(valid_dir, mean, std, batch_size) testloader = helper.test_data_loader(test_dir, mean, std, batch_size) model, criterion, optimizer = helper.build_network( args.model_type, hidden_units=args.hidden_units, output_class=output_class, dropout=args.dropout, lr=args.learning_rate) model = helper.train_model(model, criterion, optimizer, trainloader, validloader, number_epochs=args.number_epochs, print_every=10, gpu=args.gpu) helper.save_model(args.save_dir, model, traindata, args.model_type, args.hidden_units, output_class, args.dropout, args.learning_rate)
def main(): stop_words = get_stop_words(STOP_WORDS_PATH) data = Initialize_Data() visualizer = Visualize() data.initialize_twitter_posts(TWITTER_POSTS_CSV, TWITTER_DATA_DIR) data.initialize_facebook_posts(FACEBOOK_POSTS_CSV, FACEBOOK_DATA_DIR) # Cleanup posts text_Cleanuper = Posts_Cleansing(data) text_Cleanuper.cleanup(Text_Cleanuper()) # Divide data into test and train set X_train, X_test, Y_train, Y_test = train_test_split(data.posts, data.labels, test_size=0.2, random_state=40) # Bag of Words model vectorization bag_of_words_model = Bag_Of_Words(X_train) bag_of_words_model.build_vectorizer(stop_words) X_train_counts = bag_of_words_model.data_counts X_test_counts = bag_of_words_model.vectorizer.transform(X_test) forest = RandomForestClassifier(n_estimators=100) forest = forest.fit(X_train_counts, Y_train) y_predicted_counts_train = forest.predict(X_train_counts) accuracy, precision, recall, f1 = get_metrics(Y_train, y_predicted_counts_train) print("Train accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" % (accuracy, precision, recall, f1)) y_predicted_counts = forest.predict(X_test_counts) accuracy, precision, recall, f1 = get_metrics(Y_test, y_predicted_counts) print("Test accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" % (accuracy, precision, recall, f1)) # Find best hyperparams # Number of trees in random forest n_estimators = [int(x) for x in np.linspace(start=200, stop=2000, num=10)] # Number of features to consider at every split max_features = ['auto', 'sqrt'] # Maximum number of levels in tree max_depth = [int(x) for x in np.linspace(10, 110, num=11)] max_depth.append(None) # Minimum number of samples required to split a node min_samples_split = [2, 5, 10] # Minimum number of samples required at each leaf node min_samples_leaf = [1, 2, 4] # Method of selecting samples for training each tree bootstrap = [True, False] # Create the random grid random_grid = { 'n_estimators': n_estimators, 'max_features': max_features, 'max_depth': max_depth, 'min_samples_split': min_samples_split, 'min_samples_leaf': min_samples_leaf, 'bootstrap': bootstrap } # First create the model to tune rf = RandomForestClassifier() rf_random = RandomizedSearchCV(estimator=rf, param_distributions=random_grid, n_iter=100, cv=3, verbose=2, random_state=42, n_jobs=-1) # Fit the random search model rf_random.fit(X_train_counts, Y_train) print('Get Best Params') print(rf_random.best_params_) print('Saving model') save_model(rf_random, RANDOM_FOREST_MODEL_PATH) print('Load model') trained_model = load_model(RANDOM_FOREST_MODEL_PATH) y_predicted_counts_train = trained_model.predict(X_train_counts) accuracy, precision, recall, f1 = get_metrics(Y_train, y_predicted_counts_train) print( "Train accuracy = %.3f, precisionս = %.3f, recall = %.3f, f1 = %.3f" % (accuracy, precision, recall, f1)) y_predicted_counts = trained_model.predict(X_test_counts) accuracy, precision, recall, f1 = get_metrics(Y_test, y_predicted_counts) print("Test accuracy = %.3f, precision = %.3f, recall = %.3f, f1 = %.3f" % (accuracy, precision, recall, f1))
best_acc_inter = aver_jaccard_inter best_epoch_loss = epoch_loss #best_model_wts = copy.deepcopy(cust_model.state_dict) best_model_wts = copy.deepcopy(cust_model) best_optimizer_wts = optim.Adam(best_model_wts.parameters(), lr=0.0001) best_optimizer_wts.load_state_dict(optimizer.state_dict()) if phase == "valid": val_acc_history.append(aver_jaccard) print("^" * 15) save_checkpoint(best_model_wts, best_optimizer_wts, epoch + 1, best_epoch_loss, best_acc, best_acc_inter) print(" ") scheduler.step() time_elapsed = time.time() - start_time print("Training Complete in {:.0f}m {:.0f}s".format( time_elapsed // 60, time_elapsed % 60)) #print("Best Validation Accuracy: {:.4f}".format(best_acc)) #este no#best_model_wts = copy.deepcopy(cust_model.state_dict()) cust_model.load_state_dict(best_model_wts.state_dict()) return cust_model, val_acc_history segm_model, acc = train_model(segm_model, dict_loaders, criterion, optimizer, nr_epochs, scheduler=scheduler) save_model(segm_model, name="ResNet101inter_linknet_384_250_2ch_cloud.pt")
model.add(Dense(1164, activation='relu')) model.add(Dense(100, activation='relu')) model.add(Dense(50, activation='relu')) model.add(Dense(10, activation='relu')) model.add(Dense(1)) model.summary() model.compile( optimizer=Adam(learning_rate), loss="mse", ) # create two generators for training and validation train_gen = helper.generate_next_batch() validation_gen = helper.generate_next_batch() history = model.fit(train_gen, steps_per_epoch=number_of_samples_per_epoch, epochs=number_of_epochs, validation_data=validation_gen, validation_steps=number_of_validation_samples, verbose=1) # finally save our model and weights helper.save_model(model)
epoch_loss = running_loss / len(dataloaders[phase]) aver_jaccard = jaccard_acc / len(dataloaders[phase]) aver_dice = dice_acc / len(dataloaders[phase]) print( "| {} Loss: {:.4f} | Jaccard Average Acc: {:.4f} | Dice Average Acc: {:.4f} |" .format(phase, epoch_loss, aver_jaccard, aver_dice)) if phase == "valid" and aver_jaccard > best_acc: best_acc = aver_jaccard best_model_wts = copy.deepcopy(cust_model.state_dict) pass if phase == "valid": val_acc_history.append(aver_jaccard) pass print("=" * 15) print(" ") time_elapsed = time.time() - start_time print("Training complete in {:.0f}m {:.0f}s".format( time_elapsed // 60, time_elapsed % 60)) print("Best validation Accuracy: {:.4f}".format(best_acc)) best_model_wts = copy.deepcopy(cust_model.state_dict( )) # Need to change this in the future when I fix the jaccard index cust_model.load_state_dict(best_model_wts) return cust_model, val_acc_history segm_model, acc = train_model(segm_model, dataloader_dict, criterion, optimizer, nr_epochs) save_model(segm_model, name="fcn_30epch_interpol.pt")