def train(model, dataloader, optimizer, loss_fn, metric, params): model.train() loss_avg = utils.RunningAverage() output = [] y = [] with tqdm(total=len(dataloader)) as t: for X_batch, y_batch in dataloader: X_batch = X_batch.to(params.device) y_batch = y_batch.to(params.device) output_batch = model(X_batch) loss = loss_fn(output_batch, y_batch) optimizer.zero_grad() loss.backward() optimizer.step() loss_avg.update(loss.item()) y.append(y_batch.data.cpu().numpy()) output.append(output_batch.data.cpu().numpy()) t.set_postfix(loss='{:05.3f}'.format(loss_avg())) t.update() output = np.concatenate(output, axis=0) y = np.concatenate(y, axis=0) metric_score = metric(output, y) avg_loss = loss_avg() return avg_loss, metric_score
def train_step(inp, ctxt, lbl, mask): with tf.GradientTape() as tape: pred = model((inp, ctxt)) loss = loss_fn(lbl, pred, mask) grads = tape.gradient(loss, model.trainable_variables) optimizer.apply_gradients(zip(grads, model.trainable_variables)) train_loss(loss)
def evaluate(model, loss_fn, dataloader, data_types, metrics, params): model.eval() overall_summary = [] for i, batch in enumerate(dataloader): if data_types[0] == "glyph" or data_types[0] == "image": # Get batch X_batch = batch[data_types[0]] Y_batch = batch[data_types[1]] if params.use_gpu: X_batch = X_batch.cuda(non_blocking=True) Y_batch = Y_batch.cuda(non_blocking=True) X_batch = Variable(X_batch) Y_batch = Variable(Y_batch) # Predict Y_pred_batch = model(X_batch) elif data_types[0] == "svg": # Get batch X_batch = batch[data_types[0]] X_len = batch["len"] Y_batch = batch[data_types[1]] if params.use_gpu: X_batch = X_batch.cuda(non_blocking=True) X_len = X_len.cuda(non_blocking=True) Y_batch = Y_batch.cuda(non_blocking=True) X_batch = Variable(X_batch) Y_batch = Variable(Y_batch) # Predict Y_pred_batch = model(X_batch, X_len) loss = loss_fn(Y_pred_batch.float(), Y_batch.float()) Y_pred_batch = Y_pred_batch.data.cpu() Y_batch = Y_batch.data.cpu() summary = { metric: metrics[metric](Y_pred_batch, Y_batch) for metric in metrics } summary['loss'] = loss.item() overall_summary.append(summary) metrics_mean = { metric: np.mean([s[metric] for s in overall_summary]) for metric in overall_summary[0] } metrics_string = " ; ".join("{}: {:05.5f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Eval metrics : " + metrics_string) return metrics_mean
def train_step(M, sample, opt, queue, max_queue_size, report=False): pov, white, black, score = sample pred = M(pov, white, black) loss = model.loss_fn(score, pred) if report: print(loss.item()) loss.backward() if(len(queue) >= max_queue_size): queue.pop(0) queue.append(loss.item()) opt.step() M.zero_grad()
def train_epoch(epoch_idx): """Train one epoch""" loss_history = [] model.train() # shuffle training data for sgd training np.random.shuffle(train_data) for i, (feat, adj_mtx, match_idx) in enumerate(train_data): if IS_CUDA: feat = feat.cuda() adj_mtx = adj_mtx.cuda() optimizer.zero_grad() out = model(feat, adj_mtx) loss = loss_fn(out, match_idx) loss_history.append((loss / len(match_idx)).item()) loss.backward() optimizer.step() print("Epoch {}\t avg. loss: {:.4f}".format(epoch_idx + 1, np.mean(loss_history)))
def evaluate(model, dataloader, loss_fn, metric, params, test_mode=False): model.eval() loss_avg = utils.RunningAverage() output = [] y = [] with torch.no_grad(): for X_batch, y_batch in dataloader: X_batch = X_batch.to(params.device) y_batch = y_batch.to(params.device) output_batch = model(X_batch) loss = loss_fn(output_batch, y_batch) loss_avg.update(loss.item()) y.append(y_batch.data.cpu().numpy()) output.append(output_batch.data.cpu().numpy()) avg_loss = loss_avg() output = np.concatenate(output, axis=0) y = np.concatenate(y, axis=0) metric_score = metric(output, y, test_mode) return avg_loss, metric_score
def train(model, optimizer, loss_fn, dataloader, data_types, metrics, params, pause=False): """ Train for a single epoch""" model.train() epoch_summary = [] loss_running_avg = train_util.RunningAverage() with tqdm(total=len(dataloader)) as t: for i, batch in enumerate(dataloader): if data_types[0] == "glyph" or data_types[0] == "image": # Get batch X_batch = batch[data_types[0]] Y_batch = batch[data_types[1]] if params.use_gpu: X_batch = X_batch.cuda(non_blocking=True) Y_batch = Y_batch.cuda(non_blocking=True) X_batch = Variable(X_batch) Y_batch = Variable(Y_batch) # Forward prop Y_pred_batch = model(X_batch, pause) elif data_types[0] == "svg": # Get batch X_batch = batch[data_types[0]] X_len = batch["len"] Y_batch = batch[data_types[1]] if params.use_gpu: X_batch = X_batch.cuda(non_blocking=True) X_len = X_len.cuda(non_blocking=True) Y_batch = Y_batch.cuda(non_blocking=True) X_batch = Variable(X_batch) Y_batch = Variable(Y_batch) # Forward prop Y_pred_batch = model(X_batch, X_len) loss = loss_fn(Y_pred_batch.float(), Y_batch.float()) optimizer.zero_grad() # Backward prop loss.backward() # Gradient step optimizer.step() # Evaluate summaries if i % params.summary_steps == 0: Y_pred_batch = Y_pred_batch.data.cpu() Y_batch = Y_batch.data.cpu() summary = { metric: metrics[metric](Y_pred_batch, Y_batch) for metric in metrics } summary['loss'] = loss.item() epoch_summary.append(summary) loss_running_avg.update(loss.item()) t.set_postfix(loss='{:05.5f}'.format(loss_running_avg())) t.update() metrics_mean = { metric: np.mean([s[metric] for s in epoch_summary]) for metric in epoch_summary[0] } metrics_string = " ; ".join("{}: {:05.5f}".format(k, v) for k, v in metrics_mean.items()) logging.info("- Train metrics: " + metrics_string)
import torch import torch.nn as nn from torch import optim from sklearn.datasets import load_digits from random import randint from model import NeuralNet, loss_fn, device digits = load_digits() X = torch.tensor(digits['data'], dtype=torch.float32).to(device) Y = torch.tensor(digits['target'], dtype=torch.int64).to(device) model = NeuralNet() optimizer = optim.Adam(model.parameters()) i = 100 for epoch in range(i): optimizer.zero_grad() y_predict = model(X) loss = loss_fn(y_predict, Y) loss.backward() optimizer.step() if epoch % 10 == 0: print('Epoch {:4d}/{} Cost: {:.6f}'.format(epoch, i, loss.item()))
def train(args): """ Train UNet from datasets """ # dataset print('Reading dataset from {}...'.format(args.dataset_path)) train_dataset = SSDataset(dataset_path=args.dataset_path, is_train=True) val_dataset = SSDataset(dataset_path=args.dataset_path, is_train=False) train_dataloader = DataLoader(dataset=train_dataset, batch_size=args.batch_size, shuffle=True) val_dataloader = DataLoader(dataset=val_dataset, batch_size=args.batch_size, shuffle=False) # mask with open(args.mask_json_path, 'w', encoding='utf-8') as mask: colors = SSDataset.all_colors mask.write(json.dumps(colors)) print('Mask colors list has been saved in {}'.format( args.mask_json_path)) # model net = UNet(in_channels=3, out_channels=5) if args.cuda: net = net.cuda() # setting lr = args.lr # 1e-3 optimizer = optim.Adam(net.parameters(), lr=lr) criterion = loss_fn # run train_losses = [] val_losses = [] print('Start training...') for epoch_idx in range(args.epochs): # train net.train() train_loss = 0 for batch_idx, batch_data in enumerate(train_dataloader): xs, ys = batch_data if args.cuda: xs = xs.cuda() ys = ys.cuda() ys_pred = net(xs) loss = criterion(ys_pred, ys) train_loss += loss optimizer.zero_grad() loss.backward() optimizer.step() # val net.eval() val_loss = 0 for batch_idx, batch_data in enumerate(val_dataloader): xs, ys = batch_data if args.cuda: xs = xs.cuda() ys = ys.cuda() ys_pred = net(xs) loss = loss_fn(ys_pred, ys) val_loss += loss train_losses.append(train_loss) val_losses.append(val_loss) print('Epoch: {}, Train total loss: {}, Val total loss: {}'.format( epoch_idx + 1, train_loss.item(), val_loss.item())) # save if (epoch_idx + 1) % args.save_epoch == 0: checkpoint_path = os.path.join( args.checkpoint_path, 'checkpoint_{}.pth'.format(epoch_idx + 1)) torch.save(net.state_dict(), checkpoint_path) print('Saved Checkpoint at Epoch {} to {}'.format( epoch_idx + 1, checkpoint_path)) # summary if args.do_save_summary: epoch_range = list(range(1, args.epochs + 1)) plt.plot(epoch_range, train_losses, 'r', label='Train loss') plt.plot(epoch_range, val_loss, 'g', label='Val loss') plt.imsave(args.summary_image) print('Summary images have been saved in {}'.format( args.summary_image)) # save net.eval() torch.save(net.state_dict(), args.model_state_dict) print('Saved state_dict in {}'.format(args.model_state_dict))
sim = nn.CosineSimilarity() print(net) print("Running training loop") cost_book = [] val_acc_book = [] for j in range(N_EPOCH): cost = 0 pbar = tqdm(dl) for i, b in enumerate(pbar): opt.zero_grad() o1 = net(b['q1'].cuda()) o2 = net(b['q2'].cuda()) loss = loss_fn(o1, o2, device='cuda') l = loss.item() cost += l loss.backward() opt.step() pbar.set_postfix({'Epoch': j + 1, 'Train_loss': l}) pbar.close() print(f"\nEpoch Loss : {cost / (i + 1):.3f}\n") cost_book.append(cost / (i + 1)) print("\nRunning on validation set\n") with torch.no_grad(): acc = accuracy_score(vdl, net, sim, device='cuda') val_acc_book.append(acc) print(f"\nAccuracy of val set {acc:.3f}%\n")
params = util.Params() params.update(args.model_dir) model_dir = args.model_dir else: model_dir_path = os.path.join(".", "model") if not os.path.isdir(model_dir_path): os.mkdir(model_dir_path) model_dir = model_dir_path params.cuda = torch.cuda.is_available() alov = ALOVDataset('/large_storage/imagedata++', '/large_storage/alov300++_rectangleAnnotation_full', transform) dataloader = DataLoader(alov, batch_size=params.batch_size) use_gpu = torch.cuda.is_available() model = model.Re3Net().cuda() if use_gpu else model.Re3Net() optimizer = optim.Adam(model.parameters(), lr=params.learning_rate) net = 0 loss_fn = model.loss_fn(params.cuda) # Train the model logging.info("Starting training for {} epoch(s)".format(params.num_epochs)) train_and_evaluate(model, dataloader, dataloader, optimizer, loss_fn, 0, params, model_dir, args.restore_file)
def get_validation_loss(M, sample): with torch.no_grad(): pov, white, black, score = sample pred = M(pov, white, black) loss = model.loss_fn(score, pred).detach() return loss
act_fn=model_settings['fc_fn_1'], dir_npy=weight_dir) # второй слой fully connected сети fc_y_2, fc_w_2, fc_b_2 = model.fc_multiplication( y_l_minus_1=fc_y_1, w_l=fc_w_2, w_l_name='fc_w_2', b_l=fc_b_2, b_l_name='fc_b_2', neurons=len( y_true ), # количество нейронов на выходе моледи равно числу классов act_fn=model_settings['fc_fn_2'], dir_npy=weight_dir) # ошибка модели fc_error = model.loss_fn(y_true, fc_y_2, feed=True) # сохранение значений loss и accuracy loss_change.append(fc_error.sum()) accuracy_change.append(y_true.argmax() == fc_y_2.argmax()) # обратное прохожение по сети if train_model: # backprop через loss-функцию dEdfc_y_2 = model.loss_fn(y_true, fc_y_2, feed=False) # backprop через второй слой fc-сети dEdfc_y_1, fc_w_2, fc_b_2 = model.fc_backpropagation( y_l_minus_1=fc_y_1, dEdy_l=dEdfc_y_2, y_l=fc_y_2, w_l=fc_w_2, b_l=fc_b_2, act_fn=model_settings['fc_fn_2'],