def train(train: Examples, model: RNN, optimizer, criterion): epoch_loss = 0 epoch_acc = 0 count = 0 model.train() for x, y, z in batch(train.shuffled(), config.batch_size): x, y, z = get_long_tensor(x), get_long_tensor( y).float(), get_long_tensor(z) optimizer.zero_grad() if config.setting == 'RNN': predictions = model(x).squeeze(1) else: predictions = model(x, z).squeeze(1) loss = criterion(predictions, y) acc = binary_accuracy(predictions, y) loss.backward() optimizer.step() epoch_loss += loss.item() epoch_acc += acc.item() count += 1 return epoch_loss / count, epoch_acc / count
def main(_): # load data data, ix2word, word2ix = load_data() num_train = data.shape[0] vocab_size = len(ix2word) # variables for training X=tf.placeholder(tf.int32, [BATCH_SIZE, None]) y=tf.placeholder(tf.int32, [BATCH_SIZE, None]) rnn_model = RNN(model=model, batch_size=BATCH_SIZE, vocab_size=vocab_size, embedding_dim=embedding_dim, n_neurons=n_neurons, n_layers=3, lr=lr, keep_prob=keep_prob) loss, optimizer = rnn_model.train(X, y) # start trian start_time = time.time() with tf.Session() as sess: # Visualize graph # write loss into logs merged = tf.summary.merge_all() writer = tf.summary.FileWriter('./logs/', sess.graph) tf.global_variables_initializer().run() print("="*15+"strat training"+"="*15) for epc in range(NUM_EPOCH): print("="*15, "epoch: %d" % epc, "="*15) for step in range(num_train//BATCH_SIZE): # get batch data idx_strat = step*BATCH_SIZE idx_end = idx_strat+BATCH_SIZE batch_data = data[idx_strat:idx_end, ...] x_data = batch_data[:, :-1] y_data = batch_data[:, 1:] feed_dict={X:x_data,y:y_data} sess.run(optimizer, feed_dict=feed_dict) # print evaluation results for every 100 steps if step%eval_frequence==0: l = sess.run(loss,feed_dict=feed_dict) result = sess.run(merged,feed_dict=feed_dict) writer.add_summary(result, (epc*num_train//BATCH_SIZE)+step) input_seq = "湖光秋月两相和" result = generate_poem(rnn_model=rnn_model, sess=sess, input_seqs=input_seq, ix2word=ix2word,word2ix=word2ix, max_len=125, prefix_words=None) result_poem = ''.join(result) run_time = time.time() - start_time start_time = time.time() print("step: %d, run time: %.1f ms" % (step, run_time*1000/eval_frequence)) print("minibatch loss: %d" % l) print("generated poem length: %d, poem is: %s" % (len(result_poem), result_poem)) sys.stdout.flush() # save model if SAVE: saver = tf.train.Saver() saver.save(sess, CKPT_PATH+'rnn_model.ckpt')
labls_acc = np.mean(np.logical_and(gold_arcs==pred_arcs, gold_labels==pred_labels)) return arcs_acc,labls_acc highestScore = 0 tsid = 0 name_model = 'parser_model2.pt' path_save_model = os.path.join('gen', name_model) for epoch in range(1, args.epochs+1): for i, (word_tensor, ext_word_ids,char_ids,pos_tensor,xpos_tensor,head_targets,rel_targets,seq_lengths,perm_idx) in enumerate(train_loader): start = time.time() # switch to train mode model.train() ts = (((epoch -1) * train_loader.n_batches) + (i+1)) if (ts%5000 == 0): adjust_learning_rate(args.lr, optimizer,optimizer_sparse) if args.cuda: word_tensor = word_tensor.cuda() pos_tensor = pos_tensor.cuda() xpos_tensor = xpos_tensor.cuda() head_targets = head_targets.cuda() rel_targets = rel_targets.cuda() # compute output arc_logits,label_logits = model(word_tensor,ext_word_ids,char_ids,pos_tensor,xpos_tensor,seq_lengths) arc_logits = arc_logits[:,1:,:] label_logits = label_logits[:,1:,:,:]
opt_fname = os.path.join(expt_dir, f"opt_epoch_{ep}") torch.save(model.state_dict(), model_fname) torch.save(optimizer.state_dict(), opt_fname) model_fname = os.path.join(expt_dir, f"model_final_{ep}") opt_fname = os.path.join(expt_dir, f"opt_final_{ep}") torch.save(model.state_dict(), model_fname) torch.save(optimizer.state_dict(), opt_fname) finally: log() if __name__ == "__main__": fname = "_bios.json" bc = ByteCode("byte_values.txt") ds = ByteDataset(fname, bc, device=torch.device('cpu')) print(f"Loaded {len(ds)} samples") dl = ByteDataLoader(ds, batch_size=1) rnn = RNN(bc.num_codes) rnn.train() epochs = 1 lr = 1e-3 losses = [] lossfn = nn.CrossEntropyLoss(reduction='none') optimizer = Adam(rnn.parameters(), lr=lr) train(dl, rnn, optimizer, dict(epochs=epochs, expt_dir="tst", sample_step=1), torch.device('cpu'), bc)
def main(): logging.basicConfig(filename='logs/train.log', level=logging.DEBUG) # saved model path save_path = 'history/trained_model' # input file #filename = 'data/train_and_test.csv' filename = 'data/golden_400.csv' embedding_size = 300 # 128 for torch embeddings, 300 for pre-trained hidden_size = 24 batch_size = 64 nb_epochs = 200 lr = 1e-4 max_norm = 5 folds = 3 # Dataset ds = ClaimsDataset(filename) vocab_size = ds.vocab.__len__() pad_id = ds.vocab.token2id.get('<pad>') test_len = val_len = math.ceil(ds.__len__() * .10) train_len = ds.__len__() - (val_len + test_len) print("\nTrain size: {}\tValidate size: {}\tTest Size: {}".format( train_len, val_len, test_len)) # randomly split dataset into tr, te, & val sizes d_tr, d_val, d_te = torch.utils.data.dataset.random_split( ds, [train_len, val_len, test_len]) # data loaders dl_tr = torch.utils.data.DataLoader(d_tr, batch_size=batch_size) dl_val = torch.utils.data.DataLoader(d_val, batch_size=batch_size) dl_test = torch.utils.data.DataLoader(d_te, batch_size=batch_size) model = RNN(vocab_size, embedding_size, hidden_size, pad_id, ds) model = utils.cuda(model) model.zero_grad() parameters = list([ parameter for parameter in model.parameters() if parameter.requires_grad ]) #parameters = list(model.parameters()) # comment out when using pre-trained embeddings optim = torch.optim.Adam(parameters, lr=lr, weight_decay=35e-3, amsgrad=True) # optimizer criterion = nn.NLLLoss(weight=torch.Tensor([1.0, 2.2]).cuda()) losses = defaultdict(list) print("\nTraining started: {}\n".format(utils.get_time())) phases, loaders = ['train', 'val'], [dl_tr, dl_val] tr_acc, v_acc = [], [] for epoch in range(nb_epochs): for phase, loader in zip(phases, loaders): if phase == 'train': model.train() else: model.eval() ep_loss, out_list, label_list = [], [], [] for i, inputs in enumerate(loader): optim.zero_grad() claim, labels = inputs labels = utils.variable(labels) out = model(claim) out_list.append(utils.normalize_out( out)) # collect output from every epoch label_list.append(labels) out = torch.log(out) # criterion.weight = get_weights(labels) loss = criterion(out, labels) # back propagate, for training only if phase == 'train': loss.backward() torch.nn.utils.clip_grad_norm_( parameters, max_norm=max_norm) # exploding gradients? say no more! optim.step() ep_loss.append(loss.item()) losses[phase].append( np.mean(ep_loss) ) # record average losses from every phase at each epoch acc = utils.get_accuracy(label_list, out_list) if phase == 'train': tr_acc.append(acc) else: v_acc.append(acc) print("Epoch: {} \t Phase: {} \t Loss: {:.4f} \t Accuracy: {:.3f}". format(epoch, phase, loss, acc)) print("\nTime finished: {}\n".format(utils.get_time())) utils.plot_loss(losses['train'], losses['val'], tr_acc, v_acc, filename, -1) logging.info("\nTrain file=> " + filename + "\nParameters=> \nBatch size: " + str(batch_size) + "\nHidden size: " + str(hidden_size) + "\nMax_norm: " + str(max_norm) + "\nL2 Reg/weight decay: " + str(optim.param_groups[0]['weight_decay']) + "\nLoss function: \n" + str(criterion)) logging.info('Final train accuracy: ' + str(tr_acc[-1])) logging.info('Final validation accuracy: ' + str(v_acc[-1])) # Save the model torch.save(model.state_dict(), save_path) #test(model, batch_size) # predict f1_test, acc_test = [], [] for i, inputs in enumerate(dl_test): claim, label = inputs label = utils.variable(label.float()) out = model(claim) y_pred = utils.normalize_out(out) #print("\n\t\tF1 score: {}\n\n".format(get_f1(label, y_pred))) # f1 score f1_test.append(utils.get_f1(label, y_pred)) acc_test.append(metrics.accuracy_score(label, y_pred)) print("\t\tF1: {:.3f}\tAccuracy: {:.3f}".format(np.mean(f1_test), np.mean(acc_test))) logging.info('\nTest f1: ' + str(np.mean(f1_test)) + '\nTest Accuracy: ' + str(np.mean(acc_test)))
def main(): # saved model path save_path = 'history/model_fold_' test_file = 'data/test120.csv' # create dataset #filename = 'data/golden_400.csv' #filename = 'data/golden_train_and_val.csv' filename = 'data/train_val120.csv' ds = ClaimsDataset(filename) vocab_size = ds.vocab.__len__() pad_id = ds.vocab.token2id.get('<pad>') embedding_size = 128 # 128 for torch embeddings, 300 for pre-trained hidden_size = 24 batch_size = 64 nb_epochs = 150 lr = 1e-4 max_norm = 5 folds = 10 criterion = nn.NLLLoss(weight=torch.Tensor([1.0, 2.2]).cuda()) # For testing phase fold_scores = {} test_set = ClaimsDataset(test_file) dl_test = torch_data.DataLoader(test_set, batch_size=batch_size, shuffle=True) mean = [] # holds the mean validation accuracy of every fold print("\nTraining\n") logger.info(utils.get_time()) for i in range(folds): print("\nFold: {}\n".format(i)) losses = defaultdict(list) train, val = utils.split_dataset(ds, i) print("Train size: {} \t Validate size: {}".format( len(train), len(val))) dl_train = torch_data.DataLoader(train, batch_size=batch_size, shuffle=True) dl_val = torch_data.DataLoader(val, batch_size=batch_size, shuffle=True) model = RNN(vocab_size, embedding_size, hidden_size, pad_id, ds) model = utils.cuda(model) model.zero_grad() # When using pre-trained embeddings, uncomment below otherwise, use the second statement #parameters = list([parameter for parameter in model.parameters() # if parameter.requires_grad]) parameters = list(model.parameters()) optim = torch.optim.Adam(parameters, lr=lr, weight_decay=35e-3, amsgrad=True) phases, loaders = ['train', 'val'], [dl_train, dl_val] tr_acc, v_acc = [], [] for epoch in range(nb_epochs): for p, loader in zip(phases, loaders): if p == 'train': model.train() else: model.eval() ep_loss, out_list, label_list = [], [], [] for _, inputs in enumerate(loader): optim.zero_grad() claim, labels = inputs labels = utils.variable(labels) out = model(claim) out_list.append(utils.normalize_out(out)) label_list.append(labels) out = torch.log(out) loss = criterion(out, labels) if p == 'train': loss.backward() torch.nn.utils.clip_grad_norm_(parameters, max_norm=max_norm) optim.step() ep_loss.append(loss.item()) losses[p].append(np.mean(ep_loss)) acc = utils.get_accuracy(label_list, out_list) if p == 'train': tr_acc.append(acc) else: v_acc.append(acc) print( "Epoch: {} \t Phase: {} \t Loss: {:.4f} \t Accuracy: {:.3f}" .format(epoch, p, loss, acc)) utils.plot_loss(losses['train'], losses['val'], tr_acc, v_acc, filename, i) mean.append(np.mean(v_acc)) logger.info("\n Fold: " + str(i)) logger.info("Train file=> " + filename + "\nParameters=> \nBatch size: " + str(batch_size) + "\nHidden size: " + str(hidden_size) + "\nMax_norm: " + str(max_norm) + "\nL2 Reg/weight decay: " + str(optim.param_groups[0]['weight_decay']) + "\nLoss function: " + str(criterion)) logger.info('Final train accuracy: ' + str(tr_acc[-1])) logger.info('Final validation accuracy: ' + str(v_acc[-1])) # Save model for current fold torch.save(model.state_dict(), save_path + str(i)) test_f1, test_acc = [], [] for _, inp in enumerate(dl_test): claim, label = inp label = utils.variable(label) model.eval() out = model(claim) y_pred = utils.normalize_out(out) test_f1.append(utils.get_f1(label, y_pred)) test_acc.append(metrics.accuracy_score(label, y_pred)) t_f1, t_acc = np.mean(test_f1), np.mean(test_acc) fold_scores[i] = dict([('F1', t_f1), ('Accuracy', t_acc)]) print("\tf1: {:.3f} \t accuracy: {:.3f}".format(t_f1, t_acc)) #logger.info('\nTest f1: '+str(t_f1)+'\nTest Accuracy: '+str(t_acc)) logger.info('Mean accuracy over 10 folds: \t' + str(np.mean(mean))) logger.info(fold_scores)
def train(cfg, datasets, dataloaders, device, save_model_path): model = RNN(cfg.model_type, cfg.input_dim, cfg.hidden_dim, cfg.n_layers, cfg.drop_p, cfg.output_dim, cfg.bi_dir) model = model.to(device) optimizer = torch.optim.Adam(model.parameters(), lr=3e-4) criterion = torch.nn.CrossEntropyLoss() # scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=7, gamma=0.1) best_metric = 0.0 best_epoch = 0 best_model_wts = copy.deepcopy(model.state_dict()) for epoch in range(cfg.num_epochs): for phase in ['train', 'valid']: if phase == 'train': model.train() # Set model to training mode else: model.eval() # Set model to evaluate mode running_loss = 0.0 # running_corrects = 0 y_pred = [] y_true = [] # Iterate over data. for batch in dataloaders[phase]: inputs = batch['inputs'].to(device) targets = batch['targets'][cfg.task].to(device) # zero the parameter gradients optimizer.zero_grad() # forward # track history if only in train with torch.set_grad_enabled(phase == 'train'): outputs, hiddens = model(inputs) _, preds = torch.max(outputs, 1) loss = criterion(outputs, targets) # backward + optimize only if in training phase if phase == 'train': loss.backward() optimizer.step() # statistics running_loss += loss.item() * inputs.size(0) # running_corrects += torch.sum(preds == targets.data) y_pred.extend(preds.tolist()) y_true.extend(targets.tolist()) # if phase == 'train': # scheduler.step() # epoch_acc = running_corrects.double() / len(datasets[phase]) epoch_loss = running_loss / len(datasets[phase]) f1_ep = f1_score(y_true, y_pred, average='weighted') precision_ep = precision_score(y_true, y_pred, average='weighted') recall_ep = recall_score(y_true, y_pred, average='weighted') accuracy_ep = accuracy_score(y_true, y_pred) # print('{} Loss: {:.4f} Acc: {:.4f}'.format(phase, epoch_loss, epoch_acc)) print( f'({phase} @ {epoch+1}): L: {epoch_loss:3f}; A: {accuracy_ep:3f}; R: {recall_ep:3f}; ' + f'P: {precision_ep:3f}; F1: {f1_ep:3f}') # deep copy the model if phase == 'valid' and f1_ep > best_metric: best_metric = f1_ep best_epoch = epoch best_model_wts = copy.deepcopy(model.state_dict()) print(f'Best val Metric {best_metric:3f} @ {best_epoch+1}\n') # load best model weights and saves it model.load_state_dict(best_model_wts) torch.save(model.state_dict(), save_model_path) print(f'model is saved @ {save_model_path}') return best_metric
# init data init_data() # create model rnn = RNN(len(all_categories), len(all_letters), n_hidden, len(all_letters)) # setup data train_data = load_data() train_dataset = NameDataset(train_data) train_dataloader = DataLoader(train_dataset, batch_size=1, shuffle=True) # setup optimizer and criterion optimizer = optim.Adam(rnn.parameters(), lr=0.0005) criterion = nn.NLLLoss() # train all_loss = [] for epoch in range(epochs): rnn = rnn.train() current_loss = 0 for idx, (category, name) in enumerate(train_dataloader): category, name = category[0], name[0] category, name = Variable(category), Variable(name) hidden = rnn.initHidden() for i in range(name.size()[0] - 1): optimizer.zero_grad() output, hidden = rnn(category, name[i], hidden) loss = criterion(output, torch.argmax(name[i + 1], dim=1).long()) loss.backward(retain_graph=True) optimizer.step() current_loss += loss if idx >= plot_every and idx % plot_every == 0: all_loss.append(float(current_loss) / plot_every) current_loss = 0
def main(args): print(sys.argv) if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.tensors_path, args.bs, args.json_labels_path, num_workers=8) model = RNN() if torch.cuda.is_available(): model.cuda() model.train() #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) if args.rms: optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.mm) else: optimizer = optim.Adam(model.parameters(), lr=args.lr) model_loss = torch.nn.BCEWithLogitsLoss() losses = [] p = 1 try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate if epoch < 3: p = 1.0 elif epoch >= 3 and epoch < 6: p = 0.5 elif epoch >= 6 and epoch < 9: p = 0.25 else: p = 0.0 loss_epoch = [] for step, (feat_maps, gt) in enumerate(data_loader): if torch.cuda.is_available(): feat_maps = feat_maps.cuda() gt = gt.cuda() model.zero_grad() out = model(feat_maps, gt, p) loss = model_loss(out, gt) loss.backward() optimizer.step() loss_step = loss.cpu().detach().numpy() loss_epoch.append(loss_step) print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + str(len(data_loader)) + " - Loss: " + str(loss_step)) loss_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean)) if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
def train(args): if args.create_dataset: df = pd.read_csv("../data/endpoints_calculated_std.csv") smiles = df["smiles"].to_list() data = df[df.columns[3:]].to_numpy() print("Building LegoModel") legoModel = LegoGram(smiles = smiles, nworkers=8) torch.save(legoModel, "legoModel.pk") print("Building sampler") sampler = LegoGramRNNSampler(legoModel) torch.save(sampler, "sampler.pk") print("Constracting dataset") dataset = MolecularNotationDataset(smiles,sampler,data) torch.save(dataset,'lg.bin') else: dataset = torch.load('lg.bin') train_loader = DataLoader(dataset, batch_size=args.batch_size, collate_fn=collect) device = torch.device('cpu') if args.cuda: device = torch.device('cuda') model = RNN(voc_size=dataset.vocsize, device=device) model.train() model.cuda() print(f"Model has been created on device {device}") smiles_dataset = dataset.smiles optimizer = optim.Adam(model.parameters(), lr=args.lr) loss_f = nn.CrossEntropyLoss(reduction='mean', ignore_index=0) writer = SummaryWriter(comment = args.name_task) losses = [] out_counter = 0 cnt = 0 for epoch in range(args.num_epochs): loss_list =[] for iteration, (batch, lengths) in enumerate(tqdm(train_loader)): batch = batch.cuda() logits, endp_model = model(batch, lengths) print(logits.shape) print(batch.shape) loss = loss_f(logits[:, :, :-1], batch[:, 1:]) loss_list.append(loss.item()) writer.add_scalar("CrossEntropyLoss", loss_list[-1], iteration+epoch*len(train_loader)) optimizer.zero_grad() loss.backward() optimizer.step() if iteration % args.print_every == 0 and iteration > 0: model.eval() number_generate = 100 res = model.sample(number_generate, dataset.model) writer.add_text("Molecules after generator", json.dumps([res])) valid = len(res) * 100 / number_generate print(res) print("valid : {} %".format(valid)) writer.add_scalar("Valid", valid, cnt) res = [robust_standardizer(mol) for mol in res] res = list(filter(lambda x: x is not None, res)) unique = len([elem for elem in res if elem not in smiles_dataset]) writer.add_text("Unique mols", json.dumps([res])) print(f"There are unique mols {unique}") print(res) writer.add_scalar("Unique", unique, cnt) cnt += 1 model.train() writer.flush() epoch_loss = np.mean(loss_list) print(f"Loss on epoch {epoch } is {epoch_loss}") if out_counter < args.stop_after and epoch>0: if losses[-1] <= epoch_loss: out_counter += 1 else: out_counter = 0 torch.save(model, "experiments/" + args.name_task + "/model.pt") if epoch == 0: torch.save(model, "experiments/" + args.name_task + "/model.pt") losses.append(epoch_loss) return losses
print_every = 50 # training interface step = 0 tracker = {'NLL': []} start_time = time.time() for ep in range(epoch): # learning rate decay if ep >= 10 and ep % 2 == 0: learning_rate = learning_rate * 0.5 for param_group in optimizer.param_groups: param_group['lr'] = learning_rate for split in splits: dataloader = dataloaders[split] model.train() if split == 'train' else model.eval() totals = {'NLL': 0., 'words': 0} for itr, (_, dec_inputs, targets, lengths) in enumerate(dataloader): bsize = dec_inputs.size(0) dec_inputs = dec_inputs.to(device) targets = targets.to(device) lengths = lengths.to(device) # forward logp = model(dec_inputs, lengths) # calculate loss NLL_loss = NLL(logp, targets, lengths + 1) loss = NLL_loss / bsize
def main(args): if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.descriptors_path, args.json_labels_path, args.bs) model = RNN(num_descriptors=args.num_descriptors, hidden_size=args.hidden_size, lstm_in_size=args.input_size) if torch.cuda.is_available(): model.cuda() model.train() # optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) optimizer = optim.Adam(model.parameters(), lr=args.lr) # model_loss = torch.nn.BCEWithLogitsLoss() model_loss = Loss() losses = [] try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate loss_epoch = [] for step, (descriptors, labels) in enumerate(data_loader): if torch.cuda.is_available(): descriptors = descriptors.cuda() labels = labels.cuda() model.zero_grad() attention = model(descriptors) loss = model_loss(attention, labels) loss.backward() optimizer.step() loss_epoch.append(loss.cpu().detach().numpy()) print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + str(len(data_loader)) + ' - Loss: ' + str(float(loss))) loss_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean)) if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/models_361_dropout', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
data_loader.unique_chars) # generate onehot representation of training data and label X_onehot = np.zeros([rnn_net.K, data_len]) target_onehot = np.zeros([rnn_net.K, data_len]) X_int = [char2int[ch] for ch in file_data] target_int = [char2int[ch] for ch in file_data[1:] + file_data[0]] X_onehot[X_int, range(data_len)] = 1 target_onehot[target_int, range(data_len)] = 1 del file_data, X_int, target_int # start training smooth_loss_acc = rnn_net.train(X_onehot, target_onehot, h_prev, int2char, char2int, epoch_num=cfg.EPOCH, batch_size=cfg.BATCH_SIZE) print("Smoothed loss:") print(smooth_loss_acc) # save results loss_save_path = os.path.join(cfg.SAVE_PATH, tag + '_loss.npy') fig_save_path = os.path.join(cfg.SAVE_PATH, tag + '_loss.png') np.save(loss_save_path, smooth_loss_acc) fig = plt.figure() plt.plot(range(len(smooth_loss_acc)), smooth_loss_acc) plt.xlabel("Iterations (x100)") plt.ylabel("Smoothed Loss")
def main(args): print("in main") #creating tensorboard object tb_writer = SummaryWriter(log_dir=os.path.join(args.outdir, "tb/"), purge_step=0) #Loading data train_dl, val_dl, vocab, label_map = fetch_dataset(args.datapath) #Defining loss criterion = nn.CrossEntropyLoss() #Defining optimizer vocab_size = len(vocab) num_classes = len(label_map) model = RNN(vocab_size, num_classes, args.embed_dim, args.hidden_size) optimizer = optim.Adam(model.parameters(), lr=args.lr) #Looping training data for epoch in range(args.epochlen): running_loss, test_loss = 0.0, 0.0 count = 0 correct = 0 total_labels = 0 all_train_loss = [] all_test_loss = [] model.train() best_accuracy = 0 for i, batch in enumerate(train_dl): seqs, labels = batch #names = Vocab.get_string(batch) #zero the parameter gradients optimizer.zero_grad() #forward + backward + optimize pred_outputs = model(seqs) loss = criterion(pred_outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() count += 1 correct += (torch.argmax(pred_outputs, dim=1) == labels).sum().item() total_labels += labels.size(0) total_loss = running_loss / count all_train_loss.append(total_loss) accuracy = (correct * 100) / total_labels tb_writer.add_scalar('Train_Loss', running_loss, epoch) tb_writer.add_scalar('Train_Accuracy', accuracy, epoch) count = 0 model.eval() for batch in val_dl: seqs, labels = batch pred_outputs = model(seqs) loss = criterion(pred_outputs, labels) test_loss += loss.item() count += 1 correct += (torch.argmax(pred_outputs, dim=1) == labels).sum().item() total_labels += labels.size(0) total_test_loss = test_loss / count all_test_loss.append(total_test_loss) test_accuracy = (correct * 100) / total_labels print( f"Epoch : {str(epoch).zfill(2)}, Training Loss : {round(total_loss, 4)}, Training Accuracy : {round(accuracy, 4)}," f" Test Loss : {round(total_test_loss, 4)}, Test Accuracy : {round(test_accuracy, 4)}" ) tb_writer.add_scalar('Test_Loss', test_loss, epoch) tb_writer.add_scalar('Test_Accuracy', test_accuracy, epoch) if best_accuracy < test_accuracy: best_accuracy = test_accuracy torch.save(model.state_dict(), args.outdir + args.modelname + str(epoch)) # Plot confusion matrix y_true = [] y_pred = [] for data in val_dl: seq, labels = data outputs = model(seq) predicted = torch.argmax(outputs, dim=1) y_true += labels.tolist() y_pred += predicted.tolist() cm = confusion_matrix(np.array(y_true), np.array(y_pred)) disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=label_map.keys()) disp.plot(include_values=True, cmap='viridis', ax=None, xticks_rotation='horizontal', values_format=None) plt.show()
def main(args): print(sys.argv) if not os.path.exists('models'): os.mkdir('models') num_epochs = args.ne lr_decay = args.decay learning_rate = args.lr data_loader = get_data_loader(args.gt_path, args.tensors_path, args.json_labels_path, args.bs) model = RNN(lstm_hidden_size=args.hidden_size) if torch.cuda.is_available(): model.cuda() model.train() #optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=args.mm) if args.rms: optimizer = optim.RMSprop(model.parameters(), lr=args.lr, momentum=args.mm) else: optimizer = optim.Adam(model.parameters(), lr=args.lr) model_loss = torch.nn.BCEWithLogitsLoss() # model_loss = Loss() losses = [] p = 1 try: for epoch in range(num_epochs): if epoch % args.decay_epoch == 0 and epoch > 0: learning_rate = learning_rate * lr_decay for param_group in optimizer.param_groups: param_group['lr'] = learning_rate if epoch in (3, 7, 15): if epoch == 3: p = 2 / 3 if epoch == 7: p = 1 / 3 if epoch == 15: p = 0 loss_epoch = [] loss1_epoch = [] loss2_epoch = [] for step, (tensors, masks, gt) in enumerate(data_loader): if torch.cuda.is_available(): tensors = tensors.cuda() masks = masks.cuda() gt = gt.cuda() model.zero_grad() out, att = model(tensors, masks, gt, p) loss1 = model_loss(out, gt) # att[:, :-1, :] -> attention produced (location in the next frame) until the last frame -1 (49) # gt[:, 1:, :] -> gt from the second frame until the last frame (49) loss2 = model_loss(att[:, :-1, :], gt[:, 1:, :]) loss = loss1 + loss2 loss.backward() optimizer.step() loss_epoch.append(loss.cpu().detach().numpy()) loss1_epoch.append(loss1.cpu().detach().numpy()) loss2_epoch.append(loss2.cpu().detach().numpy()) #print('Epoch ' + str(epoch + 1) + '/' + str(num_epochs) + ' - Step ' + str(step + 1) + '/' + # str(len(data_loader)) + ' - Loss: ' + str(float(loss)) + " (Loss1: " + str(float(loss1)) # + ", Loss2: " + str(float(loss2)) + ")") loss_epoch_mean = np.mean(np.array(loss_epoch)) loss1_epoch_mean = np.mean(np.array(loss_epoch)) loss2_epoch_mean = np.mean(np.array(loss_epoch)) losses.append(loss_epoch_mean) print('Total epoch loss: ' + str(loss_epoch_mean) + " (loss1: " + str(loss1_epoch_mean) + ", loss2: " + str(loss2_epoch_mean) + ")") if (epoch + 1) % args.save_epoch == 0 and epoch > 0: filename = 'model-epoch-' + str(epoch + 1) + '.pth' model_path = os.path.join('models/', filename) torch.save(model.state_dict(), model_path) except KeyboardInterrupt: pass filename = 'model-epoch-last.pth' model_path = os.path.join('models', filename) torch.save(model.state_dict(), model_path) plt.plot(losses) plt.show()
def train(args, labeled, resume_from, ckpt_file): print("========== In the train step ==========") iterator, TEXT, LABEL, tabular_dataset = load_data(stage="train", args=args, indices=labeled) print("Created the iterators") INPUT_DIM = len(TEXT.vocab) OUTPUT_DIM = 1 BIDIRECTIONAL = True PAD_IDX = TEXT.vocab.stoi[TEXT.pad_token] model = RNN( INPUT_DIM, args["EMBEDDING_DIM"], args["HIDDEN_DIM"], OUTPUT_DIM, args["N_LAYERS"], BIDIRECTIONAL, args["DROPOUT"], PAD_IDX, ) model = model.to(device=device) pretrained_embeddings = TEXT.vocab.vectors model.embedding.weight.data.copy_(pretrained_embeddings) unk_idx = TEXT.vocab.stoi["<unk>"] pad_idx = TEXT.vocab.stoi["<pad>"] model.embedding.weight.data[unk_idx] = torch.zeros(args["EMBEDDING_DIM"]) model.embedding.weight.data[pad_idx] = torch.zeros(args["EMBEDDING_DIM"]) optimizer = optim.Adam(model.parameters()) criterion = nn.BCEWithLogitsLoss() model = model.to("cuda") criterion = criterion.to("cuda") if resume_from is not None: ckpt = torch.load(os.path.join(args["EXPT_DIR"], resume_from + ".pth")) model.load_state_dict(ckpt["model"]) optimizer.load_state_dict(ckpt["optimizer"]) else: getdatasetstate(args) model.train() # turn on dropout, etc for epoch in tqdm(range(args["train_epochs"]), desc="Training"): running_loss = 0 i = 0 for batch in iterator: # print("Batch is", batch.review[0]) text, text_length = batch.review labels = batch.sentiment text = text.cuda() text_length = text_length.cuda() optimizer.zero_grad() output = model(text, text_length) loss = criterion(torch.squeeze(output).float(), labels.float()) loss.backward() optimizer.step() running_loss += loss.item() if i % 10: print( "epoch: {} batch: {} running-loss: {}".format( epoch + 1, i + 1, running_loss / 1000), end="\r", ) running_loss = 0 i += 1 print("Finished Training. Saving the model as {}".format(ckpt_file)) ckpt = {"model": model.state_dict(), "optimizer": optimizer.state_dict()} torch.save(ckpt, os.path.join(args["EXPT_DIR"], ckpt_file + ".pth")) return
def main(): parser = argparse.ArgumentParser(description="==========[RNN]==========") parser.add_argument("--mode", default="train", help="available modes: train, test, eval") parser.add_argument("--model", default="rnn", help="available models: rnn, lstm") parser.add_argument("--dataset", default="all", help="available datasets: all, MA, MI, TN") parser.add_argument("--rnn_layers", default=3, type=int, help="number of stacked rnn layers") parser.add_argument("--hidden_dim", default=16, type=int, help="number of hidden dimensions") parser.add_argument("--lin_layers", default=1, type=int, help="number of linear layers before output") parser.add_argument("--epochs", default=100, type=int, help="number of max training epochs") parser.add_argument("--dropout", default=0.0, type=float, help="dropout probability") parser.add_argument("--learning_rate", default=0.01, type=float, help="learning rate") parser.add_argument("--verbose", default=2, type=int, help="how much training output?") options = parser.parse_args() verbose = options.verbose if torch.cuda.is_available(): device = torch.device("cuda") if verbose > 0: print("GPU available, using cuda...") print() else: device = torch.device("cpu") if verbose > 0: print("No available GPU, using CPU...") print() params = { "MODE": options.mode, "MODEL": options.model, "DATASET": options.dataset, "RNN_LAYERS": options.rnn_layers, "HIDDEN_DIM": options.hidden_dim, "LIN_LAYERS": options.lin_layers, "EPOCHS": options.epochs, "DROPOUT_PROB": options.dropout, "LEARNING_RATE": options.learning_rate, "DEVICE": device, "OUTPUT_SIZE": 1 } params["PATH"] = "models/" + params["MODEL"] + "_" + params[ "DATASET"] + "_" + str(params["RNN_LAYERS"]) + "_" + str( params["HIDDEN_DIM"]) + "_" + str( params["LIN_LAYERS"]) + "_" + str( params["LEARNING_RATE"]) + "_" + str( params["DROPOUT_PROB"]) + "_" + str( params["EPOCHS"]) + "_model.pt" #if options.mode == "train": # print("training placeholder...") train_data = utils.DistrictData(params["DATASET"], "train") val_data = utils.DistrictData(params["DATASET"], "val") params["INPUT_SIZE"] = train_data[0]['sequence'].size()[1] if params["MODEL"] == "rnn": model = RNN(params) elif params["MODEL"] == "lstm": model = LSTM(params) model.to(params["DEVICE"]) criterion = nn.MSELoss(reduction='sum') optimizer = torch.optim.Adam(model.parameters(), lr=params["LEARNING_RATE"]) if verbose == 0: print(params["PATH"]) else: utils.print_params(params) print("Beginning training...") print() since = time.time() best_val_loss = 10.0 for e in range(params["EPOCHS"]): running_loss = 0.0 #model.zero_grad() model.train() train_loader = DataLoader(train_data, batch_size=32, shuffle=True, num_workers=4) for batch in train_loader: x = batch['sequence'].to(device) y = batch['target'].to(device) seq_len = batch['size'].to(device) optimizer.zero_grad() y_hat, hidden = model(x, seq_len) loss = criterion(y_hat, y) running_loss += loss loss.backward() optimizer.step() mean_loss = running_loss / len(train_data) val_loss = evaluate(val_data, model, params, criterion, validation=True) if verbose == 2 or (verbose == 1 and (e + 1) % 100 == 0): print('=' * 25 + ' EPOCH {}/{} '.format(e + 1, params["EPOCHS"]) + '=' * 25) print('Training Loss: {}'.format(mean_loss)) print('Validation Loss: {}'.format(val_loss)) print() if e > params["EPOCHS"] / 3: if val_loss < best_val_loss: best_val_loss = val_loss best_model = model.state_dict() torch.save(best_model, params["PATH"]) time_elapsed = time.time() - since print('Training complete in {:.0f}m {:.0f}s'.format( time_elapsed // 60, time_elapsed % 60)) print('Final Training Loss: {:4f}'.format(mean_loss)) print('Best Validation Loss: {:4f}'.format(best_val_loss)) test_data = utils.DistrictData(params["DATASET"], "test") test_loss = evaluate(test_data, model, params, criterion) print('Test Loss: {}'.format(test_loss)) print()
class dl_model(): def __init__(self, mode): # Read config fielewhich contains parameters self.config = config self.mode = mode # Architecture name decides prefix for storing models and plots feature_dim = self.config['vocab_size'] self.arch_name = '_'.join( [self.config['rnn'], str(self.config['num_layers']), str(self.config['hidden_dim']), str(feature_dim)]) print("Architecture:", self.arch_name) # Change paths for storing models self.config['models'] = self.config['models'].split('/')[0] + '_' + self.arch_name + '/' self.config['plots'] = self.config['plots'].split('/')[0] + '_' + self.arch_name + '/' # Make folders if DNE if not os.path.exists(self.config['models']): os.mkdir(self.config['models']) if not os.path.exists(self.config['plots']): os.mkdir(self.config['plots']) if not os.path.exists(self.config['pickle']): os.mkdir(self.config['pickle']) self.cuda = (self.config['cuda'] and torch.cuda.is_available()) # load/initialise metrics to be stored and load model if mode == 'train' or mode == 'test': self.plots_dir = self.config['plots'] # store hyperparameters self.total_epochs = self.config['epochs'] self.test_every = self.config['test_every_epoch'] self.test_per = self.config['test_per_epoch'] self.print_per = self.config['print_per_epoch'] self.save_every = self.config['save_every'] self.plot_every = self.config['plot_every'] # dataloader which returns batches of data self.train_loader = dataloader('train', self.config) self.test_loader = dataloader('test', self.config) #declare model self.model = RNN(self.config) self.start_epoch = 1 self.edit_dist = [] self.train_losses, self.test_losses = [], [] else: self.model = RNN(self.config) if self.cuda: self.model.cuda() # resume training from some stored model if self.mode == 'train' and self.config['resume']: self.start_epoch, self.train_losses, self.test_losses = self.model.load_model(mode, self.model.rnn_name, self.model.num_layers, self.model.hidden_dim) self.start_epoch += 1 # load best model for testing/inference elif self.mode == 'test' or mode == 'test_one': self.model.load_model(mode, self.config['rnn'], self.model.num_layers, self.model.hidden_dim) #whether using embeddings if self.config['use_embedding']: self.use_embedding = True else: self.use_embedding = False # Train the model def train(self): print("Starting training at t =", datetime.datetime.now()) print('Batches per epoch:', len(self.train_loader)) self.model.train() # when to print losses during the epoch print_range = list(np.linspace(0, len(self.train_loader), self.print_per + 2, dtype=np.uint32)[1:-1]) if self.test_per == 0: test_range = [] else: test_range = list(np.linspace(0, len(self.train_loader), self.test_per + 2, dtype=np.uint32)[1:-1]) for epoch in range(self.start_epoch, self.total_epochs + 1): try: print("Epoch:", str(epoch)) epoch_loss = 0.0 # i used for monitoring batch and printing loss, etc. i = 0 while True: i += 1 # Get batch of inputs, labels, missed_chars and lengths along with status (when to end epoch) inputs, labels, miss_chars, input_lens, status = self.train_loader.return_batch() if self.use_embedding: inputs = torch.from_numpy(inputs).long() #embeddings should be of dtype long else: inputs = torch.from_numpy(inputs).float() #convert to torch tensors labels = torch.from_numpy(labels).float() miss_chars = torch.from_numpy(miss_chars).float() input_lens = torch.from_numpy(input_lens).long() if self.cuda: inputs = inputs.cuda() labels = labels.cuda() miss_chars = miss_chars.cuda() input_lens = input_lens.cuda() # zero the parameter gradients self.model.optimizer.zero_grad() # forward + backward + optimize outputs = self.model(inputs, input_lens, miss_chars) loss, miss_penalty = self.model.calculate_loss(outputs, labels, input_lens, miss_chars, self.cuda) loss.backward() # clip gradient # torch.nn.utils.clip_grad_norm_(self.model.parameters(), self.config['grad_clip']) self.model.optimizer.step() # store loss epoch_loss += loss.item() # print loss if i in print_range and epoch == 1: print('After %i batches, Current Loss = %.7f' % (i, epoch_loss / i)) elif i in print_range and epoch > 1: print('After %i batches, Current Loss = %.7f, Avg. Loss = %.7f, Miss Loss = %.7f' % ( i, epoch_loss / i, np.mean(np.array([x[0] for x in self.train_losses])), miss_penalty)) # test model periodically if i in test_range: self.test(epoch) self.model.train() # Reached end of dataset if status == 1: break #refresh dataset i.e. generate a new dataset from corpurs if epoch % self.config['reset_after'] == 0: self.train_loader.refresh_data(epoch) #take the last example from the epoch and print the incomplete word, target characters and missed characters random_eg = min(np.random.randint(self.train_loader.batch_size), inputs.shape[0]-1) encoded_to_string(inputs.cpu().numpy()[random_eg], labels.cpu().numpy()[random_eg], miss_chars.cpu().numpy()[random_eg], input_lens.cpu().numpy()[random_eg], self.train_loader.char_to_id, self.use_embedding) # Store tuple of training loss and epoch number self.train_losses.append((epoch_loss / len(self.train_loader), epoch)) # save model if epoch % self.save_every == 0: self.model.save_model(False, epoch, self.train_losses, self.test_losses, self.model.rnn_name, self.model.num_layers, self.model.hidden_dim) # test every 5 epochs in the beginning and then every fixed no of epochs specified in config file # useful to see how loss stabilises in the beginning if epoch % 5 == 0 and epoch < self.test_every: self.test(epoch) self.model.train() elif epoch % self.test_every == 0: self.test(epoch) self.model.train() # plot loss and accuracy if epoch % self.plot_every == 0: self.plot_loss_acc(epoch) except KeyboardInterrupt: #save model before exiting print("Saving model before quitting") self.model.save_model(False, epoch-1, self.train_losses, self.test_losses, self.model.rnn_name, self.model.num_layers, self.model.hidden_dim) exit(0) # test model def test(self, epoch=None): self.model.eval() print("Testing...") print('Total batches:', len(self.test_loader)) test_loss = 0 #generate a new dataset form corpus self.test_loader.refresh_data(epoch) with torch.no_grad(): while True: # Get batch of input, labels, missed characters and lengths along with status (when to end epoch) inputs, labels, miss_chars, input_lens, status = self.test_loader.return_batch() if self.use_embedding: inputs = torch.from_numpy(inputs).long() else: inputs = torch.from_numpy(inputs).float() labels = torch.from_numpy(labels).float() miss_chars = torch.from_numpy(miss_chars).float() input_lens= torch.from_numpy(input_lens).long() if self.cuda: inputs = inputs.cuda() labels = labels.cuda() miss_chars = miss_chars.cuda() input_lens = input_lens.cuda() # zero the parameter gradients self.model.optimizer.zero_grad() # forward + backward + optimize outputs = self.model(inputs, input_lens, miss_chars) loss, miss_penalty = self.model.calculate_loss(outputs, labels, input_lens, miss_chars, self.cuda) test_loss += loss.item() # Reached end of dataset if status == 1: break #take a random example from the epoch and print the incomplete word, target characters and missed characters #min since the last batch may not be of length batch_size random_eg = min(np.random.randint(self.train_loader.batch_size), inputs.shape[0]-1) encoded_to_string(inputs.cpu().numpy()[random_eg], labels.cpu().numpy()[random_eg], miss_chars.cpu().numpy()[random_eg], input_lens.cpu().numpy()[random_eg], self.train_loader.char_to_id, self.use_embedding) # Average out the losses and edit distance test_loss /= len(self.test_loader) print("Test Loss: %.7f, Miss Penalty: %.7f" % (test_loss, miss_penalty)) # Store in lists for keeping track of model performance self.test_losses.append((test_loss, epoch)) # if testing loss is minimum, store it as the 'best.pth' model, which is used during inference # store only when doing train/test together i.e. mode is train if test_loss == min([x[0] for x in self.test_losses]) and self.mode == 'train': print("Best new model found!") self.model.save_model(True, epoch, self.train_losses, self.test_losses, self.model.rnn_name, self.model.num_layers, self.model.hidden_dim) return test_loss def predict(self, string, misses, char_to_id): """ called during inference :param string: word with predicted characters and blanks at remaining places :param misses: list of characters which were predicted but game feedback indicated that they are not present :param char_to_id: mapping from characters to id """ id_to_char = {v:k for k,v in char_to_id.items()} #convert string into desired input tensor if self.use_embedding: encoded = np.zeros((len(char_to_id))) for i, c in enumerate(string): if c == '*': encoded[i] = len(id_to_char) - 1 else: encoded[i] = char_to_id[c] inputs = np.array(encoded)[None, :] inputs = torch.from_numpy(inputs).long() else: encoded = np.zeros((len(string), len(char_to_id))) for i, c in enumerate(string): if c == '*': encoded[i][len(id_to_char) - 1] = 1 else: encoded[i][char_to_id[c]] = 1 inputs = np.array(encoded)[None, :, :] inputs = torch.from_numpy(inputs).float() #encode the missed characters miss_encoded = np.zeros((len(char_to_id) - 1)) for c in misses: miss_encoded[char_to_id[c]] = 1 miss_encoded = np.array(miss_encoded)[None, :] miss_encoded = torch.from_numpy(miss_encoded).float() input_lens = np.array([len(string)]) input_lens= torch.from_numpy(input_lens).long() #pass through model output = self.model(inputs, input_lens, miss_encoded).detach().cpu().numpy()[0] #sort predictions sorted_predictions = np.argsort(output)[::-1] #we cannnot consider only the argmax since a missed character may also get assigned a high probability #in case of a well-trained model, we shouldn't observe this return [id_to_char[x] for x in sorted_predictions] def plot_loss_acc(self, epoch): """ take train/test loss and test accuracy input and plot it over time :param epoch: to track performance across epochs """ plt.clf() fig, ax1 = plt.subplots() ax1.set_xlabel('Epoch') ax1.set_ylabel('Loss') ax1.plot([x[1] for x in self.train_losses], [x[0] for x in self.train_losses], color='r', label='Train Loss') ax1.plot([x[1] for x in self.test_losses], [x[0] for x in self.test_losses], color='b', label='Test Loss') ax1.tick_params(axis='y') ax1.legend(loc='upper left') fig.tight_layout() # otherwise the right y-label is slightly clipped plt.grid(True) plt.legend() plt.title(self.arch_name) filename = self.plots_dir + 'plot_' + self.arch_name + '_' + str(epoch) + '.png' plt.savefig(filename) print("Saved plots")