def build_dcmn(batch_size, max_len, ctx): # do training train_sentences = './data_tmp/train_sentences.pkl' train_labels = './data_tmp/train_labels.pkl' test_sentences = './data_tmp/dev_sentences.pkl' test_labels = './data_tmp/dev_labels.pkl' max_pad_length = 16 train_sample_num = None test_sample_num = None dataloader_train = preprocess.get_dataloader( sts=train_sentences, labels=train_labels, sample_num=train_sample_num,max_seq_length=max_len, \ batch_size=batch_size,max_pad_length=max_pad_length, ctx = ctx, dataset_load_path='./data_tmp/dataset-train.pkl' ) dataloader_test = preprocess.get_dataloader( sts=test_sentences, labels=test_labels, sample_num=test_sample_num,max_seq_length=max_len, \ batch_size=batch_size,max_pad_length=max_pad_length, ctx = ctx, dataset_load_path='./data_tmp/dataset-test.pkl' ) dcmn = model_dcmn.DCMN(num_candidates=max_pad_length - 2) dcmn.initialize(init=init.Uniform(.001), ctx=ctx) loss_func = gluon.loss.SoftmaxCrossEntropyLoss() lr, clip = 5e-5, 5 trainer = gluon.Trainer(dcmn.collect_params(), 'adam', { 'learning_rate': lr, 'clip_gradient': clip }) return dcmn, dataloader_train, dataloader_test, trainer, loss_func
def inference(model, samples): ''' do inference for a list of samples, in the form of [[obs1, obs2, hyp1, hyp2], [...], ...] ''' dataloader = preprocess.get_dataloader(samples) for i, embs in enumerate(dataloader): # embs is [emb(obs1), emb(obs2), emb(hyp1), emb(hyp2), ...] output = model(embs) pred = output.argmax(axis=-1).astype('int32').asscalar() # samples[pred+2]: hyp[pred], [i], hyp[pred] for i th sample print('Sample:\033[34m') print('Obervation 1:', samples[i][0]) print('Observation 2:', samples[i][1]) print('Hypothesis 1:', samples[i][2]) print('Hypothesis 2:', samples[i][3]) print('\033[0mPred:\033[36m') print(samples[i][0], '\033[32m\n'+samples[i][pred+2], output[0].asnumpy(), \ '\033[36m\n'+samples[i][1], '\033[0m')
return gradient_penalty if __name__ == "__main__": MAX_EPOCH = 100 if torch.cuda.is_available(): device = torch.device("cuda:0") else: device = torch.device("cpu") netG = Generator().to(device) netD = discriminator().to(device) optimizerD = optim.RMSprop(netD.parameters(), lr=1e-5) optimizerG = optim.RMSprop(netG.parameters(), lr=1e-5) config = clstm_config() train, test = get_dataloader(config.batch_size) #----------------------------- train for epoch in range(100): print(f'====== epoch {epoch} ======') netD.train() netG.train() # ----------------------------- train #lossD, lossG = 0, 0 for x, y in train: # -------- train D #print('input shape == >',x.shape) for parm in netD.parameters(): parm.data.clamp_(-0.01, 0.01)
outputs = model(images) # print('labels.size, outputs.size', labels.size(), outputs.size()) loss = criterion(outputs, labels) loss.backward() optimizer.step() running_loss += loss.item() * images.size(0) epoch_loss = running_loss / data_size # print(f'{epoch} Loss: {epoch_loss:.4f}') training_msg.text(f'epoch:{epoch+1} | Train loss: {epoch_loss:.4f}') pbar.progress(epoch + 1) if __name__ == "__main__": if submit: assert isinstance(n_epochs, int) if not check_path(image_data_path): st.error(f'{image_data_path} doesn\'t exist!') image_ds = get_image_dataset(image_data_path) dataloader = get_dataloader(image_ds, batch_size=8) data_size = len(image_ds) model, criterion, optimizer = get_model(image_ds) train_loop(model, dataloader, n_epochs, optimizer, criterion, data_size) st.text('Model trained successfully!')
f1 = 2 * precision * recall / (precision + recall) return "loss=%.4f, acc=%.4f, precision=%.4f, recall=%.4f, F1=%.4f" % ( l / cnt, (tp + tn) / cnt, precision, recall, f1) if __name__ == '__main__': if torch.cuda.is_available(): device = torch.device('cuda:0') else: device = torch.device('cpu') print(device) config = clstm_config() train, test = get_dataloader(config.batch_size, rate=0.4, split=0.8, use_sr=False, normalize=True) model = SRCNN(device) model = model print(model) print('test: ', model.run_epoch(test, False)) for epoch in range(1000): print('epoch %d:' % epoch) # train print('train: ', model.run_epoch(train, True)) # test print('test: ', model.run_epoch(test, False))
fn += ((pred != y) & (pred != 1)).sum().item() cnt = tp + tn + fp + fn precision = tp / (tp + fp) recall = tp / (tp + fn) f1 = 2 * precision * recall / (precision + recall) return "loss=%.4f, acc=%.4f, precision=%.4f, recall=%.4f, F1=%.4f" % ( l / cnt, (tp + tn) / cnt, precision, recall, f1) if __name__ == '__main__': if torch.cuda.is_available(): device = torch.device('cuda:0') else: device = torch.device('cpu') print(device) config = clstm_config() train, test = get_dataloader(batch_size=256, rate=0.45, split=0.9, use_sr=False, normalize=True) model = BaseLSTM(device) model = model print(model) print('test: ', model.run_epoch(test, False)) for epoch in range(100): print('epoch %d:' % epoch) # train print('train: ', model.run_epoch(train, True)) # test print('test: ', model.run_epoch(test, False))
f1 = 2 * precision * recall / (precision + recall) return "loss=%.4f, acc=%.4f, precision=%.4f, recall=%.4f, F1=%.4f" % ( l / cnt, (tp + tn) / cnt, precision, recall, f1) if __name__ == '__main__': if torch.cuda.is_available(): device = torch.device('cuda:0') else: device = torch.device('cpu') print(device) config = clstm_config() train, test = get_dataloader(batch_size=512, rate=0.4, split=0.9, use_sr=True, normalize=True) model = BaseLSTM(device) model = model print(model) print('test: ', model.run_epoch(test, False)) for epoch in range(300): print('epoch %d:' % epoch) # train print('train: ', model.run_epoch(train, True)) # test print('test: ', model.run_epoch(test, False))
print('\033[0mPred:\033[36m') print(samples[i][0], '\033[32m\n'+samples[i][pred+2], output[0].asnumpy(), \ '\033[36m\n'+samples[i][1], '\033[0m') if __name__ == '__main__': if args.inference: # do inference dcmn = model.DCMN() dcmn.load_parameters(args.model_params) sts = args.sample.split('|') samples = [[sentence.strip() for sentence in sts]] inference(dcmn, samples) else: # do training dataloader_train = preprocess.get_dataloader(sts=args.train_sentences, labels=args.train_labels) dataloader_test = preprocess.get_dataloader(sts=args.test_sentences, labels=args.test_labels) dcmn = model.DCMN() dcmn.initialize(init=init.Uniform(.001), ctx=mx.gpu()) loss_func = gluon.loss.SoftmaxCrossEntropyLoss() lr, clip = 5e-4, 2.5 trainer = gluon.Trainer(dcmn.collect_params(), 'adam', { 'learning_rate': lr, 'clip_gradient': clip }) train.train_valid(dataloader_train, dataloader_test, dcmn, loss_func, trainer,
model.load_parameters(args.param, ctx=model_ctx) sample = nd.normal(loc=0, scale=1, shape=(1, 64), ctx=model_ctx) print('\033[33mOriginal: \033[34m%s\033[0m' % args.org_sts) print('\033[31mResult: \033[35m%s\033[0m' % generate(model, args.org_sts, \ sample, vocab, ctx=model_ctx)) else: # load train, valid dataset train_dataset_str, valid_dataset_str = get_dataset_str(folder=args.dataset, \ length=args.nsample) # start from existing parameters if args.param: with open('data/' + args.dataset + '/vocab.json', 'r') as f: vocab = nlp.Vocab.from_json(json.load(f)) # use this loaded vocab train_ld, valid_ld = get_dataloader(train_dataset_str, valid_dataset_str, \ clip_length=args.seq_len, vocab=vocab, \ batch_size=args.batch_size) model = VAE_LSTM(emb_size=300, vocab_size=len(vocab)) model.load_parameters(args.param, ctx=model_ctx) # new start, randomly initialize model else: train_ld, valid_ld, vocab = get_dataloader(train_dataset_str, valid_dataset_str, \ clip_length=args.seq_len, vocab_size=20000, \ batch_size=args.batch_size) vocab_js = vocab.to_json() with open('data/' + args.dataset + '/vocab.json', 'w') as f: json.dump(vocab_js, f) model = VAE_LSTM(emb_size=300, vocab_size=len(vocab)) # new start model.initialize(init=mx.initializer.Xavier(magnitude=.7), ctx=model_ctx)