def train(epochs): vocab_size = loader.vocab_size num_classes = loader.num_classes model = TextCNN(vocab_size, num_classes) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) for epoch in range(epochs): print('-' * 40 + ' epoch {} '.format(epoch) + '-' * 40) train_iter(model, loader, criterion, optimizer) print() torch.save(model.state_dict(), 'cnn.state_dict.pth') return
def train(): train_contents, train_labels = load_corpus('./dataset/train.txt', word2id, max_sen_len=50) val_contents, val_labels = load_corpus('./dataset/validation.txt', word2id, max_sen_len=50) # 混合训练集和验证集 contents = np.vstack([train_contents, val_contents]) labels = np.concatenate([train_labels, val_labels]) # 加载训练用的数据 train_dataset = TensorDataset( torch.from_numpy(contents).type(torch.float), torch.from_numpy(labels).type(torch.long)) train_dataloader = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2) model = TextCNN(config) if config.model_path: model.load_state_dict(torch.load(config.model_path)) model.to(device) # 设置优化器 optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) # 设置损失函数 criterion = nn.CrossEntropyLoss() # 定义训练过程 for epoch in range(config.epochs): for batch_idx, (batch_x, batch_y) in enumerate(train_dataloader): batch_x, batch_y = batch_x.to(device), batch_y.to(device) output = model(batch_x) loss = criterion(output, batch_y) if batch_idx % 200 == 0 & config.verbose: print("Train Epoch:{}[{}/{} ({:.0f}%)]\tLoss:{:.6f}".format( epoch + 1, batch_idx * len(batch_x), len(train_dataloader.dataset), 100. * batch_idx / len(train_dataloader), loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() # 保存模型 torch.save(model.state_dict(), './models/model.pth')
def main(): device = torch.device('cuda') embedding_vectors = torch.load(f'{EMBEDDINGS_DIR}/vectors.pkl') text_processor = TextProcessor( wti=pickle.load(open(f'{EMBEDDINGS_DIR}/wti.pkl', 'rb')), tokenizer=get_tokenizer('basic_english'), standardize=True, min_len=3, ) dataset = TextDataset(CORPUS_DIR, text_processor) # split into training and test set # TODO: fix this splitting sometimes failing when corpus size changes train_set, test_set = torch.utils.data.random_split( dataset, [ int(len(dataset) * DATA_SPLIT), int(len(dataset) * (1.0 - DATA_SPLIT)) ]) # count number of samples in each class class_count = [0, 0] for data, label in dataset: class_count[int(label.item())] += 1 # get relative weights for classes _sum = sum(class_count) class_count[0] /= _sum class_count[1] /= _sum # reverse the weights since we're getting the inverse for the sampler class_count = list(reversed(class_count)) # set weight for every sample weights = [class_count[int(x[1].item())] for x in train_set] # weighted sampler sampler = torch.utils.data.WeightedRandomSampler( weights=weights, num_samples=len(train_set), replacement=True) train_loader = DataLoader(dataset=train_set, batch_size=32, collate_fn=Sequencer(SEQUENCE_LEN), sampler=sampler) test_loader = DataLoader(dataset=test_set, batch_size=32, collate_fn=Sequencer(SEQUENCE_LEN)) # number of filters in each convolutional filter N_FILTERS = 64 # sizes and number of convolutional layers FILTER_SIZES = [2, 3] # dropout for between conv and dense layers DROPOUT = 0.5 model = TextCNN( embeddings=embedding_vectors, n_filters=N_FILTERS, filter_sizes=FILTER_SIZES, dropout=DROPOUT, ).to(device) print(model) print('Trainable params:', sum(p.numel() for p in model.parameters() if p.requires_grad)) criterion = nn.BCELoss() optimizer = torch.optim.Adam(model.parameters(), lr=1e-3) EPOCHS = 12 best_acc = 0.0 # training loop for epoch in range(EPOCHS): print('Epoch', epoch + 1) for i, data in tqdm(enumerate(train_loader), total=len(train_loader)): # get word indices vector and corresponding labels x, labels = data # send to device x = x.to(device) labels = labels.to(device) # make predictions predictions = model(x).squeeze() # calculate loss loss = criterion(predictions, labels) # learning stuff... optimizer.zero_grad() loss.backward() optimizer.step() # evaluate with torch.no_grad(): model.eval() correct = 0 wrong = 0 m = [[0, 0], [0, 0]] for data in test_loader: x, label = data x = x.to(device) predictions = model(x).squeeze() for truth, prediction in zip(label, predictions): y = int(truth.item()) y_pred = 1 if prediction.item() > 0.5 else 0 m[y][y_pred] += 1 if y == y_pred: correct += 1 else: wrong += 1 model.train() acc = correct / (correct + wrong) if acc > best_acc: best_acc = acc for file in glob.glob('models/model_*.pth'): os.remove(file) torch.save(model.state_dict(), f'models/state_{epoch}.pth') print() print('Correct:', f'{correct}/{correct + wrong}', 'Accuracy:', acc) print('[[TN, FP], [FN, TP]]') print(m) print() # put into evaluation mode model.eval() text_processor.do_standardize = True with torch.no_grad(): while True: text = input('Prompt: ') x = text_processor.process(text) x = torch.tensor(x).unsqueeze(dim=0) print(model(x.to(device)).squeeze())
domain_t_loss = criterion(domain_outputs, event_labels) err = class_loss + domain_s_loss + domain_t_loss err.backward() optimizer.step() i += 1 print('epoch: %d, [iter: %d / all %d], class_loss: %f, domain_s_loss: %f, domain_t_loss: %f' \ % (epoch, i, len_dataloader, class_loss.cpu().data.numpy(), domain_s_loss.cpu().data.numpy(), domain_t_loss.cpu().data.numpy())) logging.info('epoch: %d, [iter: %d / all %d], class_loss: %f, domain_s_loss: %f, domain_t_loss: %f' \ % (epoch, i, len_dataloader, class_loss.cpu().data.numpy(), domain_s_loss.cpu().data.numpy(), domain_t_loss.cpu().data.numpy())) dir = 'checkpoint/WithoutImage_' + str(epoch + 1) + '.pkl' torch.save(model.state_dict(), dir) # test model = TextCNN(args, W) model.load_state_dict(torch.load(dir)) if torch.cuda.is_available(): model.cuda() model.eval() test_sub = np.zeros((len(label_df['id']), 3), dtype=np.float) batch = len(label_df['id']) // args.batch_size for i, (test_data, event_labels) in enumerate(test_loader): test_text, test_mask = to_var(test_data[0]), to_var(test_data[1]) test_text = test_text.long() test_mask = test_mask.float()
def train(): # 配置文件 cf = Config('./config.yaml') # 有GPU用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 训练数据 train_data = NewsDataset("./data/cnews_final_train.txt",cf.max_seq_len) train_dataloader = DataLoader(train_data,batch_size=cf.batch_size,shuffle=True) # 测试数据 test_data = NewsDataset("./data/cnews_final_test.txt",cf.max_seq_len) test_dataloader = DataLoader(test_data,batch_size=cf.batch_size,shuffle=True) # 预训练词向量矩阵 embedding_matrix = get_pre_embedding_matrix("./data/final_vectors") # 模型 model = TextCNN(cf,torch.tensor(embedding_matrix)) # 优化器用adam optimizer = Adam(filter(lambda p: p.requires_grad, model.parameters())) # 把模型放到指定设备 model.to(device) # 让模型并行化运算 if torch.cuda.device_count()>1: model = torch.nn.DataParallel(model) # 训练 start_time = time.time() total_batch = 0 # 总批次 best_acc_val = 0.0 # 最佳验证集准确率 last_improved = 0 # 记录上一次提升批次 require_improvement = 1000 # 如果超过1000轮未提升,提前结束训练 flag = False model.train() for epoch_id in trange(cf.epoch,desc="Epoch"): for step,batch in enumerate(tqdm(train_dataloader,"batch",total=len(train_dataloader))): label_id = batch['label_id'].squeeze(1).to(device) segment_ids = batch['segment_ids'].to(device) loss = model(segment_ids,label_id) loss.backward() optimizer.step() optimizer.zero_grad() total_batch += 1 if total_batch % cf.print_per_batch == 0: model.eval() with torch.no_grad(): loss_train,acc_train = model.get_loss_acc(segment_ids,label_id) loss_val,acc_val = evaluate(model,test_dataloader,device) if acc_val > best_acc_val: # 保存最好结果 best_acc_val = acc_val last_improved = total_batch torch.save(model.state_dict(),"./output/model.bin") improved_str = "*" else: improved_str = "" time_dif = get_time_dif(start_time) msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train Acc: {2:>7.2%},' \ + ' Val Loss: {3:>6.2}, Val Acc: {4:>7.2%}, Time: {5} {6}' print(msg.format(total_batch, loss_train, acc_train, loss_val, acc_val, time_dif, improved_str)) model.train() if total_batch - last_improved > require_improvement: print("长时间未优化") flag = True break if flag: break
def train(args, states=None): config_obj = Config(args.config_file) config = config_obj.elements # make training runs deterministic set_seed(seed_value=config['random_seed']) logging.info("Loading datasets...") dataset, labels = load_embeddings(data_path=config['data'], label_path=config['labels']) train_loader, val_loader, test_loader = create_dataloaders( dataset, labels, batch_size=config['batch_size'], random_seed=config['random_seed'], balance=config['correct_imbalance'], ) model = TextCNN( num_classes=config['num_classes'], embedding_size=config['embedding_size'], num_filters=config['num_filters'], dropout_rate=config['dropout'], ) if torch.cuda.is_available(): model.cuda() loss_function = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config['lr']) best_metric = 0 # loop over the dataset multiple times for epoch in range(1, config['num_epochs'] + 1): logging.info( f"==================== Epoch: {epoch} ====================") running_losses = [] for i, data in enumerate(train_loader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data if torch.cuda.is_available(): inputs, labels = inputs.cuda(), labels.cuda() # zero the parameter gradients before each pass optimizer.zero_grad() # forward probs, classes = model(inputs) # backprop loss = loss_function(probs, labels) loss.backward() # update/optimize optimizer.step() # Log summary running_losses.append(loss.item()) if i % args.log_interval == 0: interval_loss = sum(running_losses) / len(running_losses) logging.info(f"step = {i}, loss = {interval_loss}") running_losses = [] if i % args.test_interval == 0: dev_metric = eval( val_loader, model, loss_function, args.eval_metric, ) if dev_metric > best_metric: best_metric = dev_metric states = { "epoch": epoch, "step": i, "model": model.state_dict(), "optimizer": optimizer.state_dict() } save_model_state(save_dir=args.model_dir, step=i, states=states) print(f"Finished Training, best {args.eval_metric}: {best_metric}")
epoches = 2 emb_dim = 50 # 词向量维度 lr = 0.001 filter_num = 10 # 卷积核的个数 filtersizes = '3,4,5' label_size = 4 dropout = 0.5 static = True # 是否使用预训练词向量 fine_tune = False # 预训练词向量是否要微调 # 获取词向量信息 vocab_array, word_to_ix, ix_to_word = get_embedding(glove_file) train_iter = get_data_loader(sample_train_file, batch_size, word_to_ix, sentence_max_size) test_iter = get_data_loader(sample_test_file, batch_size, word_to_ix, sentence_max_size) # 定义模型 model = TextCNN(vocab_array, label_size, filter_num, filtersizes, len(vocab_array), emb_dim, True, False, dropout) # 训练 logging.info('开始训练模型') train_model(model, train_iter, epoches, lr) # 模型保存 torch.save(model.state_dict(), model_param_file) logging.info('开始测试模型') model_test(model, test_iter)
y_hat = net(X) # 计算预测概率值 loss = criterion(y_hat, y) # 计算loss值 optimizer.zero_grad() # 梯度置零 loss.backward() # 反向传播 optimizer.step() # 参数更新 step += 1 # 测试 if step % args.test_per_step == 0: net.eval() all_pre = [] all_label = [] for X, y in test_iter: X, y = X.to(device), y.to(device) y_hat = net(X) y_pre = torch.argmax(y_hat, dim=-1) all_pre.extend(y_pre.tolist()) all_label.extend(y.tolist()) test_acc_sum = sum( [int(line[0] == line[1]) for line in zip(all_pre, all_label)]) test_acc = test_acc_sum / len(all_label) print('train_step %d, loss: %.4f, test_acc: %.4f' % (step, loss, test_acc)) if test_acc > best_acc: best_acc = test_acc torch.save(net.state_dict(), './model/best_model.bin') print('best_acc: ', best_acc)
if args.gpu: inputs = inputs.cuda() labels = labels.cuda() outputs = model(inputs) loss = loss_fn(outputs, labels).item() cum_loss += loss * labels.size(0) cum_cnt += labels.size(0) model.train() return cum_loss / cum_cnt while True: valid_loss = validate(net) if args.verbose: print('validation loss: %.5f' % (valid_loss)) if ep == 0 or valid_loss < best_loss: best_loss = valid_loss best_model.load_state_dict(net.state_dict()) no_improve_cnt = 0 else: no_improve_cnt += 1 if no_improve_cnt > 5 or ep > 1000: if args.verbose: print('final validation: %.5f' % (validate(best_model))) print('best validation: %.5f' % (best_loss)) break # Train for it, data in enumerate(dataLoader, start=0): inputs, labels = data if args.gpu: inputs = inputs.cuda() labels = labels.cuda() optimizer = torch.optim.Adam(net.parameters(), lr=args.learning_rate)
def train(self): best_valid_loss = 1e9 all_valid_loss, all_valid_acc = 0, 0 # CV loop for i in range(self.args.cv_num): model = TextCNN(self.vocab_size, self.pad_idx, self.args).to(device) # model variations (cf. "rand" is default value) if self.args.mode == "static": model.static_embedding.weight.data.copy_(self.embeddings) model.static_embedding.weight.requires_grad = False elif self.args.mode == "non-static": model.static_embedding.data.normal_(0, 1) model.static_embedding.weight.data.copy_(self.embeddings) elif self.args.mode == "multichannel": model.static_embedding.weight.data.copy_(self.embeddings) model.static_embedding.weight.requires_grad = False model.nonstatic_embedding.weight.data.copy_(self.embeddings) optimizer = optim.Adadelta(model.parameters()) model.train() # generate train dataset print(f'>>> {i+1}th dataset is testset') ## ?? dataset = self.dataset_list.copy() del dataset[i] # remove testset dataset = functools.reduce( lambda x, y: x + y, dataset) # Concatenate datasets consecutively. data_loader = DataLoader(dataset=dataset, batch_size=self.args.batch_size, shuffle=True, collate_fn=self.collate_fn) for epoch in range(self.args.epochs): # Epoch loop pbar = tqdm(data_loader) for text, label in pbar: text = text.to(device) label = label.to(device) optimizer.zero_grad() predictions = model(text).squeeze(1) loss = self.criterion(predictions, label) acc = self._binary_accuracy(predictions, label) loss.backward() optimizer.step() # max_norm_scaling eps = 1e-7 param = model.fc.weight norm = torch.norm(param) # l2_norm if norm > self.args.l2_constraint: param.data *= self.args.l2_constraint / (eps + norm) pbar.set_description( f"loss : {loss.item():.4f}, acc : {acc.item():.4f}") valid_loss, valid_acc = self.evaluate(model, i) all_valid_loss += valid_loss.item() all_valid_acc += valid_acc.item() print( f'valid loss : {valid_loss.item():.3f}, valid acc : {valid_acc.item():.3f}' ) if valid_loss < best_valid_loss: best_valid_loss = valid_loss torch.save( model.state_dict(), osp.join(self.args.ck_path, f'{self.args.name}_best.pt')) if not self.args.cv: return print() print(f'Final loss : {all_valid_loss / self.args.cv_num:.3f}') print(f'Final acc : {all_valid_acc / self.args.cv_num:.3f}')
return valid_loss / len(test_dataset), valid_acc / len(test_dataset) if __name__ == "__main__": if args.mode == 'train': best_valid_acc = 0.0 for epoch in range(args.epoch): start_time = time.time() train_loss, train_acc = train(args.train) valid_loss, valid_acc = test(args.dev) # save best model if valid_acc > best_valid_acc: save_checkpoint({ 'epoch': epoch + 1, 'state_dict': model.state_dict(), 'valid_acc': valid_acc }, True) secs = int(time.time() - start_time) mins = secs / 60 secs = secs % 60 writer.add_scalars("Loss", { 'train': train_loss, 'valid': valid_loss }, epoch) writer.add_scalars("Acc", { 'train': train_acc, 'valid': valid_acc }, epoch)