def train(**kwargs): for k_, v_ in kwargs.items(): setattr(options, k_, v_) training_set = TextDataset(path='data/train/train.csv', model='wordvec/skipgram.bin', max_length=options.max_length, word_dim=options.word_dim) training_loader = Data.DataLoader(dataset=training_set, batch_size=options.batch_size, shuffle=True, drop_last=True) model = TextCNN(options.word_dim, options.max_length, training_set.encoder.classes_.shape[0]) if torch.cuda.is_available(): model.cuda() optimizer = optim.Adam(model.parameters(), lr=options.learning_rate) for epoch in tqdm(range(options.epochs)): loss_sum = 0 for data, label in tqdm(training_loader): if torch.cuda.is_available(): data = data.cuda() label = label.cuda() out = model(data) loss = criteration(out, autograd.Variable(label.squeeze().long())) loss_sum += loss.item() / options.batch_size optimizer.zero_grad() loss.backward() optimizer.step() tqdm.write(f'epoch {epoch + 1}: loss = {loss_sum/len(training_set.data)}') model.save(f'checkpoints/loss-{loss_sum/len(training_set.data)}.pt')
def build_textcnn_model(vocab, config, train=True): model = TextCNN(vocab.vocab_size, config) if train: model.train() else: model.eval() if torch.cuda.is_available(): model.cuda() else: model.cpu() return model
def build_textcnn_model(vocab, config, train=True): model = TextCNN(vocab.vocab_size, config) if train: model.train() #在训练模型时会在前面加上train(); else: model.eval() #在测试模型时在前面使用eval(),会将BN和DropOut固定住,不会取平均,而是用训练好的值 if torch.cuda.is_available(): model.cuda() else: model.cpu() return model
def evaluate(): # test model = TextCNN(config) model.cuda() saved_model = torch.load(config.save_model) model.load_state_dict(saved_model["state_dict"]) print( "epoch:%s steps:%s best_valid_acc:%s" % (saved_model["epoch"], saved_model["steps"], saved_model["valid_acc"])) test_loss, test_acc, cm = test(config.test) print( f"\tLoss: {test_loss:.4f}(test)\t|\tAcc: {test_acc * 100:.1f}%(test)") print_confusion_matrix(cm, list(id2label.values()))
def train(x, y): model = TextCNN() model = model.cuda() parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.SGD(model.parameters(), lr=1e-3) criterion = nn.CrossEntropyLoss(size_average=False) for epoch in range(100): total = 0 for i in range(0, len(x) / 64): batch_x = x[i * 64:(i + 1) * 64] batch_y = y[i * 64:(i + 1) * 64] batch_x = Variable(torch.FloatTensor(batch_x)).cuda() batch_y = Variable(torch.LongTensor(batch_y)).cuda() optimizer.zero_grad() model.train() pred = model(batch_x, 64) loss = criterion(pred, batch_y) #print(loss) loss.backward() nn.utils.clip_grad_norm(parameters, max_norm=3) total += np.sum( pred.data.max(1)[1].cpu().numpy() == batch_y.data.cpu().numpy()) optimizer.step() print("epoch ", epoch + 1, " acc: ", float(total) / len(x)) return model
def train(args): train_iter, dev_iter = data_processor.load_data(args) # 将数据分为训练集和验证集 print('加载数据完成') model = TextCNN(args) if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) steps = 0 best_acc = 0 last_step = 0 model.train() for epoch in range(1, args.epoch + 1): for batch in train_iter: feature, target = batch.text, batch.label # t_()函数表示将(max_len, batch_size)转置为(batch_size, max_len) # feature.data.t_(), target.data.sub_(1) # target减去1 feature = feature.data.t() # x.t() x是不变的,所以重新赋值 # target.data.sub_(1) if args.cuda: feature, target = feature.cuda(), target.cuda() optimizer.zero_grad() logits = model(feature) loss = F.cross_entropy(logits, target) loss.backward() optimizer.step() steps += 1 if steps % args.log_interval == 0: # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引) corrects = (torch.max(logits, 1)[1] == target).sum() train_acc = 100.0 * corrects / batch.batch_size sys.stdout.write( '\rBatch[{}] - loss: {:.6f} acc: {:.4f}%({}/{})'.format( steps, loss.item(), train_acc, corrects, batch.batch_size)) if steps % args.test_interval == 0: dev_acc = eval(dev_iter, model, args) if dev_acc > best_acc: best_acc = dev_acc last_step = steps if args.save_best: print('Saving best model, acc: {:.4f}%\n'.format( best_acc)) save(model, args.save_dir, 'best', steps) else: if steps - last_step >= args.early_stopping: print('\nearly stop by {} steps, acc: {:.4f}%'.format( args.early_stopping, best_acc)) raise KeyboardInterrupt
def build_textcnn_model(vocab, config, train=True): model = TextCNN(vocab.vocab_size, config) if train: model.train() #在训练模型时会在前面加上train(); else: model.eval() #在测试模型时在前面使用eval(),会将BN和DropOut固定住,不会取平均,而是用训练好的值 #train()与eval()两个方法是针对网络train和eval时采用不同方式的情况 #比如Batch Normalization和Dropout #BN的作用主要是对网络中间的每层进行归一化处理,并且使用变换重构保证所提取的特征分布不会被破坏; #由于训练完毕后参数都是固定的,所有BN的训练和测试时的操作不同 #Dropopt能够克服过拟合,在每个训练batch中,通过忽略一般的特征检测器,可以明显地减少过拟合现象。 if torch.cuda.is_available(): model.cuda() else: model.cpu() return model
dropout_rate = opts.dropout kwargs = { 'nb_classes': nb_classes, 'vocab_size': vocab_size, 'input_size': word_dim, 'filter_shape': filter_shape, 'pretrained_embed': pretrained_embed, 'dropout_rate': dropout_rate } # 初始化模型 use_cuda = opts.cuda text_cnn = TextCNN(kwargs) print(text_cnn) if use_cuda: text_cnn = text_cnn.cuda() optimizer = torch.optim.Adam(text_cnn.parameters(), lr=0.001) criterion = torch.nn.CrossEntropyLoss() # 训练 t0 = time() nb_epoch = opts.nb_epoch max_patience = opts.max_patience current_patience = 0 root_model = opts.root_model if not os.path.exists(root_model): os.makedirs(root_model) path_model = os.path.join(root_model, 'textcnn.model') best_dev_loss = 1000. for epoch in range(nb_epoch): sys.stdout.write('epoch {0} / {1}: \r'.format(epoch, nb_epoch))
deving_set = TextCharDataset(config, path='data/dev.tsv', vocab_file='./data/char2num.pkl') training_iter = data.DataLoader(dataset=training_set, batch_size=config.batch_size, num_workers=2) deving_iter = data.DataLoader(dataset=deving_set, batch_size=config.batch_size, num_workers=2) config.word_num = len(training_set.tok2num) model = TextCNN(config) if torch.cuda.is_available(): model.cuda() criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.lr) training_lossse = [] # Train the model for epoch in range(config.epoch): model.train() for data, label in training_iter: if config.cuda and torch.cuda.is_available(): data = data.cuda() labels = label.cuda() out = model(data)
def train(args, states=None): config_obj = Config(args.config_file) config = config_obj.elements # make training runs deterministic set_seed(seed_value=config['random_seed']) logging.info("Loading datasets...") dataset, labels = load_embeddings(data_path=config['data'], label_path=config['labels']) train_loader, val_loader, test_loader = create_dataloaders( dataset, labels, batch_size=config['batch_size'], random_seed=config['random_seed'], balance=config['correct_imbalance'], ) model = TextCNN( num_classes=config['num_classes'], embedding_size=config['embedding_size'], num_filters=config['num_filters'], dropout_rate=config['dropout'], ) if torch.cuda.is_available(): model.cuda() loss_function = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters(), lr=config['lr']) best_metric = 0 # loop over the dataset multiple times for epoch in range(1, config['num_epochs'] + 1): logging.info( f"==================== Epoch: {epoch} ====================") running_losses = [] for i, data in enumerate(train_loader, 0): # get the inputs; data is a list of [inputs, labels] inputs, labels = data if torch.cuda.is_available(): inputs, labels = inputs.cuda(), labels.cuda() # zero the parameter gradients before each pass optimizer.zero_grad() # forward probs, classes = model(inputs) # backprop loss = loss_function(probs, labels) loss.backward() # update/optimize optimizer.step() # Log summary running_losses.append(loss.item()) if i % args.log_interval == 0: interval_loss = sum(running_losses) / len(running_losses) logging.info(f"step = {i}, loss = {interval_loss}") running_losses = [] if i % args.test_interval == 0: dev_metric = eval( val_loader, model, loss_function, args.eval_metric, ) if dev_metric > best_metric: best_metric = dev_metric states = { "epoch": epoch, "step": i, "model": model.state_dict(), "optimizer": optimizer.state_dict() } save_model_state(save_dir=args.model_dir, step=i, states=states) print(f"Finished Training, best {args.eval_metric}: {best_metric}")
parser.add_argument('--without_unlabel', '-wu', action='store_true') parser.add_argument('--learning_rate', '-lr', type=float, default=0.0001) parser.add_argument('--save_dir', type=str, default='') parser.add_argument('--gpu', action='store_true') parser.add_argument('--verbose', action='store_true') args = parser.parse_args() # device = torch.device("cuda") if args.gpu else torch.device("cpu") dataset = TextDataset(args.dataset,args.percent, wo_unlabel=args.without_unlabel) print("Num of labeled data: ", len(dataset)) emb = dataset.get_emb() num_class = dataset.num_class net = TextCNN(emb, num_class) best_model = TextCNN(emb, num_class) if args.gpu: net.cuda() best_model.cuda() indices = np.arange(len(dataset)) np.random.shuffle(indices) split = round(0.1 * len(dataset)) train_idx, valid_idx = indices[split:], indices[:split] train_sampler = SubsetRandomSampler(train_idx) valid_sampler = SubsetRandomSampler(valid_idx) dataLoader = DataLoader(dataset, batch_size=32, num_workers=4, sampler=train_sampler) validLoader = DataLoader(dataset, batch_size=32, num_workers=4, sampler=valid_sampler) loss_fn = nn.CrossEntropyLoss() softmax = nn.Softmax(dim=1) # Print per iteration len_print = 20