def main(**kwargs): args = DefaultConfig() args.parse(kwargs) args.model = 'LSTM' args.device = 0 args.id = 'word4' if not torch.cuda.is_available(): args.cuda = False args.device = None torch.manual_seed(args.seed) # set random seed for cpu train_iter, val_iter, test_iter, args.vocab_size, vectors = data.load_data(args) args.print_config() global best_score # init model model = getattr(models, args.model)(args, vectors) print(model) # 模型保存位置 if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) save_path = os.path.join(args.save_dir, '{}_{}.pth'.format(args.model, args.id)) if args.cuda: torch.cuda.set_device(args.device) torch.cuda.manual_seed(args.seed) # set random seed for gpu model.cuda() # 目标函数和优化器 criterion = F.cross_entropy lr1, lr2 = args.lr1, args.lr2 optimizer = model.get_optimizer(lr1, lr2, args.weight_decay) for i in range(args.max_epochs): total_loss = 0.0 correct = 0 total = 0 model.train() for idx, batch in enumerate(train_iter): # 训练模型参数 # 使用BatchNorm层时,batch size不能为1 if len(batch) == 1: continue text, label = batch.text, batch.label if args.cuda: text, label = text.cuda(), label.cuda() optimizer.zero_grad() pred = model(text) loss = criterion(pred, label) loss.backward() optimizer.step() # 更新统计指标 total_loss += loss.item() predicted = pred.max(1)[1] total += label.size(0) correct += predicted.eq(label).sum().item() if idx % 80 == 79: print('[{}, {}] loss: {:.3f} | Acc: {:.3f}%({}/{})'.format(i + 1, idx + 1, total_loss / 20, 100. * correct / total, correct, total)) total_loss = 0.0 # 计算再验证集上的分数,并相应调整学习率 f1score = val(model, val_iter, args) if f1score > best_score: best_score = f1score checkpoint = { 'state_dict': model.state_dict(), 'config': args } torch.save(checkpoint, save_path) print('Best tmp model f1score: {}'.format(best_score)) if f1score < best_score: model.load_state_dict(torch.load(save_path, map_location={'cuda:5':'cuda:0'})['state_dict']) lr1 *= args.lr_decay lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8 optimizer = model.get_optimizer(lr1, lr2, 0) print('* load previous best model: {}'.format(best_score)) print('* model lr:{} emb lr:{}'.format(lr1, lr2)) if lr1 < args.min_lr: print('* training over, best f1 score: {}'.format(best_score)) break # 保存训练最终的模型 args.best_score = best_score final_model = { 'state_dict': model.state_dict(), 'config': args } best_model_path = os.path.join(args.save_dir, '{}_{}_{}.pth'.format(args.model, args.text_type, best_score)) torch.save(final_model, best_model_path) print('Best Final Model saved in {}'.format(best_model_path)) # 在测试集上运行模型并生成概率结果和提交结果 if not os.path.exists('result/'): os.mkdir('result/') probs, test_pred = test(model, test_iter, args) result_path = 'result/' + '{}_{}_{}'.format(args.model, args.id, args.best_score) np.save('{}.npy'.format(result_path), probs) print('Prob result {}.npy saved!'.format(result_path)) test_pred[['id', 'class']].to_csv('{}.csv'.format(result_path), index=None) print('Result {}.csv saved!'.format(result_path)) t2 = time.time() print('time use: {}'.format(t2 - t1))
def main(**kwargs): args = DefaultConfig() args.parse(kwargs) # boost模型 args.max_epochs = 5 if not torch.cuda.is_available(): args.cuda = False args.device = None torch.manual_seed(args.seed) # set random seed for cpu train_iter, val_iter, test_iter, args.vocab_size, vectors = util.load_data( args) args.print_config() # 模型保存位置 if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) save_path = os.path.join( args.save_dir, '{}_{}_{}.pth'.format(args.model, args.text_type, args.id)) if args.cuda: torch.cuda.set_device(args.device) torch.cuda.manual_seed(args.seed) # set random seed for gpu for lay_i in range(args.bo_layers): print('-------------- lay {} ---------------'.format(lay_i)) model = getattr(models, args.model)(args, vectors) model = model.cuda() print(model) best_score = 0.0 # 目标函数和优化器 criterion = F.cross_entropy lr1 = args.lr1 lr2 = args.lr2 optimizer = model.get_optimizer(lr1, lr2, args.weight_decay) if lay_i != 0: # 加载上一层模型的loss weight saved_model = torch.load(args.model_path) loss_weight = saved_model['loss_weight'] print(list(enumerate(loss_weight))) loss_weight = loss_weight.cuda() for i in range(args.max_epochs): total_loss = 0.0 correct = 0 total = 0 model.train() for idx, batch in enumerate(train_iter): # 训练模型参数 # 使用BatchNorm层时,batch size不能为1 if len(batch) == 1: continue text, label = batch.text, batch.label if args.cuda: text, label = text.cuda(), label.cuda() optimizer.zero_grad() pred = model(text) if lay_i != 0: loss = criterion(pred, label, weight=loss_weight + 1 - loss_weight.mean()) else: loss = criterion(pred, label) loss.backward() optimizer.step() # 更新统计指标 total_loss += loss.item() predicted = pred.max(1)[1] total += label.size(0) correct += predicted.eq(label).sum().item() if idx % 80 == 79: print('[{}, {}] loss: {:.3f} | Acc: {:.3f}%({}/{})'.format( i + 1, idx + 1, total_loss / 20, 100. * correct / total, correct, total)) total_loss = 0.0 # 计算再验证集上的分数,并相应调整学习率 f1score, tmp_loss_weight = val(model, val_iter, args) if f1score > best_score: best_score = f1score checkpoint = {'state_dict': model.state_dict(), 'config': args} torch.save(checkpoint, save_path) print('Best tmp model f1score: {}'.format(best_score)) if f1score < best_score: model.load_state_dict(torch.load(save_path)['state_dict']) lr1 *= args.lr_decay lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8 optimizer = model.get_optimizer(lr1, lr2, 0) print('* load previous best model: {}'.format(best_score)) print('* model lr:{} emb lr:{}'.format(lr1, lr2)) if lr1 < args.min_lr: print('* training over, best f1 score: {}'.format( best_score)) break # 保存训练最终的模型 # 保存当前层的loss weight loss_weight = tmp_loss_weight args.best_score = best_score final_model = { 'state_dict': model.state_dict(), 'config': args, 'loss_weight': loss_weight } args.model_path = os.path.join( args.save_dir, '{}_{}_lay{}_{}.pth'.format(args.model, args.text_type, lay_i, best_score)) torch.save(final_model, args.model_path) print('Best Final Model saved in {}'.format(args.model_path)) t2 = time.time() print('time use: {}'.format(t2 - t1))
def main(**kwargs): start_time = time.time() config = DefaultConfig() config.parse(kwargs) vis = Visualizer(config.env) if not torch.cuda.is_available(): config.cuda = False config.device = None torch.manual_seed(config.seed) train_iter, val_iter, test_iter, config.vocab_size, config.target_vocab_size, config.aspect_vocab_size, \ text_vectors, target_vectors, aspect_vectors = data.load_data(config) # 需要进一步处理样本不均衡 config.print_config() # init model model = getattr(models, config.model)(config, text_vectors, target_vectors, aspect_vectors) print(model) # 模型保存位置 if not os.path.exists(config.save_dir): os.mkdir(config.save_dir) tmp_save_path = os.path.join(config.save_dir, 'entnet_{}.pth'.format(config.id)) if config.cuda: torch.cuda.set_device(config.device) torch.cuda.manual_seed(config.seed) # set random seed for gpu model.cuda() # 目标函数和优化器 criterion = F.cross_entropy lr1, lr2 = config.lr1, config.lr2 optimizer = model.get_optimizer(lr1, lr2) global best_acc best_acc = 0.0 # 开始训练 for i in range(config.max_epoch): total_loss = 0.0 correct = 0 total = 0 model.train() for idx, batch in enumerate(train_iter): text, target, aspect, label = batch.text, batch.target, batch.aspect, batch.label if config.cuda: text, target, aspect, label = text.cuda(), target.cuda(), aspect.cuda(), label.cuda() optimizer.zero_grad() pred = model(text, target, aspect) loss = criterion(pred, label) loss.backward(retain_graph=True) optimizer.step() # 更新统计指标 total_loss += loss.item() predicted = pred.max(dim=1)[1] total += label.size(0) correct += predicted.eq(label).sum().item() # 每个batch之后计算测试集上的准确率 print('[Epoch {}] loss: {:.5f} | Acc: {:.3f}%({}/{})'.format(i + 1, total_loss, 100. * correct / total, correct, total)) vis.plot('loss', total_loss) # 每5个epoch计算验证集上的准确率,并相应调整学习率 if i % 5 == 4: acc, acc_n, val_n = val(model, val_iter, config) vis.plot('val_acc', acc) print('Epoch {} Val Acc: {:.3f}%({}/{})'.format(i + 1, acc, acc_n, val_n)) # 100个epoch之后模型接近收敛,此时开始调整学习率 # 因为数据集偏小,100个epoch之前虽然整体呈下降趋势,但会有小幅度波动,此时调整学习率可能会影响模型收敛 if i > 100: if acc >= best_acc: best_acc = acc checkpoint = { 'state_dict': model.state_dict(), 'config': config } torch.save(checkpoint, tmp_save_path) # if acc < best_acc: # model.load_state_dict(torch.load(tmp_save_path)['state_dict']) # lr1 *= config.lr_delay # optimizer = model.get_optimizer(lr1, lr2) # print('## load previous best model: {:.3f}%'.format(best_acc)) # print('## set model lr1 to {}'.format(lr1)) # if lr1 < config.min_lr: # print('## training over, best f1 acc : {:.3f}'.format(best_acc)) # break # 计算测试集上分数(准确率) test_acc, test_acc_n, test_n = val(model, test_iter, config) vis.plot('test_acc', test_acc) print('Epoch {} Test Acc: {:.3f}%({}/{})\n'.format(i + 1, test_acc, test_acc_n, test_n)) # 加载训练过程中保存的验证集最佳模型 # 计算最终训练模型的测试集准确率 model.load_state_dict(torch.load(tmp_save_path)['state_dict']) print('Load tmp best model from {}'.format(tmp_save_path)) test_acc, test_acc_n, test_n = val(model, test_iter, config) print('Finally Test Acc: {:.3f}%({}/{})'.format(test_acc, test_acc_n, test_n)) # print('Best final model saved in {}'.format('{:.3f}_{}'.format(test_acc, tmp_save_path))) print('Final cost time : {}s'.format(time.time() - start_time))
def main(**kwargs): args = DefaultConfig() args.parse(kwargs) if not torch.cuda.is_available(): args.cuda = False args.device = None torch.manual_seed(args.seed) # set random seed for cpu train = pd.read_csv(args.train_path, sep='\t', encoding='utf-8', header=0) test_df = pd.read_csv(args.test_path, sep='\t', encoding='utf-8', header=0) corpus_all = pd.concat([train, test_df], axis=0) vocab = get_dictionary(corpus_all.text) args.vocab_size = len(vocab) train = list(zip(train.label, train.text)) test = list(zip(test_df.label, test_df.text)) train_data, val_data = train_test_split(train, test_size=0.1, random_state=1) train_iter = get_iter(train_data, vocab, args.batch_size, True, max_len=32) val_iter = get_iter(val_data, vocab, args.batch_size, True, max_len=32) test_iter = get_iter(test, vocab, args.batch_size, True, max_len=32) if args.pretrain_embeds_path is None: vectors = None else: vectors = pickle.load(args.pretrain_embeds_path) assert len( vectors) == args.vocab_size, '预训练的词向量shape[0]为%d,而字典大小为%d' % ( len(vectors), args.vocab_size) assert vectors.shape[ 1] == args.embedding_dim, '预训练词向量的shape[1]为%d,而设置的embedding_dim为%d' % ( vectors.shape[1], args.embedding_dim) args.print_config() global best_score # init model model = getattr(models, args.model)(args, vectors) print(model) # 模型保存位置 if not os.path.exists(args.save_dir): os.mkdir(args.save_dir) save_path = os.path.join(args.save_dir, '{}_{}.pth'.format(args.model, args.id)) if args.cuda: torch.cuda.current_device() torch.cuda.set_device(args.device) torch.cuda.manual_seed(args.seed) # set random seed for gpu model.cuda() # 目标函数和优化器 criterion = F.cross_entropy lr1, lr2 = args.lr1, args.lr2 optimizer = model.get_optimizer(lr1, lr2, args.weight_decay) for i in range(args.max_epochs): total_loss = 0.0 pred_labels = [] labels = [] model.train() for idx, (b_x, b_y) in enumerate(train_iter): # 训练模型参数 # 使用BatchNorm层时,batch size不能为1 if len(b_x) == 1: continue if args.cuda: b_x, b_y = b_x.cuda(), b_y.cuda() optimizer.zero_grad() pred = model(b_x) loss = criterion(pred, b_y) loss.backward() optimizer.step() # 更新统计指标 total_loss += loss.item() predicted = pred.max(1)[1] pred_labels.extend(predicted.numpy().tolist()) label = b_y.numpy().tolist() labels.extend(label) if idx % 100 == 0: print('[{}, {}] loss: {:.3f}'.format(i + 1, idx + 1, total_loss / (idx + 1))) # total_loss = 0.0 tf1score = metrics.f1_score(labels, pred_labels) print('[{}, {}] tf1_score:{}'.format(i + 1, idx + 1, total_loss / (idx + 1), tf1score)) # 计算再验证集上的分数,并相应调整学习率 f1score = val(model, val_iter, args) if f1score > best_score: best_score = f1score checkpoint = {'state_dict': model.state_dict(), 'config': args} torch.save(checkpoint, save_path) print('Best tmp model f1score: {}'.format(best_score)) if f1score < best_score: model.load_state_dict(torch.load(save_path)['state_dict']) lr1 *= args.lr_decay lr2 = 2e-4 if lr2 == 0 else lr2 * 0.8 optimizer = model.get_optimizer(lr1, lr2, 0) print('* load previous best model: {}'.format(best_score)) print('* model lr:{} emb lr:{}'.format(lr1, lr2)) if lr1 < args.min_lr: print('* training over, best f1 score: {}'.format(best_score)) break # 保存训练最终的模型 args.best_score = best_score final_model = {'state_dict': model.state_dict(), 'config': args} best_model_path = os.path.join( args.save_dir, '{}_{}_{}.pth'.format(args.model, args.text_type, best_score)) torch.save(final_model, best_model_path) print('Best Final Model saved in {}'.format(best_model_path)) # 在测试集上运行模型并生成概率结果和提交结果 if not os.path.exists(args.result_path): os.mkdir(args.result_path) probs, pre_labels = predict(model, test_iter, args) result_path = args.result_path + '{}_{}_{}'.format(args.model, args.id, args.best_score) np.save('{}.npy'.format(result_path), probs) print('Prob result {}.npy saved!'.format(result_path)) test_df['label'] = np.array(pre_labels) test_df[['idx', 'seq_id', 'label']].to_csv('{}.csv'.format(result_path), index=None) print('Result {}.csv saved!'.format(result_path)) t2 = time.time() print('time use: {}'.format(t2 - t1))
def main(**kwargs): config = DefaultConfig() config.parse(kwargs) config.env = str(config.id) vis = Visualizer # set random seed # cpu and gpu both need to set torch.manual_seed(config.seed) torch.cuda.manual_seed(config.seed) np.random.seed(config.seed) random.seed(config.seed) if not torch.cuda.is_available(): config.cuda = False config.device = None train_iter, test_iter, emb_vectors = utils.load_data(config) config.print_config() model = getattr(models, config.model)(config, emb_vectors) print(model) if config.cuda: torch.cuda.set_device(config.device) model.cuda() # 目标函数和优化器 loss_f = F.cross_entropy lr1, lr2 = config.lr1, config.lr2 optimizer = model.get_optimizer(lr1, lr2) model.train() for epoch in range(config.max_epochs): start_time = time.time() total_loss = 0.0 correct = 0 total = 0 for batch_i, batch in enumerate(train_iter): text, label = batch.text[0], batch.label if config.cuda: text, label = text.cuda(), label.cuda() optimizer.zero_grad() pred = model(text) loss = loss_f(pred, label) loss.backward() optimizer.step() total_loss += loss.item() predicted = pred.max(dim=1)[1] total += label.size(0) correct += predicted.eq(label).sum().item() if (batch_i + 1) % (10000 // config.batch_size) == 0: # 10000条训练数据输出一次统计指标 print('[Epoch {}] loss: {:.5f} | Acc: {:.3f}%({}/{})'.format( epoch + 1, total_loss, 100.0 * correct / total, correct, total)) train_acc, train_acc_n, train_n = val(model, train_iter, config) print('Epoch {} time spends : {:.1f}s'.format(epoch + 1, time.time() - start_time)) print('Epoch {} Train Acc: {:.2f}%({}/{})'.format( epoch + 1, train_acc, train_acc_n, train_n)) test_acc, test_acc_n, test_n = val(model, test_iter, config) print('Epoch {} Test Acc: {:.2f}%({}/{})\n'.format( epoch + 1, test_acc, test_acc_n, test_n))