def main(self): processed_list, alphabet, _, emb_dim = pkl.load( open(self.config['res_path'].format(self.config['dataset']), 'rb')) if isinstance(processed_list, dict): processed_list = [processed_list] scores = [] for data_list in processed_list: train_data = MyDatasetLoader(self.config, data_list, 'train').get_data() valid_data = MyDatasetLoader(self.config, data_list, 'valid').get_data() test_data = MyDatasetLoader(self.config, data_list, 'test').get_data() self.model = TextCNN(self.config, alphabet, emb_dim, self.device).to(self.device) for w in self.model.parameters(): print(w.shape, w.requires_grad) self.optimizer = Adam(filter(lambda x: x.requires_grad, self.model.parameters()), lr=self.config['lr'], weight_decay=float(self.config['l2']), eps=float(self.config['esp'])) self.metircs = Metric() score = self.forward(train_data, valid_data, test_data) scores.append(score) print('| valid best | global best|') print('| --- | --- |') for w in scores: print("| {:.4f} | {:.4f} |".format(w[0], w[1])) if len(scores) > 1: print("valid Avg\tglobal Avg") print("| {:.4f} | {:.4f} |".format(np.mean([w[0] for w in scores]), np.mean([w[1] for w in scores])))
def train_text_cnn(argv=None): # Load dataset train_dl, valid_dl, test_dl, TEXT, _ = get_dataloaders(SEED, args) # Create net filter_sizes = [int(i) for i in args.filter_sizes.split(',')] num_vocab = len(TEXT.vocab) EMB_DIM = 100 pad_idx = TEXT.vocab.stoi[TEXT.pad_token] output_dim = 2 print('Dictionary size: {}'.format(num_vocab)) text_cnn = TextCNN(num_vocab, EMB_DIM, args.num_filters, filter_sizes, output_dim, args.dropout_r, pad_idx).to(args.device) # Load the pretrained_embedding pretrained_embeddings = TEXT.vocab.vectors text_cnn.embedding.weight.data.copy_(pretrained_embeddings) # Init unknown words and pad words embedding unk_idx = TEXT.vocab.stoi[TEXT.unk_token] text_cnn.embedding.weight.data[unk_idx] = torch.zeros(EMB_DIM) text_cnn.embedding.weight.data[pad_idx] = torch.zeros(EMB_DIM) text_cnn.embedding.requires_grad = False # setup loss and optimizer loss_func = torch.nn.CrossEntropyLoss() acc_func = categorical_accuracy opt = torch.optim.Adam(text_cnn.parameters(), lr=args.lr) # Start train for epoch in range(args.epoch): train_single_epoch(text_cnn, loss_func, acc_func, train_dl, opt, epoch) evaluate(text_cnn, loss_func, acc_func, test_dl, epoch)
def main(*args, **kwargs): inputs = Inputs() config = Config() with tf.variable_scope("inference") as scope: m = TextCNN(config, inputs) scope.reuse_variables() mvalid = TextCNN(Config, inputs) init = tf.group(tf.initialize_all_variables(), tf.initialize_local_variables()) sess = tf.Session() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) sess.run(init) try: index = 0 while not coord.should_stop(): _, loss_value = sess.run([m.train_op, m.cost]) index += 1 print("step: %d, loss: %f" % (index, loss_value)) if index % 5 == 0: accuracy = sess.run(mvalid.validation_op) print("accuracy on validation is:" + str(accuracy)) except tf.errors.OutOfRangeError: print("Done traing:-------Epoch limit reached") except KeyboardInterrupt: print("keyboard interrput detected, stop training") finally: coord.request_stop() coord.join(threads) sess.close() del sess
def build_textcnn_model(vocab, config, train=True): model = TextCNN(vocab.vocab_size, config) if train: model.train() else: model.eval() if torch.cuda.is_available(): model.cuda() else: model.cpu() return model
def build_textcnn_model(vocab, config, train=True): model = TextCNN(vocab.vocab_size, config) if train: model.train() #在训练模型时会在前面加上train(); else: model.eval() #在测试模型时在前面使用eval(),会将BN和DropOut固定住,不会取平均,而是用训练好的值 if torch.cuda.is_available(): model.cuda() else: model.cpu() return model
def predict(): test_contents, test_labels = load_corpus('./dataset/test.txt', word2id, max_sen_len=50) # 加载测试集 test_dataset = TensorDataset( torch.from_numpy(test_contents).type(torch.float), torch.from_numpy(test_labels).type(torch.long)) test_dataloader = DataLoader(dataset=test_dataset, batch_size=config.batch_size, shuffle=False, num_workers=2) # 读取模型 model = TextCNN(config) model.load_state_dict(torch.load(config.model_path)) model.eval() model.to(device) # 测试过程 count, correct = 0, 0 for _, (batch_x, batch_y) in enumerate(test_dataloader): batch_x, batch_y = batch_x.to(device), batch_y.to(device) output = model(batch_x) # correct += (output.argmax(1) == batch_y).float().sum().item() correct += (output.argmax(1) == batch_y).sum().item() count += len(batch_x) # 打印准确率 print('test accuracy is {:.2f}%.'.format(100 * correct / count))
def train(epochs): vocab_size = loader.vocab_size num_classes = loader.num_classes model = TextCNN(vocab_size, num_classes) criterion = nn.CrossEntropyLoss() optimizer = torch.optim.Adam(model.parameters()) for epoch in range(epochs): print('-' * 40 + ' epoch {} '.format(epoch) + '-' * 40) train_iter(model, loader, criterion, optimizer) print() torch.save(model.state_dict(), 'cnn.state_dict.pth') return
def create_model(self, sess, config): text_cnn = TextCNN(config) saver = tf.train.Saver() if os.path.exists(FLAGS.ckpt_dir + "checkpoint"): print("Restoring Variables from Checkpoint.") saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) if FLAGS.decay_lr_flag: for i in range(2): # decay learning rate if necessary. print(i, "Going to decay learning rate by half.") sess.run(text_cnn.learning_rate_decay_half_op) else: print('Initializing Variables') sess.run(tf.global_variables_initializer()) if not os.path.exists(FLAGS.ckpt_dir): os.makedirs(FLAGS.ckpt_dir) if FLAGS.use_pretrained_embedding: # 加载预训练的词向量 print("===>>>going to use pretrained word embeddings...") old_emb_matrix = sess.run(text_cnn.Embedding.read_value()) new_emb_matrix = load_word_embedding(old_emb_matrix, FLAGS.word2vec_model_path, FLAGS.embed_size, self.index_to_word) word_embedding = tf.constant(new_emb_matrix, dtype=tf.float32) # 转为tensor t_assign_embedding = tf.assign( text_cnn.Embedding, word_embedding) # 将word_embedding复制给text_cnn.Embedding sess.run(t_assign_embedding) print("using pre-trained word emebedding.ended...") return text_cnn, saver
def main(*args, **kwargs): inputs = Inputs() print ('inputs shape: %s'%str(inputs.inputs.shape)) config = Config() with tf.variable_scope('inference') as scope: m = TextCNN(config, inputs) scope.reuse_variables() init = tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()) sess = tf.Session() coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) sess.run(init) try: index = 0 while not coord.should_stop() and index<1: _, loss_value = sess.run([m.train_op, m.cost]) index += 1 print ('step: %d, loss: %f'%(index,loss_value)) except tf.errors.OutOfRangeError: print ('Done training: -----Epoch limit reached') except KeyboardInterrupt: print ('keyboard interrput detected, stop training') finally: coord.request_stop() coord.join(threads) sess.close() del sess
def experiment_fn(run_config, params): text_cnn = TextCNN() estimator = tf.estimator.Estimator(model_fn=text_cnn.model_fn, model_dir=Config.train.model_dir, params=params, config=run_config) vocab = data_loader.load_vocab("vocab") Config.data.vocab_size = len(vocab) train_X, test_X, train_y, test_y = data_loader.make_train_and_test_set() train_input_fn, train_input_hook = dataset.get_train_inputs( train_X, train_y) test_input_fn, test_input_hook = dataset.get_test_inputs(test_X, test_y) experiment = tf.contrib.learn.Experiment( estimator=estimator, train_input_fn=train_input_fn, eval_input_fn=test_input_fn, train_steps=Config.train.train_steps, min_eval_frequency=Config.train.min_eval_frequency, train_monitors=[ train_input_hook, hook.print_input(variables=['train/input_0'], vocab=vocab, every_n_iter=Config.train.check_hook_n_iter), hook.print_target(variables=['train/target_0', 'train/pred_0'], every_n_iter=Config.train.check_hook_n_iter) ], eval_hooks=[test_input_hook]) return experiment
def train_logicnn(argv=None): train_dl, valid_dl, test_dl, TEXT, _ = get_dataloaders(SEED, args, rules=[but_rule]) # Create net filter_sizes = [int(i) for i in args.filter_sizes.split(',')] num_vocab = len(TEXT.vocab) EMB_DIM = 100 pad_idx = TEXT.vocab.stoi[TEXT.pad_token] output_dim = 2 text_cnn = TextCNN(num_vocab, EMB_DIM, args.num_filters, filter_sizes, output_dim, args.dropout_r, pad_idx).to(args.device) # Load the pretrained_embedding pretrained_embeddings = TEXT.vocab.vectors text_cnn.embedding.weight.data.copy_(pretrained_embeddings) text_cnn.embedding.requires_grad = False # Init unknown words and pad words embedding unk_idx = TEXT.vocab.stoi[TEXT.unk_token] text_cnn.embedding.weight.data[unk_idx] = torch.zeros(EMB_DIM) text_cnn.embedding.weight.data[pad_idx] = torch.zeros(EMB_DIM) text_cnn.embedding.requires_grad = False logicnn = LogiCNN(text_cnn, 1) # setup loss and optimizer loss_func = LogicLoss(args.pi_decay_factor, args.pi_lower_bound) acc_func = logic_categorical_accuracy opt = torch.optim.Adam(logicnn.parameters(), lr=args.lr) for epoch in range(args.epoch): train_logic_single_epoch(logicnn, loss_func, acc_func, train_dl, opt, epoch) evaluate_logic(logicnn, loss_func, acc_func, test_dl, epoch)
def create_model(sess, args, vocab_size, mode=constants.TRAIN, load_pretrained_model=False, reuse=None): with tf.variable_scope(constants.CLS_VAR_SCOPE, reuse=reuse): model = TextCNN(mode, args.__dict__, vocab_size) if load_pretrained_model: try: model.saver.restore(sess, args.cls_model_save_dir) print("Loading model from", args.cls_model_save_dir) except Exception as e: model.saver.restore( sess, tf.train.latest_checkpoint(args.cls_model_save_dir)) print("Loading model from", tf.train.latest_checkpoint(args.cls_model_save_dir)) else: if reuse is None: print("Creating model with new parameters.") sess.run(tf.global_variables_initializer()) else: print('Reuse parameters.') return model
class Predictor: def __init__(self, config): self.config = config self.output_path = os.path.join(config.BASE_DIR, config.output_path) self.word_to_index, self.label_to_index = self.load_vocab() self.index_to_label = { value: key for key, value in self.label_to_index.items() } self.vocab_size = len(self.word_to_index) self.word_vectors = None self.sequence_length = self.config.sequence_length self.model = TextCNN(config=self.config, vocab_size=self.vocab_size, word_vectors=self.word_vectors) self.load_graph() def load_vocab(self): with open(os.path.join(self.output_path, 'word_to_index.pkl'), 'rb') as fr: word_to_index = pickle.load(fr) with open(os.path.join(self.output_path, 'label_to_index.pkl'), 'rb') as fr: label_to_index = pickle.load(fr) return word_to_index, label_to_index def load_graph(self): self.sess = tf.Session() ckpt = tf.train.get_checkpoint_state( os.path.join(self.config.BASE_DIR, self.config.ckpt_model_path)) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print("Reloading model parameters..") self.model.saver.restore(self.sess, ckpt.model_checkpoint_path) else: raise ValueError("No such file: [{}]".format( self.config.ckpt_model_path)) def sentence_to_ids(self, sentence): sentence_ids = [ self.word_to_index.get(token, self.word_to_index.get("<UNK>")) for token in sentence ] sentence_padded = [ sentence_ids[:self.sequence_length] if len(sentence_ids) > self.sequence_length else sentence_ids + [0] * (self.sequence_length - len(sentence_ids)) ] return sentence_padded def predict(self, sentence): sentence_ids = self.sentence_to_ids(sentence) prediction = self.model.infer(self.sess, sentence_ids).tolist() label = self.index_to_label[prediction[0][0]] return label
def train(x, y): model = TextCNN() model = model.cuda() parameters = filter(lambda p: p.requires_grad, model.parameters()) optimizer = optim.SGD(model.parameters(), lr=1e-3) criterion = nn.CrossEntropyLoss(size_average=False) for epoch in range(100): total = 0 for i in range(0, len(x) / 64): batch_x = x[i * 64:(i + 1) * 64] batch_y = y[i * 64:(i + 1) * 64] batch_x = Variable(torch.FloatTensor(batch_x)).cuda() batch_y = Variable(torch.LongTensor(batch_y)).cuda() optimizer.zero_grad() model.train() pred = model(batch_x, 64) loss = criterion(pred, batch_y) #print(loss) loss.backward() nn.utils.clip_grad_norm(parameters, max_norm=3) total += np.sum( pred.data.max(1)[1].cpu().numpy() == batch_y.data.cpu().numpy()) optimizer.step() print("epoch ", epoch + 1, " acc: ", float(total) / len(x)) return model
def train(**kwargs): for k_, v_ in kwargs.items(): setattr(options, k_, v_) training_set = TextDataset(path='data/train/train.csv', model='wordvec/skipgram.bin', max_length=options.max_length, word_dim=options.word_dim) training_loader = Data.DataLoader(dataset=training_set, batch_size=options.batch_size, shuffle=True, drop_last=True) model = TextCNN(options.word_dim, options.max_length, training_set.encoder.classes_.shape[0]) if torch.cuda.is_available(): model.cuda() optimizer = optim.Adam(model.parameters(), lr=options.learning_rate) for epoch in tqdm(range(options.epochs)): loss_sum = 0 for data, label in tqdm(training_loader): if torch.cuda.is_available(): data = data.cuda() label = label.cuda() out = model(data) loss = criteration(out, autograd.Variable(label.squeeze().long())) loss_sum += loss.item() / options.batch_size optimizer.zero_grad() loss.backward() optimizer.step() tqdm.write(f'epoch {epoch + 1}: loss = {loss_sum/len(training_set.data)}') model.save(f'checkpoints/loss-{loss_sum/len(training_set.data)}.pt')
def __init__(self, config): self.config = config self.output_path = os.path.join(config.BASE_DIR, config.output_path) self.word_to_index, self.label_to_index = self.load_vocab() self.index_to_label = { value: key for key, value in self.label_to_index.items() } self.vocab_size = len(self.word_to_index) self.word_vectors = None self.sequence_length = self.config.sequence_length self.model = TextCNN(config=self.config, vocab_size=self.vocab_size, word_vectors=self.word_vectors) self.load_graph()
def test(): # 配置文件 cf = Config('./config.yaml') # 有GPU用GPU device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # 测试数据 test_data = NewsDataset("./data/cnews_final_test.txt",cf.max_seq_len) test_dataloader = DataLoader(test_data,batch_size=cf.batch_size,shuffle=True) # 预训练词向量矩阵 embedding_matrix = get_pre_embedding_matrix("./data/final_vectors") # 模型 model = TextCNN(cf,torch.tensor(embedding_matrix)) # model.load_state_dict(torch.load("./output/model.bin",map_location='cpu')) model.load_state_dict(torch.load("./output/model.bin")) # 把模型放到指定设备 model.to(device) # 让模型并行化运算 if torch.cuda.device_count()>1: model = torch.nn.DataParallel(model) # 训练 start_time = time.time() data_len = len(test_dataloader) model.eval() y_pred = np.array([]) y_test = np.array([]) for step,batch in enumerate(tqdm(test_dataloader,"batch",total=len(test_dataloader))): label_id = batch['label_id'].squeeze(1).to(device) segment_ids = batch['segment_ids'].to(device) with torch.no_grad(): pred = model.get_labels(segment_ids) y_pred = np.hstack((y_pred,pred)) y_test = np.hstack((y_test,label_id.to("cpu").numpy())) # 评估 print("Precision, Recall and F1-Score...") print(metrics.classification_report(y_test, y_pred, target_names=get_labels('./data/label'))) # 混淆矩阵 print("Confusion Matrix...") cm = metrics.confusion_matrix(y_test, y_pred) print(cm)
def build_textcnn_model(vocab, config, train=True): model = TextCNN(vocab.vocab_size, config) if train: model.train() #在训练模型时会在前面加上train(); else: model.eval() #在测试模型时在前面使用eval(),会将BN和DropOut固定住,不会取平均,而是用训练好的值 #train()与eval()两个方法是针对网络train和eval时采用不同方式的情况 #比如Batch Normalization和Dropout #BN的作用主要是对网络中间的每层进行归一化处理,并且使用变换重构保证所提取的特征分布不会被破坏; #由于训练完毕后参数都是固定的,所有BN的训练和测试时的操作不同 #Dropopt能够克服过拟合,在每个训练batch中,通过忽略一般的特征检测器,可以明显地减少过拟合现象。 if torch.cuda.is_available(): model.cuda() else: model.cpu() return model
def setUp(self): super().setUp() self.train_dataset = Word2vecStaticDataset(is_train=True, label_path='../data/train_label.npy', data_path='../data/word2vec_martix.npy') self.test_dataset = Word2vecStaticDataset(is_train=False, label_path='../data/train_label.npy', data_path='../data/word2vec_martix.npy') model = TextCNN() self.tct = NormTrainer(model=model, train_dataset=self.test_dataset, test_dataset=self.test_dataset)
def __init__(self): self.config=TCNNConfig() self.categories,self.cat_to_id=read_category() self.words,self.word_to_id=read_vocab(vocabdir) self.config.vocab_size=len(self.words) self.model=TextCNN(self.config) self.session=tf.Session() self.session.run(tf.global_variables_initializer()) saver=tf.train.Saver() saver.restore(sess=self.session,save_path=save_path)
def train(**kwargs): opt.parse(kwargs) device = torch.device( "cuda:{}".format(opt.gpu_id) if torch.cuda.is_available() else "cpu") opt.device = device x_text, y = load_data_and_labels("./data/rt-polarity.pos", "./data/rt-polarity.neg") x_train, x_test, y_train, y_test = train_test_split( x_text, y, test_size=opt.test_size) train_data = Data(x_train, y_train) test_data = Data(x_test, y_test) train_loader = DataLoader(train_data, batch_size=opt.batch_size, shuffle=True, collate_fn=collate_fn) test_loader = DataLoader(test_data, batch_size=opt.batch_size, shuffle=False, collate_fn=collate_fn) print("{} train data: {}, test data: {}".format(now(), len(train_data), len(test_data))) model = TextCNN(opt) print("{} init model finished".format(now())) if opt.use_gpu: model.to(device) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=opt.lr, weight_decay=opt.weight_decay) for epoch in range(opt.epochs): total_loss = 0.0 model.train() for step, batch_data in enumerate(train_loader): x, labels = batch_data labels = torch.LongTensor(labels) if opt.use_gpu: labels = labels.to(device) optimizer.zero_grad() output = model(x) loss = criterion(output, labels) loss.backward() optimizer.step() total_loss += loss.item() acc = test(model, test_loader) print("{} {} epoch: loss: {}, acc: {}".format(now(), epoch, total_loss, acc))
def __init__(self, config): self.config = config self.train_data_loader = None self.eval_data_loader = None # 加载数据集 self.load_data() self.train_inputs, self.train_labels, label_to_idx = self.train_data_loader.gen_data( ) self.vocab_size = self.train_data_loader.vocab_size self.word_vectors = self.train_data_loader.word_vectors print(f"train data size: {len(self.train_labels)}") print(f"vocab size: {self.vocab_size}") self.label_list = [value for key, value in label_to_idx.items()] self.eval_inputs, self.eval_labels = self.eval_data_loader.gen_data() # 初始化模型 self.model = TextCNN(config=self.config, vocab_size=self.vocab_size, word_vectors=self.word_vectors)
def main(): model = TextCNN() trainer = NormTrainer(model=model, train_dataset=test_dataset, test_dataset=test_dataset) for epoch in range(Config.CNN_EPOCH): print( "=============================== EPOCH {:d} ===============================" .format(epoch)) trainer.train() trainer.test() trainer.save_model('../pretrained/text_cnn_static.h5')
def train_TextCNN(): model = TextCNN(TextCNNConfig) loss = CrossEntropyLoss(pred="pred", target="target") metrics = AccuracyMetric(pred="pred", target="target") trainer = Trainer(model=model, train_data=dataset_train, dev_data=dataset_dev, loss=loss, metrics=metrics, batch_size=16, n_epochs=15) trainer.train() tester = Tester(dataset_test, model, metrics) tester.test()
def test(config): device = 'cuda' if config['cuda'] else 'cpu' model = TextCNN.load(config['model_path']).to(device) with open(f"{config['text_vocab']}", "rb") as f: TEXT = dill.load(f) with open(f"{config['label_vocab']}", "rb") as f: LABEL = dill.load(f) _, test_data = IMDB.splits(TEXT, LABEL, root=config['data_path']) test_iter = torchtext.data.Iterator(test_data, batch_size=config['batch_size'], device=device) loss_fn = nn.CrossEntropyLoss( weight=torch.tensor(config['class_weight'], device=device)) val_loss, accuracy = evaluate(model, test_iter, loss_fn) print(f"val_loss:{val_loss} - accuracy:{accuracy}")
def train(): train_contents, train_labels = load_corpus('./dataset/train.txt', word2id, max_sen_len=50) val_contents, val_labels = load_corpus('./dataset/validation.txt', word2id, max_sen_len=50) # 混合训练集和验证集 contents = np.vstack([train_contents, val_contents]) labels = np.concatenate([train_labels, val_labels]) # 加载训练用的数据 train_dataset = TensorDataset( torch.from_numpy(contents).type(torch.float), torch.from_numpy(labels).type(torch.long)) train_dataloader = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=True, num_workers=2) model = TextCNN(config) if config.model_path: model.load_state_dict(torch.load(config.model_path)) model.to(device) # 设置优化器 optimizer = torch.optim.Adam(model.parameters(), lr=config.learning_rate) # 设置损失函数 criterion = nn.CrossEntropyLoss() # 定义训练过程 for epoch in range(config.epochs): for batch_idx, (batch_x, batch_y) in enumerate(train_dataloader): batch_x, batch_y = batch_x.to(device), batch_y.to(device) output = model(batch_x) loss = criterion(output, batch_y) if batch_idx % 200 == 0 & config.verbose: print("Train Epoch:{}[{}/{} ({:.0f}%)]\tLoss:{:.6f}".format( epoch + 1, batch_idx * len(batch_x), len(train_dataloader.dataset), 100. * batch_idx / len(train_dataloader), loss.item())) optimizer.zero_grad() loss.backward() optimizer.step() # 保存模型 torch.save(model.state_dict(), './models/model.pth')
def train(args): train_iter, dev_iter = data_processor.load_data(args) # 将数据分为训练集和验证集 print('加载数据完成') model = TextCNN(args) if args.cuda: model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) steps = 0 best_acc = 0 last_step = 0 model.train() for epoch in range(1, args.epoch + 1): for batch in train_iter: feature, target = batch.text, batch.label # t_()函数表示将(max_len, batch_size)转置为(batch_size, max_len) # feature.data.t_(), target.data.sub_(1) # target减去1 feature = feature.data.t() # x.t() x是不变的,所以重新赋值 # target.data.sub_(1) if args.cuda: feature, target = feature.cuda(), target.cuda() optimizer.zero_grad() logits = model(feature) loss = F.cross_entropy(logits, target) loss.backward() optimizer.step() steps += 1 if steps % args.log_interval == 0: # torch.max(logits, 1)函数:返回每一行中最大值的那个元素,且返回其索引(返回最大元素在这一行的列索引) corrects = (torch.max(logits, 1)[1] == target).sum() train_acc = 100.0 * corrects / batch.batch_size sys.stdout.write( '\rBatch[{}] - loss: {:.6f} acc: {:.4f}%({}/{})'.format( steps, loss.item(), train_acc, corrects, batch.batch_size)) if steps % args.test_interval == 0: dev_acc = eval(dev_iter, model, args) if dev_acc > best_acc: best_acc = dev_acc last_step = steps if args.save_best: print('Saving best model, acc: {:.4f}%\n'.format( best_acc)) save(model, args.save_dir, 'best', steps) else: if steps - last_step >= args.early_stopping: print('\nearly stop by {} steps, acc: {:.4f}%'.format( args.early_stopping, best_acc)) raise KeyboardInterrupt
def evaluate(): # test model = TextCNN(config) model.cuda() saved_model = torch.load(config.save_model) model.load_state_dict(saved_model["state_dict"]) print( "epoch:%s steps:%s best_valid_acc:%s" % (saved_model["epoch"], saved_model["steps"], saved_model["valid_acc"])) test_loss, test_acc, cm = test(config.test) print( f"\tLoss: {test_loss:.4f}(test)\t|\tAcc: {test_acc * 100:.1f}%(test)") print_confusion_matrix(cm, list(id2label.values()))
def main(): print("Get pre-trained embedding weight...") word2vec_util = Word2vecUtil(word2vec_path=Config.WORD2VEC_PATH) wordembedding_util = WordEmbeddingUtil() pre_weight = word2vec_util.get_weight() emb_weight = wordembedding_util.get_embedding_weight(pre_weight) emb_weight = torch.tensor(emb_weight) gc.collect() print("Get pre-trained embedding weight finished") print("Build model...") model = TextCNN(pretrained_weight=emb_weight, is_static=False).double() print("Build model finished") trainer = NormTrainer(model=model, train_dataset=test_dataset, test_dataset=test_dataset) print("Begin Train Text-CNN") for epoch in range(Config.CNN_EPOCH): print("=============================== EPOCH {:d} ===============================".format(epoch)) trainer.train() if epoch % 5 == 0: print("=============================== Test ===============================") trainer.test() trainer.save_model('../../pretrained/text_cnn_static.h5')
def train(conf): data_train = pd.read_csv(conf['train_file']) data_val = pd.read_csv(conf['val_file']) processor = Processor(conf) processor.init(conf['w2v_path']) train_x = processor.get_features(data_train) val_x = processor.get_features(data_val) labels = conf['labels'] grade2idx, idx2grade = grade_map(data_train[labels[0]].tolist()) with codecs.open('./data/grade_idx.map', 'w') as f: json.dump(grade2idx, f) for label in labels: train_y = processor.get_labels(data_train, label, grade2idx) val_y = processor.get_labels(data_val, label, grade2idx) model = TextCNN(conf['num_class'], conf['seq_len'], processor.to_embedding(), conf['num_filters'], conf['filter_sizes']).model model.compile(loss='categorical_crossentropy', optimizer='adam') mtr = Metrics() model_checkpoint = ModelCheckpoint( './save_model/{}.krs.save_model'.format(label), monitor='val_loss', verbose=1, save_best_only=True, mode='min') early_stopping = EarlyStopping(monitor='val_loss', patience=3, verbose=1, mode='min') model_summary = model.summary() logging.info(str(model_summary)) logging.info('start train for label : {}'.format(label)) history = model.fit(x=train_x, y=train_y, batch_size=256, epochs=20, verbose=1, callbacks=[mtr, model_checkpoint, early_stopping], validation_data=(val_x, val_y), shuffle=True) logging.info('save_model train history for label : {}'.format(label)) logging.info(str(history)) logging.info('all labels model train finished')