def run(config, output_dir, num_splits=5, patience=0): use_cuda = torch.cuda.is_available() and config.cuda_device >= 0 vocab_file = 'data/twitter_hashtag/1kthashtag.vocab' dataset_file = 'data/DataSetsEraldo/dataSetSupernatural.txt' # The returned embedding tensor is kept unchanged to init each split model. emb_ = load_glove_embedding('data/twitter_hashtag/1kthashtag.glove') criterion = nn.CrossEntropyLoss() corpus = CorpusTE(train_file=dataset_file, vocab_file=vocab_file) metrics = { 'accuracy': skmetrics.accuracy_score, 'fscore_class1': skmetrics.f1_score } if config.stratified: def fun_split(vs): return corpus.stratified_split(vs) else: def fun_split(vs): return corpus.split(vs) mean = 0.0 for split in range(1, num_splits + 1): # Create a copy of the embedding tensor to avoid information leak between splits. # It is important to call detach(), since clone() is recorded in the computation graph # (gradients propagated to the cloned tensor will be propagated to the original tensor). emb = emb_.clone().detach() model = TextCNN(config=config, pre_trained_emb=emb) optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) train_corpus, valid_corpus = fun_split(config.valid_split) output_dir_split = os.path.join(output_dir, "split{}".format(split)) t = Trainer(train_corpus=train_corpus, valid_corpus=valid_corpus, test_corpus=None, model=model, config=config, criterion=criterion, optimizer=optimizer, verbose=False, output_dir=output_dir_split, train_metrics=metrics, val_metrics=metrics, selection_metric='fscore_class1', use_cuda=use_cuda) res = t.train(tqdm_prefix="Split {}/{}".format(split, num_splits), patience=patience, init_res_dict={"split": split}) pprint(res["best"]) mean = mean + res['best']['selection_metric'] mean = mean / num_splits print(mean)
def train(): vocab_file = 'data/twitter_hashtag/1kthashtag.vocab' dataset_file = 'data/DataSetsEraldo/dataSetSupernatural.txt' config = TextCNNConfig() config.batch_size = 128 config.stratified = False config.balanced = True config.stratified_batch = False corpus = CorpusTE(train_file=dataset_file, vocab_file=vocab_file) if config.stratified: train_corpus, valid_corpus = corpus.stratified_split( valid_split=config.valid_split) else: train_corpus, valid_corpus = corpus.split( valid_split=config.valid_split) num_epochs = 12 num_iter = num_epochs * ceil(len(train_corpus.y_data) / config.batch_size) lr_min = 1e-5 lr_max = 1 config.learning_rate = lr_min config.num_epochs = num_epochs emb = load_glove_embedding('data/twitter_hashtag/1kthashtag.glove') model = TextCNN(config=config, pre_trained_emb=emb) criterion = nn.CrossEntropyLoss() optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) output_dir = "results/out_train_{}".format( datetime.datetime.now().strftime("%Y%m%d-%H%M%S-%f")) metrics = { 'accuracy': skmetrics.accuracy_score, 'fscore_class1': skmetrics.f1_score } lr_scheduler = LambdaLR( optimizer, lambda it: (lr_max / lr_min)**(it / (num_iter - 1))) t = Trainer(train_corpus=train_corpus, valid_corpus=valid_corpus, test_corpus=None, model=model, config=config, criterion=criterion, optimizer=optimizer, verbose=True, output_dir=output_dir, train_metrics=metrics, val_metrics=metrics, selection_metric='fscore_class1', lr_scheduler=lr_scheduler) res = t.train(patience=0) pprint(res["best"])
def run(config, output_dir, num_rep=5, valid_split=0.2, patience=0): use_cuda = torch.cuda.is_available() mean = 0.0 ## barbara vocab_file = 'data/twitter_hashtag/1kthashtag.vocab' dataset_file = 'data/twitter_hashtag/multiple.txt' emb = load_glove_embedding('data/twitter_hashtag/1kthashtag.glove') criterion = nn.CrossEntropyLoss() corpus = TwitterHashtagCorpus(train_file=dataset_file, vocab_file=vocab_file) config.vocab_size = corpus.vocab_size train_corpus = Corpus() train_corpus.x_data = corpus.x_train[:1000] train_corpus.y_data = corpus.y_train[:1000] valid_corpus = Corpus() valid_corpus.x_data = corpus.x_validation[:1000] valid_corpus.y_data = corpus.y_validation[:1000] metrics = {'accuracy': skmetrics.accuracy_score} for rep in range(1, num_rep + 1): model = TextCNN(config=config, pre_trained_emb=emb) optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) #train_corpus, valid_corpus = corpus.split(valid_split=valid_split) output_dir_rep = os.path.join(output_dir, "rep{}".format(rep)) t = Trainer(train_corpus=train_corpus, valid_corpus=valid_corpus, test_corpus=None, model=model, config=config, criterion=criterion, optimizer=optimizer, verbose=False, output_dir=output_dir_rep, train_metrics=metrics, val_metrics=metrics, selection_metric='accuracy', use_cuda=use_cuda) res = t.train(tqdm_prefix="Rep {}/{}".format(rep, num_rep), patience=patience, init_res_dict={"rep": rep}) pprint(res["best"]) mean = mean + res['best']['selection_metric'] mean = mean / num_rep print(mean)
def __init__(self, emb_dim, filter_num, filter_sizes, dropout_p=0.5): super(Discriminator, self).__init__() # TODO: add dropout self.query_cnn = TextCNN(emb_dim, filter_num, filter_sizes) self.response_cnn = TextCNN(emb_dim, filter_num, filter_sizes) self.dropout = nn.Dropout(p=dropout_p) self.judger = nn.Sequential( nn.Linear(2 * filter_num * len(filter_sizes), 128), #nn.ReLU(), #nn.Linear(256, 128), nn.ReLU(), self.dropout, nn.Linear(128, 1), nn.Sigmoid())
def __init__(self, vocab_size, emb_dim, filter_num, filter_sizes, dropout=0.0): super(Discriminator, self).__init__() self.query_cnn = TextCNN(emb_dim, filter_num, filter_sizes) self.response_cnn = TextCNN(emb_dim, filter_num, filter_sizes) self.dropout = nn.Dropout(p=dropout) self.embeddings = nn.Embedding(vocab_size, emb_dim) # self.judger = nn.Sequential( nn.Linear(2 * filter_num * len(filter_sizes), 128), nn.ReLU(), self.dropout, nn.Linear(128, 2), nn.Softmax(dim=1))
def test(): with tf.Session() as sess: vocab = load_vocab(TRAIN_VOCAB_FILENAME) cnn = TextCNN(SEQUENCE_LENGTH, NUM_CLASS, len(vocab), 128, [3,4,5], 128) saver = tf.train.Saver() saver.restore(sess, './textcnn.ckpt') print('model restored') # http 통신 post 로 body 에 'str' input_text = request.form['str'] masterName = request.form['masterName'] tokens = tokenize(input_text) print('입력 문장을 다음의 토큰으로 분해:') print(tokens) sequence = [get_token_id(t, vocab) for t in tokens] x = [] while len(sequence) > 0: seq_seg = sequence[:SEQUENCE_LENGTH] sequence = sequence[SEQUENCE_LENGTH:] padding = [1] *(SEQUENCE_LENGTH - len(seq_seg)) seq_seg = seq_seg + padding x.append(seq_seg) feed_dict = { cnn.input : x, cnn.dropout_keep_prob : 1.0 } predict = sess.run([cnn.predictions], feed_dict) result = np.mean(predict) if (result > 0.75): print('추천') elif (result < 0.25): print('비추천') else: print('평가 불가능') MyMentorDB.update_item( Key={ 'Username' : masterName }, UpdateExpression='ADD grade :val', ExpressionAttributeValues = { ':val' : int(result) } ) tf.reset_default_graph() return (str(result))
def test(): with tf.Session() as sess: vocab = load_vocab(TRAIN_VOCAB_FILENAME) cnn = TextCNN(SEQUENCE_LENGTH, NUM_CLASS, len(vocab), 128, [3, 4, 5], 128) saver = tf.train.Saver() saver.restore(sess, './textcnn.ckpt') print('model restored') input_text = input('사용자 평가를 문장으로 입력하세요: ') tokens = tokenize(input_text) print('입력 문장을 다음의 토큰으로 분해:') print(tokens) sequence = [get_token_id(t, vocab) for t in tokens] x = [] while len(sequence) > 0: seq_seg = sequence[1:SEQUENCE_LENGTH] sequence = sequence[SEQUENCE_LENGTH:] padding = [1] * (SEQUENCE_LENGTH - len(seq_seg)) seq_seg = seq_seg + padding x.append(seq_seg) feed_dict = {cnn.input: x, cnn.dropout_keep_prob: 1.0} predict = sess.run([cnn.predictions], feed_dict) result = np.mean(predict) if (result > 0.75): print('추천') elif (result < 0.25): print('비추천') else: print('평가 불가능')
def test(x_train, y_train, tokenizer, x_dev, y_dev, batch_size=64): with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(tokenizer.vocab), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) out_dir = FLAGS.out_dir checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) ckpt_file = tf.train.latest_checkpoint(checkpoint_dir) saver.restore(sess, ckpt_file) def test_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } loss, accuracy, predict = sess.run( [cnn.loss, cnn.accuracy, cnn.predictions], feed_dict) auc = calAUC(predict, y_batch) time_str = datetime.datetime.now().isoformat() print("{}: loss {:g}, acc {:g}, auc {:g}".format( time_str, loss, accuracy, auc)) test_step(x_dev, y_dev) test_step(x_train[:batch_size], y_train[:batch_size])
def test(): with tf.Session() as sess: vocab = load_vocab(TRAIN_VOCAB_FILENAME) cnn = TextCNN(SEQUENCE_LENGTH, NUM_CLASS, len(vocab), 128, [3, 4, 5], 128) saver = tf.train.Saver() saver.restore(sess, './textcnn.ckpt') print('model restored') while 1: input_text = input('사용자 평가를 문장으로 입력하세요(Z 입력시 종료): ') if input_text in ['z', 'Z']: break tokens = tokenize(input_text) print('입력 문장을 다음의 토큰으로 분해:') print(tokens) sequence = [get_token_id(t, vocab) for t in tokens] x = [] while len(sequence) > 0: seq_seg = sequence[:SEQUENCE_LENGTH] sequence = sequence[SEQUENCE_LENGTH:] padding = [1] * (SEQUENCE_LENGTH - len(seq_seg)) seq_seg = seq_seg + padding x.append(seq_seg) feed_dict = {cnn.input: x, cnn.dropout_keep_prob: 1.0} #별점 예측 predict = sess.run([cnn.predictions], feed_dict) result = np.array(predict) result = result[0][0] print("=========================결과==========================") print("별점: ", result) if result in [0]: print("불만족") elif result in [1]: print("보통") elif result in [2]: print("만족")
def train(inputs, labels): """ :param input: :return: """ with tf.Session() as sess: cnn = TextCNN(flag.setence, flag.num_classes, flag.vocab_size, flag.embedding_size, flag.filter_sizes, flag.num_filters, flag.keep_prob) output = cnn(inputs) loss = tf.nn.softmax_cross_entropy_with_logits_v2(logits=output, labels=labels) total_loss = loss + flag.decay_rate * tf.nn.l2_loss(cnn.final_weight + cnn.final_bias) global_step = tf.train.get_or_create_global_step() optimizer = tf.train.AdamOptimizer(flag.learning_rate) gradients_vars = optimizer.compute_gradients(total_loss) for i, (grad, var) in enumerate(gradients_vars): if grad is not None: gradients_vars[i] = (tf.clip_by_value(grad, -10, 10), var) tf.summary.histogram(var.name + '/grad', grad) # tf.histogram_summary tf.summary.scalar('loss', total_loss) sum_merge = tf.summary.merge_all() train_op = optimizer.apply_gradients(gradients_vars, global_step=global_step) saver = tf.train.Saver() ckpt = tf.train.get_checkpoint_state(flag.model_saved_dir) if ckpt and tf.train.checkpoint_exists(ckpt.model_checkpoint_path): print('reloading model parameters..') else: print('create mdoel from scratch..') sess.run(tf.global_variables_initializer()) summarizer = tf.summary.FileWriter(flag.model_saved_dir, sess.graph) for i in range(flag.num_loop): step_loss,summary, _ = sess.run([total_loss,sum_merge, train_op]) if i %1000 == 0: print('check points {}'.format(i)) saver.save(sess,flag.model_saved_path, global_step=global_step) summarizer.add_summary(summary,global_step=global_step)
def train(): # 指定样本文件 positive_data_file = "./rt-polaritydata/rt-polarity.pos" negative_data_file = "./rt-polaritydata/rt-polarity.neg" # 设置训练参数 num_steps = 50 # 定义训练次数 SaveFileName = "text_cnn_model" # 定义保存模型文件夹名称 # 设置模型参数 num_classes = 2 # 设置模型分类 l2_reg_lambda = 0.1 # 定义正则化系数 filter_sizes = "3,4,5" # 定义多通道卷积核 num_filters = 64 # 定义每通道的输出个数 # 加载数据集 data, vocab_processor, max_len = dataset(positive_data_file, negative_data_file) #搭建模型 text_cnn = TextCNN(seq_length=max_len, num_classes=num_classes, vocab_size=len(vocab_processor.vocabulary_), embeding_size=128, filter_sizes=list(map(int, filter_sizes.split(','))), num_filters=num_filters) def l2_loss(y_true, y_pred): l2_loss = tf.constant(0.0) for tf_var in text_cnn.trainable_weights: if tf_var.name == "fully_connecred": l2_loss += tf.reduce_mean(tf.nn.l2_loss(tf_var)) loss = tf.nn.softmax_cross_entropy_with_logits(logits=y_pred, labels=y_true) return loss + l2_reg_lambda * l2_loss text_cnn.compile(loss=l2_loss, optimizer=tf.keras.optimizers.Adam(lr=1e-3), metrics=['acc']) text_cnn.fit(data, epochs=num_steps) text_cnn.save("textcnn.h5")
def train(train_dir, val_dir, labels_file, word2vec_path, batch_size, max_steps, log_step, val_step, snapshot, out_dir): ''' 训练... :param train_dir: 训练数据目录 :param val_dir: val数据目录 :param labels_file: labels文件目录 :param word2vec_path: 词向量模型文件 :param batch_size: batch size :param max_steps: 最大迭代次数 :param log_step: log显示间隔 :param val_step: 测试间隔 :param snapshot: 保存模型间隔 :param out_dir: 模型ckpt和summaries输出的目录 :return: ''' max_sentence_length = 300 embedding_dim = 50 filter_sizes = [3, 4, 5, 6] num_filters = 200 # Number of filters per filter size base_lr = 0.001 # 学习率 dropout_keep_prob = 0.5 l2_reg_lambda = 0.0 # "L2 regularization lambda (default: 0.0) allow_soft_placement = True # 如果你指定的设备不存在,允许TF自动分配设备 log_device_placement = False # 是否打印设备分配日志 print("Loading data...") w2vModel = create_word2vec.load_wordVectors(word2vec_path) labels_set = files_processing.read_txt(labels_file) labels_nums = len(labels_set) train_file_list = create_batch_data.get_file_list(file_dir=train_dir, postfix='*.npy') train_batch = create_batch_data.get_data_batch(train_file_list, labels_nums=labels_nums, batch_size=batch_size, shuffle=False, one_hot=True) val_file_list = create_batch_data.get_file_list(file_dir=val_dir, postfix='*.npy') val_batch = create_batch_data.get_data_batch(val_file_list, labels_nums=labels_nums, batch_size=batch_size, shuffle=False, one_hot=True) print("train data info *****************************") train_nums = create_word2vec.info_npy(train_file_list) print("val data info *****************************") val_nums = create_word2vec.info_npy(val_file_list) print("labels_set info *****************************") files_processing.info_labels_set(labels_set) # Training with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=allow_soft_placement, log_device_placement=log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=max_sentence_length, num_classes=labels_nums, embedding_size=embedding_dim, filter_sizes=filter_sizes, num_filters=num_filters, l2_reg_lambda=l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=base_lr) # optimizer = tf.train.MomentumOptimizer(learning_rate=0.01, momentum=0.9) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=3) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) if step % log_step == 0: print("training: step {}, loss {:g}, acc {:g}".format( step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) if writer: writer.add_summary(summaries, step) return loss, accuracy for i in range(max_steps): train_batch_data, train_batch_label = create_batch_data.get_next_batch( train_batch) train_batch_data = create_word2vec.indexMat2vector_lookup( w2vModel, train_batch_data) train_step(train_batch_data, train_batch_label) current_step = tf.train.global_step(sess, global_step) if current_step % val_step == 0: val_losses = [] val_accs = [] # for k in range(int(val_nums/batch_size)): for k in range(100): val_batch_data, val_batch_label = create_batch_data.get_next_batch( val_batch) val_batch_data = create_word2vec.indexMat2vector_lookup( w2vModel, val_batch_data) val_loss, val_acc = dev_step(val_batch_data, val_batch_label, writer=dev_summary_writer) val_losses.append(val_loss) val_accs.append(val_acc) mean_loss = np.array(val_losses, dtype=np.float32).mean() mean_acc = np.array(val_accs, dtype=np.float32).mean() print("--------Evaluation:step {}, loss {:g}, acc {:g}". format(current_step, mean_loss, mean_acc)) if current_step % snapshot == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
print('Loading data...') # get data x_train, y_train, x_test, y_test, word2index = data_helpers.preprocess() max_features = len(word2index) max_len = max(len(x) for x in x_train) print(max_len) print('Pad sequences...') x_train = sequence.pad_sequences(x_train, maxlen=max_len, value=0) x_test = sequence.pad_sequences(x_test, maxlen=max_len, value=0) print('Build model...') model = TextCNN(max_len, embedding_dim, batch_size=batch_size, class_num=2, max_features=max_features, epochs=epochs) print('Train...') model.fit(x_train, x_test, y_train, y_test) print('Test...') result = model.predict(x_test) result = np.argmax(np.array(result), axis=1) y_test = np.argmax(np.array(y_test), axis=1) print('f1:', f1_score(y_test, result, average='macro')) print('accuracy:', accuracy_score(y_test, result)) print('classification report:\n', classification_report(y_test, result)) print('confusion matrix:\n', confusion_matrix(y_test, result))
print('Pad sequences (samples x time)...') x_train = pad_sequences(x_train, maxlen=maxlen, padding='post') x_test = pad_sequences(x_test, maxlen=maxlen, padding='post') print('x_train shape:', x_train.shape) print('x_test shape:', x_test.shape) train_ds = tf.data.Dataset.from_tensor_slices( (x_train, y_train)).batch(batch_size) test_ds = tf.data.Dataset.from_tensor_slices( (x_test, y_test)).batch(batch_size) print('Build model...') model = TextCNN(maxlen=maxlen, max_features=max_features, embedding_dims=embedding_dims, class_num=class_num, kernel_sizes=[2, 3, 5], kernel_regularizer=None, last_activation='softmax') # 为训练选择优化器与损失函数 loss_object = tf.keras.losses.SparseCategoricalCrossentropy() optimizer = tf.keras.optimizers.Adam() #选择衡量指标来度量模型的损失值(loss)和准确率(accuracy)。这些指标在 epoch 上累积值,然后打印出整体结果。 train_loss = tf.keras.metrics.Mean(name='train_loss') train_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='train_accuracy') test_loss = tf.keras.metrics.Mean(name='test_loss') test_accuracy = tf.keras.metrics.SparseCategoricalAccuracy( name='test_accuracy')
def build_graph(self): """Build graph.""" self.tf_graph = TextCNN(self.flags, self.embedding) self.tf_graph.build(self.word_ids, self.word_label)
# training hyperparameter BATCH_SZIE = 128 EPOCHS = 10 # load data (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=MAX_WORD_NUM) # padding sequence x_train = sequence.pad_sequences(x_train, maxlen=MAX_SENT_LEN) x_test = sequence.pad_sequences(x_test, maxlen=MAX_SENT_LEN) # build model model = TextCNN(max_sent_len=MAX_SENT_LEN, max_word_num=MAX_WORD_NUM, embedding_dims=EMBEDDING_DIMS, class_num=CLASS_NUM, last_activation=LAST_ACTIVATION).build_model() model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) # train early_stopping = EarlyStopping(monitor='val_acc', patience=3, mode='max') model.fit(x_train, y_train, batch_size=BATCH_SZIE, epochs=EPOCHS, callbacks=[early_stopping], validation_data=(x_test, y_test)) # save model # model.save('textcnn_model.h5')
def train(x_train, y_train, vocab_processor, x_dev, y_dev, x_test, y_test): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement ) #这个session配置,按照前面的gpu,cpu自动选择 sess = tf.Session(config=session_conf) print("") with sess.as_default(): print("vocab_size:", len(vocab_processor.vocabulary_)) cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(vocab_processor.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) out_dir = os.path.abspath( os.path.join(os.path.curdir, "runs", timestamp)) print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary vocab_processor.save(os.path.join(out_dir, "vocab")) # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) #time_str = datetime.datetime.now().isoformat() #print("{}: step {}, loss {:g}, acc {:g}".format(time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ num = 20 x_batch = x_batch.tolist() y_batch = y_batch.tolist() l = len(y_batch) l_20 = int(l / num) x_set = [] y_set = [] for i in range(num - 1): x_temp = x_batch[i * l_20:(i + 1) * l_20] x_set.append(x_temp) y_temp = y_batch[i * l_20:(i + 1) * l_20] y_set.append(y_temp) x_temp = x_batch[(num - 1) * l_20:] x_set.append(x_temp) y_temp = y_batch[(num - 1) * l_20:] y_set.append(y_temp) #每个batch验证集计算一下准确率,num个batch再平均 lis_loss = [] lis_accu = [] for i in range(num): feed_dict = { cnn.input_x: np.array(x_set[i]), cnn.input_y: np.array(y_set[i]), cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy = sess.run( [global_step, dev_summary_op, cnn.loss, cnn.accuracy], feed_dict) lis_loss.append(loss) lis_accu.append(accuracy) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) print("test_loss and test_acc" + "\t\t" + str(sum(lis_loss) / num) + "\t\t" + str(sum(lis_accu) / num)) if writer: writer.add_summary(summaries, step) # Generate batches batches = data_helper.batch_iter(list(zip(x_train, y_train)), FLAGS.batch_size, FLAGS.num_epochs) # Training loop. For each batch... for batch in batches: x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path)) print("testing:......") list_acc = [] count = 0 test_batches = data_helper.batch_iter(list(zip(x_test, y_test)), FLAGS.batch_size, num_epochs=1, shuffle=False) for batch in test_batches: x_batch, y_batch = zip(*batch) count += 1 feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } accuracy = sess.run(cnn.accuracy, feed_dict) list_acc.append(accuracy) print("test acc:", str(sum(list_acc) / count))
def predict(self): """Predict line.""" word_id_list = tf.placeholder(tf.int32, shape=[None, None]) model = TextCNN(self.flags, self.embedding) model.build_predictor(word_id_list) return model, word_id_list
def train(x_train, y_train, tokenizer, x_dev, y_dev): # Training # ================================================== with tf.Graph().as_default(): session_conf = tf.ConfigProto( allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with sess.as_default(): cnn = TextCNN(sequence_length=x_train.shape[1], num_classes=y_train.shape[1], vocab_size=len(tokenizer.vocab), embedding_size=FLAGS.embedding_dim, filter_sizes=list( map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Keep track of gradient values and sparsity (optional) grad_summaries = [] for g, v in grads_and_vars: if g is not None: grad_hist_summary = tf.summary.histogram( "{}/grad/hist".format(v.name), g) sparsity_summary = tf.summary.scalar( "{}/grad/sparsity".format(v.name), tf.nn.zero_fraction(g)) grad_summaries.append(grad_hist_summary) grad_summaries.append(sparsity_summary) grad_summaries_merged = tf.summary.merge(grad_summaries) # Output directory for models and summaries timestamp = str(int(time.time())) #out_dir = os.path.abspath(os.path.join(os.path.curdir, "runs", timestamp)) out_dir = FLAGS.out_dir print("Writing to {}\n".format(out_dir)) # Summaries for loss and accuracy loss_summary = tf.summary.scalar("loss", cnn.loss) acc_summary = tf.summary.scalar("accuracy", cnn.accuracy) # Train Summaries train_summary_op = tf.summary.merge( [loss_summary, acc_summary, grad_summaries_merged]) train_summary_dir = os.path.join(out_dir, "summaries", "train") train_summary_writer = tf.summary.FileWriter( train_summary_dir, sess.graph) # Dev summaries dev_summary_op = tf.summary.merge([loss_summary, acc_summary]) dev_summary_dir = os.path.join(out_dir, "summaries", "dev") dev_summary_writer = tf.summary.FileWriter(dev_summary_dir, sess.graph) # Checkpoint directory. Tensorflow assumes this directory already exists so we need to create it checkpoint_dir = os.path.abspath( os.path.join(out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.global_variables(), max_to_keep=FLAGS.num_checkpoints) # Write vocabulary #vocab_processor.save(os.path.join(out_dir, "vocab")) ckpt_file = tf.train.latest_checkpoint(checkpoint_dir) if ckpt_file: saver.restore(sess, ckpt_file) print('restoring model from %s' % ckpt_file) else: # Initialize all variables sess.run(tf.global_variables_initializer()) def train_step(x_batch, y_batch): """ A single training step """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: FLAGS.dropout_keep_prob } _, step, summaries, loss, accuracy = sess.run([ train_op, global_step, train_summary_op, cnn.loss, cnn.accuracy ], feed_dict) time_str = datetime.datetime.now().isoformat() if step % FLAGS.log_every == 0: print("{}: step {}, loss {:g}, acc {:g}".format( time_str, step, loss, accuracy)) train_summary_writer.add_summary(summaries, step) def dev_step(x_batch, y_batch, writer=None): """ Evaluates model on a dev set """ feed_dict = { cnn.input_x: x_batch, cnn.input_y: y_batch, cnn.dropout_keep_prob: 1.0 } step, summaries, loss, accuracy, predict = sess.run([ global_step, dev_summary_op, cnn.loss, cnn.accuracy, cnn.predictions ], feed_dict) auc = calAUC(predict, y_batch) time_str = datetime.datetime.now().isoformat() print("{}: step {}, loss {:g}, acc {:g}, auc {:g}".format( time_str, step, loss, accuracy, auc)) if writer: writer.add_summary(summaries, step) # Generate batches iter = iterData(x_train, y_train, batch_size=FLAGS.train_batch_size, epoch=FLAGS.num_epochs) # Training loop. For each batch... data = next(iter) step = 0 epoch = 0 print('training begin') while data != '__RETURN__': if data == '__STOP__': data = next(iter) epoch += 1 continue x_batch, y_batch = data train_step(x_batch, y_batch) data = next(iter) current_step = tf.train.global_step(sess, global_step) if current_step % FLAGS.evaluate_every == 0: print("\nEvaluation:") dev_step(x_dev, y_dev, writer=dev_summary_writer) print("") if current_step % FLAGS.checkpoint_every == 0: path = saver.save(sess, checkpoint_prefix, global_step=current_step) print("Saved model checkpoint to {}\n".format(path))
def main(): for i in range(10): # 加载配置文件 config = Config() if torch.cuda.is_available(): torch.cuda.set_device(0) # 加载数据集 early_stopping = EarlyStopping(patience=10, verbose=True, cv_index=i) kwargs = {'num_workers': 2, 'pin_memory': True} dataset_train = MR_dataset(config=config, state="train", k=i, embedding_state=True) train_data_batch = DataLoader(dataset_train, batch_size=config.batch_size, shuffle=False, drop_last=False, **kwargs) dataset_valid = MR_dataset(config=config, state="valid", k=i, embedding_state=False) valid_data_batch = DataLoader(dataset_valid, batch_size=config.batch_size, shuffle=False, drop_last=False, **kwargs) dataset_test = MR_dataset(config=config, state="test", k=i, embedding_state=False) test_data_batch = DataLoader(dataset_test, batch_size=config.batch_size, shuffle=False, drop_last=False, **kwargs) print(len(dataset_train), len(dataset_valid), len(dataset_test)) if config.use_pretrained_embed: config.embedding_pretrained = torch.from_numpy( dataset_train.weight).float().cuda() print("load pretrained models.") else: config.embedding_pretrained = None config.vocab_size = dataset_train.vocab_size model = TextCNN(config) print(model) if config.use_cuda and torch.cuda.is_available(): # print("load data to CUDA") model.cuda() # config.embedding_pretrained.cuda() criterion = nn.CrossEntropyLoss() # 定义为交叉熵损失函数 optimizer = optim.Adam(model.parameters(), lr=config.learning_rate) count = 0 loss_sum = 0.0 for epoch in range(config.epoch): # 开始训练 model.train() for data, label in train_data_batch: if config.use_cuda and torch.cuda.is_available(): data = data.to(torch.int64).cuda() label = label.cuda() else: data.to(torch.int64) # data = torch.autograd.Variable(data).long().cuda() # label = torch.autograd.Variable(label).squeeze() out = model(data) l2_loss = config.l2_weight * torch.sum( torch.pow(list(model.parameters())[1], 2)) loss = criterion(out, autograd.Variable( label.long())) + l2_loss loss_sum += loss.data.item() count += 1 if count % 100 == 0: print("epoch", epoch, end=' ') print("The loss is: %.5f" % (loss_sum / 100)) loss_sum = 0 count = 0 optimizer.zero_grad() loss.backward() optimizer.step() # 一轮训练结束,在验证集测试 valid_loss, valid_acc = get_test_result(model, valid_data_batch, dataset_valid, config, criterion) early_stopping(valid_loss, model, config) print("The valid acc is: %.5f" % valid_acc) if early_stopping.early_stop: print("Early stopping") break # 1 fold训练结果 model.load_state_dict( torch.load( os.path.abspath( os.path.join(config.checkpoint_path, 'checkpoint%d.pt' % i)))) test_loss, test_acc = get_test_result(model, test_data_batch, dataset_test, config, criterion) print("The test acc is: %.5f" % test_acc)
def train(): config = KDConfig() logger = get_logger(config.log_path, "train_KD") device = config.device # 加载bert模型,作为teacher logger.info("load bert .....") bert = Bert(config.bert_config) bert.load_state_dict(torch.load(config.bert_config.model_path)) bert.to(device) bert.eval() # 冻结bert参数 for name, p in bert.named_parameters(): p.requires_grad = False # 加载textcnn模型,作为student textcnn = TextCNN(config.textcnn_config) textcnn.to(device) textcnn.train() # 加载数据集 logger.info("load train/dev data .....") train_loader = DataLoader(KDdataset(config.base_config.train_data_path), batch_size=config.batch_size, shuffle=True) dev_loader = DataLoader(KDdataset(config.base_config.dev_data_path), batch_size=config.batch_size, shuffle=False) optimizer = Adam(textcnn.parameters(), lr=config.lr) # 开始训练 logger.info("start training .....") best_acc = 0. for epoch in range(config.epochs): for i, batch in enumerate(train_loader): cnn_ids, labels, input_ids, token_type_ids, attention_mask = batch[0].to(device), batch[1].to(device), \ batch[2].to(device), batch[3].to(device), \ batch[4].to(device) optimizer.zero_grad() students_output = textcnn(cnn_ids) teacher_output = bert(input_ids, token_type_ids, attention_mask) loss = loss_fn_kd(students_output, labels, teacher_output, config.T, config.alpha) loss.backward() optimizer.step() # 打印信息 if i % 100 == 0: labels = labels.data.cpu().numpy() preds = torch.argmax(students_output, dim=1) preds = preds.data.cpu().numpy() acc = np.sum(preds == labels) * 1. / len(preds) logger.info( "TRAIN: epoch: {} step: {} acc: {} loss: {} ".format( epoch + 1, i, acc, loss.item())) acc, table = dev(textcnn, dev_loader, config) logger.info("DEV: acc: {} ".format(acc)) logger.info("DEV classification report: \n{}".format(table)) if acc > best_acc: torch.save(textcnn.state_dict(), config.model_path) best_acc = acc logger.info("start testing ......") test_loader = DataLoader(KDdataset(config.base_config.test_data_path), batch_size=config.batch_size, shuffle=False) best_model = TextCNN(config.textcnn_config) best_model.load_state_dict(torch.load(config.model_path)) acc, table = dev(best_model, test_loader, config) logger.info("TEST acc: {}".format(acc)) logger.info("TEST classification report:\n{}".format(table))
def train(): config = TextCNNConfig() logger = get_logger(config.log_path, "train_textcnn") model = TextCNN(config) train_loader = DataLoader(CnnDataSet(config.base_config.train_data_path), batch_size=config.batch_size, shuffle=True) dev_loader = DataLoader(CnnDataSet(config.base_config.dev_data_path), batch_size=config.batch_size, shuffle=False) model.train() model.to(config.device) optimizer = Adam(model.parameters(), lr=config.learning_rate) best_acc = 0. for epoch in range(config.num_epochs): for i, (texts, labels) in enumerate(train_loader): optimizer.zero_grad() texts = texts.to(config.device) labels = labels.to(config.device) logits = model(texts) loss = F.cross_entropy(logits, labels) loss.backward() optimizer.step() if i % 100 == 0: labels = labels.data.cpu().numpy() preds = torch.argmax(logits, dim=1) preds = preds.data.cpu().numpy() acc = np.sum(preds == labels) * 1. / len(preds) logger.info("TRAIN: epoch: {} step: {} acc: {} loss: {} ".format(epoch + 1, i, acc, loss.item())) acc, table = dev(model, dev_loader, config) logger.info("DEV: acc: {} ".format(acc)) logger.info("DEV classification report: \n{}".format(table)) if acc > best_acc: torch.save(model.state_dict(), config.model_path) best_acc = acc test_loader = DataLoader(CnnDataSet(config.base_config.test_data_path), batch_size=config.batch_size, shuffle=False) best_model = TextCNN(config) best_model.load_state_dict(torch.load(config.model_path)) acc, table = dev(best_model, test_loader, config) logger.info("TEST acc: {}".format(acc)) logger.info("TEST classification report:\n{}".format(table))
def trainModel(self): with tf.Graph().as_default(): session_conf = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement) sess = tf.Session(config=session_conf) with tf.name_scope("readfile"): processing = Processing.Processing() articles, tags = processing.loadPracticeFile("data/train_all.txt") self.data_embedding_new, self.tags_new = processing.embedding(articles, tags) X_train, X_val, y_train, y_val = train_test_split( self.data_embedding_new, self.tags_new, test_size=0.2, random_state=0) # 加载词典 vocab = learn.preprocessing.VocabularyProcessor.restore('model/vocab.pickle') with sess.as_default(): textcnn = TextCNN.TextCNN( max_length=len(self.data_embedding_new[0]), num_classes=len(y_train[0]), vocab_size=len(vocab.vocabulary_), embedding_size=FLAGS.embedding_dim, filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))), num_filters=FLAGS.num_filters, l2_reg_lambda=FLAGS.l2_reg_lambda) # Define Training procedure global_step = tf.Variable(0, name="global_step", trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) # 对var_list中的变量计算loss的梯度 返回一个以元组(gradient, variable)组成的列表 grads_and_vars = optimizer.compute_gradients(textcnn.loss) # 将计算出的梯度应用到变量上 train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) # Initialize all variables sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() best_f1 = 0.0 for time in range(config.epoch): batch_size = config.Batch_Size for trainX_batch, trainY_batch in self.get_batches(X_train, y_train, batch_size): feed_dict = { textcnn.input_x: np.array(trainX_batch), textcnn.input_y: np.array(trainY_batch), textcnn.drop_keep_prob: FLAGS.dropout_keep_prob } _, loss, train_accuracy = sess.run([train_op, textcnn.loss, textcnn.accuracy], feed_dict) print("训练集:第"+str((time+1))+"次迭代的损失为:"+str(loss)+";准确率为:"+str(train_accuracy)) all_dev = [] for devX_batch, devY_batch in self.get_batches(X_val, y_val, batch_size): feed_dict = { textcnn.input_x: np.array(devX_batch), textcnn.input_y: np.array(devY_batch), textcnn.drop_keep_prob: 1.0 } dev_loss, dev_predictions = sess.run([textcnn.loss, textcnn.predictions], feed_dict) all_dev.extend(dev_predictions.tolist()) # f1值 y_true = [] for x in y_val: if x[0] == 1: y_true.append(0) else: y_true.append(1) dev_f1 = f1_score(np.array(y_true), np.array(all_dev)) dev_recall = recall_score(np.array(y_true), np.array(all_dev)) dev_acc = accuracy_score(np.array(y_true), np.array(all_dev)) print("验证集:f1:{},recall:{},acc:{}\n".format(dev_f1, dev_recall, dev_acc)) if dev_f1 > best_f1: best_f1 = dev_f1 saver.save(sess, "model/TextCNNModel.ckpt") print("saved\n")
def train(): if (os.path.exists(TRAIN_DATA_FILENAME) and os.path.exists(TRAIN_VOCAB_FILENAME)): print('load prebuilt train data & vocab file') input = load_data(TRAIN_DATA_FILENAME) vocab = load_vocab(TRAIN_VOCAB_FILENAME) else: print('build train data & vocab from raw text') data, newscore = read_raw_data(TRAIN_FILENAME) tokens = [t for d in data for t in d[0]] vocab = build_vocab(tokens) input = build_input(data, vocab, newscore) print('save train data & vocab file') save_data(TRAIN_DATA_FILENAME, input) save_vocab(TRAIN_VOCAB_FILENAME, vocab) if (os.path.exists(TEST_DATA_FILENAME) and os.path.exists(TEST_VOCAB_FILENAME)): print('load prebuilt test data & vocab file ') test_input = load_data(TEST_DATA_FILENAME) test_vocab = load_vocab(TEST_VOCAB_FILENAME) else: print('build test data & vocab from raw text') data, newscore = read_raw_data(TEST_FILENAME) tokens = [t for d in data for t in d[0]] test_vocab = build_vocab(tokens) test_input = build_input(data, test_vocab, newscore) print('save test data & vocab file') save_data(TEST_DATA_FILENAME, test_input) save_vocab(TEST_VOCAB_FILENAME, test_vocab) with tf.Session() as sess: seq_length = np.shape(input[0][0])[0] print("SS", seq_length) num_class = np.shape(input[0][1])[0] print("NN", num_class) print('initialize cnn filter') print('sequence length %d, number of class %d, vocab size %d' % (seq_length, num_class, len(vocab))) cnn = TextCNN(seq_length, num_class, len(vocab), 128, [3, 4, 5], 128) global_step = tf.Variable(0, name='global_step', trainable=False) optimizer = tf.train.AdamOptimizer(1e-3) grads_and_vars = optimizer.compute_gradients(cnn.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) def train_step(x_batch, y_batch): feed_dict = { cnn.input: x_batch, cnn.label: y_batch, cnn.dropout_keep_prob: 0.5 } _, step, loss, accuracy = sess.run( [train_op, global_step, cnn.loss, cnn.accuracy], feed_dict) def evaluate(x_batch, y_batch): feed_dict = { cnn.input: x_batch, cnn.label: y_batch, cnn.dropout_keep_prob: 1.0 } step, loss, accuracy = sess.run( [global_step, cnn.loss, cnn.accuracy], feed_dict) print("step %d, loss %f, acc %f" % (step, loss, accuracy)) saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) for i in range(10000): try: batch = random.sample(input, 64) x_batch, y_batch = zip(*batch) train_step(x_batch, y_batch) current_step = tf.train.global_step(sess, global_step) if current_step % 100 == 0: batch = random.sample(test_input, 64) x_test, y_test = zip(*batch) evaluate(x_test, y_test) if current_step % 1000 == 0: save_path = saver.save(sess, './textcnn.ckpt') print('model saved : %s' % save_path) except: print("Unexpected error:", sys.exc_info()[0]) raise
class ModelHandler(): """Build train process.""" def __init__(self, flags): """Init class.""" self.flags = flags self.embedding, self.embedding_size = read_embedding( self.flags.model_dir + self.flags.embedding_path) def add_tensor(self): """Add data and embeding.""" self.train_dat = DataSet(self.flags.train_file, self.flags.model_dir, self.flags.batch_size, self.flags.num_class, self.flags.seq_length) iterator = self.train_dat.init_iterator() self.word_ids, self.word_label = iterator.get_next() self.dev_dat = DataSet(self.flags.dev_file, self.flags.model_dir, self.flags.batch_size, self.flags.num_class, self.flags.seq_length) self.train_data_init = iterator.make_initializer(self.train_dat.dataset) self.dev_data_init = iterator.make_initializer(self.dev_dat.dataset) print('add_dev_tensor') def train(self, sess, saver): """Train process.""" self.step = 0 best_accuracy = 0 patient_passes = 0 # sess.run(self.train_graph.embedding_init) for epoch in range(self.flags.epoch): self.train_dat.sample_data() sess.run(self.train_data_init) tf.local_variables_initializer().run() self.current_epoch = epoch print("epoch is :", epoch+1) self.train_epoch(sess, self.tf_graph) self.dev_dat.read_text(self) sess.run(self.dev_data_init) accuracy, losses = self.evaluate(sess, self.tf_graph) if accuracy < best_accuracy: patient_passes += 1 if patient_passes == self.flags.patient_passes: print("without improvement, break") break else: print("without improvement") else: print("new best acc {}".format(accuracy)) best_accuracy = accuracy patient_passes = 0 saver.save(sess, os.path.join(self.flags.model_dir, "model"), global_step=self.step) def build_graph(self): """Build graph.""" self.tf_graph = TextCNN(self.flags, self.embedding) self.tf_graph.build(self.word_ids, self.word_label) def train_epoch(self, sess, graph): """Operation in one epoch.""" while True: self.step += 1 try: _, loss, pred, ids, labels = sess.run( [graph.train_op, graph.loss, graph.pred, graph.word_ids, graph.labels]) if self.step % 10 == 0: print("training epoch:{}, step:{}, loss:{}" .format(self.current_epoch + 1, self.step, loss)) except tf.errors.OutOfRangeError: print('finish') break def evaluate(self, sess, graph): """Evaluate process.""" correct_preds = 0 total_preds = 0 accuracy = 0 losses = 0 while True: try: batch_correct_pred, pred, batch_loss = sess.run( [graph.correct_pred, graph.pred, graph.loss]) correct_preds += batch_correct_pred total_preds += pred.shape[0] losses += batch_loss * pred.shape[0] except tf.errors.OutOfRangeError: break accuracy = float(correct_preds / (total_preds+0.1)) losses = float(losses / (total_preds+0.1)) return accuracy, losses def predict(self): """Predict line.""" word_id_list = tf.placeholder(tf.int32, shape=[None, None]) model = TextCNN(self.flags, self.embedding) model.build_predictor(word_id_list) return model, word_id_list
config['embedding_size'] = 300 config['keep_prob'] = 1.0 config['filter_sizes'] = [7,8,9] config['num_filters'] = 300 config['sentence_length'] = 2500 # init data path train_data_path = '../../corpus/newdata.clean.dat' test_data_path = '../../corpus/stdtestSet.dat' channel2id_path = '../../corpus/channel2cid.yaml' cid2channel_path = '../../corpus/cid2channel.yaml' dict_path = '../../corpus/dict_texts' # loading data X_train = np.load() y_train = np.load() # build model model = TextCNN(config) model.build_graph() # running model config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=True) config.gpu_options.per_process_gpu_memory_fraction = 0.8 session = tf.Session(config=config) saver = tf.train.Saver(max_to_keep=5) with tf.Session(config=config) as sess: sess.run(tf.global_variables_initializer()) check_restore_parameters(sess, saver) if mode == 'train': print('starting training...') train_model(sess, model, epochs=20) if mode =='test': print('start testing...') test_model(sess, model)
def main(_): X_train, X_val, y_train, y_val, n_classes = train_test_loader( FLAGS.just_train) with open('data/vocab.dic', 'rb') as f: vocab = pickle.load(f) vocab_size = len(vocab) + 1 print('size of vocabulary: {}'.format(vocab_size)) # padding sentences X_train = pad_sequences(X_train, maxlen=FLAGS.sentence_len, value=float(vocab_size - 1)) if not FLAGS.just_train: X_val = pad_sequences(X_val, maxlen=FLAGS.sentence_len, value=float(vocab_size - 1)) # convert label to one-hot encode # to_categorical(y_train, n_classes) # to_categorical(y_val, n_classes) # create session config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Instantiate Model textcnn = TextCNN(filter_sizes, FLAGS.num_filters, FLAGS.num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate, FLAGS.sentence_len, vocab_size, FLAGS.embed_size, FLAGS.is_training, multi_label_flag=False) # Initialize save saver = tf.train.Saver() if os.path.exists(FLAGS.ckpt_dir + 'checkpoint'): print('restoring variables from checkpoint') saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) else: print('Initializing Variables') sess.run(tf.global_variables_initializer()) if FLAGS.use_embedding: assign_pretrained_word_embedding(sess, vocab, vocab_size, textcnn) curr_epoch = sess.run(textcnn.epoch_step) # feed data and training number_of_training_data = len(X_train) batch_size = FLAGS.batch_size best_val_acc = 0.0 total_epochs = 0 if not FLAGS.just_train: total_epochs = FLAGS.num_epochs else: total_epochs = 20 for epoch in range(curr_epoch, total_epochs): loss, acc, counter = .0, .0, 0 for start, end in zip( range(0, number_of_training_data, batch_size), range(batch_size, number_of_training_data, batch_size)): if epoch == 0 or counter == 0: pass # print('X_train[start:end]: {}'.format(X_train[start:end])) feed_dict = { textcnn.input_x: X_train[start:end], textcnn.dropout_keep_prob: 0.5 } if not FLAGS.multi_label_flag: feed_dict[textcnn.input_y] = y_train[start:end] else: feed_dict[textcnn.input_y_multilabel] = y_train[start:end] curr_loss, curr_acc, _ = sess.run( [textcnn.loss_val, textcnn.accuracy, textcnn.train_op], feed_dict) loss, counter, acc = loss + curr_loss, counter + 1, acc + curr_acc if counter % 50 == 0: print( 'Epoch {}\tBatch {}\tTrain Loss {}\tTrain Accuracy {}'. format(epoch, counter, loss / float(counter), acc / float(counter))) print('going to increment epoch counter ...') sess.run(textcnn.epoch_increment) # validation if not FLAGS.just_train and epoch % FLAGS.validate_every == 0: eval_loss, eval_acc = do_eval(sess, textcnn, X_val, y_val, batch_size) unmatched_sample(sess, textcnn, X_val, y_val, batch_size) print("Epoch {} Validation Loss: {}\tValidation Accuracy: {}".\ format(epoch, eval_loss, eval_acc)) if eval_acc > best_val_acc: if os.path.exists(FLAGS.ckpt_dir): shutil.rmtree(FLAGS.ckpt_dir) best_val_acc = eval_acc # save model to checkpoint save_path = FLAGS.ckpt_dir + "model.ckpt" saver.save(sess, save_path, global_step=epoch) else: break # report result if not FLAGS.just_train: test_loss, test_acc = do_eval(sess, textcnn, X_val, y_val, batch_size) unmatched_sample(sess, textcnn, X_val, y_val, batch_size) else: save_path = FLAGS.ckpt_dir + "model.ckpt" saver.save(sess, save_path, global_step=20)
padding_x, max_document_length = padding(x, maxlen=FLAGS.pad_seq_len) int_y = [int(_y) for _y in y] encoded_y = one_hot_encode(int_y) train_x, test_x, train_y, test_y = train_test_data_split(padding_x, encoded_y) # 3. define session with tf.Graph().as_default(): # session_config=tf.ConfigProto(allow_soft_placement=True,log_device_placement=False) # sess=tf.Session(config=session_config) session_config = tf.compat.v1.ConfigProto(allow_soft_placement=True, log_device_placement=False) sess = tf.compat.v1.Session(config=session_config) with sess.as_default(): model = TextCNN(FLAGS.pad_seq_len, FLAGS.num_classes, len(data_helper.token2idx), FLAGS.embedding_dim, FLAGS.learning_rate, FLAGS.filter_sizes, FLAGS.num_filters, FLAGS.random_embedding, FLAGS.l2_reg_lambda) # 4. define important variable global_step = tf.Variable(initial_value=0, trainable=False, name="global_step") optimizer = tf.compat.v1.train.AdamOptimizer(FLAGS.learning_rate) grads_and_vars = optimizer.compute_gradients(model.loss) train_op = optimizer.apply_gradients(grads_and_vars, global_step) # 5. record `summaries`,like:scalars, graph, histogram ## I. keep the track of gradient values and sparsity grad_summaries = [] for g, v in grads_and_vars:
logger.info('loading data...') try: (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) except: logger.info('np bug occur...') (x_train, y_train), (x_test, y_test) = load_data(num_words=max_features) logger.info('train data length: {}'.format(len(x_train))) logger.info('test data length: {}'.format(len(x_test))) logger.info('padding data...') x_train = sequence.pad_sequences(x_train, maxlen=maxlen) x_test = sequence.pad_sequences(x_test, maxlen=maxlen) logger.info('build model...') model = TextCNN(max_features=max_features, maxlen=maxlen, emb_dim=emb_dim).build_model() logger.info('training...') earlystop = EarlyStopping(patience=3, mode='max', monitor='val_acc') model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['acc']) model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, callbacks=[earlystop], validation_data=(x_test, y_test)) logger.info('test...') pred = model.predict(x_test[:10]) logger.info(list(zip(pred, y_test[:10])))
import os import data_preparation from data_preparation import THCNewsDataSet, batch_iter import torch import torch.optim as optim from torch.utils.data.dataloader import DataLoader from config import Config from textcnn import TextCNN import torch.nn as nn import torch.nn.functional as F device = torch.device( "cuda:0") if torch.cuda.is_available() else torch.device("cpu") model = TextCNN() model = model.to(device) opt = optim.Adam(model.parameters()) criterion = nn.CrossEntropyLoss() def save_model(model, model_name="best_model_sofa.pkl", model_save_dir="./trained_models/"): if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) path = os.path.join(model_save_dir, model_name) torch.save(model.state_dict(), path) print("saved model state dict at :"+path)