Esempio n. 1
0
    def test(self,sess, x, y):
        batch_test = batch_iter(x, y, batch_size=pm.batch_size)
        for x_batch, y_batch in batch_test:
            real_seq_len = seq_length(x_batch)
            feed_dict = self.feed_data(x_batch, y_batch, real_seq_len, 1.0)
            test_loss, test_accuracy = sess.run([self.loss, self.accuracy], feed_dict=feed_dict)

        return test_loss, test_accuracy
 def test(self, sess, x, y):
     batch_test = batch_iter(x, y, batch_size=pm.batch_size)
     for x_batch, y_batch in batch_test:
         x_batch, seq_length_x = process(x_batch)
         y_batch, seq_length_y = process(y_batch)
         feed_dict = self.feed_data(x_batch, y_batch, seq_length_x, 1.0)
         loss = sess.run(self.loss, feed_dict=feed_dict)
     return loss
Esempio n. 3
0
def train():

    """使用tensorboard创建视图"""
    tensorboard_dir = './tensorboard/Lstm_CNN'
    save_dir = './checkpoints/Lstm_CNN'
    if not os.path.exists(os.path.join(tensorboard_dir, 'train')):
        os.makedirs(os.path.join(tensorboard_dir, 'train'))
    if not os.path.exists(os.path.join(tensorboard_dir, 'test')):
        os.makedirs(os.path.join(tensorboard_dir, 'test'))
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, 'best_validation')

    #  在想要的节点下标注总结指令;例如记录标量:
    tf.summary.scalar('loss', model.loss)
    tf.summary.scalar('accuracy', model.accuracy)
    #  将所有的想要的节点信息通过tf.summary.merge_all()打包为一个节点,这里命名为summary__merge_op,随后创建一个写入器,为后续的写入磁盘创建接口;
    merged_summary = tf.summary.merge_all()
    writer_train = tf.summary.FileWriter(os.path.join(tensorboard_dir, 'train'))         # 把图保存到一个路径
    writer_test = tf.summary.FileWriter(os.path.join(tensorboard_dir, 'test'))
    saver = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer_train.add_graph(session.graph)

    """处理训练集、测试集数据"""
    x_train, y_train = process(pm.train_filename, wordid, cat_to_id, max_length=300)
    x_test, y_test = process(pm.test_filename, wordid, cat_to_id, max_length=300)
    for epoch in range(pm.num_epochs):
        print('Epoch:', epoch+1)
        num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1
        batch_train = batch_iter(x_train, y_train, batch_size=pm.batch_size)
        for x_batch, y_batch in batch_train:
            real_seq_len = seq_length(x_batch)          # 获取句子真实长度
            feed_dict = model.feed_data(x_batch, y_batch, real_seq_len, pm.keep_prob)
            _, global_step, _summary, train_loss, train_accuracy = session.run([model.optimizer, model.global_step, merged_summary,
                                                                                model.loss, model.accuracy], feed_dict=feed_dict)
            summary = tf.Summary(value=[tf.Summary.Value(tag="loss", simple_value=train_loss)])
            writer_train.add_summary(summary, global_step)
            summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy", simple_value=train_accuracy)])
            writer_train.add_summary(summary, global_step)
            if global_step % 5 == 0:
                test_loss, test_accuracy = model.test(session, x_test, y_test)
                print('global_step:', global_step, 'train_loss:', train_loss, 'train_accuracy:', train_accuracy,
                      'test_loss:', test_loss, 'test_accuracy:', test_accuracy)
                summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy", simple_value=test_accuracy)])
                writer_test.add_summary(summary, global_step)
                summary = tf.Summary(value=[tf.Summary.Value(tag="loss", simple_value=test_loss)])
                writer_test.add_summary(summary, global_step)

            if global_step % num_batchs == 0:
                print('Saving Model...')
                saver.save(session, save_path, global_step=global_step)

        pm.learning_rate *= pm.lr_decay
Esempio n. 4
0
def train():

    tensorboard_dir = './tensorboard/Rnn_Attention'
    save_dir = './checkpoints/Rnn_Attention'
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, 'best_validation')

    tf.summary.scalar('loss', model.loss)
    tf.summary.scalar('accuracy', model.accuracy)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)
    saver = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    x_train, y_train = process(pm.train_filename,
                               wordid,
                               cat_to_id,
                               max_length=250)
    x_test, y_test = process(pm.test_filename,
                             wordid,
                             cat_to_id,
                             max_length=250)
    for epoch in range(pm.num_epochs):
        print('Epoch:', epoch + 1)
        num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1
        batch_train = batch_iter(x_train, y_train, batch_size=pm.batch_size)
        for x_batch, y_batch in batch_train:
            seq_len = sequence(x_batch)
            feed_dict = model.feed_data(x_batch, y_batch, seq_len,
                                        pm.keep_prob)
            _, global_step, _summary, train_loss, train_accuracy = session.run(
                [
                    model.optimizer, model.global_step, merged_summary,
                    model.loss, model.accuracy
                ],
                feed_dict=feed_dict)
            if global_step % 100 == 0:
                test_loss, test_accuracy = model.evaluate(
                    session, x_test, y_test)
                print('global_step:', global_step, 'train_loss:', train_loss,
                      'train_accuracy:', train_accuracy, 'test_loss:',
                      test_loss, 'test_accuracy:', test_accuracy)

            if global_step % num_batchs == 0:
                print('Saving Model...')
                saver.save(session, save_path, global_step=global_step)

    pm.learning_rate *= pm.lr_decay
def train():
    tensorboard_dir = './tensorboard/biLstm_crf'
    save_dir = './checkpoints/biLstm_crf'
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, 'best_validation')

    tf.summary.scalar('loss', model.loss)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)
    saver = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    content_train, label_train = sequence2id(pm.train)
    content_test, label_test = sequence2id(pm.test)

    for epoch in range(pm.epochs):
        print('Epoch:', epoch + 1)
        num_batchs = int((len(content_train) - 1) / pm.batch_size) + 1
        batch_train = batch_iter(content_train, label_train)
        for x_batch, y_batch in batch_train:
            x_batch, seq_leng_x = process(x_batch)
            y_batch, seq_leng_y = process(y_batch)
            feed_dict = model.feed_data(x_batch, y_batch, seq_leng_x,
                                        pm.keep_pro)
            _, global_step, loss, tain_summary = session.run(
                [
                    model.optimizer, model.global_step, model.loss,
                    merged_summary
                ],
                feed_dict=feed_dict)
            if global_step % 100 == 0:
                test_loss = model.test(session, content_test, label_test)
                print('global_step:', global_step, 'train_loss:', loss,
                      'test_loss:', test_loss)

            if global_step % (2 * num_batchs) == 0:
                print('Saving Model...')
                saver.save(session,
                           save_path=save_path,
                           global_step=global_step)
        pm.learning_rate *= pm.lr
Esempio n. 6
0
def val():

    pre_label = []
    label = []
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    save_path = tf.train.latest_checkpoint('./checkpoints/Lstm_CNN')
    saver = tf.train.Saver()
    saver.restore(sess=session, save_path=save_path)

    val_x, val_y = process(pm.val_filename, wordid, cat_to_id, max_length=pm.seq_length)
    batch_val = batch_iter(val_x, val_y, batch_size=64)
    for x_batch, y_batch in batch_val:
        real_seq_len = seq_length(x_batch)
        feed_dict = model.feed_data(x_batch, y_batch, real_seq_len, 1.0)
        pre_lab = session.run(model.predict, feed_dict=feed_dict)
        pre_label.extend(pre_lab)
        label.extend(y_batch)
    return pre_label, label
def val():

    pre_label = []
    label = []
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    save_path = tf.train.latest_checkpoint('./checkpoints/Rnn_Attention')
    saver = tf.train.Saver()
    saver.restore(sess=session, save_path=save_path)

    val_x, val_y = process(pm.val_filename, wordid, cat_to_id, max_length=250)
    batch_val = batch_iter(val_x, val_y, batch_size=64)
    for x_batch, y_batch in batch_val:
        seq_len = sequence(x_batch)
        pre_lab = session.run(model.predict,
                              feed_dict={
                                  model.input_x: x_batch,
                                  model.seq_length: seq_len,
                                  model.keep_pro: 1.0
                              })
        pre_label.extend(pre_lab)
        label.extend(y_batch)
    return pre_label, label
def train():

    tensorboard_dir = './tensorboard/Seq2Seq'
    save_dir = './checkpoints/Seq2Seq'
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, 'best_validation')

    tf.summary.scalar('loss', model.loss)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)
    saver = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    x, y = label2id(pm.train_data)
    x_train, y_train = x[1:100001], y[1:100001]
    x_test, y_test = x[50001:80000], y[50001:80000]
    for epoch in range(pm.num_epochs):
        print('Epoch:', epoch+1)
        num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1
        batch_train = batch_iter(x_train, y_train, batch_size=pm.batch_size)
        for x_batch, y_batch in batch_train:
            feed_dict = model.feed_data(x_batch, y_batch, pm.keep_pro)
            _, global_step, _summary, train_loss = session.run([model.optimizer, model.global_step, merged_summary,
                                                                model.loss], feed_dict=feed_dict)
            if global_step % 100 == 0:
                test_loss = model.test(session, x_test, y_test)
                print('global_step:', global_step, 'train_loss:', train_loss, 'test_loss:', test_loss)

            if global_step % (3*num_batchs) == 0:
                print('Saving Model...')
                saver.save(session, save_path, global_step=global_step)
        pm.learning_rate *= pm.lr
def train(
    x_word_train,
    y_word_train,
    x_word_test,
    y_word_test,
    x_selfentity_train,
    y_selfentity_train,
    x_selfentity_test,
    y_selfentity_test,
    x_fatherentity_train,
    y_fatherentity_train,
    x_fatherentity_test,
    y_fatherentity_test,
    x_cnn_train,
    y_cnn_train,
    x_cnn_test,
    y_cnn_test,
):
    tensorboard_dir = './tensorboard/Rnn_Attention'
    save_dir = './checkpoints/Rnn_Attention'
    if not os.path.exists(tensorboard_dir):  #只是创建目录而已
        os.makedirs(tensorboard_dir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, 'best_validation')

    tf.summary.scalar('loss', model.loss)
    tf.summary.scalar('accuracy', model.accuracy)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)

    saver = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    T_loss = []
    L_loss = []

    for epoch in range(pm.num_epochs):
        # # #     print('Epoch:', epoch+1)
        num_batchs = int((len(x_word_train) - 1) / pm.batch_size) + 1
        batch_train = batch_iter(x_word_train,
                                 y_word_train,
                                 x_selfentity_train,
                                 y_selfentity_train,
                                 x_fatherentity_train,
                                 y_fatherentity_train,
                                 x_cnn_train,
                                 y_cnn_train,
                                 batch_size=pm.batch_size)

        for x1, y1, x2, y2, x3, y3, x4, y4 in batch_train:  #坑就是在这里  前面4个值,不能和上面batch_iter的值重了

            word_seq_len = sequence(x1)
            selfentity_seq_len = sequence(x2)
            fatherentity_seq_len = sequence(x3)
            cnn_inter_seq_len = sequence(x4)

            feed_dict = model.feed_data(x1, y1, word_seq_len, x2, y2,
                                        selfentity_seq_len, x3, y3,
                                        fatherentity_seq_len, x4, y4,
                                        cnn_inter_seq_len, pm.keep_prob)

            _, global_step, _summary, train_loss, train_accuracy = session.run(
                [
                    model.optimizer, model.global_step, merged_summary,
                    model.loss, model.accuracy
                ],
                feed_dict=feed_dict)

            # print('global_step:', global_step, 'train_loss:', train_loss, 'train_accuracy:', train_accuracy)
            if global_step % 50 == 0:
                test_loss, test_accuracy = model.evaluate(
                    session, x_word_test, y_word_test, x_selfentity_test,
                    y_selfentity_test, x_fatherentity_test,
                    y_fatherentity_test, x_cnn_test, y_cnn_test)
                print('global_step:', global_step, 'train_loss:', train_loss,
                      'train_accuracy:', train_accuracy, 'test_loss:',
                      test_loss, 'test_accuracy:', test_accuracy)
                T_loss.append(train_loss)
                L_loss.append(test_loss)

            if global_step % num_batchs == 0:
                print('Saving Model...')
                saver.save(session, save_path, global_step=global_step)
    #
    pm.learning_rate *= pm.lr_decay

    x = range(len(T_loss))
    plt.plot(x, T_loss)  # blue is the train loss
    plt.plot(x, L_loss, "r--")  # red is the test
    plt.show()
Esempio n. 10
0
 def test(self, sess, x, y):
     batch_test = batch_iter(x, y, batch_size=pm.batch_size)
     for x_batch, y_batch in batch_test:
         feed_dict = self.feed_data(x_batch, y_batch, 1.0)
         test_loss = sess.run(self.loss, feed_dict=feed_dict)
     return test_loss
# ## Train and Evaluate Model

learning_rate = 0.001
num_epochs = 30
batch_size = 100

# ### Human Results
vocab_size = 23  # number words in the vocabulary base
data_size = len(human_train_tensors)
num_labels = human_GO_terms.shape[0]

lstm = LSTM(vocab_size, emb_dim, hidden_dim, num_labels, batch_size)
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.Adam(lstm.parameters(),
                             lr=learning_rate,
                             weight_decay=L2_penalty)

data_iter = dp.batch_iter(batch_size, human_train_tensors, human_train_labels,
                          human_train_lengths)
dev_batches = dp.eval_iter(batch_size, human_valid_tensors, human_valid_labels)

if torch.cuda.is_available():
    lstm = lstm.cuda()
    criterion = criterion.cuda()

# Model Training
organism = 'Human'
train_test(num_epochs, optimizer, data_iter, dev_batches, lstm, data_size,
           output_file)
# ## Train and Evaluate Model

learning_rate = 0.001
num_epochs = 30
batch_size = 100

# ### Yeast Results
vocab_size = 21  # number words in the vocabulary base
data_size = len(yeast_train_tensors)
num_labels = yeast_GO_terms.shape[0]

lstm = LSTM(vocab_size, emb_dim, hidden_dim, num_labels, batch_size)
criterion = nn.MultiLabelSoftMarginLoss()
optimizer = torch.optim.Adam(lstm.parameters(),
                             lr=learning_rate,
                             weight_decay=L2_penalty)

data_iter = dp.batch_iter(batch_size, yeast_train_tensors, yeast_train_labels,
                          yeast_train_lengths)
dev_batches = dp.eval_iter(batch_size, yeast_valid_tensors, yeast_valid_labels)

if torch.cuda.is_available():
    lstm = lstm.cuda()
    criterion = criterion.cuda()

# Model Training
organism = 'Yeast'
train_test(num_epochs, optimizer, data_iter, dev_batches, lstm, data_size,
           output_file)
Esempio n. 13
0
def val():
    """----word level----"""
    all_word_file = "./word/all_clean.txt"
    train_word_file = "./word/train_clean.txt"
    test_word_file = "./word/test_clean.txt"
    """----self entity level----"""
    all_selfentity_file = "./self_entity/all_good_selfentity_last.txt"
    train_selfentity_file = "./self_entity/train_good_selfentity_last.txt"
    test_selfentity_file = "./self_entity/test_good_selfentity_last.txt"
    """----father entity level----"""
    all_fatherentity_file = "./father_entity/all_good_fatherentity_last.txt"
    train_fatherentity_file = "./father_entity/train_good_fatherentity_last.txt"
    test_fatherentity_file = "./father_entity/test_good_fatherentity_last.txt"
    """---------CNN--interaction----------"""

    all_cnn_file = "./interaction_data/all_label.txt"
    train_cnn_file = "./interaction_data/train_label.txt"
    test_cnn_file = "./interaction_data/test_label.txt"

    pre_label = []
    label = []
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    save_path = tf.train.latest_checkpoint('./checkpoints/Rnn_Attention')
    saver = tf.train.Saver()
    saver.restore(sess=session, save_path=save_path)

    x_word_train, y_word_train, vocab_word_processor, x_word_test, y_word_test, \
    x_selfentity_train, y_selfentity_train, vocab_selfentity_processor, x_selfentity_test, y_selfentity_test, \
    x_fatherentity_train, y_fatherentity_train, vocab_fatherentity_processor, x_fatherentity_test, y_fatherentity_test, \
    x_cnn_train, y_cnn_train, vocab_cnn_processor, x_cnn_test, y_cnn_test \
        = shuffle_data(all_word_file, train_word_file, test_word_file,
                       all_selfentity_file, train_selfentity_file, test_selfentity_file,
                       all_fatherentity_file, train_fatherentity_file, test_fatherentity_file,
                       all_cnn_file, train_cnn_file, test_cnn_file
                       )

    batch_val = batch_iter(x_word_test,
                           y_word_test,
                           x_selfentity_test,
                           y_selfentity_test,
                           x_fatherentity_test,
                           y_selfentity_test,
                           x_cnn_test,
                           y_cnn_test,
                           batch_size=64)

    for x_batch, y_batch,x_selfentity_test, y_selfentity_test,x_fatherentity_test, \
        y_selfentity_test, x_cnn_test,y_cnn_test in batch_val:

        seq_len = sequence(x_batch)
        selfentity_len = sequence(x_selfentity_test)
        fatherentity_len = sequence(x_fatherentity_test)
        cnninter_len = sequence(x_cnn_test)

        pre_lab = session.run(model.predict,
                              feed_dict={
                                  model.input_word_x1: x_batch,
                                  model.input_y1: y_batch,
                                  model.input_selfentity_x1: x_selfentity_test,
                                  model.input_selfentity_y1: y_selfentity_test,
                                  model.input_fatherentity_x1:
                                  x_fatherentity_test,
                                  model.input_fatherentity_y1:
                                  y_selfentity_test,
                                  model.input_cnn_x1: x_cnn_test,
                                  model.input_cnn_y1: y_cnn_test,
                                  model.word_seq_length: seq_len,
                                  model.self_entity_seq_length: selfentity_len,
                                  model.father_entity_seq_length:
                                  fatherentity_len,
                                  model.cnn_inter_seq_length: cnninter_len,
                                  model.keep_pro: 1.0
                              })
        pre_label.extend(pre_lab)
        label.extend(y_batch)
    return pre_label, label