Python process Examples, data_processing.process Python Examples

Example #1

0

Show file

File: biLSTM_CRF.py Project: zhoudaozhuihou/Easy_CWS_biLSTM_CRF

 def test(self, sess, x, y):
     batch_test = batch_iter(x, y, batch_size=pm.batch_size)
     for x_batch, y_batch in batch_test:
         x_batch, seq_length_x = process(x_batch)
         y_batch, seq_length_y = process(y_batch)
         feed_dict = self.feed_data(x_batch, y_batch, seq_length_x, 1.0)
         loss = sess.run(self.loss, feed_dict=feed_dict)
     return loss

Example #2

0

Show file

def train():

    """使用tensorboard创建视图"""
    tensorboard_dir = './tensorboard/Lstm_CNN'
    save_dir = './checkpoints/Lstm_CNN'
    if not os.path.exists(os.path.join(tensorboard_dir, 'train')):
        os.makedirs(os.path.join(tensorboard_dir, 'train'))
    if not os.path.exists(os.path.join(tensorboard_dir, 'test')):
        os.makedirs(os.path.join(tensorboard_dir, 'test'))
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, 'best_validation')

    #  在想要的节点下标注总结指令；例如记录标量：
    tf.summary.scalar('loss', model.loss)
    tf.summary.scalar('accuracy', model.accuracy)
    #  将所有的想要的节点信息通过tf.summary.merge_all()打包为一个节点，这里命名为summary__merge_op，随后创建一个写入器，为后续的写入磁盘创建接口；
    merged_summary = tf.summary.merge_all()
    writer_train = tf.summary.FileWriter(os.path.join(tensorboard_dir, 'train'))         # 把图保存到一个路径
    writer_test = tf.summary.FileWriter(os.path.join(tensorboard_dir, 'test'))
    saver = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer_train.add_graph(session.graph)

    """处理训练集、测试集数据"""
    x_train, y_train = process(pm.train_filename, wordid, cat_to_id, max_length=300)
    x_test, y_test = process(pm.test_filename, wordid, cat_to_id, max_length=300)
    for epoch in range(pm.num_epochs):
        print('Epoch:', epoch+1)
        num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1
        batch_train = batch_iter(x_train, y_train, batch_size=pm.batch_size)
        for x_batch, y_batch in batch_train:
            real_seq_len = seq_length(x_batch)          # 获取句子真实长度
            feed_dict = model.feed_data(x_batch, y_batch, real_seq_len, pm.keep_prob)
            _, global_step, _summary, train_loss, train_accuracy = session.run([model.optimizer, model.global_step, merged_summary,
                                                                                model.loss, model.accuracy], feed_dict=feed_dict)
            summary = tf.Summary(value=[tf.Summary.Value(tag="loss", simple_value=train_loss)])
            writer_train.add_summary(summary, global_step)
            summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy", simple_value=train_accuracy)])
            writer_train.add_summary(summary, global_step)
            if global_step % 5 == 0:
                test_loss, test_accuracy = model.test(session, x_test, y_test)
                print('global_step:', global_step, 'train_loss:', train_loss, 'train_accuracy:', train_accuracy,
                      'test_loss:', test_loss, 'test_accuracy:', test_accuracy)
                summary = tf.Summary(value=[tf.Summary.Value(tag="accuracy", simple_value=test_accuracy)])
                writer_test.add_summary(summary, global_step)
                summary = tf.Summary(value=[tf.Summary.Value(tag="loss", simple_value=test_loss)])
                writer_test.add_summary(summary, global_step)

            if global_step % num_batchs == 0:
                print('Saving Model...')
                saver.save(session, save_path, global_step=global_step)

        pm.learning_rate *= pm.lr_decay

Example #3

0

Show file

def train():

    tensorboard_dir = './tensorboard/Rnn_Attention'
    save_dir = './checkpoints/Rnn_Attention'
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, 'best_validation')

    tf.summary.scalar('loss', model.loss)
    tf.summary.scalar('accuracy', model.accuracy)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)
    saver = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    x_train, y_train = process(pm.train_filename,
                               wordid,
                               cat_to_id,
                               max_length=250)
    x_test, y_test = process(pm.test_filename,
                             wordid,
                             cat_to_id,
                             max_length=250)
    for epoch in range(pm.num_epochs):
        print('Epoch:', epoch + 1)
        num_batchs = int((len(x_train) - 1) / pm.batch_size) + 1
        batch_train = batch_iter(x_train, y_train, batch_size=pm.batch_size)
        for x_batch, y_batch in batch_train:
            seq_len = sequence(x_batch)
            feed_dict = model.feed_data(x_batch, y_batch, seq_len,
                                        pm.keep_prob)
            _, global_step, _summary, train_loss, train_accuracy = session.run(
                [
                    model.optimizer, model.global_step, merged_summary,
                    model.loss, model.accuracy
                ],
                feed_dict=feed_dict)
            if global_step % 100 == 0:
                test_loss, test_accuracy = model.evaluate(
                    session, x_test, y_test)
                print('global_step:', global_step, 'train_loss:', train_loss,
                      'train_accuracy:', train_accuracy, 'test_loss:',
                      test_loss, 'test_accuracy:', test_accuracy)

            if global_step % num_batchs == 0:
                print('Saving Model...')
                saver.save(session, save_path, global_step=global_step)

    pm.learning_rate *= pm.lr_decay

Example #4

0

Show file

File: Training.py Project: zhoudaozhuihou/Easy_CWS_biLSTM_CRF

def train():
    tensorboard_dir = './tensorboard/biLstm_crf'
    save_dir = './checkpoints/biLstm_crf'
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)
    if not os.path.exists(save_dir):
        os.makedirs(save_dir)
    save_path = os.path.join(save_dir, 'best_validation')

    tf.summary.scalar('loss', model.loss)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)
    saver = tf.train.Saver()
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    writer.add_graph(session.graph)

    content_train, label_train = sequence2id(pm.train)
    content_test, label_test = sequence2id(pm.test)

    for epoch in range(pm.epochs):
        print('Epoch:', epoch + 1)
        num_batchs = int((len(content_train) - 1) / pm.batch_size) + 1
        batch_train = batch_iter(content_train, label_train)
        for x_batch, y_batch in batch_train:
            x_batch, seq_leng_x = process(x_batch)
            y_batch, seq_leng_y = process(y_batch)
            feed_dict = model.feed_data(x_batch, y_batch, seq_leng_x,
                                        pm.keep_pro)
            _, global_step, loss, tain_summary = session.run(
                [
                    model.optimizer, model.global_step, model.loss,
                    merged_summary
                ],
                feed_dict=feed_dict)
            if global_step % 100 == 0:
                test_loss = model.test(session, content_test, label_test)
                print('global_step:', global_step, 'train_loss:', loss,
                      'test_loss:', test_loss)

            if global_step % (2 * num_batchs) == 0:
                print('Saving Model...')
                saver.save(session,
                           save_path=save_path,
                           global_step=global_step)
        pm.learning_rate *= pm.lr

Example #5

0

Show file

File: Dpcnn.py Project: NLPxiaoxu/Easy_NLP

    def __init__(self):

        self.train_x, self.train_y = process(train_filename,
                                             wordid,
                                             cat_to_id,
                                             max_length=300)
        self.test_x, self.test_y = process(test_filename,
                                           wordid,
                                           cat_to_id,
                                           max_length=300)
        self.train_x = self.train_x.astype(np.int32)
        self.test_x = self.test_x.astype(np.int32)
        self.train_y = self.train_y.astype(np.float32)
        self.test_y = self.test_y.astype(np.float32)
        self.num_train, self.num_test = self.train_x.shape[
            0], self.test_x.shape[0]
        self.db_train = tf.data.Dataset.from_tensor_slices(
            (self.train_x, self.train_y))
        self.db_train = self.db_train.shuffle(self.num_train).batch(
            batch_size, drop_remainder=True)
        self.db_test = tf.data.Dataset.from_tensor_slices(
            (self.test_x, self.test_y))
        self.db_test = self.db_test.shuffle(self.num_test).batch(
            batch_size, drop_remainder=True)

Example #6

0

Show file

File: biLSTM_CRF.py Project: zhoudaozhuihou/Easy_CWS_biLSTM_CRF

 def predict(self, sess, x_batch):
     seq_pad, seq_length = process(x_batch)
     logits, transition_params = sess.run(
         [self.logits, self.transition_params],
         feed_dict={
             self.input_x: seq_pad,
             self.seq_length: seq_length,
             self.keep_pro: 1.0
         })
     label_ = []
     for logit, length in zip(logits, seq_length):
         # logit 每个子句的输出值，length子句的真实长度，logit[:length]的真实输出值
         # 调用维特比算法求最优标注序列
         viterbi_seq, _ = viterbi_decode(logit[:length], transition_params)
         label_.append(viterbi_seq)
     return label_

Example #7

0

Show file

def val():
    pre_label = []
    label = []
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    save_path = tf.train.latest_checkpoint('./checkpoints/Text_cnn')
    saver = tf.train.Saver()
    saver.restore(sess=session, save_path=save_path)

    val_x, val_y = process(pm.val_filename, wordid, cat_to_id, max_length=600)
    batch_val = batch_iter(val_x, val_y, batch_size=64)
    for x_batch, y_batch in batch_val:
        pre_lab = session.run(model.predicitions, feed_dict={model.input_x: x_batch,
                                                             model.keep_pro: 1.0})
        pre_label.extend(pre_lab)
        label.extend(y_batch)
    return pre_label, label

Example #8

0

Show file

def val():

    pre_label = []
    label = []
    session = tf.Session()
    session.run(tf.global_variables_initializer())
    save_path = tf.train.latest_checkpoint('./checkpoints/Lstm_CNN')
    saver = tf.train.Saver()
    saver.restore(sess=session, save_path=save_path)

    val_x, val_y = process(pm.val_filename, wordid, cat_to_id, max_length=pm.seq_length)
    batch_val = batch_iter(val_x, val_y, batch_size=64)
    for x_batch, y_batch in batch_val:
        real_seq_len = seq_length(x_batch)
        feed_dict = model.feed_data(x_batch, y_batch, real_seq_len, 1.0)
        pre_lab = session.run(model.predict, feed_dict=feed_dict)
        pre_label.extend(pre_lab)
        label.extend(y_batch)
    return pre_label, label

Example #9

0

Show file

##########################

comparison = []

for sbj in S:  # for each subject

    print('Subject: ' + str(sbj))

    for t in T:  # for each time segmentation criteria

        t_seg = t

        # File name
        file_ID = "_S" + str(sbj) + '_' + t_seg

        df, ch_col, n_ch, fs, chN_to_drop, n_ch_tot, col_filt, nperseg, task_names, folder = data_processing.process(
            t_seg, sbj, save_fig, file_ID)

        # Save dataframe
        df_file_ID = folder + "saved_files/" + 'df' + file_ID
        save_files.save_wp(df, df_file_ID)

        for w in W:  # for each time-frequency batch

            f_W = w[1]
            t_W = w[0]

            for c in C:  # for each penalty term

                # File name with parameters
                file_ID_wp = "_S" + str(sbj) + "_C" + str(c) + "_F" + str(
                    f_W) + "_T" + str(t_W) + "_" + t_seg