예제 #1
0
k1 = 19
num_filters = [6, 14]
dev = 300
batch_size = 50
n_epochs = 30
num_hidden = 100
sentence_length = 37
num_class = 6
lr = 0.01
evaluate_every = 100
checkpoint_every = 100
num_checkpoints = 5

# Load data
print("Loading data...")
x_, y_, vocabulary, vocabulary_inv, test_size = dataUtils.load_data()
#x_:长度为5952的np.array。(包含5452个训练集和500个测试集)其中每个句子都是padding成长度为37的list(padding的索引为0)
#y_:长度为5952的np.array。每一个都是长度为6的onehot编码表示其类别属性
#vocabulary:长度为8789的字典,说明语料库中一共包含8789各单词。key是单词,value是索引
#vocabulary_inv:长度为8789的list,是按照单词出现次数进行排列。依次为:<PAD?>,\\?,the,what,is,of,in,a....
#test_size:500,测试集大小

# Randomly shuffle data
x, x_test = x_[:-test_size], x_[-test_size:]
y, y_test = y_[:-test_size], y_[-test_size:]
shuffle_indices = np.random.permutation(np.arange(len(y)))
x_shuffled = x[shuffle_indices]
y_shuffled = y[shuffle_indices]

x_train, x_dev = x_shuffled[:-dev], x_shuffled[-dev:]
y_train, y_dev = y_shuffled[:-dev], y_shuffled[-dev:]
예제 #2
0
            saver.restore(sess, checkpoint_prefix + '-' + str(best_at_step))
            print('Finish training. On test set:')
            acc, loss = dev_step(self.x_test, self.y_test, writer=None)
            print(acc, loss)


if __name__ == "__main__":
    for num_aug in range(0, 17):
        aug_split(num_aug=num_aug)
        train_path = './sougou/train_aug' + str(num_aug) + '.txt'
        test_path = './sougou/test_aug' + str(num_aug) + '.txt'
        dev_percent = 0.05
        # Load data
        print("Loading data...")
        x_, y_, vocabulary, vocabulary_inv, train_size, test_size, sent_length, num_class = dataUtils.load_data(
            train_path, test_path)

        x, x_test = x_[:-test_size], x_[-test_size:]
        y, y_test = y_[:-test_size], y_[-test_size:]
        shuffle_indices = np.random.permutation(np.arange(len(y)))
        x_shuffled = x[shuffle_indices]
        y_shuffled = y[shuffle_indices]

        x_train, x_dev = x_shuffled[:int(-dev_percent * train_size
                                         )], x_shuffled[int(-dev_percent *
                                                            train_size):]
        y_train, y_dev = y_shuffled[:int(-dev_percent * train_size
                                         )], y_shuffled[int(-dev_percent *
                                                            train_size):]

        print("Train/Dev/Test split: {:d}/{:d}/{:d}".format(
예제 #3
0
def train(fliter_size, stride, embeding_size):
    embedding = np.load(r"G:\MR\embedding_matrix_{}.npy".format(embeding_size))

    sent = tf.placeholder(tf.int64, [None, sentence_length])
    y = tf.placeholder(tf.float64, [None, num_class])
    seq_len = tf.placeholder(tf.int32, [None])
    dropout_blstm_prob = tf.placeholder(tf.float32, name="dropout")
    dropout_word_prob = tf.placeholder(tf.float32, name="dropout")

    model = Blstm_att(batch_size, sentence_length, embeding_size,
                      embeding_size, num_label, embedding, seq_len,
                      regularizer_rate, dropout_blstm_prob, dropout_word_prob)

    out_trian = model.Blstm_att(sent, fliter_size, stride, True)
    global_step = tf.Variable(0, name="global_step", trainable=False)

    with tf.name_scope("cost"):
        reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
        cost_train = tf.reduce_mean(
            tf.nn.softmax_cross_entropy_with_logits(logits=out_trian,
                                                    labels=y))
        loss_train = cost_train + regularizer_rate * sum(reg_loss)

    with tf.name_scope("acc"):
        Acc = tf.reduce_mean(
            tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(out_trian, 1)),
                    tf.float32))

    optimizer = tf.train.AdadeltaOptimizer(1.0)
    tvars = tf.trainable_variables()
    grads, _ = tf.clip_by_global_norm(tf.gradients(loss_train, tvars),
                                      grad_clip)
    grads_and_vars = tuple(zip(grads, tvars))
    train_op = optimizer.apply_gradients(grads_and_vars,
                                         global_step=global_step)

    with tf.Session() as sess:
        train_seq_len = np.ones(batch_size) * sentence_length
        sess.run(tf.global_variables_initializer())

        #=========save_the_model======
        checkpoint_dir = os.path.abspath(
            os.path.join(model_path, "checkpoints"))
        checkpoint_prefix = os.path.join(checkpoint_dir, "model")
        if not os.path.exists(checkpoint_dir):
            os.makedirs(checkpoint_dir)
        saver = tf.train.Saver()

        #=============================
        def dev(x_batch, y_batch):
            feed_dict = {
                sent: x_batch,
                y: y_batch,
                seq_len: train_seq_len,
                dropout_blstm_prob: 1.0,
                dropout_word_prob: 1.0
            }
            step, loss, acc = sess.run([global_step, cost_train, Acc],
                                       feed_dict)
            return loss, acc

        def train(x_batch, y_batch):
            feed_dict = {
                sent: x_batch,
                y: y_batch,
                seq_len: train_seq_len,
                dropout_blstm_prob: 0.5,
                dropout_word_prob: 0.5
            }
            _, step, loss, acc = sess.run(
                [train_op, global_step, loss_train, Acc], feed_dict)
            print("Train Step{},loss {:g},acc {:g} ".format(step, loss, acc))
            return loss, acc

        max_acc = 0
        valid = []
        for i in range(n_epochs):
            training_step, test_npochs, x_train, y_train, x_val, y_val = dataUtils.load_data(
                batch_size, "G:\MR", embeding_size)
            for i in range(training_step):
                current_step = tf.train.global_step(sess, global_step)
                start = i * batch_size
                end = start + batch_size
                batch_train = x_train[start:end]
                train_label = y_train[start:end]
                train(batch_train, train_label)

                if current_step % 500 == 0:
                    dev_loss = 0
                    dev_acc = 0
                    for i in range(test_npochs):
                        start = i * batch_dev
                        end = start + batch_dev
                        batch_val = x_val[start:end]
                        val_label = y_val[start:end]
                        m, n = dev(batch_val, val_label)
                        dev_loss = m + dev_loss
                        dev_acc = n + dev_acc
                    print("\nValid Step{},loss {:g},acc {:g} \n".format(
                        current_step, dev_loss / test_npochs,
                        dev_acc / test_npochs))

                    if dev_acc / test_npochs > max_acc:
                        max_acc = dev_acc / test_npochs
                        print("the max acc: {:g}".format(max_acc))

                        path = saver.save(sess,
                                          checkpoint_prefix,
                                          global_step=current_step)
                    valid.append("Valid Step:{},acc:{:g},loss:{:g}".format(
                        current_step, dev_acc / test_npochs,
                        dev_loss / test_npochs))
                    np.save("MR_{}_.npy".format(embeding_size), valid)
예제 #4
0
    ws = [7, 5]
    top_k = 4
    num_filters = [6, 14]
    dev = 300
    batch_size = 512
    n_epochs = 50
    num_hidden = 100
    sentence_length = 52
    lr = 1e-3
    evaluate_every = 100
    checkpoint_every = 100
    num_checkpoints = 5

# Load data
print("Loading data...")
x_, y_, vocabulary, vocabulary_inv, test_size, dev_size = dataUtils.load_data(
    num_class)
print(y_)
# Randomly shuffle data
x_train, x_dev, x_test = x_[:-(test_size + dev_size)], x_[-(
    test_size + dev_size):-test_size], x_[-test_size:]
y_train, y_dev, y_test = y_[:-(test_size + dev_size)], y_[-(
    test_size + dev_size):-test_size], y_[-test_size:]

print("Train/Dev/Test split: {:d}/{:d}/{:d}".format(len(y_train), len(y_dev),
                                                    len(y_test)))
#--------------------------------------------------------------------------------------#


def init_weights(shape, name):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.01), name=name)