k1 = 19 num_filters = [6, 14] dev = 300 batch_size = 50 n_epochs = 30 num_hidden = 100 sentence_length = 37 num_class = 6 lr = 0.01 evaluate_every = 100 checkpoint_every = 100 num_checkpoints = 5 # Load data print("Loading data...") x_, y_, vocabulary, vocabulary_inv, test_size = dataUtils.load_data() #x_:长度为5952的np.array。(包含5452个训练集和500个测试集)其中每个句子都是padding成长度为37的list(padding的索引为0) #y_:长度为5952的np.array。每一个都是长度为6的onehot编码表示其类别属性 #vocabulary:长度为8789的字典,说明语料库中一共包含8789各单词。key是单词,value是索引 #vocabulary_inv:长度为8789的list,是按照单词出现次数进行排列。依次为:<PAD?>,\\?,the,what,is,of,in,a.... #test_size:500,测试集大小 # Randomly shuffle data x, x_test = x_[:-test_size], x_[-test_size:] y, y_test = y_[:-test_size], y_[-test_size:] shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] x_train, x_dev = x_shuffled[:-dev], x_shuffled[-dev:] y_train, y_dev = y_shuffled[:-dev], y_shuffled[-dev:]
saver.restore(sess, checkpoint_prefix + '-' + str(best_at_step)) print('Finish training. On test set:') acc, loss = dev_step(self.x_test, self.y_test, writer=None) print(acc, loss) if __name__ == "__main__": for num_aug in range(0, 17): aug_split(num_aug=num_aug) train_path = './sougou/train_aug' + str(num_aug) + '.txt' test_path = './sougou/test_aug' + str(num_aug) + '.txt' dev_percent = 0.05 # Load data print("Loading data...") x_, y_, vocabulary, vocabulary_inv, train_size, test_size, sent_length, num_class = dataUtils.load_data( train_path, test_path) x, x_test = x_[:-test_size], x_[-test_size:] y, y_test = y_[:-test_size], y_[-test_size:] shuffle_indices = np.random.permutation(np.arange(len(y))) x_shuffled = x[shuffle_indices] y_shuffled = y[shuffle_indices] x_train, x_dev = x_shuffled[:int(-dev_percent * train_size )], x_shuffled[int(-dev_percent * train_size):] y_train, y_dev = y_shuffled[:int(-dev_percent * train_size )], y_shuffled[int(-dev_percent * train_size):] print("Train/Dev/Test split: {:d}/{:d}/{:d}".format(
def train(fliter_size, stride, embeding_size): embedding = np.load(r"G:\MR\embedding_matrix_{}.npy".format(embeding_size)) sent = tf.placeholder(tf.int64, [None, sentence_length]) y = tf.placeholder(tf.float64, [None, num_class]) seq_len = tf.placeholder(tf.int32, [None]) dropout_blstm_prob = tf.placeholder(tf.float32, name="dropout") dropout_word_prob = tf.placeholder(tf.float32, name="dropout") model = Blstm_att(batch_size, sentence_length, embeding_size, embeding_size, num_label, embedding, seq_len, regularizer_rate, dropout_blstm_prob, dropout_word_prob) out_trian = model.Blstm_att(sent, fliter_size, stride, True) global_step = tf.Variable(0, name="global_step", trainable=False) with tf.name_scope("cost"): reg_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) cost_train = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=out_trian, labels=y)) loss_train = cost_train + regularizer_rate * sum(reg_loss) with tf.name_scope("acc"): Acc = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(y, 1), tf.argmax(out_trian, 1)), tf.float32)) optimizer = tf.train.AdadeltaOptimizer(1.0) tvars = tf.trainable_variables() grads, _ = tf.clip_by_global_norm(tf.gradients(loss_train, tvars), grad_clip) grads_and_vars = tuple(zip(grads, tvars)) train_op = optimizer.apply_gradients(grads_and_vars, global_step=global_step) with tf.Session() as sess: train_seq_len = np.ones(batch_size) * sentence_length sess.run(tf.global_variables_initializer()) #=========save_the_model====== checkpoint_dir = os.path.abspath( os.path.join(model_path, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver() #============================= def dev(x_batch, y_batch): feed_dict = { sent: x_batch, y: y_batch, seq_len: train_seq_len, dropout_blstm_prob: 1.0, dropout_word_prob: 1.0 } step, loss, acc = sess.run([global_step, cost_train, Acc], feed_dict) return loss, acc def train(x_batch, y_batch): feed_dict = { sent: x_batch, y: y_batch, seq_len: train_seq_len, dropout_blstm_prob: 0.5, dropout_word_prob: 0.5 } _, step, loss, acc = sess.run( [train_op, global_step, loss_train, Acc], feed_dict) print("Train Step{},loss {:g},acc {:g} ".format(step, loss, acc)) return loss, acc max_acc = 0 valid = [] for i in range(n_epochs): training_step, test_npochs, x_train, y_train, x_val, y_val = dataUtils.load_data( batch_size, "G:\MR", embeding_size) for i in range(training_step): current_step = tf.train.global_step(sess, global_step) start = i * batch_size end = start + batch_size batch_train = x_train[start:end] train_label = y_train[start:end] train(batch_train, train_label) if current_step % 500 == 0: dev_loss = 0 dev_acc = 0 for i in range(test_npochs): start = i * batch_dev end = start + batch_dev batch_val = x_val[start:end] val_label = y_val[start:end] m, n = dev(batch_val, val_label) dev_loss = m + dev_loss dev_acc = n + dev_acc print("\nValid Step{},loss {:g},acc {:g} \n".format( current_step, dev_loss / test_npochs, dev_acc / test_npochs)) if dev_acc / test_npochs > max_acc: max_acc = dev_acc / test_npochs print("the max acc: {:g}".format(max_acc)) path = saver.save(sess, checkpoint_prefix, global_step=current_step) valid.append("Valid Step:{},acc:{:g},loss:{:g}".format( current_step, dev_acc / test_npochs, dev_loss / test_npochs)) np.save("MR_{}_.npy".format(embeding_size), valid)
ws = [7, 5] top_k = 4 num_filters = [6, 14] dev = 300 batch_size = 512 n_epochs = 50 num_hidden = 100 sentence_length = 52 lr = 1e-3 evaluate_every = 100 checkpoint_every = 100 num_checkpoints = 5 # Load data print("Loading data...") x_, y_, vocabulary, vocabulary_inv, test_size, dev_size = dataUtils.load_data( num_class) print(y_) # Randomly shuffle data x_train, x_dev, x_test = x_[:-(test_size + dev_size)], x_[-( test_size + dev_size):-test_size], x_[-test_size:] y_train, y_dev, y_test = y_[:-(test_size + dev_size)], y_[-( test_size + dev_size):-test_size], y_[-test_size:] print("Train/Dev/Test split: {:d}/{:d}/{:d}".format(len(y_train), len(y_dev), len(y_test))) #--------------------------------------------------------------------------------------# def init_weights(shape, name): return tf.Variable(tf.truncated_normal(shape, stddev=0.01), name=name)