word2idx, word_embeddings = data_helper.load_embedding2(embedding_path, True) # word to id train_sources, train_sources_length = utils.word2id(train_sources, word2idx, seq_length) train_targets, train_targets_length = utils.word2id(train_targets, word2idx, seq_length) dev_sources, dev_sources_length = utils.word2id(dev_sources, word2idx, seq_length) dev_targets, dev_targets_length = utils.word2id(dev_targets, word2idx, seq_length) test_sources, test_sources_length = utils.word2id(test_sources, word2idx, seq_length) test_targets, test_targets_length = utils.word2id(test_targets, word2idx, seq_length) train_score_probs = utils.build_porbs(train_scores, class_num) dev_score_probs = utils.build_porbs(dev_scores, class_num) test_score_probs = utils.build_porbs(test_scores, class_num) def kl_distance(y_true, y_pred): y_true = kb.clip(y_true, 1e-10, 1.) y_pred = kb.clip(y_pred, 1e-10, 1.) avg_distance = (kb.sum(y_true * kb.log(y_true / y_pred), axis=1) + kb.sum(y_pred * kb.log(y_pred / y_true), axis=1)) / 2.0 return kb.mean(avg_distance) def pearson(y_true, y_pred): scores = kb.reshape(K.arange(0, 6, dtype='float32'), [class_num, 1]) y_true = kb.reshape(kb.dot(y_true, scores), [-1]) y_pred = kb.reshape(kb.dot(y_pred, scores), [-1])
drop_out_rate = 0.5 regularizer_rate = 0.004 print("loading data...") graph_sources, graph_targets, graph_scores = data_helper.load_cross_lang_sentence_data( graph_path, False) word2idx, word_embeddings = data_helper.load_embedding(embedding_path, True) graph_sources, graph_sources_length = utils.word2id(graph_sources, word2idx, seq_length) graph_targets, graph_targets_length = utils.word2id(graph_targets, word2idx, seq_length) graph_score_probs = utils.build_porbs(graph_scores, class_num) def kl_distance(y_true, y_pred): y_true = kb.clip(y_true, 1e-10, 1.) y_pred = kb.clip(y_pred, 1e-10, 1.) avg_distance = (kb.sum(y_true * kb.log(y_true / y_pred), axis=1) + kb.sum(y_pred * kb.log(y_pred / y_true), axis=1)) / 2.0 return kb.mean(avg_distance) def pearson(y_true, y_pred): scores = kb.reshape(K.arange(0, 6, dtype='float32'), [class_num, 1]) y_true = kb.reshape(kb.dot(y_true, scores), [-1]) y_pred = kb.reshape(kb.dot(y_pred, scores), [-1])
train_sources, train_sources_length = utils.word2id(train_sources, word2idx, FLAGS.seq_length) train_targets, train_targets_length = utils.word2id(train_targets, word2idx, FLAGS.seq_length) dev_sources, dev_sources_length = utils.word2id(dev_sources, word2idx, FLAGS.seq_length) dev_targets, dev_targets_length = utils.word2id(dev_targets, word2idx, FLAGS.seq_length) test_sources, test_sources_length = utils.word2id(test_sources, word2idx, FLAGS.seq_length) test_targets, test_targets_length = utils.word2id(test_targets, word2idx, FLAGS.seq_length) train_scores_prob = utils.build_porbs(train_scores, FLAGS.class_num) dev_scores_prob = utils.build_porbs(dev_scores, FLAGS.class_num) test_scores_prob = utils.build_porbs(test_scores, FLAGS.class_num) # train_scores = utils.normalize_probs(train_scores) # dev_scores = utils.normalize_probs(dev_scores) # test_scores = utils.normalize_probs(test_scores) time_stamp = str(int(time.time())) # Training # ================================================== with tf.Graph().as_default(): session = tf.Session() with session.as_default():
FLAGS.test_path) # train_source_features, train_target_features = utils.get_all_handcraft_features(train_sources, train_targets, FLAGS.seq_length) # dev_source_features, dev_target_features = utils.get_all_handcraft_features(dev_sources, dev_targets, FLAGS.seq_length) # test_source_features, test_target_features = utils.get_all_handcraft_features(test_sources, test_targets, FLAGS.seq_length) word2idx, word_embeddings = data_helper.load_embedding(FLAGS.embedding_path, True) train_sources = utils.word2id(train_sources, word2idx, FLAGS.seq_length) train_targets = utils.word2id(train_targets, word2idx, FLAGS.seq_length) dev_sources = utils.word2id(dev_sources, word2idx, FLAGS.seq_length) dev_targets = utils.word2id(dev_targets, word2idx, FLAGS.seq_length) test_sources = utils.word2id(test_sources, word2idx, FLAGS.seq_length) test_targets = utils.word2id(test_targets, word2idx, FLAGS.seq_length) dev_score_probs = utils.build_porbs(dev_scores, FLAGS.class_num) test_score_probs = utils.build_porbs(test_scores, FLAGS.class_num) print("Train/Dev split: {:d}/{:d}".format(len(train_scores), len(dev_scores))) time_stamp = str(int(time.time())) # Training # ================================================== with tf.Graph().as_default(): config = tf.ConfigProto() # config.gpu_options.per_process_gpu_memory_fraction = 0.7 # 占用GPU70%的显存 config.gpu_options.allow_growth = True session = tf.Session(config=config) with session.as_default(): # Define training procedure