def main(): # Path to model path_save = 'models/se_' + str(args.hidden_size) + 'fb_' + str( args.forget_bias) + 'nl_' + str(args.number_layers) if not os.path.exists(path_save): os.makedirs(path_save) path_save += '/model' # Load datasets pretrain, train, val, test = load_dataset() # Get [ emb1, lengths1, emb2, lengths2, y ] pretrain = data_util.prepare_data(pretrain, maxlen=args.max_length, training=True) train = data_util.prepare_data(train, maxlen=args.max_length, training=True) val = data_util.prepare_data(val, maxlen=args.max_length, training=False) test = data_util.prepare_data(test, maxlen=args.max_length, training=False) # Create network network = siamese_neural_network_local_context.SiameseLSTMCNN( sequence_embedding=args.hidden_size, forget_bias=args.forget_bias, learning_rate=args.learning_rate, number_layers=args.number_layers, max_length=args.max_length, word_emb_size=args.word_emb_size, local_context_size=args.local_context_size, dropout=args.dropout) # Initialize tensorflow with tf.Session() as sess: # Initialize variables sess.run(network.initialize_variables) if args.bool_load_model and os.path.isfile(path_save + ".index"): network.saver.restore(sess, path_save) print("Model restored!") if args.bool_pretrain: # Pretraining network print("Pretraining network ...") training_network(sess, network, pretrain, val, path_save, args.nb_epochs_pretrain) if args.bool_train: # Training network print("Training network ...") training_network(sess, network, train, val, path_save, args.nb_epochs) if args.bool_test: # Testing network print("Test network ...") loss, prediction, reference = test_network(sess, network, test) calculate_correlation(prediction, reference)
def test_prepare_data(): data_dir = os.path.join(root_dir, "data/ptb") train_path = os.path.join(data_dir, "train") dev_path = os.path.join(data_dir, "valid") vocab_size = 20000 data_util.prepare_data(data_dir, train_path, dev_path, vocab_size)
def main(_): conf = tf.flags.FLAGS word_to_idx, idx_to_word, seq_ids = data_util.prepare_data(conf) tf.reset_default_graph() input_data = tf.placeholder(tf.int64, [conf.batch_size, conf.max_seq_length]) #labels=tf.placeholder(tf.int64,[conf.batch_size,conf.max_seq_length]) labels = tf.slice(input_data, [0, 1], [-1, -1]) print(len(word_to_idx)) print(len(idx_to_word)) embedding = tf.get_variable("embedding", [conf.vocab_size, conf.embedding_size], dtype=tf.float32) data = tf.nn.embedding_lookup(embedding, input_data) label_one_hot_embedding = tf.eye(conf.vocab_size) #data=tf.tensordot(data,embedding,1) #data=tf.reshape(data,[conf.batch_size,conf.max_seq_length,conf.embedding_size]) #data=tf.cast(data, dtype=tf.float32) data_labels = tf.nn.embedding_lookup(label_one_hot_embedding, labels) print("placeholder compelete") data = tf.reshape( data, [conf.batch_size, 1, conf.max_seq_length, conf.embedding_size]) filter = tf.Variable( tf.random_normal( [1, conf.filter_h, conf.embedding_size, conf.hidden_size])) value = tf.nn.conv2d(data, filter, [1, 1, 1, 1], padding="SAME") w_filter = tf.Variable( tf.random_normal([1, conf.filter_h, conf.embedding_size, 1])) w_value = tf.nn.conv2d(data, w_filter, [1, 1, 1, 1], padding="SAME") value = value * w_value value = tf.reshape( value, [conf.batch_size, conf.max_seq_length, conf.hidden_size]) value = tf.slice(value, [0, 0, 0], [-1, conf.max_seq_length - 1, -1]) weight = tf.get_variable("softmax_w", [conf.hidden_size, conf.vocab_size]) bias = tf.get_variable("softmax_b", [conf.vocab_size]) #value = tf.cast(value,tf.float32) #prediction=tf.nn.softmax((tf.tensordot(value,weight,1)+bias)) prediction = tf.tensordot(value, weight, 1) + bias prediction_softmax = tf.nn.softmax(prediction) """ softmax的部分会在下面的loss里由tf.nn.softmax........里完成) """ print(value) print(prediction) guess = tf.argmax(prediction, 2) correctPred = tf.equal(tf.argmax(prediction, 2), labels) accuracy = tf.reduce_mean(tf.cast(correctPred, tf.float32)) #tv = tf.trainable_variables()#得到所有可以训练的参数,即所有trainable=True 的tf.Variable/tf.get_variable #regularization_lost = 0.001* tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv ]) #0.001是lambda超参数 loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(logits=prediction, labels=data_labels)) #l2_loss=loss+regularization_lost optimizer = tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) #optimizer_l2=tf.train.AdamOptimizer(learning_rate=0.001).minimize(l2_loss) optimizer_null = tf.train.AdamOptimizer(learning_rate=0).minimize( loss) #用于dev sess = tf.Session() saver = tf.train.Saver() sess.run(tf.global_variables_initializer()) ckpt = tf.train.get_checkpoint_state(conf.ckpt_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess, ckpt.model_checkpoint_path) print(ckpt.model_checkpoint_path) else: print("checkpoint read flase") f = open(conf.closefile, 'w') f2 = open(conf.openfile, 'w') nextBatch = data_util.getTrainBatch(seq_ids, conf) for i in range(conf.iterations): nextBatch = data_util.getTrainBatch(seq_ids, conf) print(i) print(nextBatch) now_prediction, now_loss, _ = sess.run( [prediction_softmax, loss, optimizer], {input_data: nextBatch}) # if(i+1)%checkpoint_steps==0: # saver.save(sess,checkpoint_dir+'model.ckpt',global_step=i+1) print("now_loss:") print(now_loss) f.write(str(i) + ":" + str(now_loss)) f.write('\n')
def main(_): conf = tf.flags.FLAGS tf.reset_default_graph() input_data=tf.placeholder(tf.int64,[conf.batch_size,conf.max_seq_length]) labels=tf.placeholder(tf.int64,[conf.batch_size,conf.max_seq_length]) word_to_idx,idx_to_word,label_to_idx, idx_to_label,seq_ids,labels_ids,words_embedding_index,labels_embedding_index=data_util.prepare_data(conf) print(len(word_to_idx)) print(len(label_to_idx)) print(len(idx_to_word)) print(len(idx_to_label)) embedding= tf.get_variable("embedding", [conf.vocab_size,conf.embedding_size],dtype=tf.float32) data=tf.nn.embedding_lookup(embedding,input_data) #data=tf.tensordot(data,embedding,1) #data=tf.reshape(data,[conf.batch_size,conf.max_seq_length,conf.embedding_size]) #data=tf.cast(data, dtype=tf.float32) print(data) data_labels=tf.nn.embedding_lookup(labels_embedding_index,labels) print("placeholder compelete") lstm_Cell_fw = tf.contrib.rnn.MultiRNNCell([lstm_cell(conf.hidden_size,conf.dropout) for _ in range(conf.num_layers)],state_is_tuple=True) lstm_Cell_bw = tf.contrib.rnn.MultiRNNCell([lstm_cell(conf.hidden_size,conf.dropout) for _ in range(conf.num_layers)],state_is_tuple=True) value,_=tf.nn.bidirectional_dynamic_rnn(lstm_Cell_fw,lstm_Cell_bw,data, dtype=tf.float32) print(value) print(tf.shape(value)) value = tf.concat(value,axis=2) print(value) value=tf.cast(value, dtype=tf.float32) print(value) print(len(label_to_idx)) weight= tf.get_variable("softmax_w", [conf.hidden_size * 6, len(label_to_idx)]) bias = tf.get_variable("softmax_b", [len(label_to_idx)]) value_mae=tf.slice(value,[0,0,0],[conf.batch_size,conf.max_seq_length-1,conf.hidden_size*2]) value_ushiro=tf.slice(value,[0,1,0],[conf.batch_size,conf.max_seq_length-1,conf.hidden_size*2]) value_mae=tf.concat([tf.zeros([conf.batch_size,1,conf.hidden_size*2],dtype=tf.float32),value_mae],1) value_ushiro=tf.concat([value_ushiro,tf.zeros([conf.batch_size,1,conf.hidden_size*2],dtype=tf.float32)],1) value = tf.concat([value_mae,value,value_ushiro],2) #value = tf.cast(value,tf.float32) #prediction=tf.nn.softmax((tf.tensordot(value,weight,1)+bias)) prediction=tf.tensordot(value,weight,1)+bias prediction_softmax=tf.nn.softmax(prediction) word_pre= """ softmax的部分会在下面的loss里由tf.nn.softmax........里完成) """ print(value) print(prediction) guess=tf.argmax(prediction,2) correctPred=tf.equal(tf.argmax(prediction,2),labels) accuracy=tf.reduce_mean(tf.cast(correctPred,tf.float32)) #tv = tf.trainable_variables()#得到所有可以训练的参数,即所有trainable=True 的tf.Variable/tf.get_variable #regularization_lost = 0.001* tf.reduce_sum([ tf.nn.l2_loss(v) for v in tv ]) #0.001是lambda超参数 loss=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=prediction,labels=data_labels)) #l2_loss=loss+regularization_lost optimizer=tf.train.AdamOptimizer(learning_rate=0.001).minimize(loss) #optimizer_l2=tf.train.AdamOptimizer(learning_rate=0.001).minimize(l2_loss) optimizer_null=tf.train.AdamOptimizer(learning_rate=0).minimize(loss)#用于dev sess=tf.Session() saver=tf.train.Saver() sess.run(tf.global_variables_initializer()) ckpt=tf.train.get_checkpoint_state(conf.ckpt_path) if ckpt and ckpt.model_checkpoint_path: saver.restore(sess,ckpt.model_checkpoint_path) print(ckpt.model_checkpoint_path) else: print("checkpoint read flase") f = open(conf.closefile,'w') f2 = open(conf.openfile,'w') nextBatch,nextBatchLabels=data_util.getTrainBatch(labels_ids,seq_ids,conf); batch_total_error_count=0 guess_total_error_count=0 nicety_guess_error_count=0 for i in range(conf.iterations): nextBatch,nextBatchLabels=data_util.getTrainBatch(labels_ids,seq_ids,conf); print(i) print(nextBatch) print("true label:") print(nextBatchLabels) time.sleep(5) now_prediction,now_loss,now_accuracy,now_guess,_=sess.run([prediction,loss,accuracy,guess,optimizer],{input_data:nextBatch,labels:nextBatchLabels}) # if(i+1)%checkpoint_steps==0: # saver.save(sess,checkpoint_dir+'model.ckpt',global_step=i+1) print("now_guess:") print(now_guess) print("now_loss:") print(now_loss) f.write(str(i)) for a in now_guess: guess_line=[] for x in a: if(x!=0): guess_line.append(idx_to_label[x]) f.write(str(guess_line)) f.write('\n') if(i%100==0): f2.write(str(i)) f2.write(str(nextBatch)) f2.write('\n') f2.write("true label:") f2.write(str(nextBatchLabels)) f2.write("now_guess:") f2.write(str(now_guess)) print("save") f.write('\n')
def train(): """Train a nl->name translation model using identifier-nl data.""" print("Preparing the data in %s" % FLAGS.data_dir) nl_train, name_train, nl_dev, name_dev, _, _ = data_util.prepare_data( FLAGS.data_dir, FLAGS.nl_vocab_size, FLAGS.name_vocab_size) with tf.Session() as sess: # Create model. print("Creating %d layers of %d units." % (FLAGS.num_layers, FLAGS.size)) model = create_model(sess, False) # Read data into buckets and compute their sizes. print("Reading development and training data (limit: %d)." % FLAGS.max_train_data_size) dev_set = read_data(nl_dev, name_dev) train_set = read_data(nl_train, name_train, FLAGS.max_train_data_size) train_bucket_sizes = [len(train_set[b]) for b in range(len(_buckets))] train_total_size = float(sum(train_bucket_sizes)) # A bucket scale is a list of increasing numbers from 0 to 1 that we'll use # to select a bucket. Length of [scale[i], scale[i+1]] is proportional to # the size if i-th training bucket, as used later. train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in range(len(train_bucket_sizes)) ] # This is the training loop. step_time, loss = 0.0, 0.0 current_step = 0 previous_losses = [] for i in range(50000): # Choose a bucket according to data distribution. We pick a random number # in [0, 1] and use the corresponding interval in train_buckets_scale. random_number_01 = np.random.random_sample() bucket_id = min([ i for i in range(len(train_buckets_scale)) if train_buckets_scale[i] > random_number_01 ]) # Get a batch and make a step. start_time = time.time() encoder_inputs, decoder_inputs, target_weights = model.get_batch( train_set, bucket_id) _, step_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, False) step_time += (time.time() - start_time) / FLAGS.steps_per_checkpoint loss += step_loss / FLAGS.steps_per_checkpoint current_step += 1 # Once in a while, we save checkpoint, print statistics, and run evals. if current_step % FLAGS.steps_per_checkpoint == 0: # Print statistics for the previous epoch. perplexity = math.exp( float(loss)) if loss < 300 else float("inf") print( "global step %d learning rate %.4f step-time %.2f perplexity " "%.2f" % (model.global_step.eval(), model.learning_rate.eval(), step_time, perplexity)) # Decrease learning rate if no improvement was see1000n over last 3 times. if len(previous_losses) > 2 and loss > max( previous_losses[-3:]): sess.run(model.learning_rate_decay_op) previous_losses.append(loss) # Save checkpoint and zero timer and loss. checkpoint_path = os.path.join(FLAGS.model_dir, "translate.ckpt") model.saver.save(sess, checkpoint_path, global_step=model.global_step) step_time, loss = 0.0, 0.0 # Run evals on development set and print their perplexity. for bucket_id in range(len(_buckets)): if len(dev_set[bucket_id]) == 0: print(" eval: empty bucket %d" % bucket_id) continue encoder_inputs, decoder_inputs, target_weights = model.get_batch( dev_set, bucket_id) _, eval_loss, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) eval_ppx = math.exp( float(eval_loss)) if eval_loss < 300 else float("inf") print(" eval: bucket %d perplexity %.2f" % (bucket_id, eval_ppx)) sys.stdout.flush()
def train(): #1.读入train数据和dev数据 mylog_section('READ DATA') from_train = None to_train = None from_dev = None to_dev = None from_train, to_train, from_dev, to_dev, _, _ = data_util.prepare_data( FLAGS.data_cache_dir, FLAGS.train_path_from, FLAGS.train_path_to, FLAGS.dev_path_from, FLAGS.dev_path_to, FLAGS.from_vocab_size, FLAGS.to_vocab_size) train_data_bucket = read_data(from_train, to_train) dev_data_bucket = read_data(from_dev, to_dev) _, _, real_vocab_size_from, real_vocab_size_to = data_util.get_vocab_info( FLAGS.data_cache_dir) FLAGS._buckets = _buckets FLAGS.real_vocab_size_from = real_vocab_size_from FLAGS.real_vocab_size_to = real_vocab_size_to # train_n_tokens = total training target size train_n_tokens = np.sum( [np.sum([len(items[1]) for items in x]) for x in train_data_bucket]) train_bucket_sizes = [ len(train_data_bucket[b]) for b in xrange(len(_buckets)) ] train_total_size = float(sum(train_bucket_sizes)) train_buckets_scale = [ sum(train_bucket_sizes[:i + 1]) / train_total_size for i in xrange(len(train_bucket_sizes)) ] dev_bucket_sizes = [len(dev_data_bucket[b]) for b in xrange(len(_buckets))] dev_total_size = int(sum(dev_bucket_sizes)) mylog_section("REPORT") # steps batch_size = FLAGS.batch_size n_epoch = FLAGS.n_epoch steps_per_epoch = int(train_total_size / batch_size) steps_per_dev = int(dev_total_size / batch_size) steps_per_checkpoint = int(steps_per_epoch / 2) total_steps = steps_per_epoch * n_epoch # reports mylog("from_vocab_size: {}".format(FLAGS.from_vocab_size)) mylog("to_vocab_size: {}".format(FLAGS.to_vocab_size)) mylog("_buckets: {}".format(FLAGS._buckets)) mylog("Train:") mylog("total: {}".format(train_total_size)) mylog("bucket sizes: {}".format(train_bucket_sizes)) mylog("Dev:") mylog("total: {}".format(dev_total_size)) mylog("bucket sizes: {}".format(dev_bucket_sizes)) mylog("Steps_per_epoch: {}".format(steps_per_epoch)) mylog("Total_steps:{}".format(total_steps)) mylog("Steps_per_checkpoint: {}".format(steps_per_checkpoint)) mylog_section("IN TENSORFLOW") config = tf.ConfigProto(allow_soft_placement=True, log_device_placement=False) config.gpu_options.allow_growth = FLAGS.allow_growth with tf.Session(config=config) as sess: # runtime profile if FLAGS.profile: run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE) run_metadata = tf.RunMetadata() else: run_options = None run_metadata = None mylog_section("MODEL/SUMMARY/WRITER") mylog("Creating Model.. (this can take a few minutes)") model = create_model(sess, run_options, run_metadata) mylog_section("All Variables") show_all_variables() # Data Iterators mylog_section("Data Iterators") dite = DataIterator(model, train_data_bucket, len(train_buckets_scale), batch_size, train_buckets_scale) iteType = 0 if iteType == 0: mylog("Itetype: withRandom") ite = dite.next_random() elif iteType == 1: mylog("Itetype: withSequence") ite = dite.next_sequence() # statistics during training step_time, loss = 0.0, 0.0 current_step = 0 low_ppx = float("inf") steps_per_report = 30 n_targets_report = 0 report_time = 0 n_valid_sents = 0 n_valid_words = 0 patience = FLAGS.patience mylog_section("TRAIN") while current_step < total_steps: # start start_time = time.time() # data and train source_inputs, target_inputs, target_outputs, target_weights, bucket_id = ite.next( ) L = model.step(sess, source_inputs, target_inputs, target_outputs, target_weights, bucket_id) # loss and time step_time += (time.time() - start_time) / steps_per_checkpoint loss += L current_step += 1 # 此处 weights 等数据的格式是 len(weights) == 句子长度 # len(weights[0]) 是 batch size n_valid_sents += np.sum(np.sign(target_weights[0])) n_valid_words += np.sum(target_weights) # for report report_time += (time.time() - start_time) n_targets_report += np.sum(target_weights) #显示信息 if current_step % steps_per_report == 0: sect_name = "STEP {}".format(current_step) msg = "StepTime: {:.2f} sec Speed: {:.2f} targets/s Total_targets: {}".format( report_time / steps_per_report, n_targets_report * 1.0 / report_time, train_n_tokens) mylog_line(sect_name, msg) report_time = 0 n_targets_report = 0 # Create the Timeline object, and write it to a json if FLAGS.profile: tl = timeline.Timeline(run_metadata.step_stats) ctf = tl.generate_chrome_trace_format() with open('timeline.json', 'w') as f: f.write(ctf) exit() #达到半个epoch,计算ppx(dev) if current_step % steps_per_checkpoint == 0: i_checkpoint = int(current_step / steps_per_checkpoint) # train_ppx loss = loss / n_valid_words train_ppx = math.exp( float(loss)) if loss < 300 else float("inf") learning_rate = model.learning_rate.eval() # dev_ppx dev_loss, dev_ppx = evaluate(sess, model, dev_data_bucket) # report sect_name = "CHECKPOINT {} STEP {}".format( i_checkpoint, current_step) msg = "Learning_rate: {:.4f} Dev_ppx: {:.2f} Train_ppx: {:.2f}".format( learning_rate, dev_ppx, train_ppx) mylog_line(sect_name, msg) # save model per checkpoint if FLAGS.saveCheckpoint: checkpoint_path = os.path.join(FLAGS.saved_model_dir, "model") s = time.time() model.saver.save(sess, checkpoint_path, global_step=i_checkpoint, write_meta_graph=False) msg = "Model saved using {:.2f} sec at {}".format( time.time() - s, checkpoint_path) mylog_line(sect_name, msg) # save best model if dev_ppx < low_ppx: patience = FLAGS.patience low_ppx = dev_ppx checkpoint_path = os.path.join(FLAGS.saved_model_dir, "best") s = time.time() model.best_saver.save(sess, checkpoint_path, global_step=0, write_meta_graph=False) msg = "Model saved using {:.2f} sec at {}".format( time.time() - s, checkpoint_path) mylog_line(sect_name, msg) else: patience -= 1 #每次当 dev_ppx >= low_ppx时 学习步长减半 sess.run(model.learning_rate_decay_op) msg = 'dev_ppx:{}, low_ppx:{}'.format( str(dev_ppx), str(low_ppx)) mylog_line(sect_name, msg) msg = 'dev_ppx >= low_ppx,patience ={}, learning_reate ={}'.format( str(patience), str(model.learning_rate.eval())) mylog_line(sect_name, msg) if patience <= 0: mylog("Training finished. Running out of patience.") break # Save checkpoint and zero timer and loss. step_time, loss, n_valid_sents, n_valid_words = 0.0, 0.0, 0, 0