def predict_bilstm(inpath,tokenize_style,ckpt_dir,model_name,name_scope,graph): logits_result=None with graph: vocabulary_word2index, vocabulary_index2label= load_vocabulary(FLAGS.traning_data_path,FLAGS.vocab_size, name_scope=name_scope,tokenize_style=tokenize_style) vocab_size = len(vocabulary_word2index);print(model_name+".vocab_size:",vocab_size);num_classes=len(vocabulary_index2label);print("num_classes:",num_classes) lineno_list, X1, X2=load_test_data(inpath, vocabulary_word2index, FLAGS.sentence_len, tokenize_style=tokenize_style) #2.create session. config=tf.ConfigProto() config.gpu_options.allow_growth=True with tf.Session(config=config) as sess: #Instantiate Model model=DualBilstmCnnModel(filter_sizes,FLAGS.num_filters,num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training,model=model_name, similiarity_strategy=FLAGS.similiarity_strategy,top_k=FLAGS.top_k,max_pooling_style=FLAGS.max_pooling_style) #Initialize Save saver=tf.train.Saver() if os.path.exists(ckpt_dir+"checkpoint"): print(model_name+".Restoring Variables from Checkpoint.") saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir)) else: print(model_name+".Not able to find Checkpoint. Going to stop now...") iii=0 iii/0 #3.feed data & training number_of_test_data=len(X1) print(model_name+".number_of_test_data:",number_of_test_data) batch_size=FLAGS.batch_size iteration=0 divide_equally=(number_of_test_data%batch_size==0) steps=0 if divide_equally: steps=int(number_of_test_data/batch_size) else: steps=int(number_of_test_data/batch_size)+1 print("steps:",steps) start=0 end=0 logits_result=np.zeros((number_of_test_data,len(vocabulary_index2label))) for i in range(steps): print("i:",i) start=i*batch_size if i!=steps or divide_equally: end=(i+1)*batch_size feed_dict = {model.input_x1: X1[start:end],model.input_x2: X2[start:end], model.dropout_keep_prob: FLAGS.dropout_keep_prob, model.iter: iteration,model.tst: not FLAGS.is_training} print(i*batch_size,end) else: end=number_of_test_data-(batch_size*int(number_of_test_data%batch_size)) feed_dict = {model.input_x1: X1[start:end],model.input_x2: X2[start:end], model.dropout_keep_prob: FLAGS.dropout_keep_prob, model.iter: iteration,model.tst: not FLAGS.is_training} print("start:",i*batch_size,";end:",end) logits_batch=sess.run(model.logits,feed_dict) #[batch_size,num_classes] logits_result[start:end]=logits_batch print("logits_result:",logits_result) return logits_result,lineno_list,vocabulary_index2label
def predict_bilstm(inpath, outpath): vocabulary_word2index, vocabulary_index2label= load_vocabulary(FLAGS.traning_data_path,FLAGS.vocab_size, name_scope=FLAGS.name_scope,tokenize_style=FLAGS.tokenize_style) vocab_size = len(vocabulary_word2index);print("cnn_model.vocab_size:",vocab_size);num_classes=len(vocabulary_index2label);print("num_classes:",num_classes) lineno_list, X1, X2=load_test_data(inpath, vocabulary_word2index, FLAGS.sentence_len, tokenize_style=FLAGS.tokenize_style) #2.create session. config=tf.ConfigProto() config.gpu_options.allow_growth=True with tf.Session(config=config) as sess: #Instantiate Model textCNN=DualBilstmCnnModel(filter_sizes,FLAGS.num_filters,num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps, FLAGS.decay_rate,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.hidden_size,FLAGS.is_training,model=FLAGS.model, similiarity_strategy=FLAGS.similiarity_strategy,top_k=FLAGS.top_k,max_pooling_style=FLAGS.max_pooling_style) #Initialize Save saver=tf.train.Saver() if os.path.exists(FLAGS.ckpt_dir+"checkpoint"): print("Restoring Variables from Checkpoint.") saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir)) else: print("Not able to find Checkpoint. Going to stop now...") iii=0 iii/0 #3.feed data & training number_of_test_data=len(X1) print("number_of_test_data:",number_of_test_data) batch_size=FLAGS.batch_size iteration=0 file_object=open(outpath, 'a') divide_equally=(number_of_test_data%batch_size==0) steps=0 if divide_equally: steps=int(number_of_test_data/batch_size) else: steps=int(number_of_test_data/batch_size)+1 print("steps:",steps) start=0 end=0 for i in range(steps): print("i:",i) start=i*batch_size if i!=steps or divide_equally: end=(i+1)*batch_size feed_dict = {textCNN.input_x1: X1[start:end],textCNN.input_x2: X2[start:end], textCNN.dropout_keep_prob: FLAGS.dropout_keep_prob, textCNN.iter: iteration,textCNN.tst: not FLAGS.is_training} print(i*batch_size,end) else: end=number_of_test_data-(batch_size*int(number_of_test_data%batch_size)) feed_dict = {textCNN.input_x1: X1[start:end],textCNN.input_x2: X2[start:end], textCNN.dropout_keep_prob: FLAGS.dropout_keep_prob, textCNN.iter: iteration,textCNN.tst: not FLAGS.is_training} print("start:",i*batch_size,";end:",end) logits=sess.run(textCNN.logits,feed_dict) label_list=get_label_by_logits(logits,vocabulary_index2label) write_predict_result(lineno_list[start:end],label_list,file_object) file_object.close()
def predict_siamese(inpath): logits_result = None checkpoint_file = FLAGS.model graph = tf.Graph() with graph: print("1.load vocabulary...") vocabulary_word2index, vocabulary_index2label = load_vocabulary( FLAGS.traning_data_path, FLAGS.vocab_size, name_scope='siamese', tokenize_style='word') vocab_size = len(vocabulary_word2index) print(".vocab_size:", vocab_size) num_classes = len(vocabulary_index2label) print("num_classes:", num_classes) print("2.load data....") lineno_list, X1, X2, BLUESCORE = load_test_data(inpath, vocabulary_word2index, FLAGS.sentence_len, tokenize_style='word') length_data_mining_features = len(BLUESCORE[0]) print("length_data_mining_features:", length_data_mining_features) print("3.construct model...") #2.create session. config = tf.ConfigProto() config.gpu_options.allow_growth = True with tf.Session(config=config) as sess: # Load the saved meta graph and restore variables saver = tf.train.import_meta_graph( "{}.meta".format(checkpoint_file)) sess.run(tf.initialize_all_variables()) saver.restore(sess, checkpoint_file) # Get the placeholders from the graph by name input_x1 = graph.get_operation_by_name("input_x1").outputs[0] input_x2 = graph.get_operation_by_name("input_x2").outputs[0] input_y = graph.get_operation_by_name("input_y").outputs[0] dropout_keep_prob = graph.get_operation_by_name( "dropout_keep_prob").outputs[0] # Tensors we want to evaluate predictions = graph.get_operation_by_name( "output/distance").outputs[0] accuracy = graph.get_operation_by_name( "accuracy/accuracy").outputs[0] sim = graph.get_operation_by_name("accuracy/temp_sim").outputs[0] # emb = graph.get_operation_by_name("embedding/W").outputs[0] # embedded_chars = tf.nn.embedding_lookup(emb,input_x) # Generate batches for one epoch # 3.feed data & training number_of_test_data = len(X1) print(".number_of_test_data:", number_of_test_data) batch_size = FLAGS.batch_size iteration = 0 divide_equally = (number_of_test_data % batch_size == 0) steps = 0 if divide_equally: steps = int(number_of_test_data / batch_size) else: steps = int(number_of_test_data / batch_size) + 1 print("steps:", steps) start = 0 end = 0 logits_result = np.zeros( (number_of_test_data, len(vocabulary_index2label))) for i in range(steps): print("i:", i) start = i * batch_size if i != steps or divide_equally: end = (i + 1) * batch_size # feed_dict = {model.input_x1: X1[start:end], model.input_x2: X2[start:end], # model.input_bluescores: BLUESCORE[start:end], # model.dropout_keep_prob: FLAGS.dropout_keep_prob, # model.iter: iteration, model.tst: not FLAGS.is_training} feed_dict = { input_x1: X1[start:end], input_x2: X2[start:end], dropout_keep_prob: 1.0 } print(i * batch_size, end) else: end = number_of_test_data - ( batch_size * int(number_of_test_data % batch_size)) # feed_dict = {model.input_x1: X1[start:end], model.input_x2: X2[start:end], # model.dropout_keep_prob: FLAGS.dropout_keep_prob, # model.iter: iteration, model.tst: not FLAGS.is_training} feed_dict = { input_x1: X1[start:end], input_x2: X2[start:end], dropout_keep_prob: 1.0 } print("start:", i * batch_size, ";end:", end) logits_batch = sess.run(sim, feed_dict) # [batch_size,num_classes] logits_result[start:end] = logits_batch print("logits_result:", logits_result) return logits_result, lineno_list, vocabulary_index2label