def predict_bilstm(inpath,tokenize_style,ckpt_dir,model_name,name_scope,graph):
    logits_result=None
    with graph:
        vocabulary_word2index, vocabulary_index2label= load_vocabulary(FLAGS.traning_data_path,FLAGS.vocab_size,
                                                              name_scope=name_scope,tokenize_style=tokenize_style)
        vocab_size = len(vocabulary_word2index);print(model_name+".vocab_size:",vocab_size);num_classes=len(vocabulary_index2label);print("num_classes:",num_classes)
        lineno_list, X1, X2=load_test_data(inpath, vocabulary_word2index, FLAGS.sentence_len, tokenize_style=tokenize_style)
        #2.create session.
        config=tf.ConfigProto()
        config.gpu_options.allow_growth=True
        with tf.Session(config=config) as sess:
            #Instantiate Model
            model=DualBilstmCnnModel(filter_sizes,FLAGS.num_filters,num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps,
                            FLAGS.decay_rate,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.is_training,model=model_name,
                                       similiarity_strategy=FLAGS.similiarity_strategy,top_k=FLAGS.top_k,max_pooling_style=FLAGS.max_pooling_style)
            #Initialize Save
            saver=tf.train.Saver()
            if os.path.exists(ckpt_dir+"checkpoint"):
                print(model_name+".Restoring Variables from Checkpoint.")
                saver.restore(sess, tf.train.latest_checkpoint(ckpt_dir))
            else:
                print(model_name+".Not able to find Checkpoint. Going to stop now...")
                iii=0
                iii/0
            #3.feed data & training
            number_of_test_data=len(X1)
            print(model_name+".number_of_test_data:",number_of_test_data)
            batch_size=FLAGS.batch_size
            iteration=0
            divide_equally=(number_of_test_data%batch_size==0)
            steps=0
            if divide_equally:
                steps=int(number_of_test_data/batch_size)
            else:
                steps=int(number_of_test_data/batch_size)+1

            print("steps:",steps)
            start=0
            end=0
            logits_result=np.zeros((number_of_test_data,len(vocabulary_index2label)))
            for i in range(steps):
                print("i:",i)
                start=i*batch_size
                if i!=steps or divide_equally:
                    end=(i+1)*batch_size
                    feed_dict = {model.input_x1: X1[start:end],model.input_x2: X2[start:end],
                                 model.dropout_keep_prob: FLAGS.dropout_keep_prob,
                                 model.iter: iteration,model.tst: not FLAGS.is_training}
                    print(i*batch_size,end)
                else:
                    end=number_of_test_data-(batch_size*int(number_of_test_data%batch_size))
                    feed_dict = {model.input_x1: X1[start:end],model.input_x2: X2[start:end],
                                 model.dropout_keep_prob: FLAGS.dropout_keep_prob,
                                 model.iter: iteration,model.tst: not FLAGS.is_training}
                    print("start:",i*batch_size,";end:",end)
                logits_batch=sess.run(model.logits,feed_dict) #[batch_size,num_classes]
                logits_result[start:end]=logits_batch

        print("logits_result:",logits_result)
        return logits_result,lineno_list,vocabulary_index2label
Exemple #2
0
def predict_bilstm(inpath, outpath):
    vocabulary_word2index, vocabulary_index2label= load_vocabulary(FLAGS.traning_data_path,FLAGS.vocab_size,
                                                          name_scope=FLAGS.name_scope,tokenize_style=FLAGS.tokenize_style)
    vocab_size = len(vocabulary_word2index);print("cnn_model.vocab_size:",vocab_size);num_classes=len(vocabulary_index2label);print("num_classes:",num_classes)
    lineno_list, X1, X2=load_test_data(inpath, vocabulary_word2index, FLAGS.sentence_len, tokenize_style=FLAGS.tokenize_style)
    #2.create session.
    config=tf.ConfigProto()
    config.gpu_options.allow_growth=True
    with tf.Session(config=config) as sess:
        #Instantiate Model
        textCNN=DualBilstmCnnModel(filter_sizes,FLAGS.num_filters,num_classes, FLAGS.learning_rate, FLAGS.batch_size, FLAGS.decay_steps,
                        FLAGS.decay_rate,FLAGS.sentence_len,vocab_size,FLAGS.embed_size,FLAGS.hidden_size,FLAGS.is_training,model=FLAGS.model,
                                   similiarity_strategy=FLAGS.similiarity_strategy,top_k=FLAGS.top_k,max_pooling_style=FLAGS.max_pooling_style)
        #Initialize Save
        saver=tf.train.Saver()
        if os.path.exists(FLAGS.ckpt_dir+"checkpoint"):
            print("Restoring Variables from Checkpoint.")
            saver.restore(sess, tf.train.latest_checkpoint(FLAGS.ckpt_dir))
        else:
            print("Not able to find Checkpoint. Going to stop now...")
            iii=0
            iii/0
        #3.feed data & training
        number_of_test_data=len(X1)
        print("number_of_test_data:",number_of_test_data)
        batch_size=FLAGS.batch_size
        iteration=0
        file_object=open(outpath, 'a')
        divide_equally=(number_of_test_data%batch_size==0)
        steps=0
        if divide_equally:
            steps=int(number_of_test_data/batch_size)
        else:
            steps=int(number_of_test_data/batch_size)+1

        print("steps:",steps)
        start=0
        end=0
        for i in range(steps):
            print("i:",i)
            start=i*batch_size
            if i!=steps or divide_equally:
                end=(i+1)*batch_size
                feed_dict = {textCNN.input_x1: X1[start:end],textCNN.input_x2: X2[start:end],
                             textCNN.dropout_keep_prob: FLAGS.dropout_keep_prob,
                             textCNN.iter: iteration,textCNN.tst: not FLAGS.is_training}
                print(i*batch_size,end)
            else:
                end=number_of_test_data-(batch_size*int(number_of_test_data%batch_size))
                feed_dict = {textCNN.input_x1: X1[start:end],textCNN.input_x2: X2[start:end],
                             textCNN.dropout_keep_prob: FLAGS.dropout_keep_prob,
                             textCNN.iter: iteration,textCNN.tst: not FLAGS.is_training}
                print("start:",i*batch_size,";end:",end)
            logits=sess.run(textCNN.logits,feed_dict)
            label_list=get_label_by_logits(logits,vocabulary_index2label)
            write_predict_result(lineno_list[start:end],label_list,file_object)
        file_object.close()
def predict_siamese(inpath):
    logits_result = None
    checkpoint_file = FLAGS.model
    graph = tf.Graph()
    with graph:
        print("1.load vocabulary...")
        vocabulary_word2index, vocabulary_index2label = load_vocabulary(
            FLAGS.traning_data_path,
            FLAGS.vocab_size,
            name_scope='siamese',
            tokenize_style='word')
        vocab_size = len(vocabulary_word2index)
        print(".vocab_size:", vocab_size)
        num_classes = len(vocabulary_index2label)
        print("num_classes:", num_classes)
        print("2.load data....")
        lineno_list, X1, X2, BLUESCORE = load_test_data(inpath,
                                                        vocabulary_word2index,
                                                        FLAGS.sentence_len,
                                                        tokenize_style='word')
        length_data_mining_features = len(BLUESCORE[0])
        print("length_data_mining_features:", length_data_mining_features)

        print("3.construct model...")
        #2.create session.
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        with tf.Session(config=config) as sess:
            # Load the saved meta graph and restore variables
            saver = tf.train.import_meta_graph(
                "{}.meta".format(checkpoint_file))
            sess.run(tf.initialize_all_variables())
            saver.restore(sess, checkpoint_file)

            # Get the placeholders from the graph by name
            input_x1 = graph.get_operation_by_name("input_x1").outputs[0]
            input_x2 = graph.get_operation_by_name("input_x2").outputs[0]
            input_y = graph.get_operation_by_name("input_y").outputs[0]

            dropout_keep_prob = graph.get_operation_by_name(
                "dropout_keep_prob").outputs[0]
            # Tensors we want to evaluate
            predictions = graph.get_operation_by_name(
                "output/distance").outputs[0]

            accuracy = graph.get_operation_by_name(
                "accuracy/accuracy").outputs[0]

            sim = graph.get_operation_by_name("accuracy/temp_sim").outputs[0]

            # emb = graph.get_operation_by_name("embedding/W").outputs[0]
            # embedded_chars = tf.nn.embedding_lookup(emb,input_x)
            # Generate batches for one epoch
            # 3.feed data & training
            number_of_test_data = len(X1)
            print(".number_of_test_data:", number_of_test_data)
            batch_size = FLAGS.batch_size
            iteration = 0
            divide_equally = (number_of_test_data % batch_size == 0)
            steps = 0
            if divide_equally:
                steps = int(number_of_test_data / batch_size)
            else:
                steps = int(number_of_test_data / batch_size) + 1

            print("steps:", steps)
            start = 0
            end = 0
            logits_result = np.zeros(
                (number_of_test_data, len(vocabulary_index2label)))
            for i in range(steps):
                print("i:", i)
                start = i * batch_size
                if i != steps or divide_equally:
                    end = (i + 1) * batch_size
                    # feed_dict = {model.input_x1: X1[start:end], model.input_x2: X2[start:end],
                    #              model.input_bluescores: BLUESCORE[start:end],
                    #              model.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    #              model.iter: iteration, model.tst: not FLAGS.is_training}
                    feed_dict = {
                        input_x1: X1[start:end],
                        input_x2: X2[start:end],
                        dropout_keep_prob: 1.0
                    }
                    print(i * batch_size, end)
                else:
                    end = number_of_test_data - (
                        batch_size * int(number_of_test_data % batch_size))
                    # feed_dict = {model.input_x1: X1[start:end], model.input_x2: X2[start:end],
                    #              model.dropout_keep_prob: FLAGS.dropout_keep_prob,
                    #              model.iter: iteration, model.tst: not FLAGS.is_training}
                    feed_dict = {
                        input_x1: X1[start:end],
                        input_x2: X2[start:end],
                        dropout_keep_prob: 1.0
                    }
                    print("start:", i * batch_size, ";end:", end)
                logits_batch = sess.run(sim,
                                        feed_dict)  # [batch_size,num_classes]
                logits_result[start:end] = logits_batch

        print("logits_result:", logits_result)
        return logits_result, lineno_list, vocabulary_index2label