Ejemplo n.º 1
0
    def _predict_pos_tags(self, session, model, words, data_path):
        '''
        Define prediction function of POS Tagging
        return tuples [(word, tag)]
        '''
        word_data = pos_reader.sentence_to_word_ids(data_path, words)
        tag_data = [0] * len(word_data)
        state = session.run(model.initial_state)

        predict_id = []
        for step, (x, y) in enumerate(
                pos_reader.iterator(word_data, tag_data, model.batch_size,
                                    model.num_steps)):
            #print ("Current Step" + str(step))
            fetches = [model.cost, model.final_state, model.logits]
            feed_dict = {}
            feed_dict[model.input_data] = x
            feed_dict[model.targets] = y
            for i, (c, h) in enumerate(model.initial_state):
                feed_dict[c] = state[i].c
                feed_dict[h] = state[i].h

            _, _, logits = session.run(fetches, feed_dict)
            predict_id.append(int(np.argmax(logits)))
            #print (logits)
        predict_tag = pos_reader.word_ids_to_sentence(data_path, predict_id)
        return zip(words, predict_tag)
Ejemplo n.º 2
0
def predict(words):
    word_data = reader.sentence_to_word_ids(FLAGS.pos_data_path, words)
    tag_data = [0] * len(word_data)

    config = get_config(FLAGS.pos_lang)
    config.batch_size = 1
    # config.num_steps, num_steps is T, should not be set to 1, CRF decode whe whole sequence at one time

    # intialize model
    ckpt_path = os.path.join(FLAGS.pos_train_dir, "pos_bilstm_crf.ckpt")

    with tf.Graph().as_default(), tf.Session() as session:
        with tf.variable_scope(
                "pos_var_scope"):  #Need to Change in Pos_Tagger Save Function
            model = POSTagger(is_training=False,
                              config=config)  # save object after is_training

        if len(glob.glob(ckpt_path + '.data*')
               ) > 0:  # file exist with pattern: 'pos.ckpt.data*'
            print("Loading model parameters from %s" % ckpt_path)
            all_vars = tf.global_variables()
            model_vars = [
                k for k in all_vars if k.name.startswith("pos_var_scope")
            ]
            tf.train.Saver(model_vars).restore(session, ckpt_path)
        else:
            print("Model not found, created with fresh parameters.")
            session.run(tf.global_variables_initializer())

        # Make Prediciton
        word_data = np.array(word_data, dtype=np.int32)
        sequence_len = len(word_data)
        x = np.zeros([1, config.num_steps], dtype=np.int32)
        for i in range(sequence_len):  # pad word_data len to num_steps
            x[0, i] = word_data[i]
        y = np.zeros([1, config.num_steps], dtype=np.int32)

        fetches = [model.cost, model.logits, model.transition_params]
        feed_dict = {}
        feed_dict[model.input_data] = x
        feed_dict[model.targets] = y
        cost, logits, transition_params = session.run(fetches, feed_dict)
        print(logits[0])
        print(logits)

        # print (transition_params)
        print(transition_params[0, :])

        # Remove padding from the scores and tag sequence.
        tf_unary_scores = logits[
            0, :, :]  # One Example [batch_size, num_steps, target_num]
        print("unary_score %d" % len(tf_unary_scores))

        tf_unary_scores = tf_unary_scores[:sequence_len]
        print("unary_score %d" % len(tf_unary_scores))

        # transition_params  [Aij] tag transition probability
        viterbi_sequence, _ = tf.contrib.crf.viterbi_decode(
            tf_unary_scores, transition_params)
        print("optimal sequence found by viterbi algorithm")
        print(viterbi_sequence)
        predict_tag = reader.word_ids_to_sentence(FLAGS.pos_data_path,
                                                  viterbi_sequence)
    return predict_tag