def _predict_pos_tags(self, session, model, words, data_path): ''' Define prediction function of POS Tagging return tuples [(word, tag)] ''' word_data = pos_reader.sentence_to_word_ids(data_path, words) tag_data = [0] * len(word_data) state = session.run(model.initial_state) predict_id = [] for step, (x, y) in enumerate( pos_reader.iterator(word_data, tag_data, model.batch_size, model.num_steps)): #print ("Current Step" + str(step)) fetches = [model.cost, model.final_state, model.logits] feed_dict = {} feed_dict[model.input_data] = x feed_dict[model.targets] = y for i, (c, h) in enumerate(model.initial_state): feed_dict[c] = state[i].c feed_dict[h] = state[i].h _, _, logits = session.run(fetches, feed_dict) predict_id.append(int(np.argmax(logits))) #print (logits) predict_tag = pos_reader.word_ids_to_sentence(data_path, predict_id) return zip(words, predict_tag)
def predict(words): word_data = reader.sentence_to_word_ids(FLAGS.pos_data_path, words) tag_data = [0] * len(word_data) config = get_config(FLAGS.pos_lang) config.batch_size = 1 # config.num_steps, num_steps is T, should not be set to 1, CRF decode whe whole sequence at one time # intialize model ckpt_path = os.path.join(FLAGS.pos_train_dir, "pos_bilstm_crf.ckpt") with tf.Graph().as_default(), tf.Session() as session: with tf.variable_scope( "pos_var_scope"): #Need to Change in Pos_Tagger Save Function model = POSTagger(is_training=False, config=config) # save object after is_training if len(glob.glob(ckpt_path + '.data*') ) > 0: # file exist with pattern: 'pos.ckpt.data*' print("Loading model parameters from %s" % ckpt_path) all_vars = tf.global_variables() model_vars = [ k for k in all_vars if k.name.startswith("pos_var_scope") ] tf.train.Saver(model_vars).restore(session, ckpt_path) else: print("Model not found, created with fresh parameters.") session.run(tf.global_variables_initializer()) # Make Prediciton word_data = np.array(word_data, dtype=np.int32) sequence_len = len(word_data) x = np.zeros([1, config.num_steps], dtype=np.int32) for i in range(sequence_len): # pad word_data len to num_steps x[0, i] = word_data[i] y = np.zeros([1, config.num_steps], dtype=np.int32) fetches = [model.cost, model.logits, model.transition_params] feed_dict = {} feed_dict[model.input_data] = x feed_dict[model.targets] = y cost, logits, transition_params = session.run(fetches, feed_dict) print(logits[0]) print(logits) # print (transition_params) print(transition_params[0, :]) # Remove padding from the scores and tag sequence. tf_unary_scores = logits[ 0, :, :] # One Example [batch_size, num_steps, target_num] print("unary_score %d" % len(tf_unary_scores)) tf_unary_scores = tf_unary_scores[:sequence_len] print("unary_score %d" % len(tf_unary_scores)) # transition_params [Aij] tag transition probability viterbi_sequence, _ = tf.contrib.crf.viterbi_decode( tf_unary_scores, transition_params) print("optimal sequence found by viterbi algorithm") print(viterbi_sequence) predict_tag = reader.word_ids_to_sentence(FLAGS.pos_data_path, viterbi_sequence) return predict_tag