def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils.initialize_vocabulary(en_vocab_path) _, rev_fr_vocab = data_utils.initialize_vocabulary(fr_vocab_path) # Decode from standard input. # changed by Kaifeng, for test testTableFile = FLAGS.test_dir + '/test.json' offset = 0 # the test data is the last 20000 items in the table if FLAGS.enable_table_test: print('loading database table') with open(testTableFile) as testTables: tables = json.load(testTables) answerOutput = open(FLAGS.test_dir + '/answer.out', 'w') testQuestionFile = FLAGS.data_dir + '/%s_test.qu.ids1500' % subset testLogicFile = FLAGS.data_dir + '/%s_test.lon.ids150' % subset # For tagging model, Hongyu #0530 newly added geoQuestionFile = FLAGS.data_dir + '/%s_train.qu.ids1500' % subset geoLogicFile = FLAGS.data_dir + '/%s_train.lon.ids150' % subset # For tagging model, Hongyu logicalTemp_geo = open(FLAGS.test_dir + '/%s_train.out' % subset, 'w') logicalTemp_test = open(FLAGS.test_dir + '/%s_test.out' % subset, 'w') print('======= start testing =======') print('=== train dataset ===') with tf.gfile.GFile(geoQuestionFile, mode='r') as geoQuestions, tf.gfile.GFile( geoLogicFile, mode='r') as geoLogics: q_index = 0 sentence, logic_sen = geoQuestions.readline(), geoLogics.readline() while sentence and logic_sen: if q_index % 200 == 0: print(" reading data line %d" % q_index) sys.stdout.flush() qid = 'qID_' + str(q_index) print('training question: ', qid) # Get token-ids for the input sentence. token_ids = [int(x) for x in sentence.split()] logic_ids = [int(x) for x in logic_sen.split()] print(token_ids) print(logic_ids) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] > len(token_ids) and bucket[1] > len( logic_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. print(outputs) for output in outputs: if output >= len(rev_fr_vocab): output = 3 resultLogical = " ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ]) if FLAGS.enable_table_test: resultAnswer = logicalParser(tables[qid], resultLogical) answerOutput.write(str(resultAnswer) + '\n') logicalTemp_geo.write(str(resultLogical) + '\n') q_index += 1 sentence, logic_sen = geoQuestions.readline( ), geoLogics.readline() print('=== test dataset ===') with tf.gfile.GFile(testQuestionFile, mode='r') as testQuestions, tf.gfile.GFile( testLogicFile, mode='r') as testLogics: q_index = 0 sentence, logic_sen = testQuestions.readline( ), testLogics.readline() while sentence and logic_sen: if q_index % 200 == 0: print(" reading data line %d" % q_index) sys.stdout.flush() qid = 'qID_' + str(q_index) print('testing question: ', qid) # Get token-ids for the input sentence. token_ids = [int(x) for x in sentence.split()] logic_ids = [int(x) for x in logic_sen.split()] # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] > len(token_ids) and bucket[1] > len( logic_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits, _ = model.step(sess, encoder_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. print(outputs) if data_utils.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils.EOS_ID)] # Print out French sentence corresponding to outputs. for output in outputs: if output >= len(rev_fr_vocab): output = 3 resultLogical = " ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ]) if FLAGS.enable_table_test: resultAnswer = logicalParser(tables[qid], resultLogical) answerOutput.write(str(resultAnswer) + '\n') logicalTemp_test.write(str(resultLogical) + '\n') q_index += 1 sentence, logic_sen = testQuestions.readline( ), testLogics.readline() logicalTemp_geo.close() # logicalTemp_train.close() # logicalTemp_dev.close() logicalTemp_test.close() if FLAGS.enable_table_test: answerOutput.close() '''
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils_tag.initialize_vocabulary(en_vocab_path) fr_vocab, rev_fr_vocab = data_utils_tag.initialize_vocabulary( fr_vocab_path) # Decode from standard input. # changed by Kaifeng, for test offset = 0 # the test data is the last 20000 items in the table testTableFile = FLAGS.test_dir + '/test.json' if FLAGS.enable_table_test: print('loading database table') with open(testTableFile) as testTables: tables = json.load(testTables) answerOutput = open(FLAGS.test_dir + '/answer.out', 'w') trainQuestionFile = FLAGS.data_dir + '/rand_train.qu' trainTagFile = FLAGS.data_dir + '/rand_train.ta' # For tagging model, Hongyu devQuestionFile = FLAGS.data_dir + '/rand_dev.qu' devTagFile = FLAGS.data_dir + '/rand_dev.ta' # For tagging model, Hongyu testQuestionFile = FLAGS.data_dir + '/rand_test.qu' testTagFile = FLAGS.data_dir + '/rand_test.ta' # For tagging model, Hongyu #0530 newly added geoQuestionFile = FLAGS.data_dir + '/GeoQuery/geo880.qu' geoTagFile = FLAGS.data_dir + '/GeoQuery/geo880.ta' # For tagging model, Hongyu logicalTemp_geo = open(FLAGS.test_dir + '/logicalTemp_geo.out', 'w') logicalTemp_train = open(FLAGS.test_dir + '/logicalTemp_train.out', 'w') logicalTemp_dev = open(FLAGS.test_dir + '/logicalTemp_dev.out', 'w') logicalTemp_test = open(FLAGS.test_dir + '/logicalTemp_test.out', 'w') ### evaluating tagging model, Hongyu print('======= start testing =======') print('=== train dataset ===') with open(trainQuestionFile, 'r') as trainQuestions: with open(trainTagFile, 'r') as trainTags: q_index = 0 sentence, tag_sen = trainQuestions.readline( ), trainTags.readline() while sentence and tag_sen: if q_index % 200 == 0: print(" reading data line %d" % q_index) sys.stdout.flush() qid = 'qID_' + str(q_index) print('training question: ', qid) # Get token-ids for the input sentence. token_ids = data_utils_tag.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) tag_ids = data_utils_tag.sentence_to_token_ids( tf.compat.as_bytes(tag_sen), fr_vocab) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, tag_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, tag_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits, _ = model.step( sess, encoder_inputs, tag_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if data_utils_tag.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils_tag.EOS_ID )] # Print out French sentence corresponding to outputs. resultLogical = " ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ]) if FLAGS.enable_table_test: resultAnswer = logicalParser(tables[qid], resultLogical) answerOutput.write(str(resultAnswer) + '\n') logicalTemp_train.write(str(resultLogical) + '\n') q_index += 1 sentence, tag_sen = trainQuestions.readline( ), trainTags.readline() print('=== dev dataset ===') with open(devQuestionFile, 'r') as devQuestions: with open(devTagFile, 'r') as devTags: q_index = 0 sentence, tag_sen = devQuestions.readline(), devTags.readline() while sentence and tag_sen: if q_index % 200 == 0: print(" reading data line %d" % q_index) sys.stdout.flush() qid = 'qID_' + str(q_index) print('deving question: ', qid) # Get token-ids for the input sentence. token_ids = data_utils_tag.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) tag_ids = data_utils_tag.sentence_to_token_ids( tf.compat.as_bytes(tag_sen), fr_vocab) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, tag_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, tag_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits, _ = model.step( sess, encoder_inputs, tag_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if data_utils_tag.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils_tag.EOS_ID )] # Print out French sentence corresponding to outputs. resultLogical = " ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ]) if FLAGS.enable_table_test: resultAnswer = logicalParser(tables[qid], resultLogical) answerOutput.write(str(resultAnswer) + '\n') logicalTemp_dev.write(str(resultLogical) + '\n') q_index += 1 sentence, tag_sen = devQuestions.readline( ), devTags.readline() print('=== test dataset ===') with open(testQuestionFile, 'r') as testQuestions: with open(testTagFile, 'r') as testTags: q_index = 0 sentence, tag_sen = testQuestions.readline( ), testTags.readline() while sentence and tag_sen: if q_index % 200 == 0: print(" reading data line %d" % q_index) sys.stdout.flush() qid = 'qID_' + str(q_index) print('testing question: ', qid) # Get token-ids for the input sentence. token_ids = data_utils_tag.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) tag_ids = data_utils_tag.sentence_to_token_ids( tf.compat.as_bytes(tag_sen), fr_vocab) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, tag_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, tag_ids, [])]}, bucket_id) # Get output logits for the sentence and CONFUSION matrix. # 0531 newly added filename = "confusion_matrix.txt" confusion_path = os.path.join("./PCA-visual/", filename) f_con = open(confusion_path, 'a+') _, _, output_logits, confusion_matrix = model.step( sess, encoder_inputs, tag_inputs, decoder_inputs, target_weights, bucket_id, True) f_con.write('*** example: ' + str(q_index) + ' ***\n') for i in range(confusion_matrix.shape[1]): words = [str(x) for x in confusion_matrix[0][i]] f_con.write(','.join(words) + '\n') f_con.close() # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if data_utils_tag.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils_tag.EOS_ID )] # Print out French sentence corresponding to outputs. resultLogical = " ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ]) if FLAGS.enable_table_test: resultAnswer = logicalParser(tables[qid], resultLogical) answerOutput.write(str(resultAnswer) + '\n') logicalTemp_test.write(str(resultLogical) + '\n') q_index += 1 sentence, tag_sen = testQuestions.readline( ), testTags.readline() print('=== geo dataset ===') with open(geoQuestionFile, 'r') as geoQuestions: with open(geoTagFile, 'r') as geoTags: q_index = 0 sentence, tag_sen = geoQuestions.readline(), geoTags.readline() while sentence and tag_sen: if q_index % 200 == 0: print(" reading data line %d" % q_index) sys.stdout.flush() qid = 'qID_' + str(q_index) print('geoing question: ', qid) # Get token-ids for the input sentence. token_ids = data_utils_tag.sentence_to_token_ids( tf.compat.as_bytes(sentence), en_vocab) tag_ids = data_utils_tag.sentence_to_token_ids( tf.compat.as_bytes(tag_sen), fr_vocab) # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] >= len(token_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, tag_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, tag_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits, _ = model.step( sess, encoder_inputs, tag_inputs, decoder_inputs, target_weights, bucket_id, True) # This is a greedy decoder - outputs are just argmaxes of output_logits. outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] # If there is an EOS symbol in outputs, cut them at that point. if data_utils_tag.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils_tag.EOS_ID )] # Print out French sentence corresponding to outputs. resultLogical = " ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ]) if FLAGS.enable_table_test: resultAnswer = logicalParser(tables[qid], resultLogical) answerOutput.write(str(resultAnswer) + '\n') logicalTemp_geo.write(str(resultLogical) + '\n') q_index += 1 sentence, tag_sen = geoQuestions.readline( ), geoTags.readline() logicalTemp_geo.close() logicalTemp_train.close() logicalTemp_dev.close() logicalTemp_test.close() if FLAGS.enable_table_test: answerOutput.close()
def decode(): with tf.Session() as sess: # Create model and load parameters. model = create_model(sess, True) model.batch_size = 1 # We decode one sentence at a time. # Load vocabularies. en_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.from" % FLAGS.from_vocab_size) fr_vocab_path = os.path.join(FLAGS.data_dir, "vocab%d.to" % FLAGS.to_vocab_size) en_vocab, _ = data_utils_tag.initialize_vocabulary(en_vocab_path) fr_vocab, rev_fr_vocab = data_utils_tag.initialize_vocabulary( fr_vocab_path) # Decode from standard input. # changed by Kaifeng, for test offset = 0 # the test data is the last 20000 items in the table testTableFile = FLAGS.test_dir + '/test.json' if FLAGS.enable_table_test: print('loading database table') with open(testTableFile) as testTables: tables = json.load(testTables) answerOutput = open(FLAGS.test_dir + '/answer.out', 'w') # trainQuestionFile = FLAGS.data_dir + '/rand_train.qu' # trainTagFile = FLAGS.data_dir + '/rand_train.ta' # For tagging model, Hongyu # devQuestionFile = FLAGS.data_dir + '/rand_dev.qu' # devTagFile = FLAGS.data_dir + '/rand_dev.ta' # For tagging model, Hongyu testQuestionFile = FLAGS.data_dir + '/%s_test.qu.ids1500' % subset testTagFile = FLAGS.data_dir + '/%s_test.ta.ids150' % subset # For tagging model, Hongyu testLogicFile = FLAGS.data_dir + '/%s_test.lox.ids150' % subset # For tagging model, Hongyu #0530 newly added geoQuestionFile = FLAGS.data_dir + '/%s_train.qu.ids1500' % subset geoTagFile = FLAGS.data_dir + '/%s_train.ta.ids150' % subset # For tagging model, Hongyu geoLogicFile = FLAGS.data_dir + '/%s_train.lox.ids150' % subset # For tagging model, Hongyu logicalTemp_geo = open(FLAGS.test_dir + '/%s_train.out' % subset, 'w') # logicalTemp_train = open(FLAGS.test_dir + '/logicalTemp_train.out', 'w') # logicalTemp_dev = open(FLAGS.test_dir + '/logicalTemp_dev.out', 'w') logicalTemp_test = open(FLAGS.test_dir + '/%s_test.out' % subset, 'w') ### evaluating tagging model, Hongyu print('======= start testing =======') print('=== testing dataset ===') with gfile.GFile(testQuestionFile, mode='r') as testQuestions, gfile.GFile( testLogicFile, mode='r') as testLogics: with gfile.GFile(testTagFile, mode='r') as testTags: q_index = 0 sentence, tag_sen, logic_sen = testQuestions.readline( ), testTags.readline(), testLogics.readline() while sentence and tag_sen and logic_sen: if q_index % 200 == 0: print(" reading data line %d" % q_index) sys.stdout.flush() qid = 'qID_' + str(q_index) print('testing question: ', qid) # Get token-ids for the input sentence. # token_ids = data_utils_tag.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab) # tag_ids = data_utils_tag.sentence_to_token_ids(tf.compat.as_bytes(tag_sen), fr_vocab) # logic_ids = data_utils_tag.sentence_to_token_ids(tf.compat.as_bytes(logic_sen), fr_vocab) token_ids = [int(x) for x in sentence.split()] tag_ids = [int(x) for x in tag_sen.split()] logic_ids = [int(x) for x in logic_sen.split()] # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] > len(token_ids) and bucket[1] > len( logic_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, tag_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, tag_ids, [])]}, bucket_id) # Get output logits for the sentence and CONFUSION matrix. # 0531 newly added _, _, output_logits, confusion_matrix = model.step( sess, encoder_inputs, tag_inputs, decoder_inputs, target_weights, bucket_id, True) # Newly modified 0624: This is a Constraint-Greedy decoder - outputs are just argmaxes of output_logits. # resultLogical = [] # for i in range(len(output_logits)): # output = int(np.argmax(output_logits[i], axis=1)) # Constraint 1: advancd ending # if i < len(logic_ids)-1 and output == data_utils_tag.EOS_ID: # output = int(np.argmax(output_logits[i][:,data_utils_tag.EOS_ID+1:], axis=1)) + data_utils_tag.EOS_ID+1 # if i == 0: # prev_idx = output # if output >= len(rev_fr_vocab): # output = data_utils_tag.UNK_ID # prev = tf.compat.as_str(rev_fr_vocab[output]) # resultLogical.append(prev) # else: # i>0 # if str(prev) in ['equal','less','greater','neq','nl','ng']: # # Constraint 2: after 'equal' should be 'value' # output = int(np.argmax(output_logits[i][:,5:17], axis=1)) + 5 # if output == 2: #data_utils_tag.EOS_ID: # if i < len(logic_ids)-1: # output = int(np.argmax(output_logits[i][:,3:], axis=1)) + 3 # else: # break # pre_idx = output # if output >= len(rev_fr_vocab): # output = data_utils_tag.UNK_ID # prev = tf.compat.as_str(rev_fr_vocab[output]) # resultLogical.append(prev) # if str(resultLogical[-1]) in ['equal','less','greater','neq','nl','ng']: # resultLogical.append(resultLogical[-2]) # # Constraint 3, formats # resultLogical = " ".join(resultLogical) # resultLogical = resultLogical.replace('<field>:1 equal <field>:1', '<field>:1') # resultLogical = resultLogical.replace('<value>:1 where <field>', '<value>:1 and <field>') # resultLogical = resultLogical.replace('and where', 'and') outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] if data_utils_tag.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils_tag.EOS_ID )] resultLogical = " ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ]) if FLAGS.enable_table_test: resultAnswer = logicalParser(tables[qid], resultLogical) answerOutput.write(str(resultAnswer) + '\n') logicalTemp_test.write(str(resultLogical) + '\n') q_index += 1 sentence, tag_sen, logic_sen = testQuestions.readline( ), testTags.readline(), testLogics.readline() print('=== train dataset ===') with gfile.GFile(geoQuestionFile, mode='r') as geoQuestions, gfile.GFile( geoLogicFile, mode='r') as geoLogics: with gfile.GFile(geoTagFile, mode='r') as geoTags: q_index = 0 sentence, tag_sen, logic_sen = geoQuestions.readline( ), geoTags.readline(), geoLogics.readline() while sentence and tag_sen: if q_index % 200 == 0: print(" reading data line %d" % q_index) sys.stdout.flush() qid = 'qID_' + str(q_index) print('training question: ', qid) # Get token-ids for the input sentence. # token_ids = data_utils_tag.sentence_to_token_ids(tf.compat.as_bytes(sentence), en_vocab) # tag_ids = data_utils_tag.sentence_to_token_ids(tf.compat.as_bytes(tag_sen), fr_vocab) # logic_ids = data_utils_tag.sentence_to_token_ids(tf.compat.as_bytes(logic_sen), fr_vocab) token_ids = [int(x) for x in sentence.split()] tag_ids = [int(x) for x in tag_sen.split()] logic_ids = [int(x) for x in logic_sen.split()] # Which bucket does it belong to? bucket_id = len(_buckets) - 1 for i, bucket in enumerate(_buckets): if bucket[0] > len(token_ids) and bucket[1] > len( logic_ids): bucket_id = i break else: logging.warning("Sentence truncated: %s", sentence) # Get a 1-element batch to feed the sentence to the model. encoder_inputs, tag_inputs, decoder_inputs, target_weights = model.get_batch( {bucket_id: [(token_ids, tag_ids, [])]}, bucket_id) # Get output logits for the sentence. _, _, output_logits, _ = model.step( sess, encoder_inputs, tag_inputs, decoder_inputs, target_weights, bucket_id, True) # Newly modified 0624: This is a Constraint-Greedy decoder - outputs are just argmaxes of output_logits. # resultLogical = [] # for i in range(len(output_logits)): # output = int(np.argmax(output_logits[i], axis=1)) # Constraint 1: advancd ending # if i < len(logic_ids)-1 and output == data_utils_tag.EOS_ID: # output = int(np.argmax(output_logits[i][:,data_utils_tag.EOS_ID+1:], axis=1)) + data_utils_tag.EOS_ID+1 # if i == 0: # prev_idx = output # if output >= len(rev_fr_vocab): # output = data_utils_tag.UNK_ID # prev = tf.compat.as_str(rev_fr_vocab[output]) # resultLogical.append(prev) # else: # i>0 # if str(prev) in ['equal','less','greater','neq','nl','ng']: # # Constraint 2: after 'equal' should be 'value' # output = int(np.argmax(output_logits[i][:,5:17], axis=1)) + 5 # if output == 2: #data_utils_tag.EOS_ID: # if i < len(logic_ids)-1: # output = int(np.argmax(output_logits[i][:,3:], axis=1)) + 3 # else: # break # pre_idx = output # if output >= len(rev_fr_vocab): # output = data_utils_tag.UNK_ID # prev = tf.compat.as_str(rev_fr_vocab[output]) # resultLogical.append(prev) # if str(resultLogical[-1]) in ['equal','less','greater','neq','nl','ng']: # resultLogical.append(resultLogical[-2]) # # Constraint 3, formats # resultLogical = " ".join(resultLogical) # resultLogical = resultLogical.replace('<field>:1 equal <field>:1', '<field>:1') # resultLogical = resultLogical.replace('<value>:1 where <field>', '<value>:1 and <field>') # resultLogical = resultLogical.replace('and where', 'and') outputs = [ int(np.argmax(logit, axis=1)) for logit in output_logits ] if data_utils_tag.EOS_ID in outputs: outputs = outputs[:outputs.index(data_utils_tag.EOS_ID )] resultLogical = " ".join([ tf.compat.as_str(rev_fr_vocab[output]) for output in outputs ]) if FLAGS.enable_table_test: resultAnswer = logicalParser(tables[qid], resultLogical) answerOutput.write(str(resultAnswer) + '\n') logicalTemp_geo.write(str(resultLogical) + '\n') q_index += 1 sentence, tag_sen, logic_sen = geoQuestions.readline( ), geoTags.readline(), geoLogics.readline() logicalTemp_geo.close() # logicalTemp_train.close() # logicalTemp_dev.close() logicalTemp_test.close() if FLAGS.enable_table_test: answerOutput.close()