def evaluate(self, release=False): eval_start_time = time.time() if self.eval_queue is None: self.eval_queue = androidreader.Reader( subtoken_to_index=self.subtoken_to_index, node_to_index=self.node_to_index, target_to_index=self.target_to_index, config=self.config, is_evaluating=True) reader_output = self.eval_queue.get_output() self.eval_predicted_indices_op, self.eval_topk_values, _, _, self.method_embedding = \ self.build_test_graph(reader_output) self.eval_true_target_strings_op = reader_output[ androidreader.TARGET_STRING_KEY] self.eval_tag_key_op = reader_output[androidreader.TARGET_TAG_KEY] self.saver = tf.train.Saver(max_to_keep=10) if self.config.LOAD_PATH and not self.config.TRAIN_PATH: self.initialize_session_variables(self.sess) self.load_model(self.sess) if release: release_name = self.config.LOAD_PATH + '.release' print('Releasing model, output model: %s' % release_name) self.saver.save(self.sess, release_name) shutil.copyfile(src=self.config.LOAD_PATH + '.dict', dst=release_name + '.dict') return None model_dirname = os.path.dirname(self.config.SAVE_PATH if self.config. SAVE_PATH else self.config.LOAD_PATH) ref_file_name = model_dirname + '/ref.txt' predicted_file_name = model_dirname + '/pred.txt' embedding_file_name = model_dirname + '/embedding.txt' if not os.path.exists(model_dirname): os.makedirs(model_dirname) # print("itern decoder size is " + str(self.config.DECODER_SIZE)) with open(model_dirname + '/log.txt', 'w') as output_file, \ open(ref_file_name, 'w') as ref_file, \ open( predicted_file_name, 'w') as pred_file, \ open( embedding_file_name, "w") as embedding_file: num_correct_predictions = 0 if self.config.BEAM_WIDTH == 0 \ else np.zeros([self.config.BEAM_WIDTH], dtype=np.int32) total_predictions = 0 total_prediction_batches = 0 true_positive, false_positive, false_negative = 0, 0, 0 self.eval_queue.reset(self.sess) start_time = time.time() try: while True: predicted_indices, true_target_strings, top_values, method_embeddings, tag = self.sess.run( [ self.eval_predicted_indices_op, self.eval_true_target_strings_op, self.eval_topk_values, self.method_embedding, self.eval_tag_key_op ], ) #print( tag.shape ) #print( tag[0]) # print( method_embeddings.shape ) # print( "0,0 " + str(method_embeddings[0,0])) # print( "MAX_LINE,MAX_COLUMN " + str(method_embeddings[ method_embeddings.shape[0] - 1 , method_embeddings.shape[1] - 1])) #print( true_target_strings ) true_target_strings = Common.binary_to_string_list( true_target_strings) ref_file.write('\n'.join([ name.replace(Common.internal_delimiter, ' ') for name in true_target_strings ]) + '\n') if self.config.BEAM_WIDTH > 0: # predicted indices: (batch, time, beam_width) predicted_strings = [[[ self.index_to_target[i] for i in timestep ] for timestep in example] for example in predicted_indices] predicted_strings = [ list(map(list, zip(*example))) for example in predicted_strings ] # (batch, top-k, target_length) pred_file.write('\n'.join([ ' '.join(Common.filter_impossible_names(words)) for words in predicted_strings[0] ]) + '\n') else: predicted_strings = [[ self.index_to_target[i] for i in example ] for example in predicted_indices] pred_file.write('\n'.join([ ' '.join(Common.filter_impossible_names(words)) for words in predicted_strings ]) + '\n') num_correct_predictions = self.update_correct_predictions( num_correct_predictions, output_file, zip(true_target_strings, predicted_strings)) true_positive, false_positive, false_negative = self.update_per_subtoken_statistics( zip(true_target_strings, predicted_strings), true_positive, false_positive, false_negative) total_predictions += len(true_target_strings) total_prediction_batches += 1 if total_prediction_batches % self.num_batches_to_log == 0: elapsed = time.time() - start_time self.trace_evaluation(output_file, num_correct_predictions, total_predictions, elapsed) embedding_file.write('\n'.join([ Common.binary_to_string(tag[i]) + ',' + ','.join([ str(method_embeddings[i, j]) for j in range(method_embeddings.shape[1]) ]) for i in range(method_embeddings.shape[0]) ]) + '\n') except tf.errors.OutOfRangeError: pass print('Done testing, epoch reached') output_file.write( str(num_correct_predictions / total_predictions) + '\n') # Common.compute_bleu(ref_file_name, predicted_file_name) elapsed = int(time.time() - eval_start_time) precision, recall, f1 = self.calculate_results(true_positive, false_positive, false_negative) try: files_rouge = FilesRouge() rouge = files_rouge.get_scores(hyp_path=predicted_file_name, ref_path=ref_file_name, avg=True, ignore_empty=True) except ValueError: rouge = 0 print("Evaluation time: %sh%sm%ss" % ((elapsed // 60 // 60), (elapsed // 60) % 60, elapsed % 60)) return num_correct_predictions / total_predictions, \ precision, recall, f1, rouge
def evaluate(self, release=False): eval_start_time = time.time() if self.eval_queue is None: self.eval_queue = reader.Reader( subtoken_to_index=self.subtoken_to_index, node_to_index=self.node_to_index, target_to_index=self.target_to_index, config=self.config, is_evaluating=True) reader_output = self.eval_queue.get_output() self.eval_predicted_indices_op, self.eval_topk_values, _, _ = \ self.build_test_graph(reader_output) self.eval_true_target_strings_op = reader_output[ reader.TARGET_STRING_KEY] self.saver = tf.train.Saver(max_to_keep=10) if self.config.LOAD_PATH and not self.config.TRAIN_PATH: self.initialize_session_variables(self.sess) self.load_model(self.sess) if release: release_name = self.config.LOAD_PATH + '.release' print('Releasing model, output model: %s' % release_name) self.saver.save(self.sess, release_name) shutil.copyfile(src=self.config.LOAD_PATH + '.dict', dst=release_name + '.dict') return None model_dirname = os.path.dirname(self.config.SAVE_PATH if self.config. SAVE_PATH else self.config.LOAD_PATH) ref_file_name = model_dirname + '/ref.txt' predicted_file_name = model_dirname + '/pred.txt' if not os.path.exists(model_dirname): os.makedirs(model_dirname) with open(model_dirname + '/log.txt', 'w') as output_file, open( ref_file_name, 'w') as ref_file, open(predicted_file_name, 'w') as pred_file: num_correct_predictions = 0 total_predictions = 0 total_prediction_batches = 0 true_positive, false_positive, false_negative = 0, 0, 0 self.eval_queue.reset(self.sess) start_time = time.time() try: while True: predicted_indices, true_target_strings, top_values = self.sess.run( [ self.eval_predicted_indices_op, self.eval_true_target_strings_op, self.eval_topk_values ], ) true_target_strings = Common.binary_to_string_list( true_target_strings) ref_file.write('\n'.join([ name.replace(Common.internal_delimiter, ' ') for name in true_target_strings ]) + '\n') if self.config.BEAM_WIDTH > 0: # predicted indices: (batch, time, beam_width) predicted_strings = [[[ self.index_to_target[i] for i in timestep ] for timestep in example] for example in predicted_indices] predicted_strings = [ list(map(list, zip(*example))) for example in predicted_strings ] # (batch, top-k, target_length) pred_file.write('\n'.join([ ' '.join(Common.filter_impossible_names(words)) for words in predicted_strings[0] ]) + '\n') else: predicted_strings = [[ self.index_to_target[i] for i in example ] for example in predicted_indices] pred_file.write('\n'.join([ ' '.join(Common.filter_impossible_names(words)) for words in predicted_strings ]) + '\n') num_correct_predictions = self.update_correct_predictions( num_correct_predictions, output_file, zip(true_target_strings, predicted_strings)) true_positive, false_positive, false_negative = self.update_per_subtoken_statistics( zip(true_target_strings, predicted_strings), true_positive, false_positive, false_negative) total_predictions += len(true_target_strings) total_prediction_batches += 1 if total_prediction_batches % self.num_batches_to_log == 0: elapsed = time.time() - start_time self.trace_evaluation(output_file, num_correct_predictions, total_predictions, elapsed) except tf.errors.OutOfRangeError: pass print('Done testing, epoch reached') output_file.write( str(num_correct_predictions / total_predictions) + '\n') # Common.compute_bleu(ref_file_name, predicted_file_name) elapsed = int(time.time() - eval_start_time) precision, recall, f1 = self.calculate_results(true_positive, false_positive, false_negative) print("Evaluation time: %sh%sm%ss" % ((elapsed // 60 // 60), (elapsed // 60) % 60, elapsed % 60)) return num_correct_predictions / total_predictions, precision, recall, f1
tf.local_variables_initializer(), tf.tables_initializer()).run() reader.reset(sess) try: while True: target_indices, target_strings, target_lengths, path_source_indices, \ node_indices, path_target_indices, valid_context_mask, path_source_lengths, \ path_lengths, path_target_lengths, path_source_strings, path_strings, \ path_target_strings = sess.run( [target_index_op, target_string_op, target_length_op, path_source_indices_op, node_indices_op, path_target_indices_op, valid_context_mask_op, path_source_lengths_op, path_lengths_op, path_target_lengths_op, path_source_strings_op, path_strings_op, path_target_strings_op]) print('Target strings: ', Common.binary_to_string_list(target_strings)) print( 'Context strings: ', Common.binary_to_string_3d( np.concatenate([ path_source_strings, path_strings, path_target_strings ], -1))) print('Target indices: ', target_indices) print('Target lengths: ', target_lengths) print('Path source strings: ', Common.binary_to_string_3d(path_source_strings)) print('Path source indices: ', path_source_indices) print('Path source lengths: ', path_source_lengths) print('Path strings: ', Common.binary_to_string_3d(path_strings)) print('Node indices: ', node_indices) print('Path lengths: ', path_lengths)
sess = tf.InteractiveSession() tf.group(tf.global_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer()).run() reader.reset(sess) try: while True: target_indices, target_strings, target_lengths, path_source_indices, \ node_indices, path_target_indices, valid_context_mask, path_source_lengths, \ path_lengths, path_target_lengths, path_source_strings, path_strings, \ path_target_strings = sess.run( [target_index_op, target_string_op, target_length_op, path_source_indices_op, node_indices_op, path_target_indices_op, valid_context_mask_op, path_source_lengths_op, path_lengths_op, path_target_lengths_op, path_source_strings_op, path_strings_op, path_target_strings_op]) print('Target strings: ', Common.binary_to_string_list(target_strings)) print('Context strings: ', Common.binary_to_string_3d( np.concatenate([path_source_strings, path_strings, path_target_strings], -1))) print('Target indices: ', target_indices) print('Target lengths: ', target_lengths) print('Path source strings: ', Common.binary_to_string_3d(path_source_strings)) print('Path source indices: ', path_source_indices) print('Path source lengths: ', path_source_lengths) print('Path strings: ', Common.binary_to_string_3d(path_strings)) print('Node indices: ', node_indices) print('Path lengths: ', path_lengths) print('Path target strings: ', Common.binary_to_string_3d(path_target_strings)) print('Path target indices: ', path_target_indices) print('Path target lengths: ', path_target_lengths) print('Valid context mask: ', valid_context_mask)
def get_the_processed_input(self, input_tensors): target_string = input_tensors[reader.TARGET_STRING_KEY] target_index = input_tensors[reader.TARGET_INDEX_KEY] target_lengths = input_tensors[reader.TARGET_LENGTH_KEY] path_source_indices = input_tensors[reader.PATH_SOURCE_INDICES_KEY] node_indices = input_tensors[reader.NODE_INDICES_KEY] path_target_indices = input_tensors[reader.PATH_TARGET_INDICES_KEY] valid_context_mask = input_tensors[reader.VALID_CONTEXT_MASK_KEY] path_source_lengths = input_tensors[reader.PATH_SOURCE_LENGTHS_KEY] path_lengths = input_tensors[reader.PATH_LENGTHS_KEY] path_target_lengths = input_tensors[reader.PATH_TARGET_LENGTHS_KEY] with tf.variable_scope('model'): source_word_embed = tf.nn.embedding_lookup( params=self.subtoken_vocab, ids=path_source_indices ) # (batch, max_contexts, max_name_parts, dim) path_embed = tf.nn.embedding_lookup( params=self.nodes_vocab, ids=node_indices ) # (batch, max_contexts, max_path_length+1, dim) target_word_embed = tf.nn.embedding_lookup( params=self.subtoken_vocab, ids=path_target_indices ) # (batch, max_contexts, max_name_parts, dim) #sess.close() sess = tf.InteractiveSession() tf.group(tf.global_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer()).run() self.predict_queue.reset(sess) path_source_indices_matrix, node_indices_matrix, path_target_indices_matrix, \ source_word_embed_matrix, path_embed_matrix, target_word_embed_matrix, \ np_target_string = sess.run( [path_source_indices, node_indices, path_target_indices, source_word_embed, path_embed, target_word_embed, target_string]) self.predict_path_source_indices_matrix, self.predict_node_indices_matrix, self.predict_path_target_indices_matrix \ = np.array(path_source_indices_matrix), np.array(node_indices_matrix), np.array(path_target_indices_matrix) self.predict_source_word_embed_matrix, self.predict_path_embed_matrix, self.predict_target_word_embed_matrix = \ np.array(source_word_embed_matrix), np.array(path_embed_matrix), np.array(target_word_embed_matrix) print( "=============================================================\n") #print('Path source indices: ', self.predict_path_source_indices_matrix) print('Path source indices: ', self.predict_path_source_indices_matrix.shape) print("------------------------------\n") #print('Node indices: ', self.predict_node_indices_matrix) print('Node indices: ', self.predict_node_indices_matrix.shape) print("------------------------------\n") #print('Path target indices: ', self.path_target_indices_matrix) print('Path target indices: ', self.predict_path_target_indices_matrix.shape) print("------------------------------\n") #print('source_word_embed: ', self.predict_source_word_embed_matrix) print('source_word_embed: ', self.predict_source_word_embed_matrix.shape) print("------------------------------\n") #print('path_embed: ', np.array(path_embed_temp)) print('path_embed: ', self.predict_path_embed_matrix.shape) print("------------------------------\n") #print('target_word_embed: ', np.array(target_word_embed_temp)) print('target_word_embed: ', self.predict_target_word_embed_matrix.shape) print( "=============================================================\n") print('np_target_string: ', Common.binary_to_string_list(np_target_string)) print('np_target_string shape: ', np_target_string.shape) self.predict_group_for_each_data = np.full( len(self.predict_source_word_embed_matrix), 0)
def get_the_processed_data(self, input_tensors): source_string = input_tensors[reader.PATH_SOURCE_STRINGS_KEY] path_string = input_tensors[reader.PATH_STRINGS_KEY] target_string = input_tensors[reader.PATH_TARGET_STRINGS_KEY] target_string = input_tensors[reader.TARGET_STRING_KEY] target_index = input_tensors[reader.TARGET_INDEX_KEY] target_lengths = input_tensors[reader.TARGET_LENGTH_KEY] path_source_indices = input_tensors[reader.PATH_SOURCE_INDICES_KEY] node_indices = input_tensors[reader.NODE_INDICES_KEY] path_target_indices = input_tensors[reader.PATH_TARGET_INDICES_KEY] valid_context_mask = input_tensors[reader.VALID_CONTEXT_MASK_KEY] path_source_lengths = input_tensors[reader.PATH_SOURCE_LENGTHS_KEY] path_lengths = input_tensors[reader.PATH_LENGTHS_KEY] path_target_lengths = input_tensors[reader.PATH_TARGET_LENGTHS_KEY] with tf.variable_scope('model'): subtoken_vocab = tf.get_variable( 'SUBTOKENS_VOCAB', shape=(self.subtoken_vocab_size, self.config.EMBEDDINGS_SIZE), dtype=tf.float32, initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, mode='FAN_OUT', uniform=True)) target_words_vocab = tf.get_variable( 'TARGET_WORDS_VOCAB', shape=(self.target_vocab_size, self.config.EMBEDDINGS_SIZE), dtype=tf.float32, initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, mode='FAN_OUT', uniform=True)) nodes_vocab = tf.get_variable( 'NODES_VOCAB', shape=(self.nodes_vocab_size, self.config.EMBEDDINGS_SIZE), dtype=tf.float32, initializer=tf.contrib.layers.variance_scaling_initializer( factor=1.0, mode='FAN_OUT', uniform=True)) # (batch, max_contexts, decoder_size) source_word_embed, path_embed, target_word_embed = self.compute_contexts( subtoken_vocab=subtoken_vocab, nodes_vocab=nodes_vocab, source_input=path_source_indices, nodes_input=node_indices, target_input=path_target_indices, valid_mask=valid_context_mask, path_source_lengths=path_source_lengths, path_lengths=path_lengths, path_target_lengths=path_target_lengths) self.subtoken_vocab, self.target_words_vocab, self.nodes_vocab = subtoken_vocab, target_words_vocab, nodes_vocab batch_size = tf.shape(target_index)[0] #Here, the program gives us the processed data-sets #every context, in the definition, has the form of [source, path, target] # path_target_indices is the index matrix for all targets # node_indices is the index matrix for all paths # path_source_indices is the index matrix for all sources # source_word_embed is the embedding for path_source_indices # path_embed is the embedding for node_indices # target_word_embed is the embedding for path_target_indices #now we just transfer all these needed variable to np array sess = tf.InteractiveSession() tf.group(tf.global_variables_initializer(), tf.local_variables_initializer(), tf.tables_initializer()).run() self.queue_thread.reset(sess) path_source_indices_matrix, node_indices_matrix, path_target_indices_matrix, \ source_word_embed_matrix, path_embed_matrix, target_word_embed_matrix, \ np_target_string, self.source_string, self.path_string, self.target_string = sess.run( [path_source_indices, node_indices, path_target_indices, source_word_embed, path_embed, target_word_embed, target_string, source_string, path_string, target_string]) sess.close() self.path_source_indices_matrix, self.node_indices_matrix, self.path_target_indices_matrix \ = np.array(path_source_indices_matrix), np.array(node_indices_matrix), np.array(path_target_indices_matrix) self.source_word_embed_matrix, self.path_embed_matrix, self.target_word_embed_matrix = \ np.array(source_word_embed_matrix), np.array(path_embed_matrix), np.array(target_word_embed_matrix) print( "=============================================================\n") #print('Path source indices: ', self.path_source_indices_matrix) print('Path source indices: ', self.path_source_indices_matrix.shape) print("------------------------------\n") #print('Node indices: ', self.node_indices_matrix) print('Node indices: ', self.node_indices_matrix.shape) print("------------------------------\n") #print('Path target indices: ', self.path_target_indices_matrix) print('Path target indices: ', self.path_target_indices_matrix.shape) print("------------------------------\n") #print('source_word_embed: ', self.source_word_embed_matrix) print('source_word_embed: ', self.source_word_embed_matrix.shape) print("------------------------------\n") #print('path_embed: ', np.array(path_embed_temp)) print('path_embed: ', self.path_embed_matrix.shape) print("------------------------------\n") #print('target_word_embed: ', np.array(target_word_embed_temp)) print('target_word_embed: ', self.target_word_embed_matrix.shape) print( "=============================================================\n") self.target_string = Common.binary_to_string_list(np_target_string) print('np_target_string: ', self.target_string) print('np_target_string shape: ', np_target_string.shape) #temp = self.calculate_total_distance(self.source_word_embed_matrix[0], # self.path_embed_matrix[0],self.target_word_embed_matrix[0], # self.source_word_embed_matrix[1],self.path_embed_matrix[1],self.target_word_embed_matrix[1]) #print(temp) self.group_for_each_data = np.full(len(self.source_word_embed_matrix), 0) #generate random K centers for clustering self.index_of_initial_center = self.generate_center( 0, len(self.source_word_embed_matrix) - 1)