def get_attention_per_path(source_strings, path_strings, target_strings, attention_weights): # attention_weights: (time, contexts) results = [] for time_step in attention_weights: attention_per_context = {} for source, path, target, weight in zip(source_strings, path_strings, target_strings, time_step): string_triplet = ( Common.binary_to_string(source), Common.binary_to_string(path), Common.binary_to_string(target)) attention_per_context[string_triplet] = weight results.append(attention_per_context) return results
def predict(self, predict_data_lines): if self.predict_queue is None: self.predict_queue = reader.Reader(subtoken_to_index=self.subtoken_to_index, node_to_index=self.node_to_index, target_to_index=self.target_to_index, config=self.config, is_evaluating=True) self.predict_placeholder = tf.placeholder(tf.string) reader_output = self.predict_queue.process_from_placeholder( self.predict_placeholder) reader_output = {key: tf.expand_dims( tensor, 0) for key, tensor in reader_output.items()} self.predict_top_indices_op, self.predict_top_scores_op, _, self.attention_weights_op = \ self.build_test_graph(reader_output) self.predict_source_string = reader_output[reader.PATH_SOURCE_STRINGS_KEY] self.predict_path_string = reader_output[reader.PATH_STRINGS_KEY] self.predict_path_target_string = reader_output[reader.PATH_TARGET_STRINGS_KEY] self.predict_target_strings_op = reader_output[reader.TARGET_STRING_KEY] self.initialize_session_variables(self.sess) self.saver = tf.train.Saver() self.load_model(self.sess) results = [] for line in predict_data_lines: predicted_indices, top_scores, true_target_strings, attention_weights, path_source_string, path_strings, path_target_string = self.sess.run( [self.predict_top_indices_op, self.predict_top_scores_op, self.predict_target_strings_op, self.attention_weights_op, self.predict_source_string, self.predict_path_string, self.predict_path_target_string], feed_dict={self.predict_placeholder: line}) top_scores = np.squeeze(top_scores, axis=0) path_source_string = path_source_string.reshape((-1)) path_strings = path_strings.reshape((-1)) path_target_string = path_target_string.reshape((-1)) predicted_indices = np.squeeze(predicted_indices, axis=0) true_target_strings = Common.binary_to_string( true_target_strings[0]) if self.config.BEAM_WIDTH > 0: predicted_strings = [[self.index_to_target[sugg] for sugg in timestep] for timestep in predicted_indices] # (target_length, top-k) # (top-k, target_length) predicted_strings = list(map(list, zip(*predicted_strings))) top_scores = [np.exp(np.sum(s)) for s in zip(*top_scores)] else: predicted_strings = [self.index_to_target[idx] for idx in predicted_indices] # (batch, target_length) attention_per_path = None if self.config.BEAM_WIDTH == 0: attention_per_path = self.get_attention_per_path(path_source_string, path_strings, path_target_string, attention_weights) results.append((true_target_strings, predicted_strings, top_scores, attention_per_path)) return results
def evaluate(self, release=False): eval_start_time = time.time() if self.eval_queue is None: self.eval_queue = androidreader.Reader( subtoken_to_index=self.subtoken_to_index, node_to_index=self.node_to_index, target_to_index=self.target_to_index, config=self.config, is_evaluating=True) reader_output = self.eval_queue.get_output() self.eval_predicted_indices_op, self.eval_topk_values, _, _, self.method_embedding = \ self.build_test_graph(reader_output) self.eval_true_target_strings_op = reader_output[ androidreader.TARGET_STRING_KEY] self.eval_tag_key_op = reader_output[androidreader.TARGET_TAG_KEY] self.saver = tf.train.Saver(max_to_keep=10) if self.config.LOAD_PATH and not self.config.TRAIN_PATH: self.initialize_session_variables(self.sess) self.load_model(self.sess) if release: release_name = self.config.LOAD_PATH + '.release' print('Releasing model, output model: %s' % release_name) self.saver.save(self.sess, release_name) shutil.copyfile(src=self.config.LOAD_PATH + '.dict', dst=release_name + '.dict') return None model_dirname = os.path.dirname(self.config.SAVE_PATH if self.config. SAVE_PATH else self.config.LOAD_PATH) ref_file_name = model_dirname + '/ref.txt' predicted_file_name = model_dirname + '/pred.txt' embedding_file_name = model_dirname + '/embedding.txt' if not os.path.exists(model_dirname): os.makedirs(model_dirname) # print("itern decoder size is " + str(self.config.DECODER_SIZE)) with open(model_dirname + '/log.txt', 'w') as output_file, \ open(ref_file_name, 'w') as ref_file, \ open( predicted_file_name, 'w') as pred_file, \ open( embedding_file_name, "w") as embedding_file: num_correct_predictions = 0 if self.config.BEAM_WIDTH == 0 \ else np.zeros([self.config.BEAM_WIDTH], dtype=np.int32) total_predictions = 0 total_prediction_batches = 0 true_positive, false_positive, false_negative = 0, 0, 0 self.eval_queue.reset(self.sess) start_time = time.time() try: while True: predicted_indices, true_target_strings, top_values, method_embeddings, tag = self.sess.run( [ self.eval_predicted_indices_op, self.eval_true_target_strings_op, self.eval_topk_values, self.method_embedding, self.eval_tag_key_op ], ) #print( tag.shape ) #print( tag[0]) # print( method_embeddings.shape ) # print( "0,0 " + str(method_embeddings[0,0])) # print( "MAX_LINE,MAX_COLUMN " + str(method_embeddings[ method_embeddings.shape[0] - 1 , method_embeddings.shape[1] - 1])) #print( true_target_strings ) true_target_strings = Common.binary_to_string_list( true_target_strings) ref_file.write('\n'.join([ name.replace(Common.internal_delimiter, ' ') for name in true_target_strings ]) + '\n') if self.config.BEAM_WIDTH > 0: # predicted indices: (batch, time, beam_width) predicted_strings = [[[ self.index_to_target[i] for i in timestep ] for timestep in example] for example in predicted_indices] predicted_strings = [ list(map(list, zip(*example))) for example in predicted_strings ] # (batch, top-k, target_length) pred_file.write('\n'.join([ ' '.join(Common.filter_impossible_names(words)) for words in predicted_strings[0] ]) + '\n') else: predicted_strings = [[ self.index_to_target[i] for i in example ] for example in predicted_indices] pred_file.write('\n'.join([ ' '.join(Common.filter_impossible_names(words)) for words in predicted_strings ]) + '\n') num_correct_predictions = self.update_correct_predictions( num_correct_predictions, output_file, zip(true_target_strings, predicted_strings)) true_positive, false_positive, false_negative = self.update_per_subtoken_statistics( zip(true_target_strings, predicted_strings), true_positive, false_positive, false_negative) total_predictions += len(true_target_strings) total_prediction_batches += 1 if total_prediction_batches % self.num_batches_to_log == 0: elapsed = time.time() - start_time self.trace_evaluation(output_file, num_correct_predictions, total_predictions, elapsed) embedding_file.write('\n'.join([ Common.binary_to_string(tag[i]) + ',' + ','.join([ str(method_embeddings[i, j]) for j in range(method_embeddings.shape[1]) ]) for i in range(method_embeddings.shape[0]) ]) + '\n') except tf.errors.OutOfRangeError: pass print('Done testing, epoch reached') output_file.write( str(num_correct_predictions / total_predictions) + '\n') # Common.compute_bleu(ref_file_name, predicted_file_name) elapsed = int(time.time() - eval_start_time) precision, recall, f1 = self.calculate_results(true_positive, false_positive, false_negative) try: files_rouge = FilesRouge() rouge = files_rouge.get_scores(hyp_path=predicted_file_name, ref_path=ref_file_name, avg=True, ignore_empty=True) except ValueError: rouge = 0 print("Evaluation time: %sh%sm%ss" % ((elapsed // 60 // 60), (elapsed // 60) % 60, elapsed % 60)) return num_correct_predictions / total_predictions, \ precision, recall, f1, rouge
def predict(self, predict_data_lines): if self.predict_queue is None: # print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ IN LOADER MODEL @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") # print(self.target_to_index) self.predict_queue = reader.Reader( subtoken_to_index=self.subtoken_to_index, node_to_index=self.node_to_index, target_to_index=self.target_to_index, config=self.config, is_evaluating=True) self.predict_placeholder = tf.placeholder(tf.string) reader_output = self.predict_queue.process_from_placeholder( self.predict_placeholder) reader_output = { key: tf.expand_dims(tensor, 0) for key, tensor in reader_output.items() } self.predict_top_indices_op, self.predict_top_scores_op, _, self.attention_weights_op = \ self.build_test_graph(reader_output) self.predict_source_string = reader_output[ reader.PATH_SOURCE_STRINGS_KEY] self.predict_path_string = reader_output[reader.PATH_STRINGS_KEY] self.predict_path_target_string = reader_output[ reader.PATH_TARGET_STRINGS_KEY] self.predict_target_strings_op = reader_output[ reader.TARGET_STRING_KEY] self.initialize_session_variables(self.sess) self.saver = tf.train.Saver() self.load_model(self.sess) results = [] # print(f"*********** AST Predict Lines - {len(predict_data_lines)} **************") for line in predict_data_lines: predicted_indices, top_scores, true_target_strings, attention_weights, path_source_string, path_strings, path_target_string = self.sess.run( [ self.predict_top_indices_op, self.predict_top_scores_op, self.predict_target_strings_op, self.attention_weights_op, self.predict_source_string, self.predict_path_string, self.predict_path_target_string ], feed_dict={self.predict_placeholder: line}) if False: # Print print( "******************** a EMBEDDING in Training Graph ******************" ) var = tf.get_default_graph().get_tensor_by_name( 'model/TARGET_WORDS_VOCAB:0') print(var.shape) with self.sess.as_default(): print(var.eval()) # print(f"The predicted lines - {line}") top_scores = np.squeeze(top_scores, axis=0) path_source_string = path_source_string.reshape((-1)) path_strings = path_strings.reshape((-1)) path_target_string = path_target_string.reshape((-1)) predicted_indices = np.squeeze(predicted_indices, axis=0) true_target_strings = Common.binary_to_string( true_target_strings[0]) if self.config.BEAM_WIDTH > 0: predicted_strings = [[ self.index_to_target[sugg] for sugg in timestep ] for timestep in predicted_indices] # (target_length, top-k) predicted_strings = list(map( list, zip(*predicted_strings))) # (top-k, target_length) top_scores = [np.exp(np.sum(s)) for s in zip(*top_scores)] top_scores = [np.exp(np.sum(s)) for s in zip(*top_scores)] else: predicted_strings = [ self.index_to_target[idx] for idx in predicted_indices ] # (batch, target_length) attention_per_path = None if self.config.BEAM_WIDTH == 0: attention_per_path = self.get_attention_per_path( path_source_string, path_strings, path_target_string, attention_weights) results.append((true_target_strings, predicted_strings, top_scores, attention_per_path)) return results