def predict(self, predict_data_lines): if self.predict_queue is None: self.predict_queue = PathContextReader.PathContextReader(word_to_index=self.word_to_index, path_to_index=self.path_to_index, target_word_to_index=self.target_word_to_index, config=self.config, is_evaluating=True) self.predict_placeholder = self.predict_queue.get_input_placeholder() self.predict_top_words_op, self.predict_top_values_op, self.predict_original_names_op, \ self.attention_weights_op, self.predict_source_string, self.predict_path_string, self.predict_path_target_string, self.predict_code_vectors = \ self.build_test_graph(self.predict_queue.get_filtered_batches(), normalize_scores=True) self.initialize_session_variables(self.sess) self.saver = tf.train.Saver() self.load_model(self.sess) code_vectors = [] results = [] for batch in common.split_to_batches(predict_data_lines, 1): top_words, top_scores, original_names, attention_weights, source_strings, path_strings, target_strings, batch_code_vectors = self.sess.run( [self.predict_top_words_op, self.predict_top_values_op, self.predict_original_names_op, self.attention_weights_op, self.predict_source_string, self.predict_path_string, self.predict_path_target_string, self.predict_code_vectors], feed_dict={self.predict_placeholder: batch}) top_words, original_names = common.binary_to_string_matrix(top_words), common.binary_to_string_matrix( original_names) # Flatten original names from [[]] to [] attention_per_path = self.get_attention_per_path(source_strings, path_strings, target_strings, attention_weights) original_names = [w for l in original_names for w in l] results.append((original_names[0], top_words[0], top_scores[0], attention_per_path)) if self.config.EXPORT_CODE_VECTORS: code_vectors.append(batch_code_vectors) if len(code_vectors) > 0: code_vectors = np.vstack(code_vectors) return results, code_vectors
def predict_dataset(self): if self.predict_queue is None: self.predict_queue = PathContextReader.PathContextReader(word_to_index=self.word_to_index, path_to_index=self.path_to_index, target_word_to_index=self.target_word_to_index, config=self.config, is_evaluating=True) self.predict_placeholder = self.predict_queue.get_input_placeholder() self.predict_top_words_op, self.predict_top_values_op, self.predict_original_names_op, \ self.attention_weights_op, self.predict_source_string, self.predict_path_string, self.predict_path_target_string = \ self.build_test_graph(self.predict_queue.get_filtered_batches(), normalize_scores=True) self.initialize_session_variables(self.sess) self.saver = tf.train.Saver() self.load_model(self.sess) self.predict_data_lines = common.load_file_lines(self.config.TEST_PATH) with open(self.config.OUTPUT_FILE, 'a+') as output_file: batch_num = 0 for batch in common.split_to_batches(self.predict_data_lines, self.config.TEST_BATCH_SIZE): batch_num += 1 top_words, top_scores, original_names, attention_weights, source_strings, path_strings, target_strings = self.sess.run( [self.predict_top_words_op, self.predict_top_values_op, self.predict_original_names_op, self.attention_weights_op, self.predict_source_string, self.predict_path_string, self.predict_path_target_string], feed_dict={self.predict_placeholder: batch}) top_words, original_names = common.binary_to_string_matrix(top_words), common.binary_to_string_matrix(original_names) original_names = [w for l in original_names for w in l] for res_index in range(len(original_names)): output_file.write("%s;" % (original_names[res_index],)) output_file.write(";".join(top_words[res_index])) output_file.write("\n") print("Finished batch %s with %s elements" % (batch_num, len(original_names)))
def evaluate(self) -> Optional[ModelEvaluationResults]: eval_start_time = time.time() if self.eval_reader is None: self.eval_reader = PathContextReader( vocabs=self.vocabs, model_input_tensors_former=_TFEvaluateModelInputTensorsFormer( ), config=self.config, estimator_action=EstimatorAction.Evaluate) input_iterator = tf.compat.v1.data.make_initializable_iterator( self.eval_reader.get_dataset()) self.eval_input_iterator_reset_op = input_iterator.initializer input_tensors = input_iterator.get_next() self.eval_top_words_op, self.eval_top_values_op, self.eval_original_names_op, _, _, _, _, _, _, \ self.eval_code_vectors = self._build_tf_test_graph(input_tensors) self.saver = tf.compat.v1.train.Saver() if self.config.MODEL_LOAD_PATH and not self.config.TRAIN_DATA_PATH_PREFIX: self._initialize_session_variables() self._load_inner_model(self.sess) if self.config.RELEASE: release_name = self.config.MODEL_LOAD_PATH + '.release' self.log('Releasing model, output model: %s' % release_name) self.saver.save(self.sess, release_name) return None # FIXME: why do we return none here? with open('log.txt', 'w') as log_output_file: if self.config.EXPORT_CODE_VECTORS: code_vectors_file = open( self.config.TEST_DATA_PATH + '.vectors', 'w') total_predictions = 0 total_prediction_batches = 0 subtokens_evaluation_metric = SubtokensEvaluationMetric( partial(common.filter_impossible_names, self.vocabs.target_vocab.special_words)) topk_accuracy_evaluation_metric = TopKAccuracyEvaluationMetric( self.config.TOP_K_WORDS_CONSIDERED_DURING_PREDICTION, partial(common.get_first_match_word_from_top_predictions, self.vocabs.target_vocab.special_words)) start_time = time.time() self.sess.run(self.eval_input_iterator_reset_op) self.log('Starting evaluation') # Run evaluation in a loop until iterator is exhausted. # Each iteration = batch. We iterate as long as the tf iterator (reader) yields batches. try: while True: top_words, top_scores, original_names, code_vectors = self.sess.run( [ self.eval_top_words_op, self.eval_top_values_op, self.eval_original_names_op, self.eval_code_vectors ], ) # shapes: # top_words: (batch, top_k); top_scores: (batch, top_k) # original_names: (batch, ); code_vectors: (batch, code_vector_size) top_words = common.binary_to_string_matrix( top_words) # (batch, top_k) original_names = common.binary_to_string_list( original_names) # (batch,) self._log_predictions_during_evaluation( zip(original_names, top_words), log_output_file) topk_accuracy_evaluation_metric.update_batch( zip(original_names, top_words)) subtokens_evaluation_metric.update_batch( zip(original_names, top_words)) total_predictions += len(original_names) total_prediction_batches += 1 if self.config.EXPORT_CODE_VECTORS: self._write_code_vectors(code_vectors_file, code_vectors) if total_prediction_batches % self.config.NUM_BATCHES_TO_LOG_PROGRESS == 0: elapsed = time.time() - start_time # start_time = time.time() self._trace_evaluation(total_predictions, elapsed) except tf.errors.OutOfRangeError: pass # reader iterator is exhausted and have no more batches to produce. self.log('Done evaluating, epoch reached') log_output_file.write( str(topk_accuracy_evaluation_metric.topk_correct_predictions) + '\n') if self.config.EXPORT_CODE_VECTORS: code_vectors_file.close() elapsed = int(time.time() - eval_start_time) self.log("Evaluation time: %sH:%sM:%sS" % ((elapsed // 60 // 60), (elapsed // 60) % 60, elapsed % 60)) return ModelEvaluationResults( topk_acc=topk_accuracy_evaluation_metric.topk_correct_predictions, subtoken_precision=subtokens_evaluation_metric.precision, subtoken_recall=subtokens_evaluation_metric.recall, subtoken_f1=subtokens_evaluation_metric.f1)
def evaluate(self): eval_start_time = time.time() if self.eval_queue is None: self.eval_queue = PathContextReader.PathContextReader(word_to_index=self.word_to_index, path_to_index=self.path_to_index, target_word_to_index=self.target_word_to_index, config=self.config, is_evaluating=True) self.eval_placeholder = self.eval_queue.get_input_placeholder() self.eval_top_words_op, self.eval_top_values_op, self.eval_original_names_op, _, _, _, _, self.eval_code_vectors = \ self.build_test_graph(self.eval_queue.get_filtered_batches()) self.saver = tf.train.Saver() if self.config.LOAD_PATH and not self.config.TRAIN_PATH: self.initialize_session_variables(self.sess) self.load_model(self.sess) if self.config.RELEASE: release_name = self.config.LOAD_PATH + '.release' print('Releasing model, output model: %s' % release_name ) self.saver.save(self.sess, release_name ) return None if self.eval_data_lines is None: print('Loading test data from: ' + self.config.TEST_PATH) self.eval_data_lines = common.load_file_lines(self.config.TEST_PATH) print('Done loading test data') with open('log.txt', 'w') as output_file: if self.config.EXPORT_CODE_VECTORS: code_vectors_file = open(self.config.TEST_PATH + '.vectors', 'w') num_correct_predictions = np.zeros(self.topk) total_predictions = 0 total_prediction_batches = 0 true_positive, false_positive, false_negative = 0, 0, 0 start_time = time.time() for batch in common.split_to_batches(self.eval_data_lines, self.config.TEST_BATCH_SIZE): top_words, top_scores, original_names, code_vectors = self.sess.run( [self.eval_top_words_op, self.eval_top_values_op, self.eval_original_names_op, self.eval_code_vectors], feed_dict={self.eval_placeholder: batch}) top_words, original_names = common.binary_to_string_matrix(top_words), common.binary_to_string_matrix( original_names) # Flatten original names from [[]] to [] original_names = [w for l in original_names for w in l] num_correct_predictions = self.update_correct_predictions(num_correct_predictions, output_file, zip(original_names, top_words)) true_positive, false_positive, false_negative = self.update_per_subtoken_statistics( zip(original_names, top_words), true_positive, false_positive, false_negative) total_predictions += len(original_names) total_prediction_batches += 1 if self.config.EXPORT_CODE_VECTORS: self.write_code_vectors(code_vectors_file, code_vectors) if total_prediction_batches % self.num_batches_to_log == 0: elapsed = time.time() - start_time # start_time = time.time() self.trace_evaluation(output_file, num_correct_predictions, total_predictions, elapsed, len(self.eval_data_lines)) print('Done testing, epoch reached') output_file.write(str(num_correct_predictions / total_predictions) + '\n') if self.config.EXPORT_CODE_VECTORS: code_vectors_file.close() elapsed = int(time.time() - eval_start_time) precision, recall, f1 = self.calculate_results(true_positive, false_positive, false_negative) print("Evaluation time: %sH:%sM:%sS" % ((elapsed // 60 // 60), (elapsed // 60) % 60, elapsed % 60)) del self.eval_data_lines self.eval_data_lines = None return num_correct_predictions / total_predictions, precision, recall, f1