Ejemplo n.º 1
0
 def get_attention_per_path(source_strings, path_strings, target_strings, attention_weights):
     # attention_weights:  (time, contexts)
     results = []
     for time_step in attention_weights:
         attention_per_context = {}
         for source, path, target, weight in zip(source_strings, path_strings, target_strings, time_step):
             string_triplet = (
                 Common.binary_to_string(source), Common.binary_to_string(path), Common.binary_to_string(target))
             attention_per_context[string_triplet] = weight
         results.append(attention_per_context)
     return results
Ejemplo n.º 2
0
    def predict(self, predict_data_lines):
        if self.predict_queue is None:
            self.predict_queue = reader.Reader(subtoken_to_index=self.subtoken_to_index,
                                               node_to_index=self.node_to_index,
                                               target_to_index=self.target_to_index,
                                               config=self.config, is_evaluating=True)
            self.predict_placeholder = tf.placeholder(tf.string)
            reader_output = self.predict_queue.process_from_placeholder(
                self.predict_placeholder)
            reader_output = {key: tf.expand_dims(
                tensor, 0) for key, tensor in reader_output.items()}
            self.predict_top_indices_op, self.predict_top_scores_op, _, self.attention_weights_op = \
                self.build_test_graph(reader_output)
            self.predict_source_string = reader_output[reader.PATH_SOURCE_STRINGS_KEY]
            self.predict_path_string = reader_output[reader.PATH_STRINGS_KEY]
            self.predict_path_target_string = reader_output[reader.PATH_TARGET_STRINGS_KEY]
            self.predict_target_strings_op = reader_output[reader.TARGET_STRING_KEY]

            self.initialize_session_variables(self.sess)
            self.saver = tf.train.Saver()
            self.load_model(self.sess)

        results = []
        for line in predict_data_lines:
            predicted_indices, top_scores, true_target_strings, attention_weights, path_source_string, path_strings, path_target_string = self.sess.run(
                [self.predict_top_indices_op, self.predict_top_scores_op, self.predict_target_strings_op,
                 self.attention_weights_op,
                 self.predict_source_string, self.predict_path_string, self.predict_path_target_string],
                feed_dict={self.predict_placeholder: line})

            top_scores = np.squeeze(top_scores, axis=0)
            path_source_string = path_source_string.reshape((-1))
            path_strings = path_strings.reshape((-1))
            path_target_string = path_target_string.reshape((-1))
            predicted_indices = np.squeeze(predicted_indices, axis=0)
            true_target_strings = Common.binary_to_string(
                true_target_strings[0])

            if self.config.BEAM_WIDTH > 0:
                predicted_strings = [[self.index_to_target[sugg] for sugg in timestep]
                                     for timestep in predicted_indices]  # (target_length, top-k)
                # (top-k, target_length)
                predicted_strings = list(map(list, zip(*predicted_strings)))
                top_scores = [np.exp(np.sum(s)) for s in zip(*top_scores)]
            else:
                predicted_strings = [self.index_to_target[idx]
                                     for idx in predicted_indices]  # (batch, target_length)

            attention_per_path = None
            if self.config.BEAM_WIDTH == 0:
                attention_per_path = self.get_attention_per_path(path_source_string, path_strings, path_target_string,
                                                                 attention_weights)

            results.append((true_target_strings, predicted_strings,
                            top_scores, attention_per_path))
        return results
Ejemplo n.º 3
0
    def evaluate(self, release=False):
        eval_start_time = time.time()
        if self.eval_queue is None:
            self.eval_queue = androidreader.Reader(
                subtoken_to_index=self.subtoken_to_index,
                node_to_index=self.node_to_index,
                target_to_index=self.target_to_index,
                config=self.config,
                is_evaluating=True)
            reader_output = self.eval_queue.get_output()
            self.eval_predicted_indices_op, self.eval_topk_values, _, _, self.method_embedding = \
                self.build_test_graph(reader_output)
            self.eval_true_target_strings_op = reader_output[
                androidreader.TARGET_STRING_KEY]
            self.eval_tag_key_op = reader_output[androidreader.TARGET_TAG_KEY]
            self.saver = tf.train.Saver(max_to_keep=10)

        if self.config.LOAD_PATH and not self.config.TRAIN_PATH:
            self.initialize_session_variables(self.sess)
            self.load_model(self.sess)
            if release:
                release_name = self.config.LOAD_PATH + '.release'
                print('Releasing model, output model: %s' % release_name)
                self.saver.save(self.sess, release_name)
                shutil.copyfile(src=self.config.LOAD_PATH + '.dict',
                                dst=release_name + '.dict')
                return None
        model_dirname = os.path.dirname(self.config.SAVE_PATH if self.config.
                                        SAVE_PATH else self.config.LOAD_PATH)
        ref_file_name = model_dirname + '/ref.txt'
        predicted_file_name = model_dirname + '/pred.txt'
        embedding_file_name = model_dirname + '/embedding.txt'
        if not os.path.exists(model_dirname):
            os.makedirs(model_dirname)

        # print("itern decoder size is " + str(self.config.DECODER_SIZE))
        with open(model_dirname + '/log.txt', 'w') as output_file, \
            open(ref_file_name, 'w') as ref_file, \
            open( predicted_file_name, 'w') as pred_file, \
            open( embedding_file_name, "w") as embedding_file:
            num_correct_predictions = 0 if self.config.BEAM_WIDTH == 0 \
                else np.zeros([self.config.BEAM_WIDTH], dtype=np.int32)
            total_predictions = 0
            total_prediction_batches = 0
            true_positive, false_positive, false_negative = 0, 0, 0
            self.eval_queue.reset(self.sess)
            start_time = time.time()

            try:
                while True:
                    predicted_indices, true_target_strings, top_values, method_embeddings, tag = self.sess.run(
                        [
                            self.eval_predicted_indices_op,
                            self.eval_true_target_strings_op,
                            self.eval_topk_values, self.method_embedding,
                            self.eval_tag_key_op
                        ], )
                    #print( tag.shape )
                    #print( tag[0])
                    #                    print( method_embeddings.shape )
                    #                    print( "0,0 " +  str(method_embeddings[0,0]))
                    #                    print( "MAX_LINE,MAX_COLUMN " + str(method_embeddings[ method_embeddings.shape[0] - 1 , method_embeddings.shape[1] - 1]))
                    #print( true_target_strings )
                    true_target_strings = Common.binary_to_string_list(
                        true_target_strings)
                    ref_file.write('\n'.join([
                        name.replace(Common.internal_delimiter, ' ')
                        for name in true_target_strings
                    ]) + '\n')
                    if self.config.BEAM_WIDTH > 0:
                        # predicted indices: (batch, time, beam_width)
                        predicted_strings = [[[
                            self.index_to_target[i] for i in timestep
                        ] for timestep in example]
                                             for example in predicted_indices]
                        predicted_strings = [
                            list(map(list, zip(*example)))
                            for example in predicted_strings
                        ]  # (batch, top-k, target_length)
                        pred_file.write('\n'.join([
                            ' '.join(Common.filter_impossible_names(words))
                            for words in predicted_strings[0]
                        ]) + '\n')
                    else:
                        predicted_strings = [[
                            self.index_to_target[i] for i in example
                        ] for example in predicted_indices]
                        pred_file.write('\n'.join([
                            ' '.join(Common.filter_impossible_names(words))
                            for words in predicted_strings
                        ]) + '\n')

                    num_correct_predictions = self.update_correct_predictions(
                        num_correct_predictions, output_file,
                        zip(true_target_strings, predicted_strings))
                    true_positive, false_positive, false_negative = self.update_per_subtoken_statistics(
                        zip(true_target_strings, predicted_strings),
                        true_positive, false_positive, false_negative)

                    total_predictions += len(true_target_strings)
                    total_prediction_batches += 1
                    if total_prediction_batches % self.num_batches_to_log == 0:
                        elapsed = time.time() - start_time
                        self.trace_evaluation(output_file,
                                              num_correct_predictions,
                                              total_predictions, elapsed)
                    embedding_file.write('\n'.join([
                        Common.binary_to_string(tag[i]) + ',' + ','.join([
                            str(method_embeddings[i, j])
                            for j in range(method_embeddings.shape[1])
                        ]) for i in range(method_embeddings.shape[0])
                    ]) + '\n')
            except tf.errors.OutOfRangeError:
                pass

            print('Done testing, epoch reached')
            output_file.write(
                str(num_correct_predictions / total_predictions) + '\n')
            # Common.compute_bleu(ref_file_name, predicted_file_name)

        elapsed = int(time.time() - eval_start_time)
        precision, recall, f1 = self.calculate_results(true_positive,
                                                       false_positive,
                                                       false_negative)
        try:
            files_rouge = FilesRouge()
            rouge = files_rouge.get_scores(hyp_path=predicted_file_name,
                                           ref_path=ref_file_name,
                                           avg=True,
                                           ignore_empty=True)
        except ValueError:
            rouge = 0
        print("Evaluation time: %sh%sm%ss" %
              ((elapsed // 60 // 60), (elapsed // 60) % 60, elapsed % 60))
        return num_correct_predictions / total_predictions, \
               precision, recall, f1, rouge
Ejemplo n.º 4
0
    def predict(self, predict_data_lines):
        if self.predict_queue is None:
            # print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ IN LOADER MODEL @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
            # print(self.target_to_index)

            self.predict_queue = reader.Reader(
                subtoken_to_index=self.subtoken_to_index,
                node_to_index=self.node_to_index,
                target_to_index=self.target_to_index,
                config=self.config,
                is_evaluating=True)
            self.predict_placeholder = tf.placeholder(tf.string)
            reader_output = self.predict_queue.process_from_placeholder(
                self.predict_placeholder)
            reader_output = {
                key: tf.expand_dims(tensor, 0)
                for key, tensor in reader_output.items()
            }
            self.predict_top_indices_op, self.predict_top_scores_op, _, self.attention_weights_op = \
                self.build_test_graph(reader_output)
            self.predict_source_string = reader_output[
                reader.PATH_SOURCE_STRINGS_KEY]
            self.predict_path_string = reader_output[reader.PATH_STRINGS_KEY]
            self.predict_path_target_string = reader_output[
                reader.PATH_TARGET_STRINGS_KEY]
            self.predict_target_strings_op = reader_output[
                reader.TARGET_STRING_KEY]

            self.initialize_session_variables(self.sess)
            self.saver = tf.train.Saver()
            self.load_model(self.sess)

        results = []
        # print(f"*********** AST Predict Lines - {len(predict_data_lines)} **************")
        for line in predict_data_lines:
            predicted_indices, top_scores, true_target_strings, attention_weights, path_source_string, path_strings, path_target_string = self.sess.run(
                [
                    self.predict_top_indices_op, self.predict_top_scores_op,
                    self.predict_target_strings_op, self.attention_weights_op,
                    self.predict_source_string, self.predict_path_string,
                    self.predict_path_target_string
                ],
                feed_dict={self.predict_placeholder: line})

            if False:  # Print
                print(
                    "******************** a EMBEDDING in Training Graph ******************"
                )
                var = tf.get_default_graph().get_tensor_by_name(
                    'model/TARGET_WORDS_VOCAB:0')
                print(var.shape)
                with self.sess.as_default():
                    print(var.eval())

            # print(f"The predicted lines - {line}")

            top_scores = np.squeeze(top_scores, axis=0)
            path_source_string = path_source_string.reshape((-1))
            path_strings = path_strings.reshape((-1))
            path_target_string = path_target_string.reshape((-1))
            predicted_indices = np.squeeze(predicted_indices, axis=0)
            true_target_strings = Common.binary_to_string(
                true_target_strings[0])

            if self.config.BEAM_WIDTH > 0:
                predicted_strings = [[
                    self.index_to_target[sugg] for sugg in timestep
                ] for timestep in predicted_indices]  # (target_length, top-k)
                predicted_strings = list(map(
                    list, zip(*predicted_strings)))  # (top-k, target_length)
                top_scores = [np.exp(np.sum(s)) for s in zip(*top_scores)]
                top_scores = [np.exp(np.sum(s)) for s in zip(*top_scores)]
            else:
                predicted_strings = [
                    self.index_to_target[idx] for idx in predicted_indices
                ]  # (batch, target_length)

            attention_per_path = None
            if self.config.BEAM_WIDTH == 0:
                attention_per_path = self.get_attention_per_path(
                    path_source_string, path_strings, path_target_string,
                    attention_weights)

            results.append((true_target_strings, predicted_strings, top_scores,
                            attention_per_path))
        return results