def _get_single_story_features(self, story_headline, articles):
        """Converts a list of articles to a tensorflow Example."""
        def get_text_snippet(article):
            if article.text_b:
                return " [SEP] ".join([article.text_a, article.text_b])
            else:
                return article.text_a

        story_features = collections.OrderedDict()
        story_headline_feature = classifier_data_lib.convert_single_example(
            ex_index=self.ex_index,
            example=classifier_data_lib.InputExample(guid=self.guid,
                                                     text_a=story_headline,
                                                     label=self.label),
            label_list=[self.label],
            max_seq_length=self.len_title,
            tokenizer=self.tokenizer)
        if self.include_text_snippet_in_example:
            story_headline_feature.label_id = story_headline
        self._add_feature_with_suffix(feature=story_headline_feature,
                                      suffix="a",
                                      story_features=story_features)
        for (article_index, article) in enumerate(articles):
            if article_index == self.max_num_articles:
                break
            article_feature = classifier_data_lib.convert_single_example(
                ex_index=self.ex_index,
                example=article,
                label_list=[self.label],
                max_seq_length=self.len_passage,
                tokenizer=self.tokenizer)
            if self.include_text_snippet_in_example:
                article_feature.label_id = get_text_snippet(article)
            suffix = chr(ord("b") + article_index)
            self._add_feature_with_suffix(feature=article_feature,
                                          suffix=suffix,
                                          story_features=story_features)

        # Adds empty features as placeholder.
        for article_index in range(len(articles), self.max_num_articles):
            suffix = chr(ord("b") + article_index)
            empty_article = classifier_data_lib.InputExample(guid=self.guid,
                                                             text_a="",
                                                             label=self.label)
            empty_feature = classifier_data_lib.convert_single_example(
                ex_index=self.ex_index,
                example=empty_article,
                label_list=[self.label],
                max_seq_length=self.len_passage,
                tokenizer=self.tokenizer)
            if self.include_text_snippet_in_example:
                empty_feature.label_id = ""
            self._add_feature_with_suffix(feature=empty_feature,
                                          suffix=suffix,
                                          story_features=story_features)
        return story_features
    def __to_feature(self, text, label):
        example = classifier_data_lib.InputExample(guid=None,
                                                   text_a=text.numpy(),
                                                   text_b=None,
                                                   label=label.numpy())

        feature = classifier_data_lib.convert_single_example(
            0, example, self.label_list, self.max_seq_length, self.tokenizer)

        return feature.input_ids, feature.input_mask, feature.segment_ids, feature.label_id
예제 #3
0
def to_feature(text,
               label,
               label_list=label_list,
               max_seq_length=max_seq_length,
               tokenizer=tokenizer):
    example = classifier_data_lib.InputExample(guid=None,
                                               text_a=text.numpy(),
                                               text_b=None,
                                               label=label.numpy())

    feature = classifier_data_lib.convert_single_example(
        0, example, label_list, max_seq_length, tokenizer)

    return (feature.input_ids, feature.input_mask, feature.segment_ids,
            feature.label_id)