예제 #1
0
 def _info(self):
     features = datasets.Features(
         {
             "source": {
                 "filename": datasets.Value("string"),
                 "headline": datasets.Value("string"),
                 "body": datasets.Value("string"),
                 "total_number_of_words": datasets.Value("int64"),
                 "total_number_of_sentences": datasets.Value("int64"),
                 "number_of_words_with_swr": datasets.Value("int64"),
                 "newspaper": datasets.Value("string"),
                 "newsdate": datasets.Value("string"),
                 "domain": datasets.ClassLabel(
                     names=[
                         "business",
                         "sports",
                         "national",
                         "foreign",
                         "showbiz",
                     ]
                 ),
                 "classification": datasets.ClassLabel(
                     names=["wholly_derived", "partially_derived", "not_derived"]
                 ),
             },
             "derived": {
                 "filename": datasets.Value("string"),
                 "headline": datasets.Value("string"),
                 "body": datasets.Value("string"),
                 "total_number_of_words": datasets.Value("int64"),
                 "total_number_of_sentences": datasets.Value("int64"),
                 "number_of_words_with_swr": datasets.Value("int64"),
                 "newspaper": datasets.Value("string"),
                 "newsdate": datasets.Value("string"),
                 "domain": datasets.ClassLabel(
                     names=[
                         "business",
                         "sports",
                         "national",
                         "foreign",
                         "showbiz",
                     ]
                 ),
                 "classification": datasets.ClassLabel(
                     names=["wholly_derived", "partially_derived", "not_derived"]
                 ),
             },
         }
     )
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
     )
예제 #2
0
    def _info(self):
        if self.config.name == "NER":
            features = datasets.Features(
                {
                    "input_ids": datasets.Sequence(feature=datasets.Value("int32")),
                    "labels": datasets.Sequence(
                        feature=datasets.ClassLabel(num_classes=len(_NER_LABEL_NAMES), names=_NER_LABEL_NAMES)
                    ),
                    "tag_mask": datasets.Sequence(feature=datasets.Value("int8")),
                }
            )
        elif self.config.name == "ROLES":
            features = datasets.Features(
                {
                    "input_ids": datasets.Sequence(feature=datasets.Value("int32")),
                    "labels": datasets.Sequence(
                        feature=datasets.ClassLabel(
                            num_classes=len(_SEMANTIC_ROLES_LABEL_NAMES), names=_SEMANTIC_ROLES_LABEL_NAMES
                        )
                    ),
                    "tag_mask": datasets.Sequence(feature=datasets.Value("int8")),
                }
            )
        elif self.config.name == "BORING":
            features = datasets.Features(
                {
                    "input_ids": datasets.Sequence(feature=datasets.Value("int32")),
                    "labels": datasets.Sequence(
                        feature=datasets.ClassLabel(num_classes=len(_BORING_LABEL_NAMES), names=_BORING_LABEL_NAMES)
                    ),
                }
            )
        elif self.config.name == "PANELIZATION":
            features = datasets.Features(
                {
                    "input_ids": datasets.Sequence(feature=datasets.Value("int32")),
                    "labels": datasets.Sequence(
                        feature=datasets.ClassLabel(num_classes=len(_PANEL_START_NAMES), names=_PANEL_START_NAMES)
                    ),
                }
            )

        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            supervised_keys=("input_ids", "labels"),
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )
예제 #3
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "dialog": datasets.features.Sequence(datasets.Value("string")),
                 "act": datasets.features.Sequence(datasets.ClassLabel(names=list(act_label.values()))),
                 "emotion": datasets.features.Sequence(datasets.ClassLabel(names=list(emotion_label.values()))),
             }
         ),
         supervised_keys=None,
         homepage="http://yanran.li/dailydialog",
         citation=_CITATION,
     )