def _info(self): features = datasets.Features( { "source": { "filename": datasets.Value("string"), "headline": datasets.Value("string"), "body": datasets.Value("string"), "total_number_of_words": datasets.Value("int64"), "total_number_of_sentences": datasets.Value("int64"), "number_of_words_with_swr": datasets.Value("int64"), "newspaper": datasets.Value("string"), "newsdate": datasets.Value("string"), "domain": datasets.ClassLabel( names=[ "business", "sports", "national", "foreign", "showbiz", ] ), "classification": datasets.ClassLabel( names=["wholly_derived", "partially_derived", "not_derived"] ), }, "derived": { "filename": datasets.Value("string"), "headline": datasets.Value("string"), "body": datasets.Value("string"), "total_number_of_words": datasets.Value("int64"), "total_number_of_sentences": datasets.Value("int64"), "number_of_words_with_swr": datasets.Value("int64"), "newspaper": datasets.Value("string"), "newsdate": datasets.Value("string"), "domain": datasets.ClassLabel( names=[ "business", "sports", "national", "foreign", "showbiz", ] ), "classification": datasets.ClassLabel( names=["wholly_derived", "partially_derived", "not_derived"] ), }, } ) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): if self.config.name == "NER": features = datasets.Features( { "input_ids": datasets.Sequence(feature=datasets.Value("int32")), "labels": datasets.Sequence( feature=datasets.ClassLabel(num_classes=len(_NER_LABEL_NAMES), names=_NER_LABEL_NAMES) ), "tag_mask": datasets.Sequence(feature=datasets.Value("int8")), } ) elif self.config.name == "ROLES": features = datasets.Features( { "input_ids": datasets.Sequence(feature=datasets.Value("int32")), "labels": datasets.Sequence( feature=datasets.ClassLabel( num_classes=len(_SEMANTIC_ROLES_LABEL_NAMES), names=_SEMANTIC_ROLES_LABEL_NAMES ) ), "tag_mask": datasets.Sequence(feature=datasets.Value("int8")), } ) elif self.config.name == "BORING": features = datasets.Features( { "input_ids": datasets.Sequence(feature=datasets.Value("int32")), "labels": datasets.Sequence( feature=datasets.ClassLabel(num_classes=len(_BORING_LABEL_NAMES), names=_BORING_LABEL_NAMES) ), } ) elif self.config.name == "PANELIZATION": features = datasets.Features( { "input_ids": datasets.Sequence(feature=datasets.Value("int32")), "labels": datasets.Sequence( feature=datasets.ClassLabel(num_classes=len(_PANEL_START_NAMES), names=_PANEL_START_NAMES) ), } ) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=("input_ids", "labels"), homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features( { "dialog": datasets.features.Sequence(datasets.Value("string")), "act": datasets.features.Sequence(datasets.ClassLabel(names=list(act_label.values()))), "emotion": datasets.features.Sequence(datasets.ClassLabel(names=list(emotion_label.values()))), } ), supervised_keys=None, homepage="http://yanran.li/dailydialog", citation=_CITATION, )