Beispiel #1
0
 def _info(self):
     features = datasets.Features({
         "input_text": {
             "table":
             datasets.Sequence({
                 "column_header": datasets.Value("string"),
                 "row_number": datasets.Value("int16"),
                 "content": datasets.Value("string"),
             }),
             "context":
             datasets.Value("string"),
         },
         "target_text": datasets.Value("string"),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=("input_text", "target_text"),
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
     )
Beispiel #2
0
 def _info(self):
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # datasets.features.FeatureConnectors
         features=datasets.Features(
             {
                 "abstract_id": datasets.Value("int32"),
                 "text": datasets.Value("string"),
                 "location": datasets.Sequence(datasets.Value("int32")),
                 "label": datasets.Sequence(datasets.Value("string")),
                 # These are the features of your dataset like images, labels ...
             }
         ),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://github.com/BruceWen120/medal",
         citation=_CITATION,
     )
Beispiel #3
0
 def _info(self):
     # TODO(wiki_split): Specifies the datasets.DatasetInfo object
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # datasets.features.FeatureConnectors
         features=datasets.Features(
             {
                 "complex_sentence": datasets.Value("string"),
                 "simple_sentence_1": datasets.Value("string"),
                 "simple_sentence_2": datasets.Value("string"),
                 # These are the features of your dataset like images, labels ...
             }
         ),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://dataset-homepage/",
         citation=_CITATION,
     )
Beispiel #4
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "ID": datasets.Value("string"),
                 "Text": datasets.Value("string"),
                 "Pronoun": datasets.Value("string"),
                 "Pronoun-offset": datasets.Value("int32"),
                 "A": datasets.Value("string"),
                 "A-offset": datasets.Value("int32"),
                 "A-coref": datasets.Value("bool"),
                 "B": datasets.Value("string"),
                 "B-offset": datasets.Value("int32"),
                 "B-coref": datasets.Value("bool"),
                 "URL": datasets.Value("string"),
             }
         ),
         supervised_keys=None,
         homepage="https://github.com/google-research-datasets/gap-coreference",
         citation=_CITATION,
     )
Beispiel #5
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "marketplace":
             datasets.Value("string"),
             "customer_id":
             datasets.Value("string"),
             "review_id":
             datasets.Value("string"),
             "product_id":
             datasets.Value("string"),
             "product_parent":
             datasets.Value("string"),
             "product_title":
             datasets.Value("string"),
             "product_category":
             datasets.Value("string"),
             "star_rating":
             datasets.Value("int32"),
             "helpful_votes":
             datasets.Value("int32"),
             "total_votes":
             datasets.Value("int32"),
             "vine":
             datasets.features.ClassLabel(names=["N", "Y"]),
             "verified_purchase":
             datasets.features.ClassLabel(names=["N", "Y"]),
             "review_headline":
             datasets.Value("string"),
             "review_body":
             datasets.Value("string"),
             "review_date":
             datasets.Value("string"),
         }),
         supervised_keys=None,
         homepage="https://s3.amazonaws.com/amazon-reviews-pds/readme.html",
         citation=_CITATION,
     )
Beispiel #6
0
    def _info(self):

        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=datasets.Features({
                "premise":
                datasets.Value("string"),
                "hypothesis":
                datasets.Value("string"),
                "label":
                datasets.ClassLabel(names=["not-entailment", "entailment"]),
                "topic":
                datasets.ClassLabel(names=[
                    "india", "news", "international", "entertainment", "sport",
                    "science"
                ]),
            }),
            supervised_keys=None,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )
Beispiel #7
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "fold":
             datasets.Value("int32"),
             "subfold":
             datasets.Value("string"),
             "words":
             datasets.Sequence(datasets.Value("string")),
             "segments":
             datasets.Sequence(datasets.Value("string")),
             "pos_tags":
             datasets.Sequence(datasets.Value("string")),
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         homepage="https://alt.qcri.org/resources/da_resources/",
         citation=_CITATION,
     )
Beispiel #8
0
 def _info(self):
     if self.config.name == "alignments":  # This is the name of the configuration selected in BUILDER_CONFIGS above
         features = datasets.Features(
             {
                 "source_id": datasets.Value("string"),
                 "target_id_list": datasets.Sequence(datasets.Value("string")),
             }
         )
     else:  # This is an example to show how to have different features for "first_domain" and "second_domain"
         features = datasets.Features(
             {
                 "id": datasets.Value("string"),
                 "question": {
                     "stem": datasets.Value("string"),
                     "choices": datasets.Sequence(
                         {
                             "text": datasets.Value("string"),
                             "label": datasets.Value("string"),
                             "para": datasets.Value("string"),
                         }
                     ),
                 },
                 "answerKey": datasets.Value("string"),
                 "info": {
                     "grade": datasets.Value("int32"),
                     "subject": datasets.Value("string"),
                     "language": datasets.Value("string"),
                 },
             }
         )
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,  # Here we define them above because they are different between the two configurations
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
     )
    def _info(self):
        features = datasets.Features({
            "Question-ID":
            datasets.Value("string"),
            "RawQuestion":
            datasets.Value("string"),
            "ProcessedQuestion":
            datasets.Value("string"),
            "Parses":
            datasets.Sequence({
                "Parse-Id":
                datasets.Value("string"),
                "PotentialTopicEntityMention":
                datasets.Value("string"),
                "TopicEntityName":
                datasets.Value("string"),
                "TopicEntityMid":
                datasets.Value("string"),
                "InferentialChain":
                datasets.Value("string"),
                "Answers":
                datasets.Sequence({
                    "AnswersMid":
                    datasets.Value("string"),
                    "AnswersName":
                    datasets.Sequence(datasets.Value("string")),
                }),
            }),
        })

        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            supervised_keys=None,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )
Beispiel #10
0
 def _info(self):
     # TODO(blended_skill_talk): Specifies the datasets.DatasetInfo object
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # datasets.features.FeatureConnectors
         features=datasets.Features({
             "personas":
             datasets.features.Sequence(datasets.Value("string")),
             "additional_context":
             datasets.Value("string"),
             "previous_utterance":
             datasets.features.Sequence(datasets.Value("string")),
             "context":
             datasets.Value("string"),
             "free_messages":
             datasets.features.Sequence(datasets.Value("string")),
             "guided_messages":
             datasets.features.Sequence(datasets.Value("string")),
             "suggestions":
             datasets.features.Sequence(
                 {task: datasets.Value("string")
                  for task in _TASK}),
             "guided_chosen_suggestions":
             datasets.features.Sequence(datasets.Value("string")),
             "label_candidates":
             datasets.features.Sequence(
                 datasets.features.Sequence(datasets.Value("string"))),
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://parl.ai/projects/bst/",
         citation=_CITATION,
     )
    def _info(self):
        #  This method specifies the datasets.DatasetInfo object which contains informations and typings for the dataset
        if self.config.name == "first_domain":  # This is the name of the configuration selected in BUILDER_CONFIGS above
            features = datasets.Features(
                {
                    "sequence": datasets.Value("string"),
                    "ECnumber": datasets.features.ClassLabel(names=["1", "2", "3", "4", "5", "6", "7"])
                    # TODO: specify the main classes of anzymes by name?
                    # These are the features of your dataset like images, labels ...
                }
            )
        elif self.config.name == "second_domain":  # This is an example to show how to have different features for "first_domain" and "second_domain"
            features = datasets.Features(
                {
                    "sequence": datasets.Value("string"),
                    "ECnumber_one": datasets.Value("string"),
                    "ECnumber_two": datasets.Value("string")
                    # These are the features of your dataset like images, labels ...
                }
            )
        return datasets.DatasetInfo(
            # This is the description that will appear on the datasets page.
            description=_DESCRIPTION,
            # This defines the different columns of the dataset and their types
            features=features,  # Here we define them above because they are different between the two configurations
            # If there's a common (input, target) tuple from the features,
            # specify them here. They'll be used if as_supervised=True in
            # builder.as_dataset.
            supervised_keys=None,
            # Homepage of the dataset for documentation
            homepage=_HOMEPAGE,
            # License for the dataset if available
            license=_LICENSE,
            # Citation for the dataset
            citation=_CITATION,
            task_templates=[TextClassification(text_column="text", label_column="label")],

        )
Beispiel #12
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "text":
             datasets.Value("string"),
             "text_type":
             datasets.Value("string"),
             "topics":
             datasets.Sequence(datasets.Value("string")),
             "lewis_split":
             datasets.Value("string"),
             "cgis_split":
             datasets.Value("string"),
             "old_id":
             datasets.Value("string"),
             "new_id":
             datasets.Value("string"),
             "places":
             datasets.Sequence(datasets.Value("string")),
             "people":
             datasets.Sequence(datasets.Value("string")),
             "orgs":
             datasets.Sequence(datasets.Value("string")),
             "exchanges":
             datasets.Sequence(datasets.Value("string")),
             "date":
             datasets.Value("string"),
             "title":
             datasets.Value("string"),
         }),
         # No default supervised_keys (as we have to pass both premise
         # and hypothesis as input).
         supervised_keys=None,
         homepage=
         "https://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html",
         citation=_CITATION,
     )
Beispiel #13
0
 def _info(self):
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # datasets.features.FeatureConnectors
         features=datasets.Features({
             "topic_id":
             datasets.Value("string"),
             "topic_name":
             datasets.Value("string"),
             "test_id":
             datasets.Value("string"),
             "document_id":
             datasets.Value("string"),
             "document_str":
             datasets.Value("string"),
             "question_id":
             datasets.Value("string"),
             "question_str":
             datasets.Value("string"),
             "answer_options":
             datasets.features.Sequence({
                 "answer_id":
                 datasets.Value("string"),
                 "answer_str":
                 datasets.Value("string")
             }),
             "correct_answer_id":
             datasets.Value("string"),
             "correct_answer_str":
             datasets.Value("string"),
         }),
         # No default supervised keys because both passage and question are used
         # to determine the correct answer.
         supervised_keys=None,
         homepage="http://nlp.uned.es/clef-qa/repository/pastCampaigns.php",
         citation=_CITATION,
     )
Beispiel #14
0
    def _info(self):
        features = datasets.Features({
            "client_id":
            datasets.Value("string"),
            "path":
            datasets.Value("string"),
            "audio":
            datasets.Audio(sampling_rate=48_000),
            "sentence":
            datasets.Value("string"),
            "up_votes":
            datasets.Value("int64"),
            "down_votes":
            datasets.Value("int64"),
            "age":
            datasets.Value("string"),
            "gender":
            datasets.Value("string"),
            "accent":
            datasets.Value("string"),
            "locale":
            datasets.Value("string"),
            "segment":
            datasets.Value("string"),
        })

        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            supervised_keys=None,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
            task_templates=[
                AutomaticSpeechRecognition(audio_file_path_column="path",
                                           transcription_column="sentence")
            ],
        )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "promptID":
             datasets.Value("int32"),
             "pairID":
             datasets.Value("string"),
             "premise":
             datasets.Value("string"),
             "premise_binary_parse":
             datasets.Value(
                 "string"),  # parses in unlabeled binary-branching format
             "premise_parse":
             datasets.Value(
                 "string"
             ),  # sentence as parsed by the Stanford PCFG Parser 3.5.2
             "hypothesis":
             datasets.Value("string"),
             "hypothesis_binary_parse":
             datasets.Value(
                 "string"),  # parses in unlabeled binary-branching format
             "hypothesis_parse":
             datasets.Value(
                 "string"
             ),  # sentence as parsed by the Stanford PCFG Parser 3.5.2
             "genre":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(
                 names=["entailment", "neutral", "contradiction"]),
         }),
         # No default supervised_keys (as we have to pass both premise
         # and hypothesis as input).
         supervised_keys=None,
         homepage="https://www.nyu.edu/projects/bowman/multinli/",
         citation=_CITATION,
     )
Beispiel #16
0
 def _info(self):
     # TODO(com_qa): Specifies the datasets.DatasetInfo object
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # datasets.features.FeatureConnectors
         features=datasets.Features({
             "cluster_id":
             datasets.Value("string"),
             "questions":
             datasets.features.Sequence(datasets.Value("string")),
             "answers":
             datasets.features.Sequence(datasets.Value("string")),
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="http://qa.mpi-inf.mpg.de/comqa/",
         citation=_CITATION,
     )
Beispiel #17
0
 def _info(self):
     features = datasets.Features(
         {
             "category_name": datasets.Value("string"),
             "question_query": datasets.Value("string"),
             "keyword_query": datasets.Value("string"),
             "answers": datasets.features.Sequence(
                 {
                     "id": datasets.Value("string"),
                     "title": datasets.Value("string"),
                     "exact_answer": datasets.Value("string"),
                 }
             ),
         }
     )
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
     )
Beispiel #18
0
 def _info(self):
     features = datasets.Features({
         "lemma":
         datasets.Value("string"),
         "forms":
         datasets.Sequence(
             dict([("word", datasets.Value("string"))] +
                  [(cat,
                    datasets.Sequence(datasets.ClassLabel(names=tasks)))
                   for cat, tasks in _CATEGORIES.items()] +
                  [("Other", datasets.Sequence(datasets.Value("string"))
                    )]  # for misspecified tags
                  )),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=
         features,  # Here we define them above because they are different between the two configurations
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
     )
Beispiel #19
0
    def _info(self):

        features = datasets.Features({
            "id":
            datasets.Value("string"),
            "source":
            datasets.Value("string"),
            "alignment_type":
            datasets.Value("string"),
            "alignment_quality":
            datasets.Value("string"),
            "translation":
            datasets.features.Translation(languages=["en", "hi"]),
        })

        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            supervised_keys=None,
            homepage=_HOMEPAGE,
            license=_LICENSE,
            citation=_CITATION,
        )
Beispiel #20
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "text": datasets.Value("string"),
                 "label": datasets.features.ClassLabel(
                     names=[
                         "Web",
                         "Panorama",
                         "International",
                         "Wirtschaft",
                         "Sport",
                         "Inland",
                         "Etat",
                         "Wissenschaft",
                         "Kultur",
                     ]
                 ),
             }
         ),
         homepage="https://tblock.github.io/10kGNAD/",
     )
Beispiel #21
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "count":
             datasets.Value("int64"),
             "hate_speech_count":
             datasets.Value("int64"),
             "offensive_language_count":
             datasets.Value("int64"),
             "neither_count":
             datasets.Value("int64"),
             "class":
             datasets.ClassLabel(
                 names=["hate speech", "offensive language", "neither"]),
             "tweet":
             datasets.Value("string"),
         }),
         supervised_keys=("tweet", "class"),
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
     )
 def _info(self):
     features = datasets.Features({
         "review_body":
         datasets.Value("string"),
         "review_summary":
         datasets.Value("string"),
         "star_rating":
         datasets.ClassLabel(names=[str(i) for i in range(1, 6)]),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage="http://www.lsi.us.es/~fermin/index.php/Datasets",
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="review_body",
                                label_column="star_rating"),
             TextClassification(text_column="review_summary",
                                label_column="star_rating"),
         ],
     )
 def _info(self):
     features = datasets.Features({
         "total_annotation_count":
         datasets.Value("int32"),
         "hate_speech_annotations":
         datasets.Value("int32"),
         "offensive_language_annotations":
         datasets.Value("int32"),
         "neither_annotations":
         datasets.Value("int32"),
         "label":
         datasets.ClassLabel(
             names=["hate-speech", "offensive-language", "neither"]),
         "tweet":
         datasets.Value("string"),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=("tweet", "label"),
         homepage=_HOMEPAGE,
         citation=_CITATION,
     )
Beispiel #24
0
    def _info(self):
        if self.config.name.startswith("tlc"):
            features = datasets.Features(
                {
                    "ch_num": datasets.Value("string"),
                    "title": datasets.Value("string"),
                    "text": datasets.Sequence(datasets.Sequence(datasets.Value("string"))),
                }
            )
        else:
            features = datasets.Features(
                {
                    "text": datasets.Sequence((datasets.Value("string"))),
                }
            )

        return datasets.DatasetInfo(
            description=_DESCRIPTION,
            features=features,
            supervised_keys=None,
            homepage=_HOMEPAGE,
            citation=_CITATION,
        )
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "id": datasets.Value("string"),
                 "annotator": datasets.Value("int32"),
                 "position": datasets.Value("int32"),
                 "question": datasets.Value("string"),
                 "table_file": datasets.Value("string"),
                 "table_header": datasets.features.Sequence(datasets.Value("string")),
                 "table_data": datasets.features.Sequence(datasets.features.Sequence(datasets.Value("string"))),
                 "answer_coordinates": datasets.features.Sequence(
                     {"row_index": datasets.Value("int32"), "column_index": datasets.Value("int32")}
                 ),
                 "answer_text": datasets.features.Sequence(datasets.Value("string")),
             }
         ),
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
     )
Beispiel #26
0
 def _info(self):
     return ds.DatasetInfo(
         description="",
         citation="",
         homepage="",
         license="",
         features=ds.Features(
             {
                 "id": ds.Value("string"),
                 "title": ds.Value("string"),
                 "context": ds.Value("string"),
                 "question": ds.Value("string"),
                 "answers": ds.features.Sequence(
                     {"text": ds.Value("string"), "answer_start": ds.Value("int32")}
                 ),
             }
         ),
         task_templates=[
             QuestionAnsweringExtractive(
                 question_column="question", context_column="context", answers_column="answers"
             )
         ],
     )
Beispiel #27
0
 def _info(self):
     features = datasets.Features({
         "question": datasets.Value("string"),
         "answer": datasets.Value("string"),
         # These are the features of your dataset like images, labels ...
     })
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # This defines the different columns of the dataset and their types
         features=
         features,  # Here we define them above because they are different between the two configurations
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage=_HOMEPAGE,
         # License for the dataset if available
         license=_LICENSE,
         # Citation for the dataset
         citation=_CITATION,
     )
Beispiel #28
0
 def _info(self):
     features = datasets.Features({
         "id":
         datasets.Value("string"),
         "url":
         datasets.Value("string"),
         "qid":
         datasets.Value("int32"),
         "question":
         datasets.Value("string"),
         "answers":
         datasets.Sequence(datasets.Value("string")),
         "correct":
         datasets.Value("string"),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
     )
Beispiel #29
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "id":
             datasets.Value("string"),
             "tokens":
             datasets.Sequence(datasets.Value("string")),
             "bboxes":
             datasets.Sequence(datasets.Sequence(datasets.Value("int64"))),
             "ner_tags":
             datasets.Sequence(
                 datasets.features.ClassLabel(names=[
                     "O", "B-HEADER", "I-HEADER", "B-QUESTION",
                     "I-QUESTION", "B-ANSWER", "I-ANSWER"
                 ])),
             "image":
             datasets.Array3D(shape=(3, 224, 224), dtype="uint8"),
         }),
         supervised_keys=None,
         homepage="https://guillaumejaume.github.io/FUNSD/",
         citation=_CITATION,
     )
Beispiel #30
0
 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "id":
             datasets.Value("string"),
             "audio":
             datasets.features.Audio(sampling_rate=22050),
             "file":
             datasets.Value("string"),
             "text":
             datasets.Value("string"),
             "normalized_text":
             datasets.Value("string"),
         }),
         supervised_keys=("file", "text"),
         homepage=_URL,
         citation=_CITATION,
         task_templates=[
             AutomaticSpeechRecognition(audio_file_path_column="file",
                                        transcription_column="text")
         ],
     )