def _info(self): features = datasets.Features({ "input_text": { "table": datasets.Sequence({ "column_header": datasets.Value("string"), "row_number": datasets.Value("int16"), "content": datasets.Value("string"), }), "context": datasets.Value("string"), }, "target_text": datasets.Value("string"), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=("input_text", "target_text"), homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # datasets.features.FeatureConnectors features=datasets.Features( { "abstract_id": datasets.Value("int32"), "text": datasets.Value("string"), "location": datasets.Sequence(datasets.Value("int32")), "label": datasets.Sequence(datasets.Value("string")), # These are the features of your dataset like images, labels ... } ), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://github.com/BruceWen120/medal", citation=_CITATION, )
def _info(self): # TODO(wiki_split): Specifies the datasets.DatasetInfo object return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # datasets.features.FeatureConnectors features=datasets.Features( { "complex_sentence": datasets.Value("string"), "simple_sentence_1": datasets.Value("string"), "simple_sentence_2": datasets.Value("string"), # These are the features of your dataset like images, labels ... } ), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://dataset-homepage/", citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features( { "ID": datasets.Value("string"), "Text": datasets.Value("string"), "Pronoun": datasets.Value("string"), "Pronoun-offset": datasets.Value("int32"), "A": datasets.Value("string"), "A-offset": datasets.Value("int32"), "A-coref": datasets.Value("bool"), "B": datasets.Value("string"), "B-offset": datasets.Value("int32"), "B-coref": datasets.Value("bool"), "URL": datasets.Value("string"), } ), supervised_keys=None, homepage="https://github.com/google-research-datasets/gap-coreference", citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "marketplace": datasets.Value("string"), "customer_id": datasets.Value("string"), "review_id": datasets.Value("string"), "product_id": datasets.Value("string"), "product_parent": datasets.Value("string"), "product_title": datasets.Value("string"), "product_category": datasets.Value("string"), "star_rating": datasets.Value("int32"), "helpful_votes": datasets.Value("int32"), "total_votes": datasets.Value("int32"), "vine": datasets.features.ClassLabel(names=["N", "Y"]), "verified_purchase": datasets.features.ClassLabel(names=["N", "Y"]), "review_headline": datasets.Value("string"), "review_body": datasets.Value("string"), "review_date": datasets.Value("string"), }), supervised_keys=None, homepage="https://s3.amazonaws.com/amazon-reviews-pds/readme.html", citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "premise": datasets.Value("string"), "hypothesis": datasets.Value("string"), "label": datasets.ClassLabel(names=["not-entailment", "entailment"]), "topic": datasets.ClassLabel(names=[ "india", "news", "international", "entertainment", "sport", "science" ]), }), supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "fold": datasets.Value("int32"), "subfold": datasets.Value("string"), "words": datasets.Sequence(datasets.Value("string")), "segments": datasets.Sequence(datasets.Value("string")), "pos_tags": datasets.Sequence(datasets.Value("string")), }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, homepage="https://alt.qcri.org/resources/da_resources/", citation=_CITATION, )
def _info(self): if self.config.name == "alignments": # This is the name of the configuration selected in BUILDER_CONFIGS above features = datasets.Features( { "source_id": datasets.Value("string"), "target_id_list": datasets.Sequence(datasets.Value("string")), } ) else: # This is an example to show how to have different features for "first_domain" and "second_domain" features = datasets.Features( { "id": datasets.Value("string"), "question": { "stem": datasets.Value("string"), "choices": datasets.Sequence( { "text": datasets.Value("string"), "label": datasets.Value("string"), "para": datasets.Value("string"), } ), }, "answerKey": datasets.Value("string"), "info": { "grade": datasets.Value("int32"), "subject": datasets.Value("string"), "language": datasets.Value("string"), }, } ) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, # Here we define them above because they are different between the two configurations supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): features = datasets.Features({ "Question-ID": datasets.Value("string"), "RawQuestion": datasets.Value("string"), "ProcessedQuestion": datasets.Value("string"), "Parses": datasets.Sequence({ "Parse-Id": datasets.Value("string"), "PotentialTopicEntityMention": datasets.Value("string"), "TopicEntityName": datasets.Value("string"), "TopicEntityMid": datasets.Value("string"), "InferentialChain": datasets.Value("string"), "Answers": datasets.Sequence({ "AnswersMid": datasets.Value("string"), "AnswersName": datasets.Sequence(datasets.Value("string")), }), }), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): # TODO(blended_skill_talk): Specifies the datasets.DatasetInfo object return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # datasets.features.FeatureConnectors features=datasets.Features({ "personas": datasets.features.Sequence(datasets.Value("string")), "additional_context": datasets.Value("string"), "previous_utterance": datasets.features.Sequence(datasets.Value("string")), "context": datasets.Value("string"), "free_messages": datasets.features.Sequence(datasets.Value("string")), "guided_messages": datasets.features.Sequence(datasets.Value("string")), "suggestions": datasets.features.Sequence( {task: datasets.Value("string") for task in _TASK}), "guided_chosen_suggestions": datasets.features.Sequence(datasets.Value("string")), "label_candidates": datasets.features.Sequence( datasets.features.Sequence(datasets.Value("string"))), # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://parl.ai/projects/bst/", citation=_CITATION, )
def _info(self): # This method specifies the datasets.DatasetInfo object which contains informations and typings for the dataset if self.config.name == "first_domain": # This is the name of the configuration selected in BUILDER_CONFIGS above features = datasets.Features( { "sequence": datasets.Value("string"), "ECnumber": datasets.features.ClassLabel(names=["1", "2", "3", "4", "5", "6", "7"]) # TODO: specify the main classes of anzymes by name? # These are the features of your dataset like images, labels ... } ) elif self.config.name == "second_domain": # This is an example to show how to have different features for "first_domain" and "second_domain" features = datasets.Features( { "sequence": datasets.Value("string"), "ECnumber_one": datasets.Value("string"), "ECnumber_two": datasets.Value("string") # These are the features of your dataset like images, labels ... } ) return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # This defines the different columns of the dataset and their types features=features, # Here we define them above because they are different between the two configurations # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage=_HOMEPAGE, # License for the dataset if available license=_LICENSE, # Citation for the dataset citation=_CITATION, task_templates=[TextClassification(text_column="text", label_column="label")], )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "text": datasets.Value("string"), "text_type": datasets.Value("string"), "topics": datasets.Sequence(datasets.Value("string")), "lewis_split": datasets.Value("string"), "cgis_split": datasets.Value("string"), "old_id": datasets.Value("string"), "new_id": datasets.Value("string"), "places": datasets.Sequence(datasets.Value("string")), "people": datasets.Sequence(datasets.Value("string")), "orgs": datasets.Sequence(datasets.Value("string")), "exchanges": datasets.Sequence(datasets.Value("string")), "date": datasets.Value("string"), "title": datasets.Value("string"), }), # No default supervised_keys (as we have to pass both premise # and hypothesis as input). supervised_keys=None, homepage= "https://kdd.ics.uci.edu/databases/reuters21578/reuters21578.html", citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # datasets.features.FeatureConnectors features=datasets.Features({ "topic_id": datasets.Value("string"), "topic_name": datasets.Value("string"), "test_id": datasets.Value("string"), "document_id": datasets.Value("string"), "document_str": datasets.Value("string"), "question_id": datasets.Value("string"), "question_str": datasets.Value("string"), "answer_options": datasets.features.Sequence({ "answer_id": datasets.Value("string"), "answer_str": datasets.Value("string") }), "correct_answer_id": datasets.Value("string"), "correct_answer_str": datasets.Value("string"), }), # No default supervised keys because both passage and question are used # to determine the correct answer. supervised_keys=None, homepage="http://nlp.uned.es/clef-qa/repository/pastCampaigns.php", citation=_CITATION, )
def _info(self): features = datasets.Features({ "client_id": datasets.Value("string"), "path": datasets.Value("string"), "audio": datasets.Audio(sampling_rate=48_000), "sentence": datasets.Value("string"), "up_votes": datasets.Value("int64"), "down_votes": datasets.Value("int64"), "age": datasets.Value("string"), "gender": datasets.Value("string"), "accent": datasets.Value("string"), "locale": datasets.Value("string"), "segment": datasets.Value("string"), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, task_templates=[ AutomaticSpeechRecognition(audio_file_path_column="path", transcription_column="sentence") ], )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "promptID": datasets.Value("int32"), "pairID": datasets.Value("string"), "premise": datasets.Value("string"), "premise_binary_parse": datasets.Value( "string"), # parses in unlabeled binary-branching format "premise_parse": datasets.Value( "string" ), # sentence as parsed by the Stanford PCFG Parser 3.5.2 "hypothesis": datasets.Value("string"), "hypothesis_binary_parse": datasets.Value( "string"), # parses in unlabeled binary-branching format "hypothesis_parse": datasets.Value( "string" ), # sentence as parsed by the Stanford PCFG Parser 3.5.2 "genre": datasets.Value("string"), "label": datasets.features.ClassLabel( names=["entailment", "neutral", "contradiction"]), }), # No default supervised_keys (as we have to pass both premise # and hypothesis as input). supervised_keys=None, homepage="https://www.nyu.edu/projects/bowman/multinli/", citation=_CITATION, )
def _info(self): # TODO(com_qa): Specifies the datasets.DatasetInfo object return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # datasets.features.FeatureConnectors features=datasets.Features({ "cluster_id": datasets.Value("string"), "questions": datasets.features.Sequence(datasets.Value("string")), "answers": datasets.features.Sequence(datasets.Value("string")), # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="http://qa.mpi-inf.mpg.de/comqa/", citation=_CITATION, )
def _info(self): features = datasets.Features( { "category_name": datasets.Value("string"), "question_query": datasets.Value("string"), "keyword_query": datasets.Value("string"), "answers": datasets.features.Sequence( { "id": datasets.Value("string"), "title": datasets.Value("string"), "exact_answer": datasets.Value("string"), } ), } ) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): features = datasets.Features({ "lemma": datasets.Value("string"), "forms": datasets.Sequence( dict([("word", datasets.Value("string"))] + [(cat, datasets.Sequence(datasets.ClassLabel(names=tasks))) for cat, tasks in _CATEGORIES.items()] + [("Other", datasets.Sequence(datasets.Value("string")) )] # for misspecified tags )), }) return datasets.DatasetInfo( description=_DESCRIPTION, features= features, # Here we define them above because they are different between the two configurations supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): features = datasets.Features({ "id": datasets.Value("string"), "source": datasets.Value("string"), "alignment_type": datasets.Value("string"), "alignment_quality": datasets.Value("string"), "translation": datasets.features.Translation(languages=["en", "hi"]), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features( { "text": datasets.Value("string"), "label": datasets.features.ClassLabel( names=[ "Web", "Panorama", "International", "Wirtschaft", "Sport", "Inland", "Etat", "Wissenschaft", "Kultur", ] ), } ), homepage="https://tblock.github.io/10kGNAD/", )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "count": datasets.Value("int64"), "hate_speech_count": datasets.Value("int64"), "offensive_language_count": datasets.Value("int64"), "neither_count": datasets.Value("int64"), "class": datasets.ClassLabel( names=["hate speech", "offensive language", "neither"]), "tweet": datasets.Value("string"), }), supervised_keys=("tweet", "class"), homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): features = datasets.Features({ "review_body": datasets.Value("string"), "review_summary": datasets.Value("string"), "star_rating": datasets.ClassLabel(names=[str(i) for i in range(1, 6)]), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage="http://www.lsi.us.es/~fermin/index.php/Datasets", license=_LICENSE, citation=_CITATION, task_templates=[ TextClassification(text_column="review_body", label_column="star_rating"), TextClassification(text_column="review_summary", label_column="star_rating"), ], )
def _info(self): features = datasets.Features({ "total_annotation_count": datasets.Value("int32"), "hate_speech_annotations": datasets.Value("int32"), "offensive_language_annotations": datasets.Value("int32"), "neither_annotations": datasets.Value("int32"), "label": datasets.ClassLabel( names=["hate-speech", "offensive-language", "neither"]), "tweet": datasets.Value("string"), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=("tweet", "label"), homepage=_HOMEPAGE, citation=_CITATION, )
def _info(self): if self.config.name.startswith("tlc"): features = datasets.Features( { "ch_num": datasets.Value("string"), "title": datasets.Value("string"), "text": datasets.Sequence(datasets.Sequence(datasets.Value("string"))), } ) else: features = datasets.Features( { "text": datasets.Sequence((datasets.Value("string"))), } ) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features( { "id": datasets.Value("string"), "annotator": datasets.Value("int32"), "position": datasets.Value("int32"), "question": datasets.Value("string"), "table_file": datasets.Value("string"), "table_header": datasets.features.Sequence(datasets.Value("string")), "table_data": datasets.features.Sequence(datasets.features.Sequence(datasets.Value("string"))), "answer_coordinates": datasets.features.Sequence( {"row_index": datasets.Value("int32"), "column_index": datasets.Value("int32")} ), "answer_text": datasets.features.Sequence(datasets.Value("string")), } ), supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): return ds.DatasetInfo( description="", citation="", homepage="", license="", features=ds.Features( { "id": ds.Value("string"), "title": ds.Value("string"), "context": ds.Value("string"), "question": ds.Value("string"), "answers": ds.features.Sequence( {"text": ds.Value("string"), "answer_start": ds.Value("int32")} ), } ), task_templates=[ QuestionAnsweringExtractive( question_column="question", context_column="context", answers_column="answers" ) ], )
def _info(self): features = datasets.Features({ "question": datasets.Value("string"), "answer": datasets.Value("string"), # These are the features of your dataset like images, labels ... }) return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # This defines the different columns of the dataset and their types features= features, # Here we define them above because they are different between the two configurations # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage=_HOMEPAGE, # License for the dataset if available license=_LICENSE, # Citation for the dataset citation=_CITATION, )
def _info(self): features = datasets.Features({ "id": datasets.Value("string"), "url": datasets.Value("string"), "qid": datasets.Value("int32"), "question": datasets.Value("string"), "answers": datasets.Sequence(datasets.Value("string")), "correct": datasets.Value("string"), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "id": datasets.Value("string"), "tokens": datasets.Sequence(datasets.Value("string")), "bboxes": datasets.Sequence(datasets.Sequence(datasets.Value("int64"))), "ner_tags": datasets.Sequence( datasets.features.ClassLabel(names=[ "O", "B-HEADER", "I-HEADER", "B-QUESTION", "I-QUESTION", "B-ANSWER", "I-ANSWER" ])), "image": datasets.Array3D(shape=(3, 224, 224), dtype="uint8"), }), supervised_keys=None, homepage="https://guillaumejaume.github.io/FUNSD/", citation=_CITATION, )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "id": datasets.Value("string"), "audio": datasets.features.Audio(sampling_rate=22050), "file": datasets.Value("string"), "text": datasets.Value("string"), "normalized_text": datasets.Value("string"), }), supervised_keys=("file", "text"), homepage=_URL, citation=_CITATION, task_templates=[ AutomaticSpeechRecognition(audio_file_path_column="file", transcription_column="text") ], )