def test_column_mapping(self): task = QuestionAnsweringExtractive( context_column="input_context", question_column="input_question", answers_column="input_answers" ) self.assertDictEqual( {"input_context": "context", "input_question": "question", "input_answers": "answers"}, task.column_mapping )
def _info(self): # TODO(xquad): Specifies the datasets.DatasetInfo object return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # datasets.features.FeatureConnectors features=datasets.Features({ "id": datasets.Value("string"), "context": datasets.Value("string"), "question": datasets.Value("string"), "answers": datasets.features.Sequence({ "text": datasets.Value("string"), "answer_start": datasets.Value("int32"), }), # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://github.com/deepmind/xquad", citation=_CITATION, task_templates=[ QuestionAnsweringExtractive(question_column="question", context_column="context", answers_column="answers") ], )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "id": datasets.Value("string"), "title": datasets.Value("string"), "context": datasets.Value("string"), "question": datasets.Value("string"), "answers": datasets.features.Sequence({ "text": datasets.Value("string"), "answer_start": datasets.Value("int32"), }), }), # No default supervised_keys (as we have to pass both question # and context as input). supervised_keys=None, homepage="https://rajpurkar.github.io/SQuAD-explorer/", citation=_CITATION, task_templates=[ QuestionAnsweringExtractive(question_column="question", context_column="context", answers_column="answers") ], )
def test_map_on_task_template(self): info = DatasetInfo(task_templates=QuestionAnsweringExtractive()) dataset = Dataset.from_dict({k: [v] for k, v in SAMPLE_QUESTION_ANSWERING_EXTRACTIVE.items()}, info=info) assert isinstance(dataset.info.task_templates, list) assert len(dataset.info.task_templates) == 1 def keep_task(x): return x def dont_keep_task(x): out = deepcopy(SAMPLE_QUESTION_ANSWERING_EXTRACTIVE) out["answers"]["foobar"] = 0 return out mapped_dataset = dataset.map(keep_task) assert mapped_dataset.info.task_templates == dataset.info.task_templates # reload from cache mapped_dataset = dataset.map(keep_task) assert mapped_dataset.info.task_templates == dataset.info.task_templates mapped_dataset = dataset.map(dont_keep_task) assert mapped_dataset.info.task_templates == [] # reload from cache mapped_dataset = dataset.map(dont_keep_task) assert mapped_dataset.info.task_templates == []
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features( { "id": datasets.Value("int32"), "title": datasets.Value("string"), "context": datasets.Value("string"), "question": datasets.Value("string"), "answers": datasets.features.Sequence( { "text": datasets.Value("string"), "answer_start": datasets.Value("int32"), } ), } ), supervised_keys=None, homepage="", citation=_CITATION, task_templates=[ QuestionAnsweringExtractive( question_column="question", context_column="context", answers_column="answers" ) ], )
def _info(self): return ds.DatasetInfo( description="", citation="", homepage="", license="", features=ds.Features({ "id": ds.Value("string"), "title": ds.Value("string"), "context": ds.Value("string"), "question": ds.Value("string"), "answers": ds.features.Sequence({ "text": ds.Value("string"), "answer_start": ds.Value("int32") }), }), task_templates=[ QuestionAnsweringExtractive(question_column="question", context_column="context", answers_column="answers") ], )
def _info(self): features = datasets.Features({ "document_id": datasets.Value("int32"), "context": datasets.Value("string"), "question": datasets.Value("string"), "is_impossible": datasets.Value("bool"), "id": datasets.Value("int32"), "answers": datasets.features.Sequence({ "text": datasets.Value("string"), "answer_start": datasets.Value("int32"), }), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, task_templates=[ QuestionAnsweringExtractive(question_column="question", context_column="context", answers_column="answers") ], )
def _info(self): features = datasets.Features({ "squad_v2_id": datasets.Value("string"), "original question": datasets.Value("string"), "disfluent question": datasets.Value("string"), "title": datasets.Value("string"), "context": datasets.Value("string"), "answers": datasets.features.Sequence({ "text": datasets.Value("string"), "answer_start": datasets.Value("int32"), }), }) return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # This defines the different columns of the dataset and their types features= features, # Here we define them above because they are different between the two configurations # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage=_HOMEPAGE, # License for the dataset if available license=_LICENSE, # Citation for the dataset citation=_CITATION, task_templates=[ QuestionAnsweringExtractive( question_column="disfluent question", context_column="context", answers_column="answers") ], )
def _info(self): # TODO(tydiqa): Specifies the datasets.DatasetInfo object if self.config.name == "primary_task": return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # datasets.features.FeatureConnectors features=datasets.Features({ "passage_answer_candidates": datasets.features.Sequence({ "plaintext_start_byte": datasets.Value("int32"), "plaintext_end_byte": datasets.Value("int32"), }), "question_text": datasets.Value("string"), "document_title": datasets.Value("string"), "language": datasets.Value("string"), "annotations": datasets.features.Sequence({ # 'annotation_id': datasets.Value('variant'), "passage_answer_candidate_index": datasets.Value("int32"), "minimal_answers_start_byte": datasets.Value("int32"), "minimal_answers_end_byte": datasets.Value("int32"), "yes_no_answer": datasets.Value("string"), }), "document_plaintext": datasets.Value("string"), # 'example_id': datasets.Value('variant'), "document_url": datasets.Value("string") # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://github.com/google-research-datasets/tydiqa", citation=_CITATION, ) elif self.config.name == "secondary_task": return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "id": datasets.Value("string"), "title": datasets.Value("string"), "context": datasets.Value("string"), "question": datasets.Value("string"), "answers": datasets.features.Sequence({ "text": datasets.Value("string"), "answer_start": datasets.Value("int32"), }), }), # No default supervised_keys (as we have to pass both question # and context as input). supervised_keys=None, homepage="https://github.com/google-research-datasets/tydiqa", citation=_CITATION, task_templates=[ QuestionAnsweringExtractive(question_column="question", context_column="context", answers_column="answers") ], )