def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "wikidata_id": nlp.Value("string"), "text": nlp.Value("string"), "version_id": nlp.Value("string"), }), supervised_keys=None, homepage=_URL, citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ _QUESTION: nlp.Value("string"), _QUERY: nlp.Value("string"), }), supervised_keys=(_QUESTION, _QUERY), homepage= "https://github.com/google-research/google-research/tree/master/cfq", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ _DOCUMENT: nlp.Value("string"), _SUMMARY: nlp.Value("string"), "section_names": nlp.Value("string"), }), supervised_keys=None, homepage="https://github.com/armancohan/long-summarization", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "id": nlp.Value("string"), "text": nlp.Value("string") }), supervised_keys=None, homepage=_URL, citation=_CITATION, license=_LICENSE, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "title": nlp.Value("string"), "text": nlp.Value("string"), }), # No default supervised_keys. supervised_keys=None, homepage="https://dumps.wikimedia.org", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ _DOCUMENT: nlp.Value("string"), _SUMMARY: nlp.Value("string"), }), supervised_keys=(_DOCUMENT, _SUMMARY), homepage= "https://github.com/EdinburghNLP/XSum/tree/master/XSum-Dataset", citation=_CITATION, )
def _info(self): # TODO(scifact): Specifies the nlp.DatasetInfo object if self.config.name == 'corpus': features = { "doc_id": nlp.Value('int32'), # The document's S2ORC ID. "title": nlp.Value('string'), # The title. "abstract": nlp.features.Sequence({ 'sentence': nlp.Value('string') }), # The abstract, written as a list of sentences. "structured": nlp.Value( 'bool' ) # Indicator for whether this is a structured abstract. } else: features = { "id": nlp.Value('int32'), # An integer claim ID. "claim": nlp.Value('string'), # The text of the claim. "evidence_doc_id": nlp.Value('string'), "evidence_label": nlp.Value('string'), # Label for the rationale. "evidence_sentences": nlp.features.Sequence({ 'sentence': nlp.Value('int32') # Rationale sentences. }), "cited_doc_ids": nlp.features.Sequence({'doc_id': nlp.Value('int32') }) # The claim's "cited documents". } return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features( features # These are the features of your dataset like images, labels ... ), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage='https://scifact.apps.allenai.org/', citation=_CITATION, )
def _info(self): return nlp.MetricInfo( description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, features=nlp.Features({ 'predictions': nlp.Sequence(nlp.Value('string', id='token'), id='sequence'), 'references': nlp.Sequence(nlp.Sequence(nlp.Value('string', id='token'), id='sequence'), id='references'), }), codebase_urls=["https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py"], reference_urls=["https://en.wikipedia.org/wiki/BLEU", "https://towardsdatascience.com/evaluating-text-output-in-nlp-bleu-at-your-own-risk-e8609665a213"] )
def _info(self): features = {text_feature: nlp.Value("string") for text_feature in six.iterkeys(self.config.text_features)} if self.config.label_classes: features["label"] = nlp.features.ClassLabel(names=self.config.label_classes) else: features["label"] = nlp.Value("float32") features["idx"] = nlp.Value("int32") return nlp.DatasetInfo( description=_GLUE_DESCRIPTION, features=nlp.Features(features), homepage=self.config.url, citation=self.config.citation + "\n" + _GLUE_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "source_text": nlp.Value("string"), "target_text": nlp.Value("string") }), # No default supervised_keys (as we have to pass both question # and context as input). supervised_keys=None, homepage="https://rajpurkar.github.io/SQuAD-explorer/", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "wikidata_id": nlp.Value('string'), "text": nlp.Value('string'), "version_id": nlp.Value('string'), }), supervised_keys=None, homepage=_URL, citation=_CITATION, redistribution_info={"license": _LICENSE}, )
def _info(self): # Should return a nlp.DatasetInfo object return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ _ARTICLE: nlp.Value("string"), _HIGHLIGHTS: nlp.Value("string"), "id": nlp.Value("string"), }), supervised_keys=None, homepage="https://github.com/abisee/cnn-dailymail", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ _REVIEW_SENTS: nlp.Value("string"), _SUMMARIES: nlp.features.Sequence(nlp.Value("string")) }), supervised_keys=(_REVIEW_SENTS, _SUMMARIES), homepage="http://kavita-ganesan.com/opinosis/", citation=_CITATION, )
def _info(self): return nlp.MetricInfo( description=_DESCRIPTION, citation=_CITATION, homepage="https://github.com/chakki-works/seqeval", inputs_description=_KWARGS_DESCRIPTION, features=nlp.Features({ 'predictions': nlp.Sequence(nlp.Value('string', id='label'), id='sequence'), 'references': nlp.Sequence(nlp.Value('string', id='label'), id='sequence'), }), codebase_urls=["https://github.com/chakki-works/seqeval"], reference_urls=["https://github.com/chakki-works/seqeval"])
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features( { "id": nlp.Value("string"), "tokens": nlp.Sequence(nlp.Value("string")), "labels": nlp.Sequence(nlp.Value("string")), } ), supervised_keys=None, homepage="http://noisy-text.github.io/2017/emerging-rare-entities.html", citation=_CITATION, )
def _info(self): # TODO(cosmos_qa): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "id": nlp.Value("string"), "context": nlp.Value("string"), "question": nlp.Value("string"), "answer0": nlp.Value("string"), "answer1": nlp.Value("string"), "answer2": nlp.Value("string"), "answer3": nlp.Value("string"), "label": nlp.Value("int32") # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://wilburone.github.io/cosmos/", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "source_text": nlp.Value("string"), "target_text": nlp.Value("string"), "task": nlp.Value("string"), }), # No default supervised_keys (as we have to pass both question # and context as input). supervised_keys=None, homepage="http://jmcauley.ucsd.edu/data/amazon/qa/", citation=_CITATION, )
def _info(self): return nlp.MetricInfo( description=_DESCRIPTION, citation=_CITATION, inputs_description=_KWARGS_DESCRIPTION, features=nlp.Features({ 'predictions': nlp.Value('string', id='sequence'), 'references': nlp.Value('string', id='sequence'), }), codebase_urls=["https://github.com/ns-moosavi/coval"], reference_urls=["https://github.com/ns-moosavi/coval", "https://www.aclweb.org/anthology/P16-1060", "http://www.conll.cemantix.org/2012/data.html"] )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "chunk": nlp.Value("string"), "chunk_id": nlp.Value("int32"), "turn_start": nlp.Value("int32"), "turn_end": nlp.Value("int32"), "alignment_score": nlp.Value("float32"), "turn_num": nlp.Value("int32"), "turns": nlp.features.Sequence({ "names": nlp.Value("string"), "utterances": nlp.Value("string"), }), }), homepage="https://github.com/RevanthRameshkumar/CRD3", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "context": nlp.Value("string"), "answer": nlp.Value("string"), "question": nlp.Value("string") }), # No default supervised_keys (as we have to pass both question # and context as input). supervised_keys=None, homepage="https://datasets.maluuba.com/NewsQA", citation=_CITATION, )
def _info(self): # TODO(discofuse): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features( { "connective_string": nlp.Value("string"), "discourse_type": nlp.Value("string"), "coherent_second_sentence": nlp.Value("string"), "has_coref_type_pronoun": nlp.Value("float32"), "incoherent_first_sentence": nlp.Value("string"), "incoherent_second_sentence": nlp.Value("string"), "has_coref_type_nominal": nlp.Value("float32"), "coherent_first_sentence": nlp.Value("string"), # These are the features of your dataset like images, labels ... } ), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://github.com/google-research-datasets/discofuse", citation=_CITATION, )
def _info(self): # TODO(empathetic_dialogues): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features({ "conv_id": nlp.Value("string"), "utterance_idx": nlp.Value("int32"), "context": nlp.Value("string"), "prompt": nlp.Value("string"), "speaker_idx": nlp.Value("int32"), "utterance": nlp.Value("string"), "selfeval": nlp.Value("string"), "tags": nlp.Value("string") # These are the features of your dataset like images, labels ... }), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage="https://github.com/facebookresearch/EmpatheticDialogues", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ "premise": nlp.Value("string"), "hypothesis": nlp.Value("string"), "label": nlp.Value("string"), }), # No default supervised_keys (as we have to pass both premise # and hypothesis as input). supervised_keys=None, homepage="https://www.nyu.edu/projects/bowman/multinli/", citation=_CITATION, )
def _info(self): return nlp.MetricInfo( description=_DESCRIPTION, citation=_CITATION, homepage="https://github.com/Tiiiger/bert_score", inputs_description=_KWARGS_DESCRIPTION, features=nlp.Features({ 'predictions': nlp.Value('string', id='sequence'), 'references': nlp.Sequence(nlp.Value('string', id='sequence'), id='references'), }), codebase_urls=["https://github.com/Tiiiger/bert_score"], reference_urls=["https://github.com/Tiiiger/bert_score", "https://arxiv.org/abs/1904.09675"] )
def _info(self): # TODO(jeopardy): Specifies the nlp.DatasetInfo object return nlp.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # nlp.features.FeatureConnectors features=nlp.Features( { "category": nlp.Value("string"), "air_date": nlp.Value("string"), "question": nlp.Value("string"), "value": nlp.Value("int32"), "answer": nlp.Value("string"), "round": nlp.Value("string"), "category": nlp.Value("string"), "show_number": nlp.Value("int32"), # These are the features of your dataset like images, labels ... } ), # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage=_URL, citation=_CITATION, )
def _info(self): info = nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ _TITLE: nlp.Value("string"), _DOCUMENT: nlp.Value("string"), _SUMMARY: nlp.Value("string"), _CLEAN_DOCUMENT: nlp.Value("string"), _CLEAN_SUMMARY: nlp.Value("string"), }), # supervised_keys=(_TITLE, _DOCUMENT, _SUMMARY), homepage="https://github.com/airKlizz/MultiDocMultiLingualSum", citation=_CITATION, ) return info
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features({ _QUESTION: nlp.Value('string'), _QUERY: nlp.Value('string'), }), supervised_keys={ "input": _QUESTION, "output": _QUERY }, homepage= 'https://github.com/google-research/google-research/tree/master/cfq', citation=_CITATION, )
def _info(self): features = { k: nlp.Value("string") for k in [_DOCUMENT, _SUMMARY] + _ADDITIONAL_TEXT_FEATURES } features.update( {k: nlp.Value("float32") for k in _ADDITIONAL_FLOAT_FEATURES}) return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features(features), supervised_keys=(_DOCUMENT, _SUMMARY), homepage="http://lil.nlp.cornell.edu/newsroom/", citation=_CITATION, )
def _info(self): return nlp.DatasetInfo( description=_DESCRIPTION + self.config.description, features=nlp.Features({ "input_data": nlp.Value("string"), "label": nlp.Value("int32"), "label_level_1": nlp.Value("int32"), "label_level_2": nlp.Value("int32"), }), # No default supervised_keys (as we have to pass both premise # and hypothesis as input). supervised_keys=None, homepage="https://data.mendeley.com/datasets/9rw3vkcfy4/6", citation=_CITATION, )
def _info(self): features = { "text": nlp.Value("string"), "url": nlp.Value("string"), "content-type": nlp.Value("string"), "content-length": nlp.Value("string"), "timestamp": nlp.Value("string"), } return nlp.DatasetInfo( description=_DESCRIPTION, features=nlp.Features(features), citation=_CITATION, homepage= "https://github.com/google-research/text-to-text-transfer-transformer#datasets", )