Beispiel #1
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "wikidata_id": nlp.Value("string"),
             "text": nlp.Value("string"),
             "version_id": nlp.Value("string"),
         }),
         supervised_keys=None,
         homepage=_URL,
         citation=_CITATION,
     )
Beispiel #2
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             _QUESTION: nlp.Value("string"),
             _QUERY: nlp.Value("string"),
         }),
         supervised_keys=(_QUESTION, _QUERY),
         homepage=
         "https://github.com/google-research/google-research/tree/master/cfq",
         citation=_CITATION,
     )
Beispiel #3
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             _DOCUMENT: nlp.Value("string"),
             _SUMMARY: nlp.Value("string"),
             "section_names": nlp.Value("string"),
         }),
         supervised_keys=None,
         homepage="https://github.com/armancohan/long-summarization",
         citation=_CITATION,
     )
Beispiel #4
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "id": nlp.Value("string"),
             "text": nlp.Value("string")
         }),
         supervised_keys=None,
         homepage=_URL,
         citation=_CITATION,
         license=_LICENSE,
     )
Beispiel #5
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "title": nlp.Value("string"),
             "text": nlp.Value("string"),
         }),
         # No default supervised_keys.
         supervised_keys=None,
         homepage="https://dumps.wikimedia.org",
         citation=_CITATION,
     )
Beispiel #6
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             _DOCUMENT: nlp.Value("string"),
             _SUMMARY: nlp.Value("string"),
         }),
         supervised_keys=(_DOCUMENT, _SUMMARY),
         homepage=
         "https://github.com/EdinburghNLP/XSum/tree/master/XSum-Dataset",
         citation=_CITATION,
     )
Beispiel #7
0
    def _info(self):
        # TODO(scifact): Specifies the nlp.DatasetInfo object
        if self.config.name == 'corpus':
            features = {
                "doc_id":
                nlp.Value('int32'),  # The document's S2ORC ID.
                "title":
                nlp.Value('string'),  # The title.
                "abstract":
                nlp.features.Sequence({
                    'sentence': nlp.Value('string')
                }),  # The abstract, written as a list of sentences.
                "structured":
                nlp.Value(
                    'bool'
                )  # Indicator for whether this is a structured abstract.
            }
        else:
            features = {
                "id":
                nlp.Value('int32'),  # An integer claim ID.
                "claim":
                nlp.Value('string'),  # The text of the claim.
                "evidence_doc_id":
                nlp.Value('string'),
                "evidence_label":
                nlp.Value('string'),  # Label for the rationale.
                "evidence_sentences":
                nlp.features.Sequence({
                    'sentence': nlp.Value('int32')  # Rationale sentences.
                }),
                "cited_doc_ids":
                nlp.features.Sequence({'doc_id': nlp.Value('int32')
                                       })  # The claim's "cited documents".
            }

        return nlp.DatasetInfo(
            # This is the description that will appear on the datasets page.
            description=_DESCRIPTION,
            # nlp.features.FeatureConnectors
            features=nlp.Features(
                features
                # These are the features of your dataset like images, labels ...
            ),
            # If there's a common (input, target) tuple from the features,
            # specify them here. They'll be used if as_supervised=True in
            # builder.as_dataset.
            supervised_keys=None,
            # Homepage of the dataset for documentation
            homepage='https://scifact.apps.allenai.org/',
            citation=_CITATION,
        )
Beispiel #8
0
 def _info(self):
     return nlp.MetricInfo(
         description=_DESCRIPTION,
         citation=_CITATION,
         inputs_description=_KWARGS_DESCRIPTION,
         features=nlp.Features({
             'predictions': nlp.Sequence(nlp.Value('string', id='token'), id='sequence'),
             'references': nlp.Sequence(nlp.Sequence(nlp.Value('string', id='token'), id='sequence'), id='references'),
         }),
         codebase_urls=["https://github.com/tensorflow/nmt/blob/master/nmt/scripts/bleu.py"],
         reference_urls=["https://en.wikipedia.org/wiki/BLEU",
                         "https://towardsdatascience.com/evaluating-text-output-in-nlp-bleu-at-your-own-risk-e8609665a213"]
     )
Beispiel #9
0
 def _info(self):
     features = {text_feature: nlp.Value("string") for text_feature in six.iterkeys(self.config.text_features)}
     if self.config.label_classes:
         features["label"] = nlp.features.ClassLabel(names=self.config.label_classes)
     else:
         features["label"] = nlp.Value("float32")
     features["idx"] = nlp.Value("int32")
     return nlp.DatasetInfo(
         description=_GLUE_DESCRIPTION,
         features=nlp.Features(features),
         homepage=self.config.url,
         citation=self.config.citation + "\n" + _GLUE_CITATION,
     )
Beispiel #10
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "source_text": nlp.Value("string"),
             "target_text": nlp.Value("string")
         }),
         # No default supervised_keys (as we have to pass both question
         # and context as input).
         supervised_keys=None,
         homepage="https://rajpurkar.github.io/SQuAD-explorer/",
         citation=_CITATION,
     )
Beispiel #11
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "wikidata_id": nlp.Value('string'),
             "text": nlp.Value('string'),
             "version_id": nlp.Value('string'),
         }),
         supervised_keys=None,
         homepage=_URL,
         citation=_CITATION,
         redistribution_info={"license": _LICENSE},
     )
Beispiel #12
0
 def _info(self):
     # Should return a nlp.DatasetInfo object
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             _ARTICLE: nlp.Value("string"),
             _HIGHLIGHTS: nlp.Value("string"),
             "id": nlp.Value("string"),
         }),
         supervised_keys=None,
         homepage="https://github.com/abisee/cnn-dailymail",
         citation=_CITATION,
     )
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             _REVIEW_SENTS:
             nlp.Value("string"),
             _SUMMARIES:
             nlp.features.Sequence(nlp.Value("string"))
         }),
         supervised_keys=(_REVIEW_SENTS, _SUMMARIES),
         homepage="http://kavita-ganesan.com/opinosis/",
         citation=_CITATION,
     )
Beispiel #14
0
 def _info(self):
     return nlp.MetricInfo(
         description=_DESCRIPTION,
         citation=_CITATION,
         homepage="https://github.com/chakki-works/seqeval",
         inputs_description=_KWARGS_DESCRIPTION,
         features=nlp.Features({
             'predictions':
             nlp.Sequence(nlp.Value('string', id='label'), id='sequence'),
             'references':
             nlp.Sequence(nlp.Value('string', id='label'), id='sequence'),
         }),
         codebase_urls=["https://github.com/chakki-works/seqeval"],
         reference_urls=["https://github.com/chakki-works/seqeval"])
Beispiel #15
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features(
             {
                 "id": nlp.Value("string"),
                 "tokens": nlp.Sequence(nlp.Value("string")),
                 "labels": nlp.Sequence(nlp.Value("string")),
             }
         ),
         supervised_keys=None,
         homepage="http://noisy-text.github.io/2017/emerging-rare-entities.html",
         citation=_CITATION,
     )
Beispiel #16
0
 def _info(self):
     # TODO(cosmos_qa): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "id": nlp.Value("string"),
             "context": nlp.Value("string"),
             "question": nlp.Value("string"),
             "answer0": nlp.Value("string"),
             "answer1": nlp.Value("string"),
             "answer2": nlp.Value("string"),
             "answer3": nlp.Value("string"),
             "label": nlp.Value("int32")
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://wilburone.github.io/cosmos/",
         citation=_CITATION,
     )
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "source_text": nlp.Value("string"),
             "target_text": nlp.Value("string"),
             "task": nlp.Value("string"),
         }),
         # No default supervised_keys (as we have to pass both question
         # and context as input).
         supervised_keys=None,
         homepage="http://jmcauley.ucsd.edu/data/amazon/qa/",
         citation=_CITATION,
     )
Beispiel #18
0
 def _info(self):
     return nlp.MetricInfo(
         description=_DESCRIPTION,
         citation=_CITATION,
         inputs_description=_KWARGS_DESCRIPTION,
         features=nlp.Features({
             'predictions': nlp.Value('string', id='sequence'),
             'references': nlp.Value('string', id='sequence'),
         }),
         codebase_urls=["https://github.com/ns-moosavi/coval"],
         reference_urls=["https://github.com/ns-moosavi/coval",
                         "https://www.aclweb.org/anthology/P16-1060",
                         "http://www.conll.cemantix.org/2012/data.html"]
     )
Beispiel #19
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "chunk":
             nlp.Value("string"),
             "chunk_id":
             nlp.Value("int32"),
             "turn_start":
             nlp.Value("int32"),
             "turn_end":
             nlp.Value("int32"),
             "alignment_score":
             nlp.Value("float32"),
             "turn_num":
             nlp.Value("int32"),
             "turns":
             nlp.features.Sequence({
                 "names": nlp.Value("string"),
                 "utterances": nlp.Value("string"),
             }),
         }),
         homepage="https://github.com/RevanthRameshkumar/CRD3",
         citation=_CITATION,
     )
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "context": nlp.Value("string"),
             "answer": nlp.Value("string"),
             "question": nlp.Value("string")
         }),
         # No default supervised_keys (as we have to pass both question
         # and context as input).
         supervised_keys=None,
         homepage="https://datasets.maluuba.com/NewsQA",
         citation=_CITATION,
     )
Beispiel #21
0
 def _info(self):
     # TODO(discofuse): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features(
             {
                 "connective_string": nlp.Value("string"),
                 "discourse_type": nlp.Value("string"),
                 "coherent_second_sentence": nlp.Value("string"),
                 "has_coref_type_pronoun": nlp.Value("float32"),
                 "incoherent_first_sentence": nlp.Value("string"),
                 "incoherent_second_sentence": nlp.Value("string"),
                 "has_coref_type_nominal": nlp.Value("float32"),
                 "coherent_first_sentence": nlp.Value("string"),
                 # These are the features of your dataset like images, labels ...
             }
         ),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://github.com/google-research-datasets/discofuse",
         citation=_CITATION,
     )
Beispiel #22
0
 def _info(self):
     # TODO(empathetic_dialogues): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features({
             "conv_id": nlp.Value("string"),
             "utterance_idx": nlp.Value("int32"),
             "context": nlp.Value("string"),
             "prompt": nlp.Value("string"),
             "speaker_idx": nlp.Value("int32"),
             "utterance": nlp.Value("string"),
             "selfeval": nlp.Value("string"),
             "tags": nlp.Value("string")
             # These are the features of your dataset like images, labels ...
         }),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage="https://github.com/facebookresearch/EmpatheticDialogues",
         citation=_CITATION,
     )
Beispiel #23
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             "premise": nlp.Value("string"),
             "hypothesis": nlp.Value("string"),
             "label": nlp.Value("string"),
         }),
         # No default supervised_keys (as we have to pass both premise
         # and hypothesis as input).
         supervised_keys=None,
         homepage="https://www.nyu.edu/projects/bowman/multinli/",
         citation=_CITATION,
     )
Beispiel #24
0
 def _info(self):
     return nlp.MetricInfo(
         description=_DESCRIPTION,
         citation=_CITATION,
         homepage="https://github.com/Tiiiger/bert_score",
         inputs_description=_KWARGS_DESCRIPTION,
         features=nlp.Features({
             'predictions': nlp.Value('string', id='sequence'),
             'references': nlp.Sequence(nlp.Value('string', id='sequence'), id='references'),
         }),
         codebase_urls=["https://github.com/Tiiiger/bert_score"],
         reference_urls=["https://github.com/Tiiiger/bert_score",
                         "https://arxiv.org/abs/1904.09675"]
     )
Beispiel #25
0
 def _info(self):
     # TODO(jeopardy): Specifies the nlp.DatasetInfo object
     return nlp.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # nlp.features.FeatureConnectors
         features=nlp.Features(
             {
                 "category": nlp.Value("string"),
                 "air_date": nlp.Value("string"),
                 "question": nlp.Value("string"),
                 "value": nlp.Value("int32"),
                 "answer": nlp.Value("string"),
                 "round": nlp.Value("string"),
                 "category": nlp.Value("string"),
                 "show_number": nlp.Value("int32"),
                 # These are the features of your dataset like images, labels ...
             }
         ),
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage=_URL,
         citation=_CITATION,
     )
Beispiel #26
0
 def _info(self):
     info = nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             _TITLE: nlp.Value("string"),
             _DOCUMENT: nlp.Value("string"),
             _SUMMARY: nlp.Value("string"),
             _CLEAN_DOCUMENT: nlp.Value("string"),
             _CLEAN_SUMMARY: nlp.Value("string"),
         }),
         # supervised_keys=(_TITLE, _DOCUMENT, _SUMMARY),
         homepage="https://github.com/airKlizz/MultiDocMultiLingualSum",
         citation=_CITATION,
     )
     return info
Beispiel #27
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features({
             _QUESTION: nlp.Value('string'),
             _QUERY: nlp.Value('string'),
         }),
         supervised_keys={
             "input": _QUESTION,
             "output": _QUERY
         },
         homepage=
         'https://github.com/google-research/google-research/tree/master/cfq',
         citation=_CITATION,
     )
Beispiel #28
0
 def _info(self):
     features = {
         k: nlp.Value("string")
         for k in [_DOCUMENT, _SUMMARY] + _ADDITIONAL_TEXT_FEATURES
     }
     features.update(
         {k: nlp.Value("float32")
          for k in _ADDITIONAL_FLOAT_FEATURES})
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features(features),
         supervised_keys=(_DOCUMENT, _SUMMARY),
         homepage="http://lil.nlp.cornell.edu/newsroom/",
         citation=_CITATION,
     )
Beispiel #29
0
 def _info(self):
     return nlp.DatasetInfo(
         description=_DESCRIPTION + self.config.description,
         features=nlp.Features({
             "input_data": nlp.Value("string"),
             "label": nlp.Value("int32"),
             "label_level_1": nlp.Value("int32"),
             "label_level_2": nlp.Value("int32"),
         }),
         # No default supervised_keys (as we have to pass both premise
         # and hypothesis as input).
         supervised_keys=None,
         homepage="https://data.mendeley.com/datasets/9rw3vkcfy4/6",
         citation=_CITATION,
     )
Beispiel #30
0
 def _info(self):
     features = {
         "text": nlp.Value("string"),
         "url": nlp.Value("string"),
         "content-type": nlp.Value("string"),
         "content-length": nlp.Value("string"),
         "timestamp": nlp.Value("string"),
     }
     return nlp.DatasetInfo(
         description=_DESCRIPTION,
         features=nlp.Features(features),
         citation=_CITATION,
         homepage=
         "https://github.com/google-research/text-to-text-transfer-transformer#datasets",
     )