def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage="https://www.bl.uk/collection-guides/digitised-printed-books",
         citation=_CITATION,
         task_templates=[LanguageModeling(text_column="text")],
     )
Ejemplo n.º 2
0
 def _info(self):
     features = datasets.Features(
         {
             "id": datasets.Value("string"),
             "source": datasets.Value("string"),
             "url": datasets.Value("string"),
             "title": datasets.Value("string"),
             "ispartof": datasets.Value("string"),
             "text": datasets.Value("string"),
             "pub_date": datasets.Value("timestamp[s]"),
             "publisher": datasets.Value("string"),
             "language": datasets.Value("string"),
             "article_type": datasets.ClassLabel(
                 names=[
                     "ADVERTISEMENT_SECTION",
                     "BIBLIOGRAPHY",
                     "CHAPTER",
                     "INDEX",
                     "CONTRIBUTION",
                     "TABLE_OF_CONTENTS",
                     "WEATHER",
                     "SHIPPING",
                     "SECTION",
                     "ARTICLE",
                     "TITLE_SECTION",
                     "DEATH_NOTICE",
                     "SUPPLEMENT",
                     "TABLE",
                     "ADVERTISEMENT",
                     "CHART_DIAGRAM",
                     "ILLUSTRATION",
                     "ISSUE",
                 ]
             ),
             "extent": datasets.Value("int32"),
         }
     )
     return datasets.DatasetInfo(
         # This is the description that will appear on the datasets page.
         description=_DESCRIPTION,
         # This defines the different columns of the dataset and their types
         features=features,  # Here we define them above because they are different between the two configurations
         # If there's a common (input, target) tuple from the features,
         # specify them here. They'll be used if as_supervised=True in
         # builder.as_dataset.
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage=_HOMEPAGE,
         # License for the dataset if available
         license=_LICENSE,
         # Citation for the dataset
         citation=_CITATION,
         task_templates=[LanguageModeling(text_column="text")],
     )
 def _info(self):
     features = datasets.Features({
         "id":
         datasets.Value("string"),
         "source":
         datasets.Value("string"),
         "url":
         datasets.Value("string"),
         "title":
         datasets.Value("string"),
         "ispartof":
         datasets.Value("string"),
         "text":
         datasets.Value("string"),
         "pub_date":
         datasets.Value("timestamp[s]"),
         "publisher":
         datasets.Value("string"),
         "language":
         datasets.Value("string"),
         "article_type":
         datasets.ClassLabel(names=[
             "ADVERTISEMENT_SECTION",
             "BIBLIOGRAPHY",
             "CHAPTER",
             "INDEX",
             "CONTRIBUTION",
             "TABLE_OF_CONTENTS",
             "WEATHER",
             "SHIPPING",
             "SECTION",
             "ARTICLE",
             "TITLE_SECTION",
             "DEATH_NOTICE",
             "SUPPLEMENT",
             "TABLE",
             "ADVERTISEMENT",
             "CHART_DIAGRAM",
             "ILLUSTRATION",
             "ISSUE",
         ]),
         "extent":
         datasets.Value("int32"),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[LanguageModeling(text_column="text")],
     )
Ejemplo n.º 4
0
 def test_from_dict(self):
     input_schema = Features({"text": Value("string")})
     template_dict = {"text_column": "input_text"}
     task = LanguageModeling.from_dict(template_dict)
     assert "language-modeling" == task.task
     assert input_schema == task.input_schema
Ejemplo n.º 5
0
 def test_column_mapping(self):
     task = LanguageModeling(text_column="input_text")
     assert {"input_text": "text"} == task.column_mapping