def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "tweet": datasets.Value("string"), "label": datasets.features.ClassLabel(names=[ "none", "anger", "joy", "sadness", "love", "sympathy", "surprise", "fear" ]), }), homepage=_HOMEPAGE, citation=_CITATION, task_templates=[ TextClassification(text_column="tweet", label_column="label") ], )
def _info(self): # Labels: 0="Non-hate Speech", 1="Hate Speech" features = datasets.Features({ "text": datasets.Value("string"), "label": datasets.features.ClassLabel(names=["0", "1"]) }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, task_templates=[ TextClassification(text_column="text", label_column="label") ], )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "news_title": datasets.Value("string"), "label": datasets.features.ClassLabel( names=["Africa", "Health", "Nigeria", "Politics", "World" ]), }), homepage= "https://github.com/uds-lsv/transfer-distant-transformer-african", citation=_CITATION, task_templates=[ TextClassification(text_column="news_title", label_column="label") ], )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features( { "text": datasets.Value("string"), "label": datasets.features.ClassLabel( names=[ "Negative", "Positive", ] ), } ), supervised_keys=None, homepage="https://github.com/komari6/Arabic-twitter-corpus-AJGT", citation=_CITATION, task_templates=[TextClassification(text_column="text", label_column="label")], )
def _info(self): features = datasets.Features({ "review_body": datasets.Value("string"), "star_rating": datasets.features.ClassLabel(names=["1", "2", "3", "4", "5"]), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage="https://github.com/wongnai/wongnai-corpus", license=_LICENSE, citation=_CITATION, task_templates=[ TextClassification(text_column="review_body", label_column="star_rating") ], )
def test_remove_and_map_on_task_template(self): features = Features({ "text": Value("string"), "label": ClassLabel(names=("pos", "neg")) }) task_templates = TextClassification(text_column="text", label_column="label") info = DatasetInfo(features=features, task_templates=task_templates) dataset = Dataset.from_dict({ "text": ["A sentence."], "label": ["pos"] }, info=info) def process(example): return example modified_dataset = dataset.remove_columns("label") mapped_dataset = modified_dataset.map(process) assert mapped_dataset.info.task_templates == []
def _info(self): class_names = ["Sport", "Politic", "Business", "Entertainment"] features = datasets.Features({ "text": datasets.Value("string"), "category": datasets.ClassLabel(names=class_names), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage= "https://github.com/ayehninnkhine/MyanmarNewsClassificationSystem", license=_LICENSE, citation=_CITATION, task_templates=[ TextClassification(text_column="text", label_column="category") ], )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features( { "text": datasets.Value("string"), "label": datasets.features.ClassLabel( names=[ "1", "2", "3", "4", "5", ] ), } ), supervised_keys=None, homepage="https://github.com/mohamedadaly/LABR", citation=_CITATION, task_templates=[TextClassification(text_column="text", label_column="label")], )
def _info(self): # This method specifies the datasets.DatasetInfo object which contains informations and typings for the dataset if self.config.name == "first_domain": # This is the name of the configuration selected in BUILDER_CONFIGS above features = datasets.Features( { "sequence": datasets.Value("string"), "ECnumber": datasets.features.ClassLabel(names=["1", "2", "3", "4", "5", "6", "7"]) # TODO: specify the main classes of anzymes by name? # These are the features of your dataset like images, labels ... } ) elif self.config.name == "second_domain": # This is an example to show how to have different features for "first_domain" and "second_domain" features = datasets.Features( { "sequence": datasets.Value("string"), "ECnumber_one": datasets.Value("string"), "ECnumber_two": datasets.Value("string") # These are the features of your dataset like images, labels ... } ) return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # This defines the different columns of the dataset and their types features=features, # Here we define them above because they are different between the two configurations # If there's a common (input, target) tuple from the features, # specify them here. They'll be used if as_supervised=True in # builder.as_dataset. supervised_keys=None, # Homepage of the dataset for documentation homepage=_HOMEPAGE, # License for the dataset if available license=_LICENSE, # Citation for the dataset citation=_CITATION, task_templates=[TextClassification(text_column="text", label_column="label")], )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "category": datasets.features.ClassLabel(names=[ "siyaset", "dunya", "ekonomi", "kultur", "saglik", "spor", "teknoloji" ]), "text": datasets.Value("string"), }), supervised_keys=None, # Homepage of the dataset for documentation homepage=_HOMEPAGE, # License for the dataset if available license=_LICENSE, # Citation for the dataset citation=_CITATION, task_templates=[ TextClassification(text_column="text", label_column="category") ], )
def _info(self): features = datasets.Features({ "label": datasets.features.ClassLabel(names=[ "1 star", "2 star", "3 stars", "4 stars", "5 stars", ]), "text": datasets.Value("string"), }) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, task_templates=[ TextClassification(text_column="text", label_column="label") ], )
def _info(self): return datasets.DatasetInfo( description=_DESCRIPTION, features=datasets.Features({ "text": datasets.Value("string"), "label": datasets.features.ClassLabel(names=[ "Web", "Panorama", "International", "Wirtschaft", "Sport", "Inland", "Etat", "Wissenschaft", "Kultur", ]), }), homepage="https://tblock.github.io/10kGNAD/", task_templates=[ TextClassification(text_column="text", label_column="label") ], )
def _info(self): if self.config.name == "Coarse_Grained": features = datasets.Features( { "tweet": datasets.Value("string"), "label": datasets.features.ClassLabel(names=["Abusive/Offensive", "Normal"]), # These are the features of your dataset like images, labels ... } ) if self.config.name == "Fine_Grained": features = datasets.Features( { "tweet": datasets.Value("string"), "label": datasets.features.ClassLabel( names=["Abusive/Offensive", "Normal", "Religious Hate", "Sexism", "Profane/Untargeted"] ), # These are the features of your dataset like images, labels ... } ) return datasets.DatasetInfo( # This is the description that will appear on the datasets page. description=_DESCRIPTION, # This defines the different columns of the dataset and their types features=features, # Here we define them above because they are different between the two configurations # If there's a common (input, target) tuple from the features, uncomment supervised_keys line below and # specify them. They'll be used if as_supervised=True in builder.as_dataset. # supervised_keys=("sentence", "label"), # Homepage of the dataset for documentation homepage=_HOMEPAGE, # License for the dataset if available license=_LICENSE, # Citation for the dataset citation=_CITATION, task_templates=[TextClassification(text_column="tweet", label_column="label")], )
def test_align_with_features(self): task = TextClassification(text_column="input_text", label_column="input_label") self.assertEqual(task.label_schema["labels"], ClassLabel) task = task.align_with_features(Features({"input_label": ClassLabel(names=self.labels)})) self.assertEqual(task.label_schema["labels"], ClassLabel(names=self.labels))
def test_column_mapping(self): task = TextClassification(text_column="input_text", label_column="input_label") self.assertDictEqual({"input_text": "text", "input_label": "labels"}, task.column_mapping)
def _info(self): features = datasets.Features( { "text": datasets.Value("string"), "label": datasets.features.ClassLabel( names=[ "activate_my_card", "age_limit", "apple_pay_or_google_pay", "atm_support", "automatic_top_up", "balance_not_updated_after_bank_transfer", "balance_not_updated_after_cheque_or_cash_deposit", "beneficiary_not_allowed", "cancel_transfer", "card_about_to_expire", "card_acceptance", "card_arrival", "card_delivery_estimate", "card_linking", "card_not_working", "card_payment_fee_charged", "card_payment_not_recognised", "card_payment_wrong_exchange_rate", "card_swallowed", "cash_withdrawal_charge", "cash_withdrawal_not_recognised", "change_pin", "compromised_card", "contactless_not_working", "country_support", "declined_card_payment", "declined_cash_withdrawal", "declined_transfer", "direct_debit_payment_not_recognised", "disposable_card_limits", "edit_personal_details", "exchange_charge", "exchange_rate", "exchange_via_app", "extra_charge_on_statement", "failed_transfer", "fiat_currency_support", "get_disposable_virtual_card", "get_physical_card", "getting_spare_card", "getting_virtual_card", "lost_or_stolen_card", "lost_or_stolen_phone", "order_physical_card", "passcode_forgotten", "pending_card_payment", "pending_cash_withdrawal", "pending_top_up", "pending_transfer", "pin_blocked", "receiving_money", "Refund_not_showing_up", "request_refund", "reverted_card_payment?", "supported_cards_and_currencies", "terminate_account", "top_up_by_bank_transfer_charge", "top_up_by_card_charge", "top_up_by_cash_or_cheque", "top_up_failed", "top_up_limits", "top_up_reverted", "topping_up_by_card", "transaction_charged_twice", "transfer_fee_charged", "transfer_into_account", "transfer_not_received_by_recipient", "transfer_timing", "unable_to_verify_identity", "verify_my_identity", "verify_source_of_funds", "verify_top_up", "virtual_card_not_working", "visa_or_mastercard", "why_verify_identity", "wrong_amount_of_cash_received", "wrong_exchange_rate_for_cash_withdrawal", ] ), } ) return datasets.DatasetInfo( description=_DESCRIPTION, features=features, supervised_keys=None, homepage=_HOMEPAGE, license=_LICENSE, citation=_CITATION, task_templates=[TextClassification(text_column="text", label_column="label")], )