Python TextClassification 예제들, datasets.tasks.TextClassification Python 예제들

예제 #1

0

파일 보기

 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "tweet":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(names=[
                 "none", "anger", "joy", "sadness", "love", "sympathy",
                 "surprise", "fear"
             ]),
         }),
         homepage=_HOMEPAGE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="tweet", label_column="label")
         ],
     )

예제 #2

0

파일 보기

 def _info(self):
     # Labels: 0="Non-hate Speech", 1="Hate Speech"
     features = datasets.Features({
         "text":
         datasets.Value("string"),
         "label":
         datasets.features.ClassLabel(names=["0", "1"])
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )

예제 #3

0

파일 보기

 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "news_title":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(
                 names=["Africa", "Health", "Nigeria", "Politics", "World"
                        ]),
         }),
         homepage=
         "https://github.com/uds-lsv/transfer-distant-transformer-african",
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="news_title",
                                label_column="label")
         ],
     )

예제 #4

0

파일 보기

 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "text": datasets.Value("string"),
                 "label": datasets.features.ClassLabel(
                     names=[
                         "Negative",
                         "Positive",
                     ]
                 ),
             }
         ),
         supervised_keys=None,
         homepage="https://github.com/komari6/Arabic-twitter-corpus-AJGT",
         citation=_CITATION,
         task_templates=[TextClassification(text_column="text", label_column="label")],
     )

예제 #5

0

파일 보기

 def _info(self):
     features = datasets.Features({
         "review_body":
         datasets.Value("string"),
         "star_rating":
         datasets.features.ClassLabel(names=["1", "2", "3", "4", "5"]),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage="https://github.com/wongnai/wongnai-corpus",
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="review_body",
                                label_column="star_rating")
         ],
     )

예제 #6

0

파일 보기

    def test_remove_and_map_on_task_template(self):
        features = Features({
            "text": Value("string"),
            "label": ClassLabel(names=("pos", "neg"))
        })
        task_templates = TextClassification(text_column="text",
                                            label_column="label")
        info = DatasetInfo(features=features, task_templates=task_templates)
        dataset = Dataset.from_dict({
            "text": ["A sentence."],
            "label": ["pos"]
        },
                                    info=info)

        def process(example):
            return example

        modified_dataset = dataset.remove_columns("label")
        mapped_dataset = modified_dataset.map(process)
        assert mapped_dataset.info.task_templates == []

예제 #7

0

파일 보기

파일: myanmar_news.py 프로젝트: albertvillanova/huggingface_datasets

 def _info(self):
     class_names = ["Sport", "Politic", "Business", "Entertainment"]
     features = datasets.Features({
         "text":
         datasets.Value("string"),
         "category":
         datasets.ClassLabel(names=class_names),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage=
         "https://github.com/ayehninnkhine/MyanmarNewsClassificationSystem",
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="category")
         ],
     )

예제 #8

0

파일 보기

 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features(
             {
                 "text": datasets.Value("string"),
                 "label": datasets.features.ClassLabel(
                     names=[
                         "1",
                         "2",
                         "3",
                         "4",
                         "5",
                     ]
                 ),
             }
         ),
         supervised_keys=None,
         homepage="https://github.com/mohamedadaly/LABR",
         citation=_CITATION,
         task_templates=[TextClassification(text_column="text", label_column="label")],
     )

예제 #9

0

파일 보기

파일: new_dataset.py 프로젝트: UgurDURA/Protein-Active-Site-w-ML

    def _info(self):
        #  This method specifies the datasets.DatasetInfo object which contains informations and typings for the dataset
        if self.config.name == "first_domain":  # This is the name of the configuration selected in BUILDER_CONFIGS above
            features = datasets.Features(
                {
                    "sequence": datasets.Value("string"),
                    "ECnumber": datasets.features.ClassLabel(names=["1", "2", "3", "4", "5", "6", "7"])
                    # TODO: specify the main classes of anzymes by name?
                    # These are the features of your dataset like images, labels ...
                }
            )
        elif self.config.name == "second_domain":  # This is an example to show how to have different features for "first_domain" and "second_domain"
            features = datasets.Features(
                {
                    "sequence": datasets.Value("string"),
                    "ECnumber_one": datasets.Value("string"),
                    "ECnumber_two": datasets.Value("string")
                    # These are the features of your dataset like images, labels ...
                }
            )
        return datasets.DatasetInfo(
            # This is the description that will appear on the datasets page.
            description=_DESCRIPTION,
            # This defines the different columns of the dataset and their types
            features=features,  # Here we define them above because they are different between the two configurations
            # If there's a common (input, target) tuple from the features,
            # specify them here. They'll be used if as_supervised=True in
            # builder.as_dataset.
            supervised_keys=None,
            # Homepage of the dataset for documentation
            homepage=_HOMEPAGE,
            # License for the dataset if available
            license=_LICENSE,
            # Citation for the dataset
            citation=_CITATION,
            task_templates=[TextClassification(text_column="text", label_column="label")],

        )

예제 #10

0

파일 보기

파일: ttc4900.py 프로젝트: albertvillanova/huggingface_datasets

 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "category":
             datasets.features.ClassLabel(names=[
                 "siyaset", "dunya", "ekonomi", "kultur", "saglik", "spor",
                 "teknoloji"
             ]),
             "text":
             datasets.Value("string"),
         }),
         supervised_keys=None,
         # Homepage of the dataset for documentation
         homepage=_HOMEPAGE,
         # License for the dataset if available
         license=_LICENSE,
         # Citation for the dataset
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="category")
         ],
     )

예제 #11

0

파일 보기

 def _info(self):
     features = datasets.Features({
         "label":
         datasets.features.ClassLabel(names=[
             "1 star",
             "2 star",
             "3 stars",
             "4 stars",
             "5 stars",
         ]),
         "text":
         datasets.Value("string"),
     })
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )

예제 #12

0

파일 보기

파일: gnad10.py 프로젝트: albertvillanova/huggingface_datasets

 def _info(self):
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=datasets.Features({
             "text":
             datasets.Value("string"),
             "label":
             datasets.features.ClassLabel(names=[
                 "Web",
                 "Panorama",
                 "International",
                 "Wirtschaft",
                 "Sport",
                 "Inland",
                 "Etat",
                 "Wissenschaft",
                 "Kultur",
             ]),
         }),
         homepage="https://tblock.github.io/10kGNAD/",
         task_templates=[
             TextClassification(text_column="text", label_column="label")
         ],
     )

예제 #13

0

파일 보기

파일: roman_urdu_hate_speech.py 프로젝트: albertvillanova/huggingface_datasets

    def _info(self):

        if self.config.name == "Coarse_Grained":
            features = datasets.Features(
                {
                    "tweet": datasets.Value("string"),
                    "label": datasets.features.ClassLabel(names=["Abusive/Offensive", "Normal"]),
                    # These are the features of your dataset like images, labels ...
                }
            )
        if self.config.name == "Fine_Grained":
            features = datasets.Features(
                {
                    "tweet": datasets.Value("string"),
                    "label": datasets.features.ClassLabel(
                        names=["Abusive/Offensive", "Normal", "Religious Hate", "Sexism", "Profane/Untargeted"]
                    ),
                    # These are the features of your dataset like images, labels ...
                }
            )
        return datasets.DatasetInfo(
            # This is the description that will appear on the datasets page.
            description=_DESCRIPTION,
            # This defines the different columns of the dataset and their types
            features=features,  # Here we define them above because they are different between the two configurations
            # If there's a common (input, target) tuple from the features, uncomment supervised_keys line below and
            # specify them. They'll be used if as_supervised=True in builder.as_dataset.
            # supervised_keys=("sentence", "label"),
            # Homepage of the dataset for documentation
            homepage=_HOMEPAGE,
            # License for the dataset if available
            license=_LICENSE,
            # Citation for the dataset
            citation=_CITATION,
            task_templates=[TextClassification(text_column="tweet", label_column="label")],
        )

예제 #14

0

파일 보기

파일: test_tasks.py 프로젝트: cahya-wirawan/datasets

 def test_align_with_features(self):
     task = TextClassification(text_column="input_text", label_column="input_label")
     self.assertEqual(task.label_schema["labels"], ClassLabel)
     task = task.align_with_features(Features({"input_label": ClassLabel(names=self.labels)}))
     self.assertEqual(task.label_schema["labels"], ClassLabel(names=self.labels))

예제 #15

0

파일 보기

파일: test_tasks.py 프로젝트: cahya-wirawan/datasets

 def test_column_mapping(self):
     task = TextClassification(text_column="input_text", label_column="input_label")
     self.assertDictEqual({"input_text": "text", "input_label": "labels"}, task.column_mapping)

예제 #16

0

파일 보기

 def _info(self):
     features = datasets.Features(
         {
             "text": datasets.Value("string"),
             "label": datasets.features.ClassLabel(
                 names=[
                     "activate_my_card",
                     "age_limit",
                     "apple_pay_or_google_pay",
                     "atm_support",
                     "automatic_top_up",
                     "balance_not_updated_after_bank_transfer",
                     "balance_not_updated_after_cheque_or_cash_deposit",
                     "beneficiary_not_allowed",
                     "cancel_transfer",
                     "card_about_to_expire",
                     "card_acceptance",
                     "card_arrival",
                     "card_delivery_estimate",
                     "card_linking",
                     "card_not_working",
                     "card_payment_fee_charged",
                     "card_payment_not_recognised",
                     "card_payment_wrong_exchange_rate",
                     "card_swallowed",
                     "cash_withdrawal_charge",
                     "cash_withdrawal_not_recognised",
                     "change_pin",
                     "compromised_card",
                     "contactless_not_working",
                     "country_support",
                     "declined_card_payment",
                     "declined_cash_withdrawal",
                     "declined_transfer",
                     "direct_debit_payment_not_recognised",
                     "disposable_card_limits",
                     "edit_personal_details",
                     "exchange_charge",
                     "exchange_rate",
                     "exchange_via_app",
                     "extra_charge_on_statement",
                     "failed_transfer",
                     "fiat_currency_support",
                     "get_disposable_virtual_card",
                     "get_physical_card",
                     "getting_spare_card",
                     "getting_virtual_card",
                     "lost_or_stolen_card",
                     "lost_or_stolen_phone",
                     "order_physical_card",
                     "passcode_forgotten",
                     "pending_card_payment",
                     "pending_cash_withdrawal",
                     "pending_top_up",
                     "pending_transfer",
                     "pin_blocked",
                     "receiving_money",
                     "Refund_not_showing_up",
                     "request_refund",
                     "reverted_card_payment?",
                     "supported_cards_and_currencies",
                     "terminate_account",
                     "top_up_by_bank_transfer_charge",
                     "top_up_by_card_charge",
                     "top_up_by_cash_or_cheque",
                     "top_up_failed",
                     "top_up_limits",
                     "top_up_reverted",
                     "topping_up_by_card",
                     "transaction_charged_twice",
                     "transfer_fee_charged",
                     "transfer_into_account",
                     "transfer_not_received_by_recipient",
                     "transfer_timing",
                     "unable_to_verify_identity",
                     "verify_my_identity",
                     "verify_source_of_funds",
                     "verify_top_up",
                     "virtual_card_not_working",
                     "visa_or_mastercard",
                     "why_verify_identity",
                     "wrong_amount_of_cash_received",
                     "wrong_exchange_rate_for_cash_withdrawal",
                 ]
             ),
         }
     )
     return datasets.DatasetInfo(
         description=_DESCRIPTION,
         features=features,
         supervised_keys=None,
         homepage=_HOMEPAGE,
         license=_LICENSE,
         citation=_CITATION,
         task_templates=[TextClassification(text_column="text", label_column="label")],
     )