Exemple #1
0
class CommitmentBankTask(Task):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.CLASSIFICATION
    LABELS = ["neutral", "entailment", "contradiction"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for line in lines:
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, line["idx"]),
                    input_premise=line["premise"],
                    input_hypothesis=line["hypothesis"],
                    label=line["label"]
                    if set_type != "test" else cls.LABELS[-1],
                ))
        return examples
Exemple #2
0
class BoolQTask(Task):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.CLASSIFICATION
    LABELS = [False, True]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for (i, line) in enumerate(lines):
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    input_question=line["question"],
                    input_passage=line["passage"],
                    label=line["label"]
                    if set_type != "test" else cls.LABELS[-1],
                ))
        return examples
Exemple #3
0
class MutualPlusTask(mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = ["A", "B", "C", "D"]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for i, line in enumerate(lines):
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    prompt=line["article"],
                    choice_list=[d for d in line["options"]],
                    label=line["answers"],
                ))
        return examples
Exemple #4
0
class SentevalTenseTask(Task):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.CLASSIFICATION
    LABELS = ["PAST", "PRES"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    def get_train_examples(self):
        return self._create_examples(path=self.train_path, set_type="train")

    def get_val_examples(self):
        return self._create_examples(path=self.val_path, set_type="val")

    def get_test_examples(self):
        return self._create_examples(path=self.test_path, set_type="test")

    @classmethod
    def _create_examples(cls, path, set_type):
        examples = []
        df = pd.read_csv(path,
                         index_col=0,
                         names=["split", "label", "text", "unk_1", "unk_2"])
        for i, row in df.iterrows():
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    text=row.text,
                    label=row.label if set_type != "test" else cls.LABELS[-1],
                ))
        return examples
Exemple #5
0
class CosmosQATask(mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = [0, 1, 2, 3]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    def get_train_examples(self):
        return self._create_examples(path=self.train_path, set_type="train")

    def get_val_examples(self):
        return self._create_examples(path=self.val_path, set_type="val")

    def get_test_examples(self):
        return self._create_examples(path=self.test_path, set_type="test")

    @classmethod
    def _create_examples(cls, path, set_type):
        df = pd.read_csv(path)
        examples = []
        for i, row in enumerate(df.itertuples()):
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    prompt=row.context + " " + row.question,
                    choice_list=[
                        row.answer0, row.answer1, row.answer2, row.answer3
                    ],
                    label=row.label
                    if set_type != "test" else cls.CHOICE_KEYS[-1],
                ))
        return examples
Exemple #6
0
class AdversarialNliTask(Task):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.CLASSIFICATION
    LABELS = ["c", "e", "n"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for (i, line) in enumerate(lines):
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    input_premise=line["context"],
                    input_hypothesis=line["hypothesis"],
                    label=line["label"]
                    if set_type != "test" else cls.LABELS[-1],
                ))
        return examples
Exemple #7
0
class MrpcTask(GlueMixin, Task):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.CLASSIFICATION
    LABELS = ["0", "1"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    def get_train_examples(self):
        return self._create_examples(lines=read_jsonl(self.train_path), set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_jsonl(self.val_path), set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_jsonl(self.test_path), set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for (i, line) in enumerate(lines):
            examples.append(
                Example(
                    # NOTE: get_glue_preds() is dependent on this guid format.
                    guid="%s-%s" % (set_type, i),
                    text_a=line["text_a"],
                    text_b=line["text_b"],
                    label=line["label"] if set_type != "test" else cls.LABELS[-1],
                )
            )
        return examples
Exemple #8
0
class SpatialTask(Task):
    TASK_TYPE = TaskTypes.CLASSIFICATION
    LABELS = ["0", "1"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)
    Batch = Batch

    def get_train_examples(self):
        return self._create_examples(path=self.train_path, set_type="train")

    def get_val_examples(self):
        return self._create_examples(path=self.val_path, set_type="val")

    def get_test_examples(self):
        return self._create_examples(path=self.test_path, set_type="test")

    @classmethod
    def _create_examples(cls, path, set_type):
        examples = []
        df = pd.read_csv(path, index_col=0)
        for i, row in df.iterrows():
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    sentence=row.sentence,
                    label=row.label if set_type != "test" else cls.LABELS[-1],
                ))

        return examples
Exemple #9
0
class SentEvalTopConstituentsTask(base.BaseSentEvalTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    LABELS = [
        "ADVP_NP_VP_.",
        "CC_ADVP_NP_VP_.",
        "CC_NP_VP_.",
        "IN_NP_VP_.",
        "NP_ADVP_VP_.",
        "NP_NP_VP_.",
        "NP_PP_.",
        "NP_VP_.",
        "OTHER",
        "PP_NP_VP_.",
        "RB_NP_VP_.",
        "SBAR_NP_VP_.",
        "SBAR_VP_.",
        "S_CC_S_.",
        "S_NP_VP_.",
        "S_VP_.",
        "VBD_NP_VP_.",
        "VP_.",
        "WHADVP_SQ_.",
        "WHNP_SQ_.",
    ]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)
Exemple #10
0
class WinograndeTask(mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = [1, 2]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path), set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path), set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path), set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []

        for i, line in enumerate(lines):
            choice_pre, option = line["sentence"].split("_")

            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    prompt=option,
                    choice_list=[choice_pre + line["option1"], choice_pre + line["option2"]],
                    label=int(line["answer"]) if set_type != "test" else cls.CHOICE_KEYS[-1],
                )
            )
        return examples
Exemple #11
0
class QuailTask(mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = [0, 1, 2, 3]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for i, line in enumerate(lines):
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    prompt=line["context"] + " " + line["question"],
                    choice_list=[d for d in line["answers"]],
                    label=line["correct_answer_id"],
                ))
        return examples
Exemple #12
0
class RuHumorTask(Task):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.CLASSIFICATION
    LABELS = ["0", "1"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    def get_train_examples(self):
        return self._create_examples(lines=read_jsonl(self.train_path), set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_jsonl(self.path_dict['validation']), set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_jsonl(self.test_path), set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for (i, line) in enumerate(lines):
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    text=line["text"],
                    label=str(line["label"]) if set_type != "test" else cls.LABELS[-1],
                )
            )
        return examples
Exemple #13
0
class WiCTask(SuperGlueMixin, Task):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.SPAN_COMPARISON_CLASSIFICATION
    LABELS = [False, True]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    @property
    def num_spans(self):
        return 2

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for line in lines:
            span1 = ExclusiveSpan(int(line["start1"]), int(line["end1"]))
            span2 = ExclusiveSpan(int(line["start2"]), int(line["end2"]))
            # Note, the chosen word may be different (e.g. different tenses) in sent1 and sent2,
            #   hence we don't do an assert here.
            examples.append(
                Example(
                    # NOTE: WiCTask.super_glue_format_preds() is dependent on this guid format.
                    guid="%s-%s" % (set_type, line["idx"]),
                    sentence1=line["sentence1"],
                    sentence2=line["sentence2"],
                    word=line["word"],
                    span1=span1,
                    span2=span2,
                    label=line["label"]
                    if set_type != "test" else cls.LABELS[-1],
                ))
        return examples

    @classmethod
    def super_glue_format_preds(cls, pred_dict):
        """Reformat this task's raw predictions to have the structure expected by SuperGLUE."""
        lines = []
        for pred, guid in zip(list(pred_dict["preds"]),
                              list(pred_dict["guids"])):
            lines.append({
                "idx": int(guid.split("-")[1]),
                "label": str(cls.LABELS[pred]).lower()
            })
        return lines
Exemple #14
0
class SemevalTask(edge_probing_two_span.AbstractProbingTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    LABELS = [
        "Cause-Effect(e1,e2)",
        "Cause-Effect(e2,e1)",
        "Component-Whole(e1,e2)",
        "Component-Whole(e2,e1)",
        "Content-Container(e1,e2)",
        "Content-Container(e2,e1)",
        "Entity-Destination(e1,e2)",
        "Entity-Destination(e2,e1)",
        "Entity-Origin(e1,e2)",
        "Entity-Origin(e2,e1)",
        "Instrument-Agency(e1,e2)",
        "Instrument-Agency(e2,e1)",
        "Member-Collection(e1,e2)",
        "Member-Collection(e2,e1)",
        "Message-Topic(e1,e2)",
        "Message-Topic(e2,e1)",
        "Other",
        "Product-Producer(e1,e2)",
        "Product-Producer(e2,e1)",
    ]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    @property
    def num_spans(self):
        return 2

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path), set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path), set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path), set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for (line_num, line) in enumerate(lines):
            for (target_num, target) in enumerate(line["targets"]):
                span1 = target["span1"]
                span2 = target["span2"]
                examples.append(
                    Example(
                        guid="%s-%s-%s" % (set_type, line_num, target_num),
                        text=line["text"],
                        span1=span1,
                        span2=span2,
                        labels=[target["label"]] if set_type != "test" else [cls.LABELS[-1]],
                    )
                )
        return examples
Exemple #15
0
class NerTask(edge_probing_single_span.AbstractProbingTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    LABELS = [
        "CARDINAL",
        "DATE",
        "EVENT",
        "FAC",
        "GPE",
        "LANGUAGE",
        "LAW",
        "LOC",
        "MONEY",
        "NORP",
        "ORDINAL",
        "ORG",
        "PERCENT",
        "PERSON",
        "PRODUCT",
        "QUANTITY",
        "TIME",
        "WORK_OF_ART",
    ]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    @property
    def num_spans(self):
        return 1

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for (line_num, line) in enumerate(lines):
            for (target_num, target) in enumerate(line["targets"]):
                span = target["span1"]
                examples.append(
                    Example(
                        guid="%s-%s-%s" % (set_type, line_num, target_num),
                        text=line["text"],
                        span=span,
                        labels=[target["label"]]
                        if set_type != "test" else [cls.LABELS[-1]],
                    ))
        return examples
Exemple #16
0
class MultiRCTask(Task):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.CLASSIFICATION
    LABELS = [0, 1]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    def __init__(self, name, path_dict, filter_sentences=True):
        super().__init__(name=name, path_dict=path_dict)
        self.name = name
        self.path_dict = path_dict
        self.filter_sentences = filter_sentences

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path), set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path), set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path), set_type="test")

    def _create_examples(self, lines, set_type):
        examples = []
        question_id = 0
        for line in lines:
            soup = bs4.BeautifulSoup(line["passage"]["text"], features="lxml")
            sentence_ls = []
            for i, elem in enumerate(soup.html.body.contents):
                if isinstance(elem, bs4.element.NavigableString):
                    sentence_ls.append(str(elem).strip())

            for question_dict in line["passage"]["questions"]:
                question = question_dict["question"]
                if self.filter_sentences:
                    paragraph = " ".join(
                        sentence
                        for i, sentence in enumerate(sentence_ls, start=1)
                        if i in question_dict["sentences_used"]
                    )
                else:
                    paragraph = " ".join(sentence_ls)
                for answer_dict in question_dict["answers"]:
                    answer = answer_dict["text"]
                    examples.append(
                        Example(
                            guid="%s-%s" % (set_type, line["idx"]),
                            paragraph=paragraph,
                            question=question,
                            answer=answer,
                            label=answer_dict["label"] if set_type != "test" else self.LABELS[-1],
                            question_id=question_id,
                        )
                    )
                question_id += 1
        return examples
Exemple #17
0
class SentEvalObjNumberTask(base.BaseSentEvalTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    LABELS = ["NN", "NNS"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)
Exemple #18
0
class SentEvalPastPresentTask(base.BaseSentEvalTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    LABELS = ["PAST", "PRES"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)
Exemple #19
0
class SentEvalBigramShiftTask(base.BaseSentEvalTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    LABELS = ["I", "O"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)
Exemple #20
0
class SentEvalSentenceLengthTask(base.BaseSentEvalTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    LABELS = [0, 1, 2, 3, 4, 5, 6]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)
class SentEvalCoordinationInversionTask(base.BaseSentEvalTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    LABELS = ["I", "O"]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)
Exemple #22
0
class SentEvalTreeDepthTask(base.BaseSentEvalTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    LABELS = [5, 6, 7, 8, 9, 10, 11]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)
Exemple #23
0
class ReCoRDTask(Task):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.CLASSIFICATION
    LABELS = [False, True]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for line in lines:
            passage_text = line["passage"]["text"]
            for qas in line["qas"]:
                answers_dict = {}
                if set_type != "test":
                    answers_dict = {(answer["start"], answer["end"]):
                                    answer["text"]
                                    for answer in qas["answers"]}
                for entity in line["passage"]["entities"]:
                    label = False
                    entity_span = (entity["start"], entity["end"])
                    if set_type != "test":
                        if entity_span in answers_dict:
                            assert (
                                passage_text[entity_span[0]:entity_span[1] +
                                             1] == answers_dict[entity_span])
                            label = True
                    examples.append(
                        Example(
                            guid="%s-%s" % (set_type, len(examples)),
                            passage_text=passage_text,
                            query_text=qas["query"],
                            entity_start_char_idx=entity_span[0],
                            entity_end_char_idx=entity_span[1] +
                            1,  # make exclusive
                            entity_str=passage_text[
                                entity_span[0]:entity_span[1] + 1],
                            passage_idx=line["idx"],
                            question_idx=qas["idx"],
                            answers_dict=answers_dict,
                            label=label,
                        ))
        return examples
Exemple #24
0
class WSCTask(SuperGlueMixin, Task):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    TASK_TYPE = TaskTypes.SPAN_COMPARISON_CLASSIFICATION
    LABELS = [False, True]
    LABEL_TO_ID, ID_TO_LABEL = labels_to_bimap(LABELS)

    @property
    def num_spans(self):
        return 2

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for line in lines:
            # # # # Leo's finding: potentially general bug
            #  "span1_index" etc. are in line["target"]
            assert "target" in line
            examples.append(
                Example(
                    # NOTE: WSCTask.super_glue_format_preds() is dependent on this guid format.
                    guid="%s-%s" % (set_type, line["idx"]),
                    text=line["text"],
                    span1_idx=line['target']["span1_index"],
                    span2_idx=line["target"]["span2_index"],
                    span1_text=line["target"]["span1_text"],
                    span2_text=line["target"]["span2_text"],
                    label=line["label"]
                    if set_type != "test" else cls.LABELS[-1],
                ))
        return examples

    @classmethod
    def super_glue_format_preds(cls, pred_dict):
        """Reformat this task's raw predictions to have the structure expected by SuperGLUE."""
        lines = []
        for pred, guid in zip(list(pred_dict["preds"]),
                              list(pred_dict["guids"])):
            lines.append({
                "idx": int(guid.split("-")[1]),
                "label": str(cls.LABELS[pred])
            })
        return lines
Exemple #25
0
class ArctTask(mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = [0, 1]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    def get_train_examples(self):
        return self._create_examples(self.train_path, set_type="train")

    def get_val_examples(self):
        return self._create_examples(self.val_path, set_type="val")

    def get_test_examples(self):
        return self._create_examples(self.test_path, set_type="test")

    @classmethod
    def _create_examples(cls, path, set_type):
        df_names = [
            "#id",
            "warrant0",
            "warrant1",
            "gold_label",
            "reason",
            "claim",
            "debateTitle",
            "debateInfo",
        ]

        df = pd.read_csv(
            path,
            sep="\t",
            header=0,
            names=df_names,
        )
        choice_pre = "And since "
        examples = []

        for i, row in enumerate(df.itertuples()):
            # Repo explanation from https://github.com/UKPLab/argument-reasoning-comprehension-task
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    prompt=row.reason + " ",
                    choice_list=[
                        choice_pre + row.warrant0 + ", " + row.claim,
                        choice_pre + row.warrant1 + ", " + row.claim,
                    ],
                    label=row.gold_label
                    if set_type != "test" else cls.CHOICE_KEYS[-1],
                ))

        return examples
Exemple #26
0
class CopaTask(SuperGlueMixin, mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = [0, 1]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    _QUESTION_DICT = {
        "cause": "What was the cause of this?",
        "effect": "What happened as a result?",
    }

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        for line in lines:
            question = cls._QUESTION_DICT[line["question"]]
            examples.append(
                Example(
                    # NOTE: CopaTask.super_glue_format_preds() is dependent on this guid format.
                    guid="%s-%s" % (set_type, line["idx"]),
                    prompt=line["premise"] + " " + question,
                    choice_list=[line["choice1"], line["choice2"]],
                    label=line["label"]
                    if set_type != "test" else cls.CHOICE_KEYS[-1],
                ))
        return examples

    @classmethod
    def super_glue_format_preds(cls, pred_dict):
        """Reformat this task's raw predictions to have the structure expected by SuperGLUE."""
        lines = []
        for pred, guid in zip(list(pred_dict["preds"]),
                              list(pred_dict["guids"])):
            lines.append({
                "idx": int(guid.split("-")[1]),
                "label": cls.CHOICE_KEYS[pred]
            })
        return lines
Exemple #27
0
class SocialIQATask(mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = ["A", "B", "C"]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []
        answer_key_ls = ["answerA", "answerB", "answerC"]
        hf_datasets_label_map = {
            "1\n": "A",
            "2\n": "B",
            "3\n": "C",
        }
        for i, line in enumerate(lines):
            if "label" in line:
                # Loading from HF Datasets data
                label = hf_datasets_label_map[line["label"]]
            else:
                # Loading from original data
                label = line["correct"]
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    prompt=line["context"] + " " + line["question"],
                    choice_list=[
                        line[answer_key] for answer_key in answer_key_ls
                    ],
                    label=label,
                ))
        return examples

    @classmethod
    def _read_labels(cls, path):
        lines = read_file_lines(path)
        return [int(line.strip()) for line in lines]
Exemple #28
0
class MCTestTask(mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = TokenizedExample
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = ["A", "B", "C", "D"]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    def get_train_examples(self):
        return self._create_examples(
            lines=read_file_lines(self.train_path, strip_lines=True),
            ans_lines=read_file_lines(self.path_dict["train_ans"],
                                      strip_lines=True),
            set_type="train",
        )

    def get_val_examples(self):
        return self._create_examples(
            lines=read_file_lines(self.val_path, strip_lines=True),
            ans_lines=read_file_lines(self.path_dict["val_ans"],
                                      strip_lines=True),
            set_type="val",
        )

    def get_test_examples(self):
        return self._create_examples(
            lines=read_file_lines(self.test_path, strip_lines=True),
            ans_lines=None,
            set_type="test",
        )

    @classmethod
    def _create_examples(cls, lines, ans_lines, set_type):
        examples = []
        if ans_lines is None:
            ans_lines = [
                "\t".join([cls.CHOICE_KEYS[-1]] * 4) for line in lines
            ]
        for i, (line, ans) in enumerate(zip(lines, ans_lines)):
            line = line.split("\t")
            ans = ans.split("\t")
            for j in range(4):
                examples.append(
                    Example(
                        guid="%s-%s" % (set_type, i * 4 + j),
                        prompt=line[2].replace("\\newline", " ") + " " +
                        line[3 + j * 5],
                        choice_list=line[4 + j * 5:8 + j * 5],
                        label=ans[j],
                    ))
        return examples
Exemple #29
0
class ArcChallengeTask(mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = ["A", "B", "C", "D", "E"]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    def get_train_examples(self):
        return self._create_examples(lines=read_json_lines(self.train_path),
                                     set_type="train")

    def get_val_examples(self):
        return self._create_examples(lines=read_json_lines(self.val_path),
                                     set_type="val")

    def get_test_examples(self):
        return self._create_examples(lines=read_json_lines(self.test_path),
                                     set_type="test")

    @classmethod
    def _create_examples(cls, lines, set_type):
        potential_label_map = {
            "1": "A",
            "2": "B",
            "3": "C",
            "4": "D",
            "5": "E",
        }
        NUM_CHOICES = len(potential_label_map)
        examples = []
        for i, line in enumerate(lines):
            label = line["answerKey"]
            if label in potential_label_map:
                label = potential_label_map[label]
            # choice_list = [d["text"] for d in line["choices"]]
            choice_list = [d for d in line["choices"]["text"]]
            filler_choice_list = [
                "." for i in range(NUM_CHOICES - len(choice_list))
            ]
            choice_list = choice_list + filler_choice_list
            assert len(choice_list) == NUM_CHOICES

            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    prompt=line["question"],
                    choice_list=choice_list,
                    label=label,
                ))
        return examples
Exemple #30
0
class PiqaTask(mc_template.AbstractMultipleChoiceTask):
    Example = Example
    TokenizedExample = Example
    DataRow = DataRow
    Batch = Batch

    CHOICE_KEYS = [0, 1]
    CHOICE_TO_ID, ID_TO_CHOICE = labels_to_bimap(CHOICE_KEYS)
    NUM_CHOICES = len(CHOICE_KEYS)

    def get_train_examples(self):
        return self._create_examples(
            lines=zip(
                read_json_lines(self.train_path),
                read_file_lines(self.path_dict["train_labels"],
                                strip_lines=True),
            ),
            set_type="train",
        )

    def get_val_examples(self):
        return self._create_examples(
            lines=zip(
                read_json_lines(self.val_path),
                read_file_lines(self.path_dict["val_labels"],
                                strip_lines=True),
            ),
            set_type="val",
        )

    def get_test_examples(self):
        return self._create_examples(
            lines=zip(read_json_lines(self.test_path),
                      read_json_lines(self.test_path)),
            set_type="test",
        )

    @classmethod
    def _create_examples(cls, lines, set_type):
        examples = []

        for i, (ex, label_string) in enumerate(lines):
            examples.append(
                Example(
                    guid="%s-%s" % (set_type, i),
                    prompt=ex["goal"],
                    choice_list=[ex["sol1"], ex["sol2"]],
                    label=int(label_string)
                    if set_type != "test" else cls.CHOICE_KEYS[-1],
                ))

        return examples