Esempio n. 1
0
    def post_process_question(self, q: Question):
        base_relevant = [e for e in StoryGenerator.get_relevant_events(self)]
        modified_relevant = [e for e in self.get_relevant_events()]
        if q.type == QuestionTypes.DIRECT:
            n = q.question_data['n'] - 1
            modified = base_relevant[n].sentence_nr != modified_relevant[
                n].sentence_nr
            q.question_data['modified'] = modified
        elif q.type == QuestionTypes.OVERALL:
            if q.target in self.ATTRIBUTES:
                base_relevant = [
                    e for e in base_relevant if self.is_realised(q.target, e)
                ]
                modified_relevant = [
                    e for e in modified_relevant
                    if self.is_realised(q.target, e)
                ]

            # base number of events for question type is different than the number of effects if including modifier
            # why not just compare len(get_relevant_events)? because if multiple modifiable actions, overall number
            # can be different while for a specific event it is still the same
            num_base_events = sum(1 for e in base_relevant
                                  if e.event_type == q.event_type)
            num_modified_events = sum(1 for e in modified_relevant
                                      if e.event_type == q.event_type)
            modified = num_base_events != num_modified_events
            q.question_data['modified'] = modified
            # q.question_data['easier'] = (
            #         len(base_relevant) > 1 and len(modified_relevant) == 1 and base_relevant[0] == modified_relevant[0]
            # )

        else:
            raise NotImplementedError()
Esempio n. 2
0
def test_fix_units():
    q = Question(type='overall', target='distance', evidence=[3], event_type='goal', reasoning='argmin-distance',
                 answer=str(14),
                 question_data={}, realized='The closest goal was scored from how far away ?')
    p = [
        "Tricia Lusk almost opened the action when she nearly slotted in a 15 metres goal "
        "from Tracy Hahn 's soft clearance .",

        'The stadium went wild as Pok Formosa was withdrawn in minute 29 with her ankle in a brace following a '
        'harsh challenge from Margaretta Sabins .',

        'Further pressure on the attack resulted in Caryl Yacullo fouling Devin Mockler '
        'for an auspiciously looking free-kick chance for her opponents .',

        "14 minutes after that Wendy Miners nearly scored on the 49 th minute , all but putting in "
        "the ball from 14 metres away under the bar after she ran 11 metres and intercepted "
        "Dynamo Whalesharks goalkeeper's goal kick .",

        'Rita Sander scored the next goal for Red-blue Elephants from 18 metres to '
        'continue where they left off after Pauline Hunter played the ball into her path .',
        'The stadium went wild seeing Claudia Johnson winning the ball out wide for Red-blue '
        'Elephants and drawing a foul play from Sharon Schoolfield .'
    ]
    realizer = Realizer(**templates, validate=False)
    answer = realizer._fix_units(q, p)
    assert answer == '14 metres'
Esempio n. 3
0
def test_fix_units_2():
    q = Question(type='direct', target='time', evidence=[3], event_type='goal', reasoning='retrieval',
                 answer=str(42), question_data={'n': 1}, realized='When did they score the 1 st goal ?')

    passage = [
        'The match started as Terra Miller scythed down Maria Forest '
        'for a promisingly looking free-kick opportunity for her opponents .',

        'On the 11 th minute a spectacular 12 metres strike from Susan White almost '
        'flying in the lower left corner past the woman between the posts for her 2 nd league '
        'goal of the season advanced the action .',

        'The stadium went wild as Lajuana Loader fouled Pearle Giebel on the 35 th minute .',

        "7 minutes after that Trish Oieda scored in minute 42 , hitting the ball from 14 "
        "metres away off the post and in the middle of the goal after she intercepted "
        "FC Monkeys goalkeeper's goal kick .",

        'Things proceeded with Marlene Croom winning the ball in the attacking '
        'third and drawing a foul play from Mellisa Winnett .',

        "Dynamo Whalesharks advanced the action with a 12 metres goal as "
        "Silvana Waugaman put in Tabetha Bowe 's risky through ball ."]
    realizer = Realizer(**templates, validate=False)
    answer = realizer._fix_units(q, passage)
    assert answer == 'minute 42'
Esempio n. 4
0
def to_question(events: List[Event],
                is_modified,
                generator,
                event_types,
                modify_event_type,
                target='actor',
                reverse=False) -> Question:
    # the first (or last) appearance of (just _): bridge event
    evidence = [
        next(
            iter([
                i for i, (mod, _) in enumerate(event_types) if mod == 'just'
            ][::-1 if reverse else 1]))
    ]
    # the other appearance of (Just, modify_event_type): actual event
    if is_modified:
        evidence += [
            next(i for i, (mod, _) in enumerate(event_types)
                 if mod == 'just' and i not in evidence)
        ]

    else:
        # the first (or last) appearance of (mod, modify_event_type): actual event
        evidence += [
            next(
                iter([
                    i for i, (mod, _) in enumerate(event_types)
                    if mod == 'modified'
                ][::-1 if reverse else 1]))
        ]
    evidence = sorted(evidence)
    answer_event = events[evidence[-1] if not reverse else evidence[0]]
    bridge_event = events[evidence[0] if not reverse else evidence[-1]]
    answer = answer_event.actor if target == 'actor' else answer_event.attributes[
        target]
    assert answer
    return Question(
        type=QuestionTypes.DIRECT,
        target=target,
        evidence=evidence,
        event_type=modify_event_type,
        reasoning=bridge_reverse.name if reverse else bridge.name,
        question_data={
            "bridge-event": bridge_event,
        },
        answer=answer,
    )
Esempio n. 5
0
 def to_question(events,
                 is_modified,
                 generator,
                 fm=first_modification,
                 md=modification_distance):
     if reverse:
         evidence = len(events) - 1 - (fm if not is_modified else fm +
                                       md)
     else:
         evidence = fm if not is_modified else fm + md
     answer = events[evidence].actor
     return Question(
         type=QuestionTypes.DIRECT,
         target='actor',
         evidence=[evidence],
         event_type=modify_event_type,
         reasoning=(retrieval_reverse if reverse else retrieval).name,
         question_data={"n": 1},
         answer=answer,
     )
Esempio n. 6
0
 def to_question(
     events,
     is_modified,
     generator,
     ets=event_types,
     # fm=first_modification, md=modification_distance,
     attr=attribute):
     if is_modified:
         if reverse:
             evidence = [
                 i for i, m in enumerate(ets) if m == non_modified
             ][-2:]
         else:
             evidence = [
                 i for i, m in enumerate(ets) if m == non_modified
             ][:2]
     else:
         if reverse:
             evidence = [
                 i for i, m in enumerate(ets)
                 if m == non_modified or m == modified
             ][-2:]
         else:
             evidence = [
                 i for i, m in enumerate(ets)
                 if m == non_modified or m == modified
             ][:2]
     answer = events[
         evidence[-1]].attributes[attr] if not reverse else events[
             evidence[0]].attributes[attr]
     return Question(
         type=QuestionTypes.DIRECT,
         target=attr,
         evidence=evidence,
         event_type=modify_event_type,
         reasoning='retrieval'
         if not reverse else 'retrieval-reverse',
         question_data={"n": 2},
         answer=answer,
     )
Esempio n. 7
0
def to_question(events: List[Event], is_modified, generator, event_types,
                argselect_attribute,
                modify_event_type, target='actor', reverse=False) -> Question:
    if not is_modified:

        evidence = [
            i for i, (_, ((mod, _), _, order)) in enumerate(event_types) if
            mod == EventPlan.Mod and order == (0 if reverse else 3)
        ]
    else:
        evidence = [i for i, (_, ((mod, _), _, order)) in enumerate(event_types) if mod == EventPlan.Just]

    answer = events[evidence[0]].actor if target == 'actor' else events[evidence[0]].attributes[target]
    return Question(
        type=QuestionTypes.OVERALL,
        target=target,
        evidence=evidence,
        event_type=modify_event_type,
        reasoning=f"{argmin.name if reverse else argmax.name}-{argselect_attribute}",
        question_data=dict(),
        answer=answer,
    )
Esempio n. 8
0
def test_fix_units_3():
    q = Question(type='direct', target='distance', evidence=[1, 2], event_type='goal', reasoning='bridge',
                        answer=str(31),
                        question_data=dict(), realized='After the foul on Mary Millwood , '
                                                       'from how far away was the next goal scored ?')
    passage = [
        "Ethelyn Capello scored Arctic Monkeys 's first goal from 20 metres away "
        "to set the tone for the match after Annmarie Dibiase inadvertently prodded the ball into her path .",
        'Things proceeded with Mary Millwood being withdrawn in the 31 st minute with her hip '
        'in a brace following a challenge from Amanda Testa .',
        "Pale Lilac Elephants almost advanced the action with a 31 metres goal as Carol Nehls "
        "all but curled in Cynthia Kittredge 's soft clearance .",
        "Shannon Garber almost added more insult to the injury when she almost slotted in a "
        "21 metres goal from Virginia Sheekey 's pass .",
        "In the 52 nd minute a soft clearance went to Pale Lilac Elephants 's Ida Webb on the flank "
        "and the player swept low to the 6-yard-area for Mamie Swart to poke past the goalkeeper "
        "for a wonderful 25 metres goal .",
        'Things proceeded with Judith Odougherty winning the ball in the middle field for '
        'Pale Lilac Elephants and drawing a foul from Brenda Uttech .'
    ]
    realizer = Realizer(**templates, validate=False)
    answer = realizer._fix_units(q, passage)
    assert answer == '31 metres'
Esempio n. 9
0
 def to_question(events,
                 is_modified,
                 generator,
                 ets=event_types,
                 met=modify_event_type):
     if is_modified:
         if reverse:
             evidence = [
                 i for i, m in enumerate(ets) if m == non_modified
             ][-2:]
         else:
             evidence = [
                 i for i, m in enumerate(ets) if m == non_modified
             ][:2]
     else:
         if reverse:
             evidence = [
                 i for i, m in enumerate(ets)
                 if m == non_modified or m == modified
             ][-2:]
         else:
             evidence = [
                 i for i, m in enumerate(ets)
                 if m == non_modified or m == modified
             ][:2]
     assert len(evidence) == 2
     answer = events[evidence[-1]].actor if not reverse else events[
         evidence[0]].actor
     return Question(
         type=QuestionTypes.DIRECT,
         target='actor',
         evidence=evidence,
         event_type=modify_event_type,
         reasoning='retrieval' if not reverse else 'retrieval-reverse',
         question_data={"n": 2},
         answer=answer,
     )
Esempio n. 10
0
    def generate_questions(
        self, events: List[Event], visits: Dict[int, List[str]]
    ) -> Tuple[List[Question], List[Question], List[Question], List[Question]]:
        # extractive
        single_span_questions = []
        multi_span_questions = []
        unanswerable_questions = []
        abstractive_questions = []
        self.all_events = events
        self.event_type = None
        # per sentence per attribute
        self.visits = visits
        # per-sentence action questions
        for self.event_type in self.EVENT_TYPES:
            self.relevant_events = self.get_relevant_events()
            for ith, event in enumerate(self.relevant_events):
                # actor
                q = Question(
                    type=QuestionTypes.DIRECT,
                    target="actor",
                    evidence=[event.sentence_nr],
                    event_type=self.event_type,
                    # TODO: WHAT IF COREF ETC
                    answer=self.post_process_actor_answer(event.actor),
                    reasoning=ReasoningTypes.Retrieval
                    if ith == 0 else ReasoningTypes.OrderingEasy,
                    question_data={"n": ith + 1})
                self.post_process_question(q)

                if any(f"sent.actor" in v for v in visits[event.sentence_nr]):
                    single_span_questions.append(q)
                else:
                    q.answer = None
                    unanswerable_questions.append(q)

                # attribute questions
                for attribute in self.ATTRIBUTES:
                    q = Question(
                        type=QuestionTypes.DIRECT,
                        target=attribute,
                        event_type=self.event_type,
                        reasoning=ReasoningTypes.Retrieval
                        if ith == 0 else ReasoningTypes.OrderingEasy,
                        question_data={"n": ith + 1},
                    )
                    if self.is_realised(attribute, event):
                        q.answer = self.post_process_attribute_answers(
                            attribute, event.attributes[attribute])
                        q.evidence = [event.sentence_nr]
                        single_span_questions.append(q)
                    else:
                        q.answer = None
                        q.evidence = []
                        unanswerable_questions.append(q)

                    self.post_process_question(q)
            # overall questions

            # target = actor
            q = Question(
                type=QuestionTypes.OVERALL,
                target='actor',
                event_type=self.event_type,
            )
            # events = self.get_relevant_events(event_type, story)  # sum(s.event_type == event_type for s in story)
            # [s.sentence_nr for s in story if s.event_type == event_type]
            q.evidence = [e.sentence_nr for e in self.relevant_events]

            if len(self.relevant_events) > 1:
                q.reasoning = ReasoningTypes.MultiRetrieval
                q.answer = [
                    self.post_process_actor_answer(s.actor)
                    for s in self.relevant_events
                ]
                multi_span_questions.append(q)
            elif len(self.relevant_events) == 1:
                q.reasoning = ReasoningTypes.Retrieval
                q.answer = self.post_process_actor_answer(
                    self.relevant_events[0].actor)
                single_span_questions.append(q)
            elif len(self.relevant_events) < 1:
                q.answer = None
                unanswerable_questions.append(q)

            self.post_process_question(q)
            # target = attribute
            for attribute in self.ATTRIBUTES:
                q = Question(type=QuestionTypes.OVERALL,
                             target=attribute,
                             event_type=self.event_type)

                # def condition(s):
                #     return any(f"sent.attributes.{attribute}" in v for v in visits[s.sentence_nr]) and \
                #            s.event_type == event_type

                # events = sum(1 for s in story if condition(s))
                visited_events = [
                    event for event in self.relevant_events
                    if self.is_realised(attribute, event)
                ]
                # q.evidence = [e.sentence_nr for s in story if condition(s)]
                q.evidence = [e.sentence_nr for e in visited_events]
                answers = [
                    self.post_process_attribute_answers(
                        attribute, event.attributes[attribute])
                    for event in visited_events
                ]

                if len(visited_events) > 1:
                    q.reasoning = ReasoningTypes.MultiRetrieval
                    q.answer = answers
                    multi_span_questions.append(q)

                elif len(visited_events) == 1:
                    q.reasoning = ReasoningTypes.Retrieval
                    q.answer = answers[0]
                    single_span_questions.append(q)

                elif len(visited_events) < 1:
                    q.answer = None
                    unanswerable_questions.append(q)
                self.post_process_question(q)
        return (single_span_questions, multi_span_questions,
                unanswerable_questions, abstractive_questions)
Esempio n. 11
0
    def realise_question(self,
                         q: Question,
                         passage: List[str],
                         ignore_missing_keys=True):
        self.processor.chooser = RandomChooser()
        logger.debug(f"Question: {q}")
        try:
            # first see if there's a reasoning key
            template, template_nr = self.question_templates[q.type][q.target][
                q.reasoning][q.event_type].random()
        except KeyError as e:
            try:
                # if not, try without reasoning
                logger.debug(str(e))
                logger.warning(
                    f"{'.'.join([q.type, q.target, q.reasoning, q.event_type])} "
                    'not found, trying without reasoning key....')
                template, template_nr = self.question_templates[q.type][
                    q.target][q.event_type].random()
            except KeyError:
                # if still not: ¯\_(ツ)_/¯
                if ignore_missing_keys:
                    return None
                else:
                    raise YouIdiotException(
                        f"Question templates are missing the key "
                        f"{'.'.join([q.type, q.target, q.reasoning, q.event_type])}"
                    )
        logger.debug(f'Template: {template}')
        question_words = []
        template.reverse()
        stack = template
        while stack:
            logger.debug(f"Current stack is: {stack}")
            word = stack.pop()
            logger.debug(word)

            # option as in ()
            if word.startswith("(") and word.endswith(")"):
                new_words = self.processor.process_option(word)
                stack.extend(new_words[::-1])
            # context access
            elif word.startswith("#"):
                try:
                    new_word = str(q.question_data[word[1:]])
                except KeyError:
                    raise NotImplementedError(
                        f"{word} is not in question data!")
                stack.append(str(new_word))
            elif word.startswith("!"):
                new_words = self.processor.process_function(
                    word, args=q.question_data)
                stack.extend(new_words[::-1])
            else:
                question_words.append(word)
        logger.debug(question_words)
        q.realized = " ".join(" ".join(
            self.post_process(question_words)).split()) + " ?"
        answer = self._fix_units(q, passage)
        assert answer, f"{q}, {passage}"
        q.answer = answer

        return q.realized, q.answer
Esempio n. 12
0
def to_question(events: List[Event],
                is_modified,
                generator,
                event_types,
                comparison_attribute,
                modify_event_type,
                target='actor',
                reverse=False,
                temp_ordered=False) -> Question:
    assert target == 'actor'
    for e in event_types:
        logger.debug(e)
    logger.debug(f"temp ordered: {temp_ordered}")
    logger.debug(f"is modified: {is_modified}")
    logger.debug(f"comparison attribute: {comparison_attribute}")
    logger.debug(f"reverse: {reverse}")
    idx_of_true_when_modified = \
        [i for i, e in enumerate(event_types) if _is(e, EventPlan.Just, '_') and not _is(e, EventPlan.SameActor, '_')]
    assert len(idx_of_true_when_modified) == 1
    idx_of_true_when_modified = idx_of_true_when_modified[0]
    if temp_ordered:
        idx_of_true_when_not_modified = [
            i for i, e in enumerate(event_types)
            if _is(e, EventPlan.SameActor, '_')
        ]
        if is_modified:
            idx_of_true_when_not_modified = [
                i for i in idx_of_true_when_not_modified
                if _is(event_types[i], EventPlan.Just, '_')
            ]
            assert len(idx_of_true_when_not_modified) == 1
            idx_of_true_when_not_modified = idx_of_true_when_not_modified[0]
        else:
            idx_of_true_when_not_modified = idx_of_true_when_not_modified[
                0 if reverse else -1]
    else:
        if is_modified:
            idx_of_true_when_not_modified = [
                i for i, e in enumerate(event_types)
                if _is(e, EventPlan.Just, '_')
                and _is(e, EventPlan.SameActor, '_')
            ]
        else:
            idx_of_true_when_not_modified = [
                i for i, e in enumerate(event_types)
                if _is(e, EventPlan.SameActor, '_') and e[0] == EventPlan.Order
                and e[1][2] == (-99 if reverse else 99)
            ]
        assert len(idx_of_true_when_not_modified) == 1
        idx_of_true_when_not_modified = idx_of_true_when_not_modified[0]
    evidence = [idx_of_true_when_modified, idx_of_true_when_not_modified]

    logger.debug(f"Evidence: {evidence}")
    logger.debug(f"len(events): {len(events)}")

    # if not (isinstance(idx_of_true_when_modified, int) and isinstance(idx_of_true_when_not_modified, int) and len(
    #         evidence) == 2):
    #     print(idx_of_true_when_modified)
    #     print(idx_of_true_when_not_modified)
    #     print(event_types)
    #     for e in event_types:
    #         print(e)
    #     print(reverse)  # false
    #     print(is_modified)  # True
    #     print(temp_ordered)  # false
    #     raise NotImplementedError()
    assert len(evidence) == 2
    answer_when_modified = events[idx_of_true_when_modified].actor
    answer_when_not_modified = events[idx_of_true_when_not_modified].actor
    # if not answer_when_modified != answer_when_not_modified:
    #     print(event_types)
    #     for e in event_types:
    #         print(e)
    #     print(idx_of_true_when_modified)
    #     print(idx_of_true_when_not_modified)
    #     raise NotImplementedError()
    answer = answer_when_modified if is_modified else answer_when_not_modified
    return Question(
        type=QuestionTypes.OVERALL,
        target=target,
        evidence=sorted(evidence),
        event_type=modify_event_type,
        reasoning=
        f"{comparison.name}{'-reverse-' if reverse else '-'}{comparison_attribute}",
        question_data={
            "answer-when-modified": answer_when_modified,
            'answer-when-not-modified': answer_when_not_modified
        },
        answer=answer,
    )