Ejemplo n.º 1
0
    def generate_questions(
        self, events: List[Event], visits: Dict[int, List[str]]
    ) -> Tuple[List[Question], List[Question], List[Question], List[Question]]:
        # extractive
        single_span_questions = []
        multi_span_questions = []
        unanswerable_questions = []
        abstractive_questions = []
        self.all_events = events
        self.event_type = None
        # per sentence per attribute
        self.visits = visits
        # per-sentence action questions
        for self.event_type in self.EVENT_TYPES:
            self.relevant_events = self.get_relevant_events()
            for ith, event in enumerate(self.relevant_events):
                # actor
                q = Question(
                    type=QuestionTypes.DIRECT,
                    target="actor",
                    evidence=[event.sentence_nr],
                    event_type=self.event_type,
                    # TODO: WHAT IF COREF ETC
                    answer=self.post_process_actor_answer(event.actor),
                    reasoning=ReasoningTypes.Retrieval
                    if ith == 0 else ReasoningTypes.OrderingEasy,
                    question_data={"n": ith + 1})
                self.post_process_question(q)

                if any(f"sent.actor" in v for v in visits[event.sentence_nr]):
                    single_span_questions.append(q)
                else:
                    q.answer = None
                    unanswerable_questions.append(q)

                # attribute questions
                for attribute in self.ATTRIBUTES:
                    q = Question(
                        type=QuestionTypes.DIRECT,
                        target=attribute,
                        event_type=self.event_type,
                        reasoning=ReasoningTypes.Retrieval
                        if ith == 0 else ReasoningTypes.OrderingEasy,
                        question_data={"n": ith + 1},
                    )
                    if self.is_realised(attribute, event):
                        q.answer = self.post_process_attribute_answers(
                            attribute, event.attributes[attribute])
                        q.evidence = [event.sentence_nr]
                        single_span_questions.append(q)
                    else:
                        q.answer = None
                        q.evidence = []
                        unanswerable_questions.append(q)

                    self.post_process_question(q)
            # overall questions

            # target = actor
            q = Question(
                type=QuestionTypes.OVERALL,
                target='actor',
                event_type=self.event_type,
            )
            # events = self.get_relevant_events(event_type, story)  # sum(s.event_type == event_type for s in story)
            # [s.sentence_nr for s in story if s.event_type == event_type]
            q.evidence = [e.sentence_nr for e in self.relevant_events]

            if len(self.relevant_events) > 1:
                q.reasoning = ReasoningTypes.MultiRetrieval
                q.answer = [
                    self.post_process_actor_answer(s.actor)
                    for s in self.relevant_events
                ]
                multi_span_questions.append(q)
            elif len(self.relevant_events) == 1:
                q.reasoning = ReasoningTypes.Retrieval
                q.answer = self.post_process_actor_answer(
                    self.relevant_events[0].actor)
                single_span_questions.append(q)
            elif len(self.relevant_events) < 1:
                q.answer = None
                unanswerable_questions.append(q)

            self.post_process_question(q)
            # target = attribute
            for attribute in self.ATTRIBUTES:
                q = Question(type=QuestionTypes.OVERALL,
                             target=attribute,
                             event_type=self.event_type)

                # def condition(s):
                #     return any(f"sent.attributes.{attribute}" in v for v in visits[s.sentence_nr]) and \
                #            s.event_type == event_type

                # events = sum(1 for s in story if condition(s))
                visited_events = [
                    event for event in self.relevant_events
                    if self.is_realised(attribute, event)
                ]
                # q.evidence = [e.sentence_nr for s in story if condition(s)]
                q.evidence = [e.sentence_nr for e in visited_events]
                answers = [
                    self.post_process_attribute_answers(
                        attribute, event.attributes[attribute])
                    for event in visited_events
                ]

                if len(visited_events) > 1:
                    q.reasoning = ReasoningTypes.MultiRetrieval
                    q.answer = answers
                    multi_span_questions.append(q)

                elif len(visited_events) == 1:
                    q.reasoning = ReasoningTypes.Retrieval
                    q.answer = answers[0]
                    single_span_questions.append(q)

                elif len(visited_events) < 1:
                    q.answer = None
                    unanswerable_questions.append(q)
                self.post_process_question(q)
        return (single_span_questions, multi_span_questions,
                unanswerable_questions, abstractive_questions)
Ejemplo n.º 2
0
    def realise_question(self,
                         q: Question,
                         passage: List[str],
                         ignore_missing_keys=True):
        self.processor.chooser = RandomChooser()
        logger.debug(f"Question: {q}")
        try:
            # first see if there's a reasoning key
            template, template_nr = self.question_templates[q.type][q.target][
                q.reasoning][q.event_type].random()
        except KeyError as e:
            try:
                # if not, try without reasoning
                logger.debug(str(e))
                logger.warning(
                    f"{'.'.join([q.type, q.target, q.reasoning, q.event_type])} "
                    'not found, trying without reasoning key....')
                template, template_nr = self.question_templates[q.type][
                    q.target][q.event_type].random()
            except KeyError:
                # if still not: ¯\_(ツ)_/¯
                if ignore_missing_keys:
                    return None
                else:
                    raise YouIdiotException(
                        f"Question templates are missing the key "
                        f"{'.'.join([q.type, q.target, q.reasoning, q.event_type])}"
                    )
        logger.debug(f'Template: {template}')
        question_words = []
        template.reverse()
        stack = template
        while stack:
            logger.debug(f"Current stack is: {stack}")
            word = stack.pop()
            logger.debug(word)

            # option as in ()
            if word.startswith("(") and word.endswith(")"):
                new_words = self.processor.process_option(word)
                stack.extend(new_words[::-1])
            # context access
            elif word.startswith("#"):
                try:
                    new_word = str(q.question_data[word[1:]])
                except KeyError:
                    raise NotImplementedError(
                        f"{word} is not in question data!")
                stack.append(str(new_word))
            elif word.startswith("!"):
                new_words = self.processor.process_function(
                    word, args=q.question_data)
                stack.extend(new_words[::-1])
            else:
                question_words.append(word)
        logger.debug(question_words)
        q.realized = " ".join(" ".join(
            self.post_process(question_words)).split()) + " ?"
        answer = self._fix_units(q, passage)
        assert answer, f"{q}, {passage}"
        q.answer = answer

        return q.realized, q.answer