def test_fix_units_2(): q = Question(type='direct', target='time', evidence=[3], event_type='goal', reasoning='retrieval', answer=str(42), question_data={'n': 1}, realized='When did they score the 1 st goal ?') passage = [ 'The match started as Terra Miller scythed down Maria Forest ' 'for a promisingly looking free-kick opportunity for her opponents .', 'On the 11 th minute a spectacular 12 metres strike from Susan White almost ' 'flying in the lower left corner past the woman between the posts for her 2 nd league ' 'goal of the season advanced the action .', 'The stadium went wild as Lajuana Loader fouled Pearle Giebel on the 35 th minute .', "7 minutes after that Trish Oieda scored in minute 42 , hitting the ball from 14 " "metres away off the post and in the middle of the goal after she intercepted " "FC Monkeys goalkeeper's goal kick .", 'Things proceeded with Marlene Croom winning the ball in the attacking ' 'third and drawing a foul play from Mellisa Winnett .', "Dynamo Whalesharks advanced the action with a 12 metres goal as " "Silvana Waugaman put in Tabetha Bowe 's risky through ball ."] realizer = Realizer(**templates, validate=False) answer = realizer._fix_units(q, passage) assert answer == 'minute 42'
def showcase_all(out_file, given_bundle: Bundle = None, do_x_repetitions=12): test_bundle = given_bundle or bundle lines = [] for i in range(len(test_bundle.templates_modifier['sentences']['goal'])): lines.append(click.style(f"goal[{i}]", fg='blue', bold=True)) for j in range(do_x_repetitions // 6): stories = showcase(test_bundle, i, False) lines.extend(highlight(text=s, colors=colors) for s in stories) for i in range(len(test_bundle.templates_modifier['sentences']['foul'])): generator, cfg, events, realizer, story, all_questions, visits = interactive_env_football_modifier( test_bundle, cfg={"world.num_sentences": 3}, do_print=False, do_realise=False, first_modification=1) lines.append(click.style(f"foul[{i}]", fg='blue', bold=True)) for j in range(do_x_repetitions): templates = only(test_bundle.templates_modifier, n=i, action='foul') realizer = Realizer(**templates, unique_sentences=False) story, visits = realizer.realise_story(events, generator.world) lines.append(story[0]) print("\n".join(lines)) with open(out_file, "w+") as f: f.write('\n'.join(lines))
def test_fix_units(): q = Question(type='overall', target='distance', evidence=[3], event_type='goal', reasoning='argmin-distance', answer=str(14), question_data={}, realized='The closest goal was scored from how far away ?') p = [ "Tricia Lusk almost opened the action when she nearly slotted in a 15 metres goal " "from Tracy Hahn 's soft clearance .", 'The stadium went wild as Pok Formosa was withdrawn in minute 29 with her ankle in a brace following a ' 'harsh challenge from Margaretta Sabins .', 'Further pressure on the attack resulted in Caryl Yacullo fouling Devin Mockler ' 'for an auspiciously looking free-kick chance for her opponents .', "14 minutes after that Wendy Miners nearly scored on the 49 th minute , all but putting in " "the ball from 14 metres away under the bar after she ran 11 metres and intercepted " "Dynamo Whalesharks goalkeeper's goal kick .", 'Rita Sander scored the next goal for Red-blue Elephants from 18 metres to ' 'continue where they left off after Pauline Hunter played the ball into her path .', 'The stadium went wild seeing Claudia Johnson winning the ball out wide for Red-blue ' 'Elephants and drawing a foul play from Sharon Schoolfield .' ] realizer = Realizer(**templates, validate=False) answer = realizer._fix_units(q, p) assert answer == '14 metres'
def test_different_dollar_templates_nested(): only_templates = only(templates, 1) r = Realizer(**only_templates, unique_sentences=False) logic_sents = [Event(0)] logic_sents[0].event_type = 'test' world = {} realised_sents, visits = r.realise_story(logic_sents, world) assert '1' in realised_sents[0] assert '2' in realised_sents[0] assert '3' in realised_sents[0]
def test_realise_choices_works(): for to_permute in (['t1'], ['t2'], ['t1', 't2']): choices = generate_all_possible_template_choices(test_events, sentences, to_permute) sf = [] w = World() for c in choices: r = Realizer(sentences, {}, dollar, {}, {}, {}, True, False) story, visits = r.realise_with_sentence_choices(test_events, w, c) sf.append(" ".join(story)) assert len(sf) == len(set(sf))
def interactive_env(bundle: Bundle, cfg=None, modifier=False, do_print=True, do_realise=True, generator_kwargs=None): if not modifier: raise NotImplementedError() if not cfg: cfg = Config({}) elif isinstance(cfg, str): cfg = Config(cfg) elif isinstance(cfg, dict): cfg = Config(cfg) cfg.pprint() # if modifier: g_class = bundle.generator_modifier templates = bundle.templates_modifier # else: # g_class = bundle.generator # templates = bundle.templates generator_kwargs = generator_kwargs or {} generator = g_class(cfg, **generator_kwargs) events = generator.generate_story() if generator.unique_actors: actors = [e.actor for e in events] assert len(set(actors)) == generator.world.num_sentences if getattr(generator, 'unique_coactors', False): coactors = [e.attributes['coactor'] for e in events] assert len(set(coactors)) == generator.world.num_sentences assert len(set(actors + coactors)) == 2 * generator.world.num_sentences if not do_realise: return generator, cfg, events, None, None, None, None realizer = Realizer(**templates, unique_sentences=False) story, visits = realizer.realise_story(events, generator.world) ssq, maq, uaq, abq = get_questions(generator, realizer, events, visits, story) all_questions = (ssq, maq, uaq, abq) if do_print: actors = [" ".join([e.actor.first, e.actor.last]) for e in events] coactors = [ " ".join( [e.attributes['coactor'].first, e.attributes['coactor'].last]) for e in events ] print('Actors', actors) print('Coactors', coactors) print_out(story, ssq, maq, uaq, abq, highlights=actors + coactors) print(realizer.context.chosen_templates) return generator, cfg, events, realizer, story, all_questions, visits
def test_different_sentences(): sents = sentences # nested r = Realizer(**templates) ii = [0, 1, 2, 3] logic_sents = [Event(i) for i in ii] for i in ii: logic_sents[i].event_type = "unique_sentence_test" world = {} realised_sents, visits = r.realise_story(logic_sents, world) for i in ii: assert any([str(i) in sent for sent in realised_sents]), f"{i} not in story!"
def test_different_dollar_templates_flat_with_multiple_sentences_when_not_leaf(): only_templates = only(templates, 2) r = Realizer(**only_templates, unique_sentences=False) logic_sents = [Event(0), Event(1)] logic_sents[0].event_type = 'test' logic_sents[1].event_type = 'test' world = {} realised_sents, visits = r.realise_story(logic_sents, world) assert '1' in realised_sents[0] assert '2' in realised_sents[0] assert 'b' in realised_sents[0]
def test_different_dollar_templates_flat(): # sents = only(sentences, 0) # flat only_templates = only(templates, 0) r = Realizer(**only_templates) # r = TestRealizer(sentences=sents) logic_sents = [Event(0)] logic_sents[0].event_type = 'test' world = {} realised_sents, visits = r.realise_story(logic_sents, world) assert '1' in realised_sents[0] assert '2' in realised_sents[0] assert '3' in realised_sents[0]
def _do_realize(config, event_plan, events, modifier_types, template_choices, templates, world, seed=None): if seed: random.seed(seed) realizer = Realizer(**templates, validate=False) modified_story, visits = realizer.realise_with_sentence_choices( events, world, template_choices) choices = realizer.context.choices indices_to_remove = [ i for i, e in enumerate(events) if any(mt in e.features for mt in modifier_types) ] baseline_events = deepcopy(events) for event in baseline_events: event.features = [] realizer = Realizer(**templates, unique_sentences=True) baseline_story, baseline_visits = realizer.realise_with_choices( baseline_events, world, choices, template_choices) generator = partial(PlannedFootballModifierGenerator, config=config, modifier_types=modifier_types) generator_instance: PlannedFootballModifierGenerator = generator( event_plan=event_plan) # this is for single span extraction only atm qs = generator_instance.generate_questions_from_plan( event_plan, baseline_events)[0] mqs = generator_instance.generate_questions_from_plan( event_plan, events, True)[0] control_story = [ s for i, s in enumerate(modified_story) if i not in indices_to_remove ] assert len(control_story) == len(baseline_story) - len(indices_to_remove) for q, mq in zip(qs, mqs): realizer.realise_question(q, baseline_story, ignore_missing_keys=False) assert q.answer, f"{q}\n{baseline_story}" mq.realized = q.realized assert mq.realized, f"{mq}\n{modified_story}" mq.answer = realizer._fix_units(mq, modified_story) assert mq.answer, f"{mq}\n{modified_story}" assert mq.answer != q.answer assert q.answer in " ".join( modified_story ), f"{q}\n{baseline_story}\n{event_plan.event_types}\n{event_plan.must_haves}\n{template_choices}" assert mq.answer in " ".join( modified_story ), f"{mq}\n{modified_story}\n{event_plan.event_types}\n{event_plan.must_haves}\n{template_choices}" return baseline_story, mqs, qs, modified_story, control_story
def validate(domain: Bundle): print("Validating...") try: realizer = Realizer(**domain.templates_modifier, validate=True) except ValueError as e: print(str(e)) click.secho("FAIL!", fg='red') return click.secho("SUCCESS!", fg='green')
def showcase(given_bundle=None, n=0, do_print=True): test_bundle = only(given_bundle, n, 'goal') if given_bundle else only( bundle, n, 'goal') templates = test_bundle.templates_modifier generator, cfg, events, realizer, story, all_questions, visits = interactive_env_football_modifier( test_bundle, cfg={"world.num_sentences": 2}, do_print=False, do_realise=False) sentences = [] for f in MODIFIER_TYPES: events[0].features = [f] generator.modifier_type = f realizer = Realizer(**templates, unique_sentences=False) story, visits = realizer.realise_story(events, generator.world) # ssq, maq, uaq, abq = get_questions(generator, realizer, events, visits, story) if do_print: print(f"==== {f} ====") print(story[0]) print(story[1]) # print_out(story, []) sentences.append(story[0]) return sentences
def test_fix_units_3(): q = Question(type='direct', target='distance', evidence=[1, 2], event_type='goal', reasoning='bridge', answer=str(31), question_data=dict(), realized='After the foul on Mary Millwood , ' 'from how far away was the next goal scored ?') passage = [ "Ethelyn Capello scored Arctic Monkeys 's first goal from 20 metres away " "to set the tone for the match after Annmarie Dibiase inadvertently prodded the ball into her path .", 'Things proceeded with Mary Millwood being withdrawn in the 31 st minute with her hip ' 'in a brace following a challenge from Amanda Testa .', "Pale Lilac Elephants almost advanced the action with a 31 metres goal as Carol Nehls " "all but curled in Cynthia Kittredge 's soft clearance .", "Shannon Garber almost added more insult to the injury when she almost slotted in a " "21 metres goal from Virginia Sheekey 's pass .", "In the 52 nd minute a soft clearance went to Pale Lilac Elephants 's Ida Webb on the flank " "and the player swept low to the 6-yard-area for Mamie Swart to poke past the goalkeeper " "for a wonderful 25 metres goal .", 'Things proceeded with Judith Odougherty winning the ball in the middle field for ' 'Pale Lilac Elephants and drawing a foul from Brenda Uttech .' ] realizer = Realizer(**templates, validate=False) answer = realizer._fix_units(q, passage) assert answer == '31 metres'
def generate_and_realise(bundle, config, modify_event_type, modifier_types, reasonings: Dict[Reasoning, int], max_modifiers, use_mod_distance=False, mute=False, num_workers=8, deterministic=True): # TODO: do parallel result = generate_balanced(modify_event_type, config, bundle, reasonings, modifier_types, max_modifiers, use_mod_distance, mute, num_workers=num_workers) templates = bundle.templates_modifier # validate templates here real quick Realizer(**templates, validate=True) if deterministic: seeds = [random.randint(0, sys.maxsize) for _ in result] else: seeds = [None for _ in result] if num_workers > 1: realized = Parallel(num_workers)( (delayed(_do_realize)(config, event_plan, events, modifier_types, template_choices, templates, world, seed)) for (event_plan, events, template_choices, world), seed in zip( tqdm(result, desc='Realising...', disable=mute), seeds)) else: realized = [ _do_realize(config, event_plan, events, modifier_types, template_choices, templates, world, seed) for (event_plan, events, template_choices, world), seed in zip( tqdm(result, desc='Realising...', disable=mute), seeds) ] return [(*z1, *z2) for z1, z2 in zip(result, realized)]
def _realize_events(generator_class, target_event_types, events, world, arranged_sentences, question_types, answer_types, modifier_type, templates, uuid4, modification_data=None, seed=None): if seed: random.seed(seed) logger.remove() story_id = uuid4() paragraph = {"id": story_id, 'qas': []} realizer = Realizer(**templates, unique_sentences=True) story, visits = realizer.realise_with_sentence_choices( events, world, arranged_sentences) paragraph["context"] = ' '.join(story) paragraph['passage_sents'] = story choices = realizer.context.choices template_choices = realizer.context.chosen_templates generator = generator_class({}) (single_span_questions, multi_span_questions, unanswerable_questions, abstractive_questions) = \ generator.generate_questions(events, visits) for q in single_span_questions + multi_span_questions + unanswerable_questions + abstractive_questions: realizer.realise_question(q, story) for label, logical_qs in zip( answer_types, (single_span_questions, multi_span_questions, unanswerable_questions, abstractive_questions)): for logical in logical_qs: if logical.realized and (question_types and logical.reasoning in question_types or not question_types) \ and logical.event_type in target_event_types: question_data_str = "/".join( f"{k}:{v}" for k, v in logical.question_data.items() if k not in ['modified', 'easier']) qa = { "id": f"{story_id}/{logical.reasoning}/{logical.type}/{logical.target}/{logical.event_type}/" f"{question_data_str}", "question": logical.realized, "answer": logical.answer, "reasoning": logical.reasoning, 'type': logical.type, 'target': logical.target, 'evidence': logical.evidence, 'event_type': logical.event_type, 'question_data': logical.question_data, 'modification_data': modification_data } try: qa['answers'] = [{ 'answer_start': match_answer_in_paragraph(qa=qa, datum=paragraph), 'text': qa['answer'] }] except NotImplementedError: pass paragraph['qas'].append(qa) modified = {'title': paragraph['id'], 'paragraphs': [paragraph]} question_map = {} for question in single_span_questions + multi_span_questions + unanswerable_questions + abstractive_questions: question_map[(question.event_type, question.target, question.question_data.get("n", None))] = question # remove modifier idx_to_remove = [ i for i, e in enumerate(events) if any( f.startswith(modifier_type) for f in e.features) ] events = deepcopy(events) for event in events: event.features = [] # generate baseline baseline_paragraph = {"id": story_id, "qas": []} # realizer = Realizer(**templates) realizer = Realizer(**templates, unique_sentences=True) story, visits = realizer.realise_with_choices(events, world, choices, template_choices) # print(20 * "===") # print("\n".join(story)) # generator = load_class(config.get('generator.class'), StoryGenerator)(config) baseline_paragraph["context"] = ' '.join(story) baseline_paragraph['passage_sents'] = story generator = generator_class({}) (single_span_questions, multi_span_questions, unanswerable_questions, abstractive_questions) = \ generator.generate_questions(events, visits) for q in single_span_questions + multi_span_questions + unanswerable_questions + abstractive_questions: try: q.realized = question_map[(q.event_type, q.target, q.question_data.get("n", None))].realized q.answer = realizer._fix_units(q, story) except: realizer.realise_question(q, story) for label, logical_qs in zip( answer_types, (single_span_questions, multi_span_questions, unanswerable_questions, abstractive_questions)): # click.echo(f"{label.upper()}s:") for logical in logical_qs: if logical.realized and (question_types and logical.reasoning in question_types or not question_types) \ and logical.event_type in target_event_types: question_data_str = "/".join( f"{k}:{v}" for k, v in logical.question_data.items() if k not in ['modified', 'easier']) qa = { "id": f"{story_id}/{logical.reasoning}/{logical.type}/{logical.target}/{logical.event_type}/" f"{question_data_str}", "question": logical.realized, "answer": logical.answer, "reasoning": logical.reasoning, 'type': logical.type, 'target': logical.target, 'evidence': logical.evidence, 'event_type': logical.event_type, 'question_data': logical.question_data, 'modification_data': modification_data } qa['answers'] = [{ 'answer_start': match_answer_in_paragraph(qa=qa, datum=baseline_paragraph), 'text': qa['answer'] }] baseline_paragraph['qas'].append(qa) baseline = { 'title': baseline_paragraph['id'], 'paragraphs': [baseline_paragraph] } # TODO: move modifier into parameter and from there into config/cli option control_deleted_story = [ s for i, s in enumerate(story) if i not in idx_to_remove ] control_deleted_paragraph = { "id": story_id, "qas": modified['paragraphs'][0]['qas'], "context": ' '.join(control_deleted_story), 'passage_sents': control_deleted_story } # TODO: fix evidence? control_deleted = { 'title': control_deleted_paragraph['id'], 'paragraphs': [control_deleted_paragraph] } # if REDO: is not that easy, because we might potentially delete whole branches... # need to make choices tree-like rather than sequential # defeats the point.... # events_control = [e for i, e in enumerate(events) if i not in idx_to_remove] # chosen_templates_control = [e for i, e in enumerate(template_choices) if i not in idx_to_remove] # choices_control = [e for i, e in enumerate(choices) if i not in idx_to_remove] # realizer = Realizer(**templates, unique_sentences=True) # control_redone_story, _ = realizer.realise_with_choices(events_control, world, choices_control, # chosen_templates_control) # control_redone_paragraph = { # "id": story_id, # "qas": modified['paragraphs'][0]['qas'], # "context": ' '.join(control_redone_story), # 'passage_sents': control_redone_story # } # control_redone = {'title': control_redone_paragraph['id'], 'paragraphs': [control_redone_paragraph]} return baseline, modified, control_deleted