def test_default_format(self, lazy): reader = Event2MindDatasetReader(lazy=lazy) instances = reader.read( str(AllenNlpTestCase.FIXTURES_ROOT / 'data' / 'event2mind_small.csv')) instances = ensure_list(instances) assert len(instances) == 12 instance = instances[0] assert get_text("source", instance) == [ "@start@", "it", "is", "personx", "'s", "favorite", "animal", "@end@" ] assert get_text("xintent", instance) == ["@start@", "none", "@end@"] assert get_text("xreact", instance) == [ "@start@", "excited", "to", "see", "it", "@end@" ] assert get_text("oreact", instance) == ["@start@", "none", "@end@"] instance = instances[3] assert get_text("source", instance) == [ "@start@", "personx", "drives", "persony", "'s", "truck", "@end@" ] assert get_text("xintent", instance) == ["@start@", "move", "@end@"] assert get_text("xreact", instance) == ["@start@", "grateful", "@end@"] assert get_text("oreact", instance) == ["@start@", "charitable", "@end@"] instance = instances[4] assert get_text("source", instance) == [ "@start@", "personx", "drives", "persony", "'s", "truck", "@end@" ] assert get_text("xintent", instance) == ["@start@", "move", "@end@"] assert get_text("xreact", instance) == ["@start@", "grateful", "@end@"] # Interestingly, taking all combinations doesn't make much sense if the # original source is ambiguous. assert get_text("oreact", instance) == ["@start@", "enraged", "@end@"] instance = instances[10] assert get_text("source", instance) == [ "@start@", "personx", "drives", "persony", "'s", "truck", "@end@" ] assert get_text("xintent", instance) == ["@start@", "steal", "@end@"] assert get_text("xreact", instance) == ["@start@", "guilty", "@end@"] assert get_text("oreact", instance) == ["@start@", "enraged", "@end@"] instance = instances[11] assert get_text("source", instance) == [ "@start@", "personx", "gets", "persony", "'s", "mother", "@end@" ] assert get_text("xintent", instance) == ["@start@", "helpful", "@end@"] assert get_text("xreact", instance) == ["@start@", "useful", "@end@"] assert get_text("oreact", instance) == ["@start@", "grateful", "@end@"]
def test_read_with_dummy_instances_for_vocab_generation(self, lazy): reader = Event2MindDatasetReader( lazy=lazy, dummy_instances_for_vocab_generation=True) instances = reader.read( str(AllenNlpTestCase.FIXTURES_ROOT / 'data' / 'event2mind_small.csv')) instances = ensure_list(instances) assert len(instances) == 17 instance = instances[0] assert get_text("source", instance) == [ "@start@", "it", "is", "personx", "'s", "favorite", "animal", "@end@" ] assert get_text("xintent", instance) == ["@start@", "none", "@end@"] assert get_text("xreact", instance) == ["@start@", "none", "@end@"] assert get_text("oreact", instance) == ["@start@", "none", "@end@"] instance = instances[5] assert get_text("source", instance) == [ "@start@", "personx", "drives", "persony", "'s", "truck", "@end@" ] assert get_text("xintent", instance) == ["@start@", "move", "@end@"] assert get_text("xreact", instance) == ["@start@", "none", "@end@"] assert get_text("oreact", instance) == ["@start@", "none", "@end@"] instance = instances[7] assert get_text("source", instance) == ["@start@", "none", "@end@"] assert get_text("xintent", instance) == ["@start@", "none", "@end@"] assert get_text("xreact", instance) == ["@start@", "grateful", "@end@"] assert get_text("oreact", instance) == ["@start@", "none", "@end@"] instance = instances[9] assert get_text("source", instance) == ["@start@", "none", "@end@"] assert get_text("xintent", instance) == ["@start@", "none", "@end@"] assert get_text("xreact", instance) == ["@start@", "none", "@end@"] assert get_text("oreact", instance) == ["@start@", "charitable", "@end@"] instance = instances[14] assert get_text("source", instance) == [ "@start@", "personx", "drives", "persony", "'s", "truck", "@end@" ] assert get_text("xintent", instance) == ["@start@", "for", "fun", "@end@"] assert get_text("xreact", instance) == ["@start@", "none", "@end@"] assert get_text("oreact", instance) == ["@start@", "none", "@end@"]