def test_srl_reader_can_filter_by_domain(self): conll_reader = SrlReader(domain_identifier="subdomain2") instances = conll_reader.read('tests/fixtures/conll_2012/') instances = ensure_list(instances) # If we'd included the folder, we'd have 9 instances. assert len(instances) == 2
def test_srl_reader_can_filter_by_domain(self): conll_reader = SrlReader(domain_identifier="subdomain2") instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012') instances = ensure_list(instances) # If we'd included the folder, we'd have 9 instances. assert len(instances) == 2
def test_read_from_file(self): conll_reader = SrlReader() dataset = conll_reader.read('tests/fixtures/conll_2012/') instances = dataset.instances fields = instances[0].fields() assert fields["tokens"].tokens() == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].sequence_index() == 3 assert fields["tags"].tags() == ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'] fields = instances[1].fields() assert fields["tokens"].tokens() == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].sequence_index() == 8 assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O'] fields = instances[2].fields() assert fields["tokens"].tokens() == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].sequence_index() == 2 assert fields["tags"].tags() == ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP', 'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'O'] fields = instances[3].fields() assert fields["tokens"].tokens() == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].sequence_index() == 11 assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O'] # Tests a sentence with no verbal predicates. fields = instances[4].fields() assert fields["tokens"].tokens() == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].sequence_index() is None assert fields["tags"].tags() == ['O', 'O', 'O', 'O', 'O']
def test_read_from_file(self, lazy): conll_reader = SrlReader(lazy=lazy) instances = conll_reader.read('tests/fixtures/conll_2012/subdomain') instances = ensure_list(instances) fields = instances[0].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "." ] assert fields["verb_indicator"].labels[3] == 1 assert fields["tags"].labels == [ 'B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O' ] fields = instances[1].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "." ] assert fields["verb_indicator"].labels[8] == 1 assert fields["tags"].labels == [ 'O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O' ] fields = instances[2].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == [ 'The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.' ] assert fields["verb_indicator"].labels[2] == 1 assert fields["tags"].labels == [ 'B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP', 'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'O' ] fields = instances[3].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == [ 'The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.' ] assert fields["verb_indicator"].labels[11] == 1 assert fields["tags"].labels == [ 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O' ] # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0] assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O']
def test_read_from_file(self, lazy): conll_reader = SrlReader(lazy=lazy) instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / 'conll_2012' / 'subdomain') instances = ensure_list(instances) fields = instances[0].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].labels[3] == 1 assert fields["tags"].labels == ['B-ARG0', 'I-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'O'] fields = instances[1].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ["Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", "."] assert fields["verb_indicator"].labels[8] == 1 assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'B-ARG1', 'I-ARG1', 'I-ARG1', 'I-ARG1', 'B-V', 'B-ARG2', 'O'] fields = instances[2].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].labels[2] == 1 assert fields["tags"].labels == ['B-ARG0', 'I-ARG0', 'B-V', 'B-ARG1', 'I-ARG1', 'B-ARGM-TMP', 'I-ARGM-TMP', 'B-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'I-ARGM-TMP', 'O'] fields = instances[3].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ['The', 'prosecution', 'rested', 'its', 'case', 'last', 'month', 'after', 'four', 'months', 'of', 'hearings', '.'] assert fields["verb_indicator"].labels[11] == 1 assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-V', 'O'] # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = [t.text for t in fields['tokens'].tokens] assert tokens == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0] assert fields["tags"].labels == ['O', 'O', 'O', 'O', 'O']
def test_read_from_file(self, lazy): conll_reader = SrlReader(lazy=lazy) instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / u'conll_2012' / u'subdomain') instances = ensure_list(instances) fields = instances[0].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [ u"Mali", u"government", u"officials", u"say", u"the", u"woman", u"'s", u"confession", u"was", u"forced", u"." ] assert fields[u"verb_indicator"].labels[3] == 1 assert fields[u"tags"].labels == [ u'B-ARG0', u'I-ARG0', u'I-ARG0', u'B-V', u'B-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'O' ] fields = instances[1].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [ u"Mali", u"government", u"officials", u"say", u"the", u"woman", u"'s", u"confession", u"was", u"forced", u"." ] assert fields[u"verb_indicator"].labels[8] == 1 assert fields[u"tags"].labels == [ u'O', u'O', u'O', u'O', u'B-ARG1', u'I-ARG1', u'I-ARG1', u'I-ARG1', u'B-V', u'B-ARG2', u'O' ] fields = instances[2].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [ u'The', u'prosecution', u'rested', u'its', u'case', u'last', u'month', u'after', u'four', u'months', u'of', u'hearings', u'.' ] assert fields[u"verb_indicator"].labels[2] == 1 assert fields[u"tags"].labels == [ u'B-ARG0', u'I-ARG0', u'B-V', u'B-ARG1', u'I-ARG1', u'B-ARGM-TMP', u'I-ARGM-TMP', u'B-ARGM-TMP', u'I-ARGM-TMP', u'I-ARGM-TMP', u'I-ARGM-TMP', u'I-ARGM-TMP', u'O' ] fields = instances[3].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [ u'The', u'prosecution', u'rested', u'its', u'case', u'last', u'month', u'after', u'four', u'months', u'of', u'hearings', u'.' ] assert fields[u"verb_indicator"].labels[11] == 1 assert fields[u"tags"].labels == [ u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'O', u'B-V', u'O' ] # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = [t.text for t in fields[u'tokens'].tokens] assert tokens == [u"Denise", u"Dillon", u"Headline", u"News", u"."] assert fields[u"verb_indicator"].labels == [0, 0, 0, 0, 0] assert fields[u"tags"].labels == [u'O', u'O', u'O', u'O', u'O']
def test_read_from_file(self, lazy): conll_reader = SrlReader(lazy=lazy) instances = conll_reader.read(AllenNlpTestCase.FIXTURES_ROOT / "conll_2012" / "subdomain") instances = ensure_list(instances) fields = instances[0].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", ".", ] assert fields["verb_indicator"].labels[3] == 1 assert fields["tags"].labels == [ "B-ARG0", "I-ARG0", "I-ARG0", "B-V", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "O", ] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] == tokens[3] assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels fields = instances[1].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == [ "Mali", "government", "officials", "say", "the", "woman", "'s", "confession", "was", "forced", ".", ] assert fields["verb_indicator"].labels[8] == 1 assert fields["tags"].labels == [ "O", "O", "O", "O", "B-ARG1", "I-ARG1", "I-ARG1", "I-ARG1", "B-V", "B-ARG2", "O", ] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] == tokens[8] assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels fields = instances[2].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == [ "The", "prosecution", "rested", "its", "case", "last", "month", "after", "four", "months", "of", "hearings", ".", ] assert fields["verb_indicator"].labels[2] == 1 assert fields["tags"].labels == [ "B-ARG0", "I-ARG0", "B-V", "B-ARG1", "I-ARG1", "B-ARGM-TMP", "I-ARGM-TMP", "B-ARGM-TMP", "I-ARGM-TMP", "I-ARGM-TMP", "I-ARGM-TMP", "I-ARGM-TMP", "O", ] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] == tokens[2] assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels fields = instances[3].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == [ "The", "prosecution", "rested", "its", "case", "last", "month", "after", "four", "months", "of", "hearings", ".", ] assert fields["verb_indicator"].labels[11] == 1 assert fields["tags"].labels == [ "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "O", "B-V", "O", ] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] == tokens[11] assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels # Tests a sentence with no verbal predicates. fields = instances[4].fields tokens = [t.text for t in fields["tokens"].tokens] assert tokens == ["Denise", "Dillon", "Headline", "News", "."] assert fields["verb_indicator"].labels == [0, 0, 0, 0, 0] assert fields["tags"].labels == ["O", "O", "O", "O", "O"] assert fields["metadata"].metadata["words"] == tokens assert fields["metadata"].metadata["verb"] is None assert fields["metadata"].metadata["gold_tags"] == fields[ "tags"].labels