Beispiel #1
0
def main():

    dbfile = sys.argv[1]
    with open(dbfile, 'rb') as f:
        passages = pickle.load(f)

    nouns = set()
    for p in passages:
        sc = scenes.extract_possible_scenes(p)
        heads = [scenes.extract_head(x) for x in sc]
        heads = [x for x in heads if x is not None]
        nouns.update(scenes.filter_noun_heads(heads))

    print('\n'.join(nouns))
Beispiel #2
0
def run_file(path, eng, stats):
    """Site XML file ==> prints list of sceneness results"""
    with open(path) as f:
        root = ETree.ElementTree().parse(f)
    passage = convert.from_site(root)

    sc = scenes.extract_possible_scenes(passage)
    heads = [scenes.extract_head(x) for x in sc]

    for s, h in zip(sc, heads):
        if h is None:
            stats.heads.append(Result(s))
            continue
        out = eng.get_categories(s, h)
        if out == 'implicit':
            stats.heads.append(Result(s))
        elif out == 'no base form':
            stats.lemmas.append(Result(s, h))
        elif out[2]:
            stats.fulls.append(Result(s, h, *out))
        else:
            stats.no_cats.append(Result(s, h, *out))
Beispiel #3
0
 def test_extract_head(self):
     """Tests that the API isn't broken, not validity of the result."""
     passage = TestUtil.create_passage()
     for x in scenes.extract_possible_scenes(passage):
         scenes.extract_head(x)
Beispiel #4
0
 def test_possible_scenes(self):
     """Tests that the API isn't broken, not validity of the result."""
     elem = TestUtil.load_xml('test_files/site3.xml')
     passage = convert.from_site(elem)
     scenes.extract_possible_scenes(passage)
Beispiel #5
0
 def test_extract_head(self):
     """Tests that the API isn't broken, not validity of the result."""
     passage = TestUtil.create_passage()
     for x in scenes.extract_possible_scenes(passage):
         scenes.extract_head(x)
Beispiel #6
0
 def test_possible_scenes(self):
     """Tests that the API isn't broken, not validity of the result."""
     elem = TestUtil.load_xml('test_files/site3.xml')
     passage = convert.from_site(elem)
     scenes.extract_possible_scenes(passage)