Example #1
0
def count_training():
    counts = {i: 0 for i in range(17)}
    pl = PickleLoader(training_dir)
    for sense in pl.iterate():
        for n in sense.thesaurus_nodes:
            thesclass = tdb.get_thesclass(n)
            counts[thesclass.level] += 1
    for i in range(17):
        print '%d\t%d' % (i, counts[i])
Example #2
0
def count_classified():
    counts = {i: 0 for i in range(17)}
    for p in parent_directories:
        subdir = os.path.join(p, 'classified')
        pl = PickleLoader(subdir)
        for sense in pl.iterate():
            try:
                sense.class_id
            except AttributeError:
                pass
            else:
                thesclass = tdb.get_thesclass(sense.class_id)
                counts[thesclass.level] += 1
    for i in range(17):
        print '%d\t%d' % (i, counts[i])
Example #3
0
    print """
===========================================================


Enter lemma (optionally followed by '-c' or '-u' to specify
    classified or unclassified):
"""
    lemma = raw_input(">>>")
    lemma = lemma.strip()
    if lemma.endswith(" -c"):
        dirs = ["classified"]
    elif lemma.endswith(" -u"):
        dirs = ["unclassified"]
    else:
        dirs = ["classified", "unclassified"]

    lemma = re.sub(r" +-.$", "", lemma)
    initial = lemma[0].upper()

    if initial in letters:
        seen = set()
        for p in parent_directories:
            for d in dirs:
                subdir = os.path.join(p, d)
                pl = PickleLoader(subdir, letters=initial)
                for sense in pl.iterate():
                    if sense.lemma == lemma and sense.node_id not in seen:
                        print "----------------------------------------"
                        print trace_sense(sense)
                        seen.add(sense.node_id)