Example #1
0
def main():
    arg = argparser().parse_args()
    query = "select i-id where p-id = {}".format(arg.pid)
    tsdb_results = delphin.tsdb_query(query, arg.profile)
    gold_iids = set(int(x) for x in tsdb_results.split())
    all_iids = delphin.get_profile_ids(arg.profile)
    predicted_iids = get_predicted_iids(arg)
    print evaluate(predicted_iids, gold_iids, len(all_iids)) 
Example #2
0
def index(profiles, treebank, in_grammar):
    stats_dict = defaultdict(delphin.TypeStats)
    trees = 0
    failures = []

    for path in profiles:
        grammar = in_grammar
        items_seen = set()
        print("processing {}".format(path))
        profile = os.path.basename(path) 

        if profile in ERG_SPEECH_PROFILES:
            alias = grammar.alias+'-speech'
            grammar = gram.get_grammar(alias)

        try:
            # for treebanked profiles:
            out = delphin.tsdb_query('select i-id derivation where t-active > 0', path)
            # for non-treebanked profiles
            # out = delphin.tsdb_query('select i-id derivation where readings > 0', path)
        except delphin.TsdbError as e:
            sys.stderr.write(str(e)+'\n')
            continue

        if out == '':
            continue

        results = out.strip().split('\n')
        for result in results:
            iid, derivation = result.split(' | ')

            if iid in items_seen or iid in BLACKLIST:
                continue
                
            try:
                counts = get_types(derivation, grammar)
                for name, count in counts.items():
                    stats_dict[name].update(count)
            except delphin.AceError as e:
                e.other_data.append(iid)
                e.other_data.append(path)
                failures.append(e)
                sys.stderr.write(str(e) + '\n')
            else:
                items_seen.add(iid)
                trees += 1
                print(trees, iid)

    print("Processed {} trees".format(trees))

    num_failures = len(failures)
    if num_failures > 0: 
        print("Failed to reconstruct {} trees".format(num_failures))
        print("See type-stats-errors.txt for details.")

        with open('type-stats-errors.txt', 'w') as f:
            errors_str = '\n'.join(str(e) for e in failures)
            f.write(errors_str.encode('utf8')+'\n\n')

    treebank_str = treebank.replace(' ', '_')
    filename = '{}--{}--{}.pickle'.format(grammar.alias, treebank_str, trees)

    with open(filename, 'wb') as f:
        pickle.dump(stats_dict, f)