def main(): arg = argparser().parse_args() query = "select i-id where p-id = {}".format(arg.pid) tsdb_results = delphin.tsdb_query(query, arg.profile) gold_iids = set(int(x) for x in tsdb_results.split()) all_iids = delphin.get_profile_ids(arg.profile) predicted_iids = get_predicted_iids(arg) print evaluate(predicted_iids, gold_iids, len(all_iids))
def index(profiles, treebank, in_grammar): stats_dict = defaultdict(delphin.TypeStats) trees = 0 failures = [] for path in profiles: grammar = in_grammar items_seen = set() print("processing {}".format(path)) profile = os.path.basename(path) if profile in ERG_SPEECH_PROFILES: alias = grammar.alias+'-speech' grammar = gram.get_grammar(alias) try: # for treebanked profiles: out = delphin.tsdb_query('select i-id derivation where t-active > 0', path) # for non-treebanked profiles # out = delphin.tsdb_query('select i-id derivation where readings > 0', path) except delphin.TsdbError as e: sys.stderr.write(str(e)+'\n') continue if out == '': continue results = out.strip().split('\n') for result in results: iid, derivation = result.split(' | ') if iid in items_seen or iid in BLACKLIST: continue try: counts = get_types(derivation, grammar) for name, count in counts.items(): stats_dict[name].update(count) except delphin.AceError as e: e.other_data.append(iid) e.other_data.append(path) failures.append(e) sys.stderr.write(str(e) + '\n') else: items_seen.add(iid) trees += 1 print(trees, iid) print("Processed {} trees".format(trees)) num_failures = len(failures) if num_failures > 0: print("Failed to reconstruct {} trees".format(num_failures)) print("See type-stats-errors.txt for details.") with open('type-stats-errors.txt', 'w') as f: errors_str = '\n'.join(str(e) for e in failures) f.write(errors_str.encode('utf8')+'\n\n') treebank_str = treebank.replace(' ', '_') filename = '{}--{}--{}.pickle'.format(grammar.alias, treebank_str, trees) with open(filename, 'wb') as f: pickle.dump(stats_dict, f)