def output(pickle_path, output_type): with open(pickle_path, 'rb') as f: type_stats = pickle.load(f) x = os.path.splitext(os.path.basename(pickle_path))[0].split('--') grammar_name = x[0] treebank = x[1] trees = x[2] metadata = {'grammar' : grammar_name, 'treebank' : treebank, 'trees' : trees} grammar = gram.get_grammar(grammar_name) hierarchy = delphin.load_hierarchy(grammar.types_path) signs = [x.name for x in hierarchy['sign'].descendants() if not x.name.startswith('glb')] if output_type == 'txt': lex_entries = filter(lambda x:x.endswith('_le'), signs) rules = filter(lambda x:not x.endswith('_le'), signs) unknowns = filter(lambda x:x.endswith('unknown_rel"'), type_stats.keys()) all_types = [x for x in hierarchy.types if not x.startswith('glb')] txt_output(lex_entries, type_stats, metadata, 'lex') txt_output(rules, type_stats, metadata, 'rule') txt_output(type_stats.keys(), type_stats, metadata, 'everything') txt_output(unknowns, type_stats, metadata, 'unknowns') elif output_type == 'json': json_output(type_stats, metadata)
def typediff(pos_input, neg_input, grammar, arg): parse = lambda x: delphin.Fragment(x, grammar, ace_path=config.ACEBIN, dat_path=grammar.dat_path, count=arg.n, typifier=config.TYPIFIERBIN, fragments=arg.frags, logpath=config.LOGPATH) pos = [parse(x) for x in pos_input] neg = [parse(x) for x in neg_input] hierarchy = None if arg.all: tfunc = lambda x:x.types.keys() sfunc = lambda x:x.supers else: tfunc = lambda x:x.best.types.keys() sfunc = lambda x:x.best.supers pos_types = set(chain.from_iterable(tfunc(x) for x in pos)) neg_types = set(chain.from_iterable(tfunc(x) for x in neg)) if len(pos) + len(neg) > 1: typelist = list(compare_types(pos_types, neg_types, arg)) else: typelist = list(max(pos_types, neg_types)) if arg.supers: hierarchy = delphin.load_hierarchy(grammar.types_path) for p in pos: p.load_supers(hierarchy) for n in neg: n.load_supers(hierarchy) pos_supers = set(chain.from_iterable(sfunc(x) for x in pos)) neg_supers = set(chain.from_iterable(sfunc(x) for x in neg)) supers = compare_types(pos_supers, neg_supers, arg) typelist.extend('^'+t for t in supers) if arg.raw: return '\n'.join(typelist) else: if hierarchy is None: hierarchy = delphin.load_hierarchy(grammar.types_path) return pretty_print_types(typelist, hierarchy)
def export_json(pos_input, neg_input, grammar, count, frags, supers, load_desc, tagger): hierarchy = delphin.load_hierarchy(grammar.types_path) parse = lambda x: delphin.Fragment(x, grammar, ace_path=config.ACEBIN, dat_path=grammar.dat_path, count=count, tnt=(tagger=='tnt'), typifier=config.TYPIFIERBIN, fragments=frags, logpath=config.LOGPATH) try: pos = [parse(x) for x in pos_input] neg = [parse(x) for x in neg_input] except(delphin.AceError) as err: data = { 'succes' : False, 'error' : err.msg, } return json.dumps(data) if supers: for p in pos: p.load_supers(hierarchy) for n in neg: n.load_supers(hierarchy) data = { 'success': True, 'pos-items' : pos, 'neg-items' : neg, } if load_desc: descendants = lambda x: set(t.name for t in hierarchy[x].descendants()) types = ('sign', 'head', 'synsem', 'cat', 'relation', 'predsort') data['descendants'] = {t:descendants(t) for t in types} else: data['descendants'] = False data['typeOrdering'] = [t for t, tc, wc in config.TYPES] data['typeColors'] = {t:wc for t, tc, wc in config.TYPES} return json.dumps(data, cls=delphin.JSONEncoder)
def update_reading_counts(reading, feature, counts, ancestor): if feature == 'lextypes': counts.update(reading.lextypes) elif feature == 'rules': counts.update(reading.rules) elif feature == 'types': if ancestor is None: counts.update(reading.types) else: hierarchy = delphin.load_hierarchy(reading.grammar.types_path) try: descendant_types = set(t.name for t in hierarchy[ancestor].descendants()) except delphin.TypeNotFoundError as e: sys.stderr.write(str(e)) sys.exit() for t in reading.types: if t in descendant_types: counts[t] += reading.types[t] else: raise UnknownFeatureException(feature)