Esempio n. 1
0
def output(pickle_path, output_type):
    with open(pickle_path, 'rb') as f:
        type_stats = pickle.load(f)

    x = os.path.splitext(os.path.basename(pickle_path))[0].split('--')
    grammar_name = x[0]
    treebank = x[1]
    trees = x[2]
    metadata = {'grammar' : grammar_name, 'treebank' : treebank, 'trees' : trees}

    grammar = gram.get_grammar(grammar_name)
    hierarchy = delphin.load_hierarchy(grammar.types_path)
    signs = [x.name for x in hierarchy['sign'].descendants() 
             if not x.name.startswith('glb')]
 
    if output_type == 'txt':
        lex_entries = filter(lambda x:x.endswith('_le'), signs)
        rules = filter(lambda x:not x.endswith('_le'), signs)
        unknowns = filter(lambda x:x.endswith('unknown_rel"'), type_stats.keys()) 
        all_types = [x for x in hierarchy.types if not x.startswith('glb')]
        txt_output(lex_entries, type_stats, metadata, 'lex')
        txt_output(rules, type_stats, metadata, 'rule')
        txt_output(type_stats.keys(), type_stats, metadata, 'everything')
        txt_output(unknowns, type_stats, metadata, 'unknowns')
    elif output_type == 'json':
        json_output(type_stats, metadata)
Esempio n. 2
0
def typediff(pos_input, neg_input, grammar, arg):
    parse = lambda x: delphin.Fragment(x, grammar, ace_path=config.ACEBIN,
                                       dat_path=grammar.dat_path,  
                                       count=arg.n,
                                       typifier=config.TYPIFIERBIN,
                                       fragments=arg.frags, 
                                       logpath=config.LOGPATH)

    pos  = [parse(x) for x in pos_input]
    neg  = [parse(x) for x in neg_input]
    hierarchy = None

    if arg.all:
        tfunc = lambda x:x.types.keys()
        sfunc = lambda x:x.supers
    else:
        tfunc = lambda x:x.best.types.keys()
        sfunc = lambda x:x.best.supers

    pos_types = set(chain.from_iterable(tfunc(x) for x in pos))
    neg_types = set(chain.from_iterable(tfunc(x) for x in neg))

    if len(pos) + len(neg) > 1:
        typelist = list(compare_types(pos_types, neg_types, arg))
    else:
        typelist = list(max(pos_types, neg_types))

    if arg.supers:
        hierarchy = delphin.load_hierarchy(grammar.types_path)
        for p in pos: p.load_supers(hierarchy)
        for n in neg: n.load_supers(hierarchy)
        pos_supers = set(chain.from_iterable(sfunc(x) for x in pos))
        neg_supers = set(chain.from_iterable(sfunc(x) for x in neg))
        supers = compare_types(pos_supers, neg_supers, arg)
        typelist.extend('^'+t for t in supers)

    if arg.raw:
        return '\n'.join(typelist)
    else:
        if hierarchy is None:
            hierarchy = delphin.load_hierarchy(grammar.types_path)
        return pretty_print_types(typelist, hierarchy)
Esempio n. 3
0
def export_json(pos_input, neg_input, grammar, count, frags, supers, load_desc, 
                tagger):
    hierarchy = delphin.load_hierarchy(grammar.types_path)
    parse = lambda x: delphin.Fragment(x, grammar, ace_path=config.ACEBIN,
                                       dat_path=grammar.dat_path,
                                       count=count,
                                       tnt=(tagger=='tnt'),
                                       typifier=config.TYPIFIERBIN,
                                       fragments=frags, 
                                       logpath=config.LOGPATH)

    try:
        pos  = [parse(x) for x in pos_input]
        neg  = [parse(x) for x in neg_input]
    except(delphin.AceError) as err:
        data = {
            'succes' : False, 
            'error'  : err.msg,
        }

        return json.dumps(data)

    if supers:
        for p in pos: p.load_supers(hierarchy)
        for n in neg: n.load_supers(hierarchy)

    data = {
        'success': True,
        'pos-items' : pos,
        'neg-items' : neg,
    }

    if load_desc:
        descendants = lambda x: set(t.name for t in hierarchy[x].descendants())
        types = ('sign', 'head', 'synsem', 'cat', 'relation', 'predsort') 
        data['descendants'] = {t:descendants(t) for t in types}
    else:
        data['descendants'] = False

    data['typeOrdering'] = [t for t, tc, wc in config.TYPES] 
    data['typeColors'] = {t:wc for t, tc, wc in config.TYPES}
    return json.dumps(data, cls=delphin.JSONEncoder)
Esempio n. 4
0
def update_reading_counts(reading, feature, counts, ancestor):
    if feature == 'lextypes':
        counts.update(reading.lextypes)
    elif feature == 'rules':
        counts.update(reading.rules)
    elif feature == 'types':
        if ancestor is None:
            counts.update(reading.types)    
        else:
            hierarchy = delphin.load_hierarchy(reading.grammar.types_path)
            try:
                descendant_types = set(t.name for t in hierarchy[ancestor].descendants())
            except delphin.TypeNotFoundError as e:
                sys.stderr.write(str(e))
                sys.exit()
            for t in reading.types:
                if t in descendant_types:
                    counts[t] += reading.types[t]
    else:
        raise UnknownFeatureException(feature)