Esempio n. 1
0
def dependencies_to_indices(target_tokens, parses, lookup,space):
    deps = []
    for target, parse in izip(target_tokens, parses):
        deps.append([])
        if USE_LEMMAPOS:
            extractor = depextract.extract_relations_for_token(parse, target)
        else:
            extractor = depextract.extract_relations_for_token_melamud(parse, target, inverter='I')
        for relation, attachment, in extractor:
            if USE_LEMMAPOS:
                dep = relation + "+" + attachment.lemma_pos
            else:
                dep = relation + "_" + attachment.word_normed
            if dep in lookup:
                deps[-1].append(lookup[dep])
            else:
                if attachment.word_normed in space.lookup:
                    #print '-', dep
                    pass

    numrows = len(deps)
    numcols = max(len(d) for d in deps)
    depmat = np.zeros((numrows, numcols), dtype=np.int32)

    for i, d in enumerate(deps):
        l = len(d)
        depmat[i,:l] = d

    return depmat[:,::-1]
Esempio n. 2
0
def dependencies_to_indicies3(target_tokens, parses, vlookup, rlookup):
    deps = []
    rels = []
    for target, parse in izip(target_tokens, parses):
        d = np.zeros(10)
        r = np.zeros(10)
        i = 0
        relattachments = list(depextract.extract_relations_for_token_melamud(parse, target, inverter='I'))
        for relation, attachment in relattachments:
            if i >= 10:
                break
            if relation not in rlookup or attachment.word_normed not in vlookup:
                continue
            rid = rlookup[relation] + 1
            vid = vlookup[attachment.word_normed]
            d[i] = vid
            r[i] = rid
            i += 1
        deps.append(d)
        rels.append(r)
    return [np.array(deps), np.array(rels)]