def dependencies_to_indices(target_tokens, parses, lookup,space): deps = [] for target, parse in izip(target_tokens, parses): deps.append([]) if USE_LEMMAPOS: extractor = depextract.extract_relations_for_token(parse, target) else: extractor = depextract.extract_relations_for_token_melamud(parse, target, inverter='I') for relation, attachment, in extractor: if USE_LEMMAPOS: dep = relation + "+" + attachment.lemma_pos else: dep = relation + "_" + attachment.word_normed if dep in lookup: deps[-1].append(lookup[dep]) else: if attachment.word_normed in space.lookup: #print '-', dep pass numrows = len(deps) numcols = max(len(d) for d in deps) depmat = np.zeros((numrows, numcols), dtype=np.int32) for i, d in enumerate(deps): l = len(d) depmat[i,:l] = d return depmat[:,::-1]
def dependencies_to_indicies3(target_tokens, parses, vlookup, rlookup): deps = [] rels = [] for target, parse in izip(target_tokens, parses): d = np.zeros(10) r = np.zeros(10) i = 0 relattachments = list(depextract.extract_relations_for_token_melamud(parse, target, inverter='I')) for relation, attachment in relattachments: if i >= 10: break if relation not in rlookup or attachment.word_normed not in vlookup: continue rid = rlookup[relation] + 1 vid = vlookup[attachment.word_normed] d[i] = vid r[i] = rid i += 1 deps.append(d) rels.append(r) return [np.array(deps), np.array(rels)]