def test(n): ex = [[1,0,0,0], [1,1,0,0], [1,1,1,0], [0,0,0,1], [0,0,1,0], [0,0,1,0], [0,0,0,0]] if n == 1 : #affcihage normal test = hg.Hypergraph(ex) print(test.test_hypertree()) elif n == 2: #affichage avec matrice aléatoire ex = hg.randomHypergraph() print("Matrice d'incidence: \n",ex,"\n") test = hg.Hypergraph(ex) print(test.test_hypertree())
def constructModel(params): G = hypergraph.Hypergraph() for infile in params['infiles']: print 'Loading ', infile with open(infile, 'r') as f: G.importEdge(pickle.load(f)['X']) pass pass print 'Pruning edges' G.pruneEdges(params['m']) if params['quadratic']: print 'Expanding edges' G.quadraticExpansion(MIN_SIZE=params['m'], rho=params['rho']) pass return G
def constructHypergraph(params): HG = hypergraph.Hypergraph() ## at this point, import all sperate edge sets from different features into this one large hypergraph # for all song -> feature maps that we have pickled print("edgelist: ");print(HG.pub_edges) with open("feat1map.pickle", 'r') as f: print("Adding feature 1 edges...") HG.importEdge(pickle.load(f)['X']) # notice you are importing the value of key 'X', which is put in there inside build files for unknown reasons pass # pass print("edgelist: ");print(HG.pub_edges) with open("feat2map.pickle", 'r') as f: print("Adding feature 2 edges...") HG.importEdge(pickle.load(f)['X']) # notice you are importing the value of key 'X', which is put in there inside build files for unknown reasons pass # pass print("edgelist: ");print(HG.pub_edges) with open("feat3map.pickle", 'r') as f: print("Adding feature 3 edges...") HG.importEdge(pickle.load(f)['X']) # notice you are importing the value of key 'X', which is put in there inside build files for unknown reasons pass # pass print("edgelist: ");print(HG.pub_edges) ''' ## use this as worst baseline (all songs in 1 edge) with open("featALLmap.pickle", 'r') as f: print("Adding all songs to 1 edge (anyEdge)...") HG.importEdge(pickle.load(f)['X']) # notice you are importing the value of key 'X', which is put in there inside build files for unknown reasons pass # pass print("edgelist: ");print(HG.pub_edges) ''' # TODO: prune edges if edge is less than min edge size # TODO: Use quadratic edge expansion? return HG
log_file('semiring.py') sr = semiring.ExpectationSemiring() log_function('sum_op') test_eq(sr.sum_op, ([sr.zero(), sr.one()],), sr.one()) log_function('prod_op') test_eq(sr.prod_op, ([sr.zero(), sr.one()],), sr.zero()) # feature # hypergraph log_file('hypergraph.py') a = hypergraph.Hypergraph('a', ()) b = hypergraph.Hypergraph('b', ()) c = hypergraph.Hypergraph('c', ((a,b),(b,a))) d = hypergraph.Hypergraph('d', ((c,a),(c,b))) sr = semiring.DebugSemiring() log_function('inside') d.inside(feature.identity, sr) test_eq(lambda hg: hg.alpha, (c,), '((c) AND (a) AND (b)) OR ((c) AND (b) AND (a))') log_function('outside') c.inside_outside(feature.identity, sr) test_eq(lambda hg: hg.beta, (a,), '((c) AND (b)) OR ((c) AND (b))')
def extract_aligned_rules(args): tok_seqs = read_toks(args.tok_file) lemma_seqs = read_toks(args.lemma_file) pos_seqs = read_toks(args.pos_file) print 'A total of %d sentences' % len(tok_seqs) assert len(tok_seqs) == len(pos_seqs) assert len(tok_seqs) == len(lemma_seqs) (word2predicate, ent2frag, label2frag) = load_mappings(args.stats_dir) result_f = open(args.output, 'w') with open(args.input, 'r') as input_f: sent_no = 0 while True: sent_map = read_sentence(input_f) if not sent_map: break aligned_toks = set() unaligned_toks = set() toks = tok_seqs[sent_no] lemmas = lemma_seqs[sent_no] pos_seq = pos_seqs[sent_no] sent_no += 1 aligned_rules = [] for align in sent_map: frag_label = align[0] start = align[1] end = align[2] is_ent = 'ent+' in frag_label is_pred = 'ARG' in frag_label if frag_label == 'UNKNOWN': assert end - start == 1 unaligned_toks.add(start) continue else: new_aligned = set(xrange(start, end)) aligned_toks |= new_aligned if is_ent: #An entity is found entity_str = '_'.join(toks[start:end]) if entity_str in ent2frag: curr_frag = ent2frag[entity_str] else: frag_str = build_one_entity(toks[start:end], frag_label) #rule_str = '[A1-1] ## %s ## %s' % (' '.join(toks[start:end]), frag_str) rule_str = '%d-%d####[A1-1] ## %s ## %s' % (start, end, ' '.join(lemmas[start:end]), frag_str) aligned_rules.append(rule_str) continue elif is_pred: assert end -start == 1 curr_tok = toks[start] curr_lem = lemmas[start] curr_pred = None if curr_tok in word2predicate: curr_pred = word2predicate[curr_tok] elif curr_lem in word2predicate: curr_pred = word2predicate[curr_lem] else: #curr_pred = ret_word2predicate[curr_tok] curr_pred = 'UNKNOWN-01' (frag_str, index, suffix) = build_one_predicate(curr_pred, frag_label) #rule_str = '[A%d-%s] ## %s ## %s' % (index, suffix, curr_tok, frag_str) rule_str = '%d-%d####[A%d-%s] ## %s ## %s' % (start, start+1, index, suffix, curr_lem, frag_str) aligned_rules.append(rule_str) continue else: #if frag_label in label2frag: if frag_label not in label2frag: print 'weird here' print frag_label continue curr_frag = label2frag[frag_label] #else: # curr_frag = ret_word2frag[frag_label] new_node = FragmentHGNode(FRAGMENT_NT, -1, -1, curr_frag) s = Sample(hypergraph.Hypergraph(new_node), 0) new_node.cut = 1 new_rule, _ = s.extract_one_rule(new_node, None, curr_frag.ext_list, False) rule_str = filter_vars(str(new_rule)).replace('|||', '##') fields = rule_str.split('##') #fields[1] = ' %s ' % ' '.join(toks[start:end]) fields[1] = ' %s ' % ' '.join(lemmas[start:end]) rule_str = '##'.join(fields) rule_str = '%d-%d####%s' % (start, end, rule_str) aligned_rules.append(rule_str) #print >>result_f, '%s ||| %s ||| %s' % (' '.join(toks), ' '.join([str(k) for k in unaligned_toks]), '++'.join(aligned_rules)) print >>result_f, '%s ||| %s ||| %s' % (' '.join(lemmas), ' '.join([str(k) for k in unaligned_toks]), '++'.join(aligned_rules)) assert len(aligned_toks & unaligned_toks) == 0, str(aligned_toks)+ str(unaligned_toks) assert len(aligned_toks | unaligned_toks) == len(toks) input_f.close() result_f.close()
def phrase_decomposition_forest(align): # save the index mapping so that we can restore indices after phrase # decomposition forest generation if not FLAGS.delete_unaligned: fmap = get_reversed_index_map(align.faligned) emap = get_reversed_index_map(align.ealigned) a = align.remove_unaligned() phrases = list(extract_phrases(a)) #print('%s phrases' % len(phrases)) n = len(a.fwords) #print('%s words' % n) chart = [[None for j in range(n + 1)] for i in range(n + 1)] for i1, j1, i2, j2 in phrases: #print('(%s,%s)' % (i1, i2)) chart[i1][i2] = PhraseHGNode(PHRASE_NT, i1, i2, j1, j2) for s in range(1, n + 1): for i in range(0, n - s + 1): j = i + s #print('span (%s %s)' % (i, j)) node = chart[i][j] if node is None: continue splits = 0 # test for binary ambiguity for k in range(i + 1, j): if chart[i][k] is not None and chart[k][j] is not None: edge = PhraseHGEdge() edge.add_tail(chart[i][k]) edge.add_tail(chart[k][j]) node.add_incoming(edge) #print('split at %s' % k) splits += 1 # find the maximal cover if no ambiguity found if splits == 0: edge = PhraseHGEdge() l = i while l < j: next = l + 1 m = j - 1 if l == i else j while m > l: if chart[l][m] is not None: edge.add_tail(chart[l][m]) next = m break m -= 1 l = next node.add_incoming(edge) hg = hypergraph.Hypergraph(chart[0][n]) hg.assert_done('topo_sort') assert len(phrases) == len(hg.nodes), \ '%s phrases, %s nodes' % (len(phrases), len(hg.nodes)) #if len(phrases) != len(hg.nodes): # print('%s phrases, %s nodes' % (len(phrases), len(hg.nodes))) #for node in hg.nodes: # i1,j2,i2,j2 = node.phrase # print('(%s,%s)' % (i1, i2)) # restore indices on each node if FLAGS.delete_unaligned: return hg, a else: for node in hg.nodes: node.fi = max(fmap[node.fi]) node.fj = min(fmap[node.fj]) node.ei = max(emap[node.ei]) node.ej = min(emap[node.ej]) return hg, align
def fragment_decomposition_forest(fragments, amr_graph, unaligned_fragments, edge_alignment, refine=False): # save the index mapping so that we can restore indices after phrase # decomposition forest generation n = len(fragments) #These fragments are aligned, and have some order based on the strings global print_sign chart = [[set() for j in range(n+1)] for i in range(n+1)] start_time = time.time() #The leaves of the forest are identified concept fragments for i in xrange(n): j = i + 1 frag = fragments[i] new_node = build_one_node(frag, i, j, amr_graph, edge_alignment, refine) filter_with_maxtype(new_node) chart[i][j].add(new_node) #These are the unaligned concepts in the graph unaligned_nodes = [] for unaligned_frag in unaligned_fragments: unaligned_node = FragmentHGNode(FRAGMENT_NT, -1, -1, unaligned_frag, False, True, True) #Special here unaligned_node.cut = 1 unaligned_nodes.append(unaligned_node) edge_to_node = {} for i in xrange(n): j = i + 1 curr_candidate = chart[i][j] updated = True count = 0 while updated: updated = False new_node_set = set() curr_time = time.time() if curr_time - start_time > 30: return None for node1 in curr_candidate: for unaligned_node in unaligned_nodes: #Before combining two fragments, check if they are disjoint if check_disjoint(node1.frag, unaligned_node.frag): (new_frag, connect_frags) = general_combine_fragments(node1.frag, unaligned_node.frag, edge_alignment, refine) if new_frag is None: continue #new_node = FragmentHGNode(FRAGMENT_NT, i, j, new_frag, False, False, False) new_node = FragmentHGNode(FRAGMENT_NT, i, j, new_frag, False, False, True) edge = FragmentHGEdge() edge.add_tail(node1) edge.add_tail(unaligned_node) if connect_frags and len(connect_frags) > 0: for unaligned_frag in connect_frags: un_edge_index = unique_edge(unaligned_frag) if un_edge_index not in edge_to_node: tmp_node = FragmentHGNode(FRAGMENT_NT, -1, -1, unaligned_frag, True, False, False) edge_to_node[un_edge_index] = tmp_node tmp_node.cut = 0 else: tmp_node = edge_to_node[un_edge_index] edge.add_tail(tmp_node) new_node.add_incoming(edge) if print_sign: print '%d to %d: %s %s' % (i, j, ' '.join(new_frag.str_list()), str(new_frag)) updated = True filter_with_maxtype(new_node) add_one_item(new_node_set, new_node) if updated: enlarge_chart(chart[i][j], new_node_set) curr_candidate = new_node_set start_time = time.time() #logger.writeln('Finished dealing with unary') for span in xrange(2, n+1): for i in xrange(0, n): j = i + span if j > n: continue curr_time = time.time() if curr_time - start_time > 30: return None for k in xrange(i+1, j): if len(chart[i][k]) == 0 or len(chart[k][j]) == 0: continue for node1 in chart[i][k]: for node2 in chart[k][j]: curr_time = time.time() if check_disjoint(node1.frag, node2.frag): #new_frag = combine_fragments(node1.frag, node2.frag) (new_frag, connect_frags) = general_combine_fragments(node1.frag, node2.frag, edge_alignment, refine) if new_frag is None: continue noprint = node1.noprint | node2.noprint new_node = FragmentHGNode(FRAGMENT_NT, i, j, new_frag, False, False, noprint) children = [] children.append(node1) children.append(node2) unaligned_node = None if connect_frags and len(connect_frags) > 0: for unaligned_frag in connect_frags: un_edge_index = unique_edge(unaligned_frag) if un_edge_index not in edge_to_node: tmp_node = FragmentHGNode(FRAGMENT_NT, -1, -1, unaligned_frag, True, False, False) edge_to_node[un_edge_index] = tmp_node tmp_node.cut = 0 else: tmp_node = edge_to_node[un_edge_index] children.append(tmp_node) if not check_consist(new_node, children): print 'inconsistency here' print str(new_node.frag) print str(node1.frag) print str(node2.frag) edge = FragmentHGEdge() edge.add_tail(node1) edge.add_tail(node2) if connect_frags and len(connect_frags) > 0: for unaligned_frag in connect_frags: un_edge_index = unique_edge(unaligned_frag) assert un_edge_index in edge_to_node #unaligned_node.cut = 0 edge.add_tail(edge_to_node[un_edge_index]) new_node.add_incoming(edge) if print_sign: print '%d to %d: %s %s' % (i, j, ' '.join(new_frag.str_list()), str(new_frag)) print '####Children info####' for node in children: print '%d to %d: %s %s' % (node.frag.start, node.frag.end, ' '.join(node.frag.str_list()) if node.frag.start != -1 else '###', str(node.frag)) print '########' s = Sample(hypergraph.Hypergraph(new_node), 0) new_node.cut = 1 new_rule, _ = s.extract_one_rule(new_node, None, list(new_node.frag.ext_set)) if not new_node.noprint and len(new_node.frag.str_list()) < 8: rule_str = '%s ||| %s\n' % (filter_vars(new_rule.dumped_format()), context_str(new_node.frag, amr_graph)) rule_f.write(rule_str) fields = rule_str.split(' ||| ') fields[1] = ' '.join(amr_graph.lems[new_node.frag.start: new_node.frag.end]) lem_rule_str = ' ||| '.join(fields) lemma_rule_f.write(lem_rule_str) filter_with_maxtype(new_node) add_one_item(chart[i][j], new_node) if print_sign: print 'total length is %d' % n if chart[0][n] is None or len(chart[0][n]) == 0: rule_f.write('\n') print '##################################' print 'The goal chart is empty, fail to build a goal item' print 'Alignment fragments:' for frag in fragments: print '%s : %s' % (frag.str_side(), str(frag)) print 'Unaligned fragments:' for frag in unaligned_fragments: print str(frag) print '#################################' return None rule_f.write('\n') hg = None for node in chart[0][n]: if is_goal_item(node): hg = hypergraph.Hypergraph(node) return hg #assert hg is not None, 'Failed to build a goal item' if hg is None: print '##################################' print 'No goal item in the final chart' print 'Alignment fragments:' for frag in fragments: print str(frag) return None return hg
def build_one_node(curr_frag, curr_start, curr_end, amr_graph, edge_alignment, refine=False): curr_node_index = curr_frag.root curr_graph_node = amr_graph.nodes[curr_node_index] if edge_alignment[curr_graph_node.c_edge] == 0: new_node = FragmentHGNode(FRAGMENT_NT, curr_start, curr_end, curr_frag) return new_node #To remember the unaligned relation going out of each entity root_arcs = [] head_arcs = [] visited = set() is_pred = False #Use for deciding the category of the root node is_op = False #Dealing with parent edges, ARGx-of if len(curr_graph_node.p_edges) > 0: for curr_edge_index in curr_graph_node.p_edges: curr_edge = amr_graph.edges[curr_edge_index] edge_label = curr_edge.label if edge_alignment[curr_edge_index] == 1: #This edge has already been aligned if curr_frag.edges[curr_edge_index] == 1 and (edge_label[:3] == 'ARG' and 'of' in edge_label): #logger.writeln("what the hell is this") #logger.writeln(str(curr_frag)) is_pred = True continue #Our intuition: ARGs and ops goes with the root if (edge_label[:3] == 'ARG' and 'of' in edge_label): is_pred = True head_arcs.append((curr_edge_index, curr_edge.head)) if len(curr_graph_node.v_edges) > 0: for curr_edge_index in curr_graph_node.v_edges: curr_edge = amr_graph.edges[curr_edge_index] edge_label = curr_edge.label if edge_alignment[curr_edge_index] == 1: #This edge has already been aligned if curr_frag.edges[curr_edge_index] == 1 and is_root_arc(edge_label): #Special case, there is already args attached if 'ARG' in edge_label: is_pred = True else: is_op = True continue tail_node_index = curr_edge.tail #Our intuition: ARGs and ops goes with the root if is_root_arc(edge_label): if 'ARG' in edge_label: is_pred = True else: assert 'op' in edge_label is_op = True root_arcs.append((curr_edge_index, tail_node_index)) unaligned_node = None if refine: init_ext_frag(curr_frag, is_pred, is_op) #Initialize the current fragment if len(root_arcs) > 0 or len(head_arcs) > 0: n_nodes = len(amr_graph.nodes) n_edges = len(amr_graph.edges) frag = AMRFragment(n_edges, n_nodes, amr_graph) frag.set_root(curr_node_index) for rel_index, tail_index in root_arcs: edge_alignment[rel_index] = 1 frag.set_edge(rel_index) frag.set_node(tail_index) if head_arcs: (rel_index, head_index) = head_arcs[0] edge_alignment[rel_index] = 1 frag.set_edge(rel_index) frag.set_root(head_index) if refine: init_ext_frag(frag, is_pred, is_op) frag.build_ext_list() frag.build_ext_set() new_frag = combine_fragments(curr_frag, frag, refine) assert new_frag, 'Weird combination found' new_node = FragmentHGNode(FRAGMENT_NT, curr_start, curr_end, new_frag) else: #Should be either an entity or a single concept new_node = FragmentHGNode(FRAGMENT_NT, curr_start, curr_end, curr_frag) s = Sample(hypergraph.Hypergraph(new_node), 0) new_node.cut = 1 new_rule, _ = s.extract_one_rule(new_node, None, new_node.frag.ext_list, refine) rule_str = '%s ||| %s\n' % (filter_vars(new_rule.dumped_format()), context_str(new_node.frag, amr_graph)) rule_f.write(rule_str) fields = rule_str.split(' ||| ') fields[1] = ' '.join(amr_graph.lems[new_node.frag.start: new_node.frag.end]) lem_rule_str = ' ||| '.join(fields) lemma_rule_f.write(lem_rule_str) return new_node