Exemple #1
0
def test(n):
    ex = [[1,0,0,0],
          [1,1,0,0],
          [1,1,1,0],
          [0,0,0,1],
          [0,0,1,0],
          [0,0,1,0],
          [0,0,0,0]]

    if n == 1 : #affcihage normal
        test = hg.Hypergraph(ex)
        print(test.test_hypertree())


    elif n == 2: #affichage avec matrice aléatoire
        ex = hg.randomHypergraph()
        print("Matrice d'incidence: \n",ex,"\n")
        test = hg.Hypergraph(ex)
        print(test.test_hypertree())
Exemple #2
0
def constructModel(params):
    
    G = hypergraph.Hypergraph()
    for infile in params['infiles']:
        print 'Loading ', infile
        with open(infile, 'r') as f:
            G.importEdge(pickle.load(f)['X'])
            pass
        pass
    print 'Pruning edges'
    G.pruneEdges(params['m'])
    if params['quadratic']:
        print 'Expanding edges'
        G.quadraticExpansion(MIN_SIZE=params['m'], rho=params['rho'])
        pass
    return G
def constructHypergraph(params):

	HG = hypergraph.Hypergraph()


	## at this point, import all sperate edge sets from different features into this one large hypergraph
	# for all song -> feature  maps that we have pickled
	print("edgelist: ");print(HG.pub_edges)
	with open("feat1map.pickle", 'r') as f:
		print("Adding feature 1 edges...")
		HG.importEdge(pickle.load(f)['X']) # notice you are importing the value of key 'X', which is put in there inside build files for unknown reasons
		pass
	# pass
	print("edgelist: ");print(HG.pub_edges)

	with open("feat2map.pickle", 'r') as f:
		print("Adding feature 2 edges...")
		HG.importEdge(pickle.load(f)['X']) # notice you are importing the value of key 'X', which is put in there inside build files for unknown reasons
		pass
	# pass
	print("edgelist: ");print(HG.pub_edges)

	with open("feat3map.pickle", 'r') as f:
		print("Adding feature 3 edges...")
		HG.importEdge(pickle.load(f)['X']) # notice you are importing the value of key 'X', which is put in there inside build files for unknown reasons
		pass
	# pass
	print("edgelist: ");print(HG.pub_edges)

	'''
	## use this as worst baseline (all songs in 1 edge)
	with open("featALLmap.pickle", 'r') as f:
		print("Adding all songs to 1 edge (anyEdge)...")
		HG.importEdge(pickle.load(f)['X']) # notice you are importing the value of key 'X', which is put in there inside build files for unknown reasons
		pass
	# pass
	print("edgelist: ");print(HG.pub_edges)
	'''

	# TODO: prune edges if edge is less than min edge size

	# TODO: Use quadratic edge expansion?

	return HG
Exemple #4
0
log_file('semiring.py')
sr = semiring.ExpectationSemiring()

log_function('sum_op')
test_eq(sr.sum_op, ([sr.zero(), sr.one()],), sr.one())

log_function('prod_op')
test_eq(sr.prod_op, ([sr.zero(), sr.one()],), sr.zero())

# feature

# hypergraph

log_file('hypergraph.py')
a = hypergraph.Hypergraph('a', ())
b = hypergraph.Hypergraph('b', ())
c = hypergraph.Hypergraph('c', ((a,b),(b,a)))
d = hypergraph.Hypergraph('d', ((c,a),(c,b)))
sr = semiring.DebugSemiring()

log_function('inside')
d.inside(feature.identity, sr)
test_eq(lambda hg: hg.alpha, (c,), 
        '((c) AND (a) AND (b)) OR ((c) AND (b) AND (a))')

log_function('outside')
c.inside_outside(feature.identity, sr)
test_eq(lambda hg: hg.beta, (a,),
        '((c) AND (b)) OR ((c) AND (b))')
def extract_aligned_rules(args):
    tok_seqs = read_toks(args.tok_file)
    lemma_seqs = read_toks(args.lemma_file)
    pos_seqs = read_toks(args.pos_file)

    print 'A total of %d sentences' % len(tok_seqs)

    assert len(tok_seqs) == len(pos_seqs)
    assert len(tok_seqs) == len(lemma_seqs)

    (word2predicate, ent2frag, label2frag) = load_mappings(args.stats_dir)

    result_f = open(args.output, 'w')

    with open(args.input, 'r') as input_f:
        sent_no = 0

        while True:
            sent_map = read_sentence(input_f)
            if not sent_map:
                break

            aligned_toks = set()
            unaligned_toks = set()

            toks = tok_seqs[sent_no]
            lemmas = lemma_seqs[sent_no]
            pos_seq = pos_seqs[sent_no]
            sent_no += 1

            aligned_rules = []
            for align in sent_map:
                frag_label = align[0]
                start = align[1]
                end = align[2]
                is_ent = 'ent+' in frag_label

                is_pred = 'ARG' in frag_label

                if frag_label == 'UNKNOWN':
                    assert end - start == 1
                    unaligned_toks.add(start)
                    continue
                else:
                    new_aligned = set(xrange(start, end))
                    aligned_toks |= new_aligned
                    if is_ent: #An entity is found
                        entity_str = '_'.join(toks[start:end])
                        if entity_str in ent2frag:
                            curr_frag = ent2frag[entity_str]
                        else:
                            frag_str = build_one_entity(toks[start:end], frag_label)
                            #rule_str = '[A1-1] ## %s ## %s' % (' '.join(toks[start:end]), frag_str)
                            rule_str = '%d-%d####[A1-1] ## %s ## %s' % (start, end, ' '.join(lemmas[start:end]), frag_str)
                            aligned_rules.append(rule_str)
                            continue

                    elif is_pred:
                        assert end -start == 1
                        curr_tok = toks[start]
                        curr_lem = lemmas[start]
                        curr_pred = None
                        if curr_tok in word2predicate:
                            curr_pred = word2predicate[curr_tok]
                        elif curr_lem in word2predicate:
                            curr_pred = word2predicate[curr_lem]
                        else:
                            #curr_pred = ret_word2predicate[curr_tok]
                            curr_pred = 'UNKNOWN-01'

                        (frag_str, index, suffix) = build_one_predicate(curr_pred, frag_label)
                        #rule_str = '[A%d-%s] ## %s ## %s' % (index, suffix, curr_tok, frag_str)
                        rule_str = '%d-%d####[A%d-%s] ## %s ## %s' % (start, start+1, index, suffix, curr_lem, frag_str)
                        aligned_rules.append(rule_str)
                        continue

                    else:
                        #if frag_label in label2frag:
                        if frag_label not in label2frag:
                            print 'weird here'
                            print frag_label
                            continue

                        curr_frag = label2frag[frag_label]
                        #else:
                        #    curr_frag = ret_word2frag[frag_label]

                    new_node = FragmentHGNode(FRAGMENT_NT, -1, -1, curr_frag)
                    s = Sample(hypergraph.Hypergraph(new_node), 0)
                    new_node.cut = 1
                    new_rule, _ = s.extract_one_rule(new_node, None, curr_frag.ext_list, False)
                    rule_str = filter_vars(str(new_rule)).replace('|||', '##')
                    fields = rule_str.split('##')
                    #fields[1] = ' %s ' % ' '.join(toks[start:end])
                    fields[1] = ' %s ' % ' '.join(lemmas[start:end])
                    rule_str = '##'.join(fields)
                    rule_str = '%d-%d####%s' % (start, end, rule_str)
                    aligned_rules.append(rule_str)

            #print >>result_f, '%s ||| %s ||| %s' % (' '.join(toks), ' '.join([str(k) for k in unaligned_toks]), '++'.join(aligned_rules))
            print >>result_f, '%s ||| %s ||| %s' % (' '.join(lemmas), ' '.join([str(k) for k in unaligned_toks]), '++'.join(aligned_rules))
            assert len(aligned_toks & unaligned_toks) == 0, str(aligned_toks)+ str(unaligned_toks)
            assert len(aligned_toks | unaligned_toks) == len(toks)

        input_f.close()

    result_f.close()
Exemple #6
0
def phrase_decomposition_forest(align):
    # save the index mapping so that we can restore indices after phrase
    # decomposition forest generation
    if not FLAGS.delete_unaligned:
        fmap = get_reversed_index_map(align.faligned)
        emap = get_reversed_index_map(align.ealigned)
    a = align.remove_unaligned()

    phrases = list(extract_phrases(a))
    #print('%s phrases' % len(phrases))
    n = len(a.fwords)
    #print('%s words' % n)
    chart = [[None for j in range(n + 1)] for i in range(n + 1)]
    for i1, j1, i2, j2 in phrases:
        #print('(%s,%s)' % (i1, i2))
        chart[i1][i2] = PhraseHGNode(PHRASE_NT, i1, i2, j1, j2)
    for s in range(1, n + 1):
        for i in range(0, n - s + 1):
            j = i + s
            #print('span (%s %s)' % (i, j))
            node = chart[i][j]
            if node is None:
                continue
            splits = 0
            # test for binary ambiguity
            for k in range(i + 1, j):
                if chart[i][k] is not None and chart[k][j] is not None:
                    edge = PhraseHGEdge()
                    edge.add_tail(chart[i][k])
                    edge.add_tail(chart[k][j])
                    node.add_incoming(edge)
                    #print('split at %s' % k)
                    splits += 1
            # find the maximal cover if no ambiguity found
            if splits == 0:
                edge = PhraseHGEdge()
                l = i
                while l < j:
                    next = l + 1
                    m = j - 1 if l == i else j
                    while m > l:
                        if chart[l][m] is not None:
                            edge.add_tail(chart[l][m])
                            next = m
                            break
                        m -= 1
                    l = next
                node.add_incoming(edge)
    hg = hypergraph.Hypergraph(chart[0][n])
    hg.assert_done('topo_sort')
    assert len(phrases) == len(hg.nodes), \
            '%s phrases, %s nodes' % (len(phrases), len(hg.nodes))
    #if len(phrases) != len(hg.nodes):
    #    print('%s phrases, %s nodes' % (len(phrases), len(hg.nodes)))
    #for node in hg.nodes:
    #    i1,j2,i2,j2 = node.phrase
    #    print('(%s,%s)' % (i1, i2))

    # restore indices on each node
    if FLAGS.delete_unaligned:
        return hg, a
    else:
        for node in hg.nodes:
            node.fi = max(fmap[node.fi])
            node.fj = min(fmap[node.fj])
            node.ei = max(emap[node.ei])
            node.ej = min(emap[node.ej])
        return hg, align
def fragment_decomposition_forest(fragments, amr_graph, unaligned_fragments, edge_alignment, refine=False):
    # save the index mapping so that we can restore indices after phrase
    # decomposition forest generation

    n = len(fragments) #These fragments are aligned, and have some order based on the strings

    global print_sign
    chart = [[set() for j in range(n+1)] for i in range(n+1)]

    start_time = time.time()

    #The leaves of the forest are identified concept fragments
    for i in xrange(n):
        j = i + 1
        frag = fragments[i]

        new_node = build_one_node(frag, i, j, amr_graph, edge_alignment, refine)
        filter_with_maxtype(new_node)
        chart[i][j].add(new_node)

    #These are the unaligned concepts in the graph
    unaligned_nodes = []
    for unaligned_frag in unaligned_fragments:

        unaligned_node = FragmentHGNode(FRAGMENT_NT, -1, -1, unaligned_frag, False, True, True) #Special here
        unaligned_node.cut = 1
        unaligned_nodes.append(unaligned_node)

    edge_to_node = {}
    for i in xrange(n):
        j = i + 1
        curr_candidate = chart[i][j]
        updated = True
        count = 0
        while updated:

            updated = False
            new_node_set = set()
            curr_time = time.time()
            if curr_time - start_time > 30:
                return None
            for node1 in curr_candidate:
                for unaligned_node in unaligned_nodes:
                    #Before combining two fragments, check if they are disjoint
                    if check_disjoint(node1.frag, unaligned_node.frag):
                        (new_frag, connect_frags) = general_combine_fragments(node1.frag, unaligned_node.frag, edge_alignment, refine)
                        if new_frag is None:
                            continue

                        #new_node = FragmentHGNode(FRAGMENT_NT, i, j, new_frag, False, False, False)
                        new_node = FragmentHGNode(FRAGMENT_NT, i, j, new_frag, False, False, True)
                        edge = FragmentHGEdge()
                        edge.add_tail(node1)
                        edge.add_tail(unaligned_node)
                        if connect_frags and len(connect_frags) > 0:
                            for unaligned_frag in connect_frags:
                                un_edge_index = unique_edge(unaligned_frag)
                                if un_edge_index not in edge_to_node:
                                    tmp_node = FragmentHGNode(FRAGMENT_NT, -1, -1, unaligned_frag, True, False, False)
                                    edge_to_node[un_edge_index] = tmp_node
                                    tmp_node.cut = 0
                                else:
                                    tmp_node = edge_to_node[un_edge_index]

                                edge.add_tail(tmp_node)

                        new_node.add_incoming(edge)

                        if print_sign:
                            print '%d to %d: %s  %s' % (i, j, ' '.join(new_frag.str_list()), str(new_frag))
                        updated = True
                        filter_with_maxtype(new_node)
                        add_one_item(new_node_set, new_node)
            if updated:
                enlarge_chart(chart[i][j], new_node_set)
                curr_candidate = new_node_set

    start_time = time.time()
    #logger.writeln('Finished dealing with unary')
    for span in xrange(2, n+1):
        for i in xrange(0, n):
            j = i + span
            if j > n:
                continue
            curr_time = time.time()
            if curr_time - start_time > 30:
                return None

            for k in xrange(i+1, j):
                if len(chart[i][k]) == 0 or len(chart[k][j]) == 0:
                    continue
                for node1 in chart[i][k]:
                    for node2 in chart[k][j]:
                        curr_time = time.time()

                        if check_disjoint(node1.frag, node2.frag):
                            #new_frag = combine_fragments(node1.frag, node2.frag)
                            (new_frag, connect_frags) = general_combine_fragments(node1.frag, node2.frag, edge_alignment, refine)

                            if new_frag is None:
                                continue

                            noprint = node1.noprint | node2.noprint
                            new_node = FragmentHGNode(FRAGMENT_NT, i, j, new_frag, False, False, noprint)

                            children = []
                            children.append(node1)
                            children.append(node2)

                            unaligned_node = None
                            if connect_frags and len(connect_frags) > 0:
                                for unaligned_frag in connect_frags:
                                    un_edge_index = unique_edge(unaligned_frag)
                                    if un_edge_index not in edge_to_node:
                                        tmp_node = FragmentHGNode(FRAGMENT_NT, -1, -1, unaligned_frag, True, False, False)
                                        edge_to_node[un_edge_index] = tmp_node
                                        tmp_node.cut = 0
                                    else:
                                        tmp_node = edge_to_node[un_edge_index]

                                    children.append(tmp_node)


                            if not check_consist(new_node, children):
                                print 'inconsistency here'
                                print str(new_node.frag)
                                print str(node1.frag)
                                print str(node2.frag)

                            edge = FragmentHGEdge()
                            edge.add_tail(node1)
                            edge.add_tail(node2)
                            if connect_frags and len(connect_frags) > 0:
                                for unaligned_frag in connect_frags:
                                    un_edge_index = unique_edge(unaligned_frag)
                                    assert un_edge_index in edge_to_node
                                    #unaligned_node.cut = 0
                                    edge.add_tail(edge_to_node[un_edge_index])

                            new_node.add_incoming(edge)
                            if print_sign:
                                print '%d to %d: %s  %s' % (i, j, ' '.join(new_frag.str_list()), str(new_frag))
                                print '####Children info####'
                                for node in children:
                                    print '%d to %d: %s %s' % (node.frag.start, node.frag.end, ' '.join(node.frag.str_list()) if node.frag.start != -1 else '###', str(node.frag))
                                print '########'

                            s = Sample(hypergraph.Hypergraph(new_node), 0)
                            new_node.cut = 1
                            new_rule, _ = s.extract_one_rule(new_node, None, list(new_node.frag.ext_set))
                            if not new_node.noprint and len(new_node.frag.str_list()) < 8:
                                rule_str = '%s ||| %s\n' % (filter_vars(new_rule.dumped_format()), context_str(new_node.frag, amr_graph))

                                rule_f.write(rule_str)
                                fields = rule_str.split(' ||| ')
                                fields[1] = ' '.join(amr_graph.lems[new_node.frag.start: new_node.frag.end])
                                lem_rule_str = ' ||| '.join(fields)

                                lemma_rule_f.write(lem_rule_str)

                            filter_with_maxtype(new_node)
                            add_one_item(chart[i][j], new_node)

    if print_sign:
        print 'total length is %d' % n
    if chart[0][n] is None or len(chart[0][n]) == 0:
        rule_f.write('\n')
        print '##################################'
        print 'The goal chart is empty, fail to build a goal item'
        print 'Alignment fragments:'
        for frag in fragments:
            print '%s :   %s' % (frag.str_side(), str(frag))
        print 'Unaligned fragments:'
        for frag in unaligned_fragments:
            print str(frag)
        print '#################################'
        return None

    rule_f.write('\n')
    hg = None
    for node in chart[0][n]:
        if is_goal_item(node):
            hg = hypergraph.Hypergraph(node)
            return hg

    #assert hg is not None, 'Failed to build a goal item'
    if hg is None:
        print '##################################'
        print 'No goal item in the final chart'
        print 'Alignment fragments:'
        for frag in fragments:
            print str(frag)

        return None
    return hg
def build_one_node(curr_frag, curr_start, curr_end, amr_graph, edge_alignment, refine=False):
    curr_node_index = curr_frag.root
    curr_graph_node = amr_graph.nodes[curr_node_index]

    if edge_alignment[curr_graph_node.c_edge] == 0:
        new_node = FragmentHGNode(FRAGMENT_NT, curr_start, curr_end, curr_frag)
        return new_node

    #To remember the unaligned relation going out of each entity
    root_arcs = []
    head_arcs = []
    visited = set()

    is_pred = False #Use for deciding the category of the root node
    is_op = False

    #Dealing with parent edges, ARGx-of
    if len(curr_graph_node.p_edges) > 0:
        for curr_edge_index in curr_graph_node.p_edges:
            curr_edge = amr_graph.edges[curr_edge_index]
            edge_label = curr_edge.label

            if edge_alignment[curr_edge_index] == 1: #This edge has already been aligned
                if curr_frag.edges[curr_edge_index] == 1 and (edge_label[:3] == 'ARG' and 'of' in edge_label):
                    #logger.writeln("what the hell is this")
                    #logger.writeln(str(curr_frag))
                    is_pred = True
                continue

            #Our intuition: ARGs and ops goes with the root
            if (edge_label[:3] == 'ARG' and 'of' in edge_label):
                is_pred = True
                head_arcs.append((curr_edge_index, curr_edge.head))

    if len(curr_graph_node.v_edges) > 0:
        for curr_edge_index in curr_graph_node.v_edges:
            curr_edge = amr_graph.edges[curr_edge_index]
            edge_label = curr_edge.label

            if edge_alignment[curr_edge_index] == 1: #This edge has already been aligned
                if curr_frag.edges[curr_edge_index] == 1 and is_root_arc(edge_label): #Special case, there is already args attached
                    if 'ARG' in edge_label:
                        is_pred = True
                    else:
                        is_op = True
                continue

            tail_node_index = curr_edge.tail

            #Our intuition: ARGs and ops goes with the root
            if is_root_arc(edge_label):
                if 'ARG' in edge_label:
                    is_pred = True
                else:
                    assert 'op' in edge_label
                    is_op = True
                root_arcs.append((curr_edge_index, tail_node_index))

    unaligned_node = None
    if refine:
        init_ext_frag(curr_frag, is_pred, is_op) #Initialize the current fragment

    if len(root_arcs) > 0 or len(head_arcs) > 0:
        n_nodes = len(amr_graph.nodes)
        n_edges = len(amr_graph.edges)
        frag = AMRFragment(n_edges, n_nodes, amr_graph)
        frag.set_root(curr_node_index)

        for rel_index, tail_index in root_arcs:
            edge_alignment[rel_index] = 1
            frag.set_edge(rel_index)
            frag.set_node(tail_index)

        if head_arcs:
            (rel_index, head_index) = head_arcs[0]
            edge_alignment[rel_index] = 1
            frag.set_edge(rel_index)
            frag.set_root(head_index)

        if refine:
            init_ext_frag(frag, is_pred, is_op)

        frag.build_ext_list()
        frag.build_ext_set()
        new_frag = combine_fragments(curr_frag, frag, refine)
        assert new_frag, 'Weird combination found'

        new_node = FragmentHGNode(FRAGMENT_NT, curr_start, curr_end, new_frag)

    else: #Should be either an entity or a single concept
        new_node = FragmentHGNode(FRAGMENT_NT, curr_start, curr_end, curr_frag)

    s = Sample(hypergraph.Hypergraph(new_node), 0)
    new_node.cut = 1
    new_rule, _ = s.extract_one_rule(new_node, None, new_node.frag.ext_list, refine)
    rule_str = '%s ||| %s\n' % (filter_vars(new_rule.dumped_format()), context_str(new_node.frag, amr_graph))
    rule_f.write(rule_str)
    fields = rule_str.split(' ||| ')
    fields[1] = ' '.join(amr_graph.lems[new_node.frag.start: new_node.frag.end])
    lem_rule_str = ' ||| '.join(fields)
    lemma_rule_f.write(lem_rule_str)
    return new_node