Esempio n. 1
0
def eisner_first_order(n):
    num_edges = 4 * n**3

    items = np.arange((kShapes * kDir * n * n), dtype=np.int64) \
        .reshape([kShapes, kDir, n, n])
    part_encoder = DependencyParsingEncoder(n, 1)
    out = part_encoder.encoder
    c = pydecode.ChartBuilder(items,
                              out,
                              unstrict=True,
                              expected_size=(num_edges, 2))

    # Add terminal nodes.
    c.init(np.diag(items[Tri, Right]).copy())
    c.init(np.diag(items[Tri, Left, 1:, 1:]).copy())

    for k in range(1, n):
        for s in range(n):
            t = k + s
            if t >= n:
                break

            out_ind = np.zeros([t - s], dtype=np.int64)

            # First create incomplete items.
            if s != 0:
                out_ind.fill(out[t, s])
                c.set_t(items[Trap, Left, s, t],
                        items[Tri, Right, s, s:t],
                        items[Tri, Left, s + 1:t + 1, t],
                        labels=out_ind)

            out_ind.fill(out[s, t])
            c.set_t(items[Trap, Right, s, t],
                    items[Tri, Right, s, s:t],
                    items[Tri, Left, s + 1:t + 1, t],
                    labels=out_ind)

            out_ind.fill(-1)
            if s != 0:
                c.set_t(items[Tri, Left, s, t],
                        items[Tri, Left, s, s:t],
                        items[Trap, Left, s:t, t],
                        labels=out_ind)

            c.set_t(items[Tri, Right, s, t],
                    items[Trap, Right, s, s + 1:t + 1],
                    items[Tri, Right, s + 1:t + 1, t],
                    labels=out_ind)

    return c.finish(False), part_encoder
Esempio n. 2
0
def simple_hypergraph():
    """
    Create a simple hypergraph.
    """
    enc = np.arange(6)
    c = pydecode.ChartBuilder(enc, np.arange(10))

    c.init(enc[:4])

    c.set_t(enc[4], enc[0:2], enc[1:3], labels=np.arange(2))
    c.set_t(enc[5], np.repeat(enc[4], 1), enc[[3]], labels=np.array([2]))

    dp = c.finish()
    # for edge in hypergraph.edges:
    #     assert edge.label in ["0", "1", "2", "3", "4"]
    return dp
Esempio n. 3
0
def eisner_second_order(n):
    coder = np.arange((kShapes * kDir * n * n), dtype=np.int64) \
        .reshape([kShapes, kDir, n, n])

    part_encoder = DependencyParsingEncoder(n, 2)
    out = part_encoder.encoder

    c = pydecode.ChartBuilder(coder, out, unstrict=True)
    # Initialize the chart.
    c.init(np.diag(coder[Tri, Right]).copy())
    c.init(np.diag(coder[Tri, Left, 1:, 1:]).copy())

    for k in range(1, n):
        for s in range(n):
            t = k + s
            if t >= n:
                break

            if s != 0:
                c.set_t(coder[Box, Left, s, t], coder[Tri, Right, s, s:t],
                        coder[Tri, Left, s + 1:t + 1, t])

                c.set_t(coder[Trap, Left, s, t],
                        np.append(coder[Tri, Right, s, t - 1],
                                  coder[Box, Left, s, t - 1:s:-1]),
                        np.append(coder[Tri, Left, t, t],
                                  coder[Trap, Left, t - 1:s:-1, t]),
                        labels=out[t, s, t:s:-1])

            c.set_t(coder[Trap, Right, s, t],
                    np.append(coder[Tri, Right, s, s], coder[Trap, Right, s,
                                                             s + 1:t]),
                    np.append(coder[Tri, Left, s + 1, t], coder[Box, Left,
                                                                s + 1:t, t]),
                    labels=out[s, t, s:t])

            if s != 0:
                c.set_t(coder[Tri, Left, s, t], coder[Tri, Left, s, s:t],
                        coder[Trap, Left, s:t, t])

            if (s, t) == (0, n - 1) or s != 0:
                c.set_t(coder[Tri, Right, s, t], coder[Trap, Right, s,
                                                       s + 1:t + 1],
                        coder[Tri, Right, s + 1:t + 1, t])
    return c.finish(False), part_encoder
Esempio n. 4
0
def make_lattice(width, height, transitions):
    w, h = width, height

    blank = np.array([], dtype=np.int64)

    coder = np.arange(w * h, dtype=np.int64)\
        .reshape([w+2, h])
    out = np.arange(w * h * h, dtype=np.int64)\
        .reshape([w, h, h])

    c = ph.ChartBuilder(coder.size, unstrict=True, output_size=out.size)

    c.init(coder[0, 0])
    for i in range(1, w + 1):
        for j in range(h):
            c.set2(coder[i, j], coder[i - 1, transitions[j]], blank,
                   out[i - 1, j, transitions[j]])
    c.set(coder[w + 1, 0], coder[w, :h], blank, blank)
    return c
Esempio n. 5
0
def random_hypergraph(size=50):
    """
    Generate a random hypergraph.

    Parameters
    ----------
    size : integer
    """
    # children = defaultdict(lambda: set())

    # complete_reference_set = range(0, size)
    reference_sets = defaultdict(lambda: set())
    enc = np.arange(2*size  + 1)

    c = pydecode.ChartBuilder(enc, np.arange(10))
    used = set()

    c.init(enc[:size])

    for i in range(size):
        reference_sets[i] = set([i])

    nodes = range(size)
    for node in range(size):
        head_node = size + node
        node_a, node_b = random.sample(nodes, 2)
        if reference_sets[node_a] & reference_sets[node_b]:
            continue

        c.set_t(enc[head_node], enc[[node_a]], enc[[node_b]],
                labels=np.array([random.randint(0, 100)]))
        used.update([node_a, node_b])
        reference_sets[head_node] |= \
            reference_sets[node_a] | reference_sets[node_b]
        nodes.append(head_node)
    unused = set(nodes) -  used
    c.set_t(enc[2*size], enc[list(unused)])

    dp = c.finish()
    assert len(dp.nodes) > 0
    assert len(dp.edges) > 0
    return dp
Esempio n. 6
0
def tagger_first_order(sentence_length, tag_sizes):
    n = sentence_length
    K = tag_sizes
    t = np.max(tag_sizes)

    coder = np.arange(n * t, dtype=np.int64)\
        .reshape([n, t])
    part_encoder = TaggingEncoder(tag_sizes, 1)
    out = part_encoder.encoder

    c = pydecode.ChartBuilder(coder, out, unstrict=True, lattice=True)

    c.init(coder[0, :K[0]])
    for i in range(1, sentence_length):
        for t in range(K[i]):
            c.set_t(coder[i, t],
                    coder[i - 1, :K[i - 1]],
                    labels=out[i, :K[i - 1], t])

    return c.finish(False), part_encoder
Esempio n. 7
0
__author__ = 'Superuser'

import pydecode
import numpy as np

n = 10
chart = np.zeros(10)
chart[0] = 0
chart[1] = chart[0] + 1
for item in range(2, n):
    chart[item] = chart[item - 1] + chart[item - 2]
chart

chart = pydecode.ChartBuilder(np.arange(10))
chart.init(0)
chart.set(1, [[0]], labels=[0])

for item in range(2, n):
    chart.set(item, [[item - 1, item - 2]])
graph = chart.finish()

weights = pydecode.transform(graph, np.array([1.0]))
inside = pydecode.inside(graph, weights)
inside

pydecode.draw(graph, np.array(["+"] * 10), graph.node_labeling)
Esempio n. 8
0
def cky(sentence, tags, grammar, dep_matrix):
    """
    """
    # Preprocessing.
    n = len(sentence)
    preterms = [grammar.nonterms[tag] for tag in tags]

    span_pruner = span_pruning(n, dep_matrix)
    # print span_pruner
    cell_rules = \
        grammar_pruning(preterms, grammar, span_pruner)
    span_nts = defaultdict(set)
    encoder = LexicalizedCFGEncoder(sentence, tags, grammar)
    items = defaultdict(auto())

    G = len(grammar.rules)

    labels = encoder.encoder
    temp = np.arange(1000000)
    chart = pydecode.ChartBuilder(temp, unstrict=True)
    has_item = defaultdict(lambda: 0)

    # Initialize the chart.
    for i in range(n):
        chart.init(items[i, i, i, preterms[i], START])
        chart.set(items[i, i, i, preterms[i], DONE],
                  [[items[i, i, i, preterms[i], START]]], [-1])

        has_item[i, i, i, preterms[i]] = 1
        span_nts[i, i, i].add(preterms[i])
        Y = preterms[i]
        unary = defaultdict(set)
        seen = set()
        for r, X in grammar.unary_rules_by_first[Y]:
            chart.set(items[i, i, i, X, MID], [[items[i, i, i, Y, START]]],
                      labels=[labels[i, i, i, i, i, r]])
            has_item[i, i, i, X] = 1
            span_nts[i, i, i].add(X)
            for r_up, X_up in grammar.unary_rules_by_first[X]:
                unary[X_up].add((r_up, X))

            seen.add(X)

        to_finish = set(unary.keys()) | seen
        for X_up in to_finish:
            if X_up in unary:
                edges, labels_ = zip(*[([items[i, i, i, X,
                                               MID]], labels[i, i, i, i, i,
                                                             r_up])
                                       for r_up, X in unary[X_up]])
            else:
                edges, labels_ = (), ()
            if X_up in seen:
                labels_ += (-1, )
                edges += ([items[i, i, i, X_up, MID]], )
            chart.set(items[i, i, i, X_up, DONE], edges, labels_)
            has_item[i, i, i, X_up] = 1
            span_nts[i, i, i].add(X_up)

    # Main loop.
    for d in range(1, n):
        for i in range(n):
            k = i + d
            if k >= n:
                continue

            to_add = defaultdict(list)
            #print i, k, bool(span_pruner[i, k])
            for h, m, j in span_pruner[i, k]:
                if h <= j:
                    for Y in span_nts[i, j, h]:
                        for r, X, Z in cell_rules[i, k, Y, 0]:
                            if has_item[j + 1, k, m, Z]:
                                to_add[X, h].append([r, Y, Z, j, h, m, 0])
                                assert r < G, "%s %s" % (r, G)
                if h > j:
                    for Z in span_nts[j + 1, k, h]:
                        for r, X, Y in cell_rules[i, k, Z, 1]:
                            if has_item[i, j, m, Y]:

                                to_add[X, h].append([r, Y, Z, j, h, m, 1])
                                assert r < G, "%s %s" % (r, G)

            unary = defaultdict(list)
            unary2 = defaultdict(list)
            for X, h in to_add:
                labels_, edges = zip(*[(labels[i, j, k, h, m, r], [
                    items[i, j, h if dir_ == 0 else m, Y,
                          DONE], items[j + 1, k, m if dir_ == 0 else h, Z,
                                       DONE]
                ]) for r, Y, Z, j, h, m, dir_ in to_add[X, h]])

                chart.set(items[i, k, h, X, START], edges, labels=labels_)
                assert not has_item[i, k, h, X]
                has_item[i, k, h, X] = 1
                span_nts[i, k, h].add(X)
                for r_unary, X_up in grammar.unary_rules_by_first[X]:
                    unary[X_up, h].append((r_unary, X))

            for X, h in unary:
                labels_, edges = zip(*[(labels[i, k, k, h, h,
                                               r], [items[i, k, h, Y, START]])
                                       for r, Y in unary[X, h]])
                has_item[i, k, h, X] = 1
                span_nts[i, k, h].add(X)
                chart.set(items[i, k, h, X, MID], edges, labels=labels_)
                for r_unary, X_up in grammar.unary_rules_by_first[X]:
                    unary2[X_up, h].append((r_unary, X))

            # Unary rules.
            finish = set()
            finish.update(unary.keys())
            finish.update(unary2.keys())
            finish.update(to_add.keys())
            for X, h in finish:
                if (X, h) in unary2:
                    labels_, edges = zip(*[(labels[i, k, k, h, h, r],
                                            [items[i, k, h, Y, MID]])
                                           for r, Y in unary2[X, h]])
                else:
                    labels_, edges = (), ()

                if (X, h) in unary:
                    edges += ([items[i, k, h, X, MID]], )
                    labels_ += (-1, )

                if (X, h) in to_add:
                    edges += ([items[i, k, h, X, START]], )
                    labels_ += (-1, )

                has_item[i, k, h, X] = 1
                span_nts[i, k, h].add(X)
                chart.set(items[i, k, h, X, DONE], edges, labels=labels_)

    children = [[items[0, n - 1, h, root, DONE]] for h in range(n)
                for root in grammar.roots if has_item[0, n - 1, h, root]]

    #assert(children)

    chart.set(items[n - 1, 0, 0, 0, DONE], children)
    graph = chart.finish(True)
    # for key in span_nts:
    #     print key
    #     for nt in span_nts[key]:
    #         print grammar.nonterm_name(nt),
    #     print
    # print

    # print "SIZE", n, len(graph.edges), len(items), len(span_nts)
    return graph, encoder