def eisner_first_order(n): num_edges = 4 * n**3 items = np.arange((kShapes * kDir * n * n), dtype=np.int64) \ .reshape([kShapes, kDir, n, n]) part_encoder = DependencyParsingEncoder(n, 1) out = part_encoder.encoder c = pydecode.ChartBuilder(items, out, unstrict=True, expected_size=(num_edges, 2)) # Add terminal nodes. c.init(np.diag(items[Tri, Right]).copy()) c.init(np.diag(items[Tri, Left, 1:, 1:]).copy()) for k in range(1, n): for s in range(n): t = k + s if t >= n: break out_ind = np.zeros([t - s], dtype=np.int64) # First create incomplete items. if s != 0: out_ind.fill(out[t, s]) c.set_t(items[Trap, Left, s, t], items[Tri, Right, s, s:t], items[Tri, Left, s + 1:t + 1, t], labels=out_ind) out_ind.fill(out[s, t]) c.set_t(items[Trap, Right, s, t], items[Tri, Right, s, s:t], items[Tri, Left, s + 1:t + 1, t], labels=out_ind) out_ind.fill(-1) if s != 0: c.set_t(items[Tri, Left, s, t], items[Tri, Left, s, s:t], items[Trap, Left, s:t, t], labels=out_ind) c.set_t(items[Tri, Right, s, t], items[Trap, Right, s, s + 1:t + 1], items[Tri, Right, s + 1:t + 1, t], labels=out_ind) return c.finish(False), part_encoder
def simple_hypergraph(): """ Create a simple hypergraph. """ enc = np.arange(6) c = pydecode.ChartBuilder(enc, np.arange(10)) c.init(enc[:4]) c.set_t(enc[4], enc[0:2], enc[1:3], labels=np.arange(2)) c.set_t(enc[5], np.repeat(enc[4], 1), enc[[3]], labels=np.array([2])) dp = c.finish() # for edge in hypergraph.edges: # assert edge.label in ["0", "1", "2", "3", "4"] return dp
def eisner_second_order(n): coder = np.arange((kShapes * kDir * n * n), dtype=np.int64) \ .reshape([kShapes, kDir, n, n]) part_encoder = DependencyParsingEncoder(n, 2) out = part_encoder.encoder c = pydecode.ChartBuilder(coder, out, unstrict=True) # Initialize the chart. c.init(np.diag(coder[Tri, Right]).copy()) c.init(np.diag(coder[Tri, Left, 1:, 1:]).copy()) for k in range(1, n): for s in range(n): t = k + s if t >= n: break if s != 0: c.set_t(coder[Box, Left, s, t], coder[Tri, Right, s, s:t], coder[Tri, Left, s + 1:t + 1, t]) c.set_t(coder[Trap, Left, s, t], np.append(coder[Tri, Right, s, t - 1], coder[Box, Left, s, t - 1:s:-1]), np.append(coder[Tri, Left, t, t], coder[Trap, Left, t - 1:s:-1, t]), labels=out[t, s, t:s:-1]) c.set_t(coder[Trap, Right, s, t], np.append(coder[Tri, Right, s, s], coder[Trap, Right, s, s + 1:t]), np.append(coder[Tri, Left, s + 1, t], coder[Box, Left, s + 1:t, t]), labels=out[s, t, s:t]) if s != 0: c.set_t(coder[Tri, Left, s, t], coder[Tri, Left, s, s:t], coder[Trap, Left, s:t, t]) if (s, t) == (0, n - 1) or s != 0: c.set_t(coder[Tri, Right, s, t], coder[Trap, Right, s, s + 1:t + 1], coder[Tri, Right, s + 1:t + 1, t]) return c.finish(False), part_encoder
def make_lattice(width, height, transitions): w, h = width, height blank = np.array([], dtype=np.int64) coder = np.arange(w * h, dtype=np.int64)\ .reshape([w+2, h]) out = np.arange(w * h * h, dtype=np.int64)\ .reshape([w, h, h]) c = ph.ChartBuilder(coder.size, unstrict=True, output_size=out.size) c.init(coder[0, 0]) for i in range(1, w + 1): for j in range(h): c.set2(coder[i, j], coder[i - 1, transitions[j]], blank, out[i - 1, j, transitions[j]]) c.set(coder[w + 1, 0], coder[w, :h], blank, blank) return c
def random_hypergraph(size=50): """ Generate a random hypergraph. Parameters ---------- size : integer """ # children = defaultdict(lambda: set()) # complete_reference_set = range(0, size) reference_sets = defaultdict(lambda: set()) enc = np.arange(2*size + 1) c = pydecode.ChartBuilder(enc, np.arange(10)) used = set() c.init(enc[:size]) for i in range(size): reference_sets[i] = set([i]) nodes = range(size) for node in range(size): head_node = size + node node_a, node_b = random.sample(nodes, 2) if reference_sets[node_a] & reference_sets[node_b]: continue c.set_t(enc[head_node], enc[[node_a]], enc[[node_b]], labels=np.array([random.randint(0, 100)])) used.update([node_a, node_b]) reference_sets[head_node] |= \ reference_sets[node_a] | reference_sets[node_b] nodes.append(head_node) unused = set(nodes) - used c.set_t(enc[2*size], enc[list(unused)]) dp = c.finish() assert len(dp.nodes) > 0 assert len(dp.edges) > 0 return dp
def tagger_first_order(sentence_length, tag_sizes): n = sentence_length K = tag_sizes t = np.max(tag_sizes) coder = np.arange(n * t, dtype=np.int64)\ .reshape([n, t]) part_encoder = TaggingEncoder(tag_sizes, 1) out = part_encoder.encoder c = pydecode.ChartBuilder(coder, out, unstrict=True, lattice=True) c.init(coder[0, :K[0]]) for i in range(1, sentence_length): for t in range(K[i]): c.set_t(coder[i, t], coder[i - 1, :K[i - 1]], labels=out[i, :K[i - 1], t]) return c.finish(False), part_encoder
__author__ = 'Superuser' import pydecode import numpy as np n = 10 chart = np.zeros(10) chart[0] = 0 chart[1] = chart[0] + 1 for item in range(2, n): chart[item] = chart[item - 1] + chart[item - 2] chart chart = pydecode.ChartBuilder(np.arange(10)) chart.init(0) chart.set(1, [[0]], labels=[0]) for item in range(2, n): chart.set(item, [[item - 1, item - 2]]) graph = chart.finish() weights = pydecode.transform(graph, np.array([1.0])) inside = pydecode.inside(graph, weights) inside pydecode.draw(graph, np.array(["+"] * 10), graph.node_labeling)
def cky(sentence, tags, grammar, dep_matrix): """ """ # Preprocessing. n = len(sentence) preterms = [grammar.nonterms[tag] for tag in tags] span_pruner = span_pruning(n, dep_matrix) # print span_pruner cell_rules = \ grammar_pruning(preterms, grammar, span_pruner) span_nts = defaultdict(set) encoder = LexicalizedCFGEncoder(sentence, tags, grammar) items = defaultdict(auto()) G = len(grammar.rules) labels = encoder.encoder temp = np.arange(1000000) chart = pydecode.ChartBuilder(temp, unstrict=True) has_item = defaultdict(lambda: 0) # Initialize the chart. for i in range(n): chart.init(items[i, i, i, preterms[i], START]) chart.set(items[i, i, i, preterms[i], DONE], [[items[i, i, i, preterms[i], START]]], [-1]) has_item[i, i, i, preterms[i]] = 1 span_nts[i, i, i].add(preterms[i]) Y = preterms[i] unary = defaultdict(set) seen = set() for r, X in grammar.unary_rules_by_first[Y]: chart.set(items[i, i, i, X, MID], [[items[i, i, i, Y, START]]], labels=[labels[i, i, i, i, i, r]]) has_item[i, i, i, X] = 1 span_nts[i, i, i].add(X) for r_up, X_up in grammar.unary_rules_by_first[X]: unary[X_up].add((r_up, X)) seen.add(X) to_finish = set(unary.keys()) | seen for X_up in to_finish: if X_up in unary: edges, labels_ = zip(*[([items[i, i, i, X, MID]], labels[i, i, i, i, i, r_up]) for r_up, X in unary[X_up]]) else: edges, labels_ = (), () if X_up in seen: labels_ += (-1, ) edges += ([items[i, i, i, X_up, MID]], ) chart.set(items[i, i, i, X_up, DONE], edges, labels_) has_item[i, i, i, X_up] = 1 span_nts[i, i, i].add(X_up) # Main loop. for d in range(1, n): for i in range(n): k = i + d if k >= n: continue to_add = defaultdict(list) #print i, k, bool(span_pruner[i, k]) for h, m, j in span_pruner[i, k]: if h <= j: for Y in span_nts[i, j, h]: for r, X, Z in cell_rules[i, k, Y, 0]: if has_item[j + 1, k, m, Z]: to_add[X, h].append([r, Y, Z, j, h, m, 0]) assert r < G, "%s %s" % (r, G) if h > j: for Z in span_nts[j + 1, k, h]: for r, X, Y in cell_rules[i, k, Z, 1]: if has_item[i, j, m, Y]: to_add[X, h].append([r, Y, Z, j, h, m, 1]) assert r < G, "%s %s" % (r, G) unary = defaultdict(list) unary2 = defaultdict(list) for X, h in to_add: labels_, edges = zip(*[(labels[i, j, k, h, m, r], [ items[i, j, h if dir_ == 0 else m, Y, DONE], items[j + 1, k, m if dir_ == 0 else h, Z, DONE] ]) for r, Y, Z, j, h, m, dir_ in to_add[X, h]]) chart.set(items[i, k, h, X, START], edges, labels=labels_) assert not has_item[i, k, h, X] has_item[i, k, h, X] = 1 span_nts[i, k, h].add(X) for r_unary, X_up in grammar.unary_rules_by_first[X]: unary[X_up, h].append((r_unary, X)) for X, h in unary: labels_, edges = zip(*[(labels[i, k, k, h, h, r], [items[i, k, h, Y, START]]) for r, Y in unary[X, h]]) has_item[i, k, h, X] = 1 span_nts[i, k, h].add(X) chart.set(items[i, k, h, X, MID], edges, labels=labels_) for r_unary, X_up in grammar.unary_rules_by_first[X]: unary2[X_up, h].append((r_unary, X)) # Unary rules. finish = set() finish.update(unary.keys()) finish.update(unary2.keys()) finish.update(to_add.keys()) for X, h in finish: if (X, h) in unary2: labels_, edges = zip(*[(labels[i, k, k, h, h, r], [items[i, k, h, Y, MID]]) for r, Y in unary2[X, h]]) else: labels_, edges = (), () if (X, h) in unary: edges += ([items[i, k, h, X, MID]], ) labels_ += (-1, ) if (X, h) in to_add: edges += ([items[i, k, h, X, START]], ) labels_ += (-1, ) has_item[i, k, h, X] = 1 span_nts[i, k, h].add(X) chart.set(items[i, k, h, X, DONE], edges, labels=labels_) children = [[items[0, n - 1, h, root, DONE]] for h in range(n) for root in grammar.roots if has_item[0, n - 1, h, root]] #assert(children) chart.set(items[n - 1, 0, 0, 0, DONE], children) graph = chart.finish(True) # for key in span_nts: # print key # for nt in span_nts[key]: # print grammar.nonterm_name(nt), # print # print # print "SIZE", n, len(graph.edges), len(items), len(span_nts) return graph, encoder