def compute_crossings(a, r, cumul): count = [0] * len(r.f) vars = [0, 0] (pfi, pfj, pei, pej) = r.span for rfi in xrange(len(r.f)): for rei in xrange(len(r.e)): if type(r.fpos[rfi]) is int and type( r.epos[rei]) is int and a.aligned[r.fpos[rfi]][ r.epos[rei]]: # both terminals fi = r.fpos[rfi] ei = r.epos[rei] if region_aligned(cumul, pfi, fi, ei + 1, pej) + region_aligned( cumul, fi + 1, pfj, pei, ei) > 0: count[rfi] = 1 break elif type(r.fpos[rfi]) is tuple and type( r.epos[rei]) is tuple and sym.getindex( r.f[rfi]) == sym.getindex(r.e[rei]): # coindexed nonterminals (fi, fj) = r.fpos[rfi] (ei, ej) = r.epos[rei] if region_aligned(cumul, pfi, fi, ej, pej) + region_aligned( cumul, fj, pfj, pei, ei) > 0: count[rfi] = 1 vars[sym.getindex(r.f[rfi]) - 1] = 1 break return vars
def _tree_helper(t, antvalues): t = tree.str_to_tree(t) for node in t.frontier(): x = sym.fromstring(node.label) if sym.isvar(x): node.insert_child(0, antvalues[sym.getindex(x)-1]) return t
def _tree_helper(t, antvalues): t = tree.str_to_tree(t) for node in t.frontier(): x = sym.fromstring(node.label) if sym.isvar(x): node.insert_child(0, antvalues[sym.getindex(x) - 1]) return t
def visit(item): ded = self.ded[id(item)] if ded.rule: align = collections.defaultdict(list) if 'align' in ded.rule.attrs: for fi, ei in ded.rule.attrs['align']: align[ei].append(fi) result = [] j1 = None for ei, e in enumerate(ded.rule.e): if sym.isvar(e): result.extend(visit(ded.ants[sym.getindex(e) - 1])) else: if len(ded.ants) == 2: j1 = ded.ants[0].j else: j1 = None result.append([ ded.rule.f.stringpos(fi, item.i, item.j, j1) for fi in align[ei] ]) print ded.rule, item.i, item.j, j1, result return result else: return visit(ded.ants[0])
def _ded_to_xml(node, result, memo, mode, models, weights): if weights: result.append('<and label=%s cost=%s>' % (xml.sax.saxutils.quoteattr(str(id(node.rule))), xml.sax.saxutils.quoteattr(str(weights.dot(node.dcost))))) else: result.append('<and label=%s>' % (xml.sax.saxutils.quoteattr(str(id(node))))) result.append('<features>') for f,v in node.dcost.iteritems(): result.append('<feature name=%s value=%s/>' % (xml.sax.saxutils.quoteattr(f), xml.sax.saxutils.quoteattr(str(v)))) result.append('</features>') if mode == 'french': children = node.rule.f if node.rule else node.ants elif mode == 'english': children = node.rule.e if node.rule else node.ants else: children = node.ants for child in children: if isinstance(child, Item): _item_to_xml(child, result, memo, mode=mode, models=models, weights=weights) elif sym.isvar(child): _item_to_xml(node.ants[sym.getindex(child)-1], result, memo, mode=mode, models=models, weights=weights) else: result.append('<leaf label=%s/>' % xml.sax.saxutils.quoteattr(sym.tostring(child))) result.append('</and>')
def _ded_to_text(node, result, memo, mode=None, weights=None): # Convert rule and features into single tokens #vstr = ",".join("%s:%s" % (quotefeature(f),node.dcost[f]) for f in node.dcost) vstr = "cost:%s" % weights.dot(node.dcost) #rstr = id(node.rule) rstr = id(node) s = "%s<%s>" % (rstr,vstr) if False and len(node.ants) == 0: # the format allows this but only if we don't tag with an id. but we tag everything with an id result.append(s) else: result.append('(') result.append(s) if mode == 'french': children = node.rule.f if node.rule else node.ants elif mode == 'english': children = node.rule.e if node.rule else node.ants else: children = node.ants for child in children: if isinstance(child, Item): result.append(' ') _item_to_text(child, result, memo, mode=mode, weights=weights) elif sym.isvar(child): result.append(' ') _item_to_text(node.ants[sym.getindex(child)-1], result, memo, mode=mode, weights=weights) else: result.append(' ') result.append(quoteattr(sym.tostring(child))) result.append(')')
def _fake_tree_helper(lhs, rhs, antvalues): children = [] for x in rhs: if sym.isvar(x): children.append(antvalues[sym.getindex(x) - 1]) else: children.append(tree.Node(sym.tostring(x), [])) return tree.Node(sym.totag(lhs), children)
def _fake_tree_helper(lhs, rhs, antvalues): children = [] for x in rhs: if sym.isvar(x): children.append(antvalues[sym.getindex(x)-1]) else: children.append(tree.Node(sym.tostring(x), [])) return tree.Node(sym.totag(lhs), children)
def forest_to_json(root, fwords=None, mode=None, models=None, weights=None): result = [] result.append('{\n') if fwords: fwords = [(sym.tostring(fword) if type(fword) is int else fword) for fword in fwords] result.append(' "source": [%s],\n' % ",".join(quotejson(fword) for fword in fwords)) items = list(root) nodeindex = {} nodestrs = [] for ni, item in enumerate(items): nodeindex[item] = ni if item is root: ri = ni if item.x is None: nodestrs.append(' {}') else: nodestrs.append(' {"label": %s}' % quotejson(sym.totag(item.x))) result.append(' "nodes": [\n%s\n ],\n' % ",\n".join(nodestrs)) result.append(' "root": %d,\n' % ri) edgestrs = [] for ni, item in enumerate(items): for ded in item.deds: tailstrs = [] if mode == 'french': children = ded.rule.f if ded.rule else ded.ants elif mode == 'english': children = ded.rule.e if ded.rule else ded.ants else: children = ded.ants for child in children: if isinstance(child, Item): tailstrs.append(str(nodeindex[child])) elif sym.isvar(child): ant = ded.ants[sym.getindex(child) - 1] tailstrs.append(str(nodeindex[ant])) else: tailstrs.append(quotejson(sym.tostring(child))) dcoststr = "{%s}" % ",".join("%s:%s" % (quotejson(f), v) for (f, v) in ded.dcost.iteritems()) edgestrs.append( ' {"head": %s, "tails": [%s], "features": %s}\n' % (ni, ",".join(tailstrs), dcoststr)) result.append(' "edges": [\n%s\n ]\n' % ",\n".join(edgestrs)) result.append('}') return "".join(result)
def forest_to_json(root, fwords=None, mode=None, models=None, weights=None): result = [] result.append('{\n') if fwords: fwords = [(sym.tostring(fword) if type(fword) is int else fword) for fword in fwords] result.append(' "source": [%s],\n' % ",".join(quotejson(fword) for fword in fwords)) items = list(root) nodeindex = {} nodestrs = [] for ni,item in enumerate(items): nodeindex[item] = ni if item is root: ri = ni if item.x is None: nodestrs.append(' {}') else: nodestrs.append(' {"label": %s}' % quotejson(sym.totag(item.x))) result.append(' "nodes": [\n%s\n ],\n' % ",\n".join(nodestrs)) result.append(' "root": %d,\n' % ri) edgestrs = [] for ni,item in enumerate(items): for ded in item.deds: tailstrs = [] if mode == 'french': children = ded.rule.f if ded.rule else ded.ants elif mode == 'english': children = ded.rule.e if ded.rule else ded.ants else: children = ded.ants for child in children: if isinstance(child, Item): tailstrs.append(str(nodeindex[child])) elif sym.isvar(child): ant = ded.ants[sym.getindex(child)-1] tailstrs.append(str(nodeindex[ant])) else: tailstrs.append(quotejson(sym.tostring(child))) dcoststr = "{%s}" % ",".join("%s:%s" % (quotejson(f),v) for (f,v) in ded.dcost.iteritems()) edgestrs.append(' {"head": %s, "tails": [%s], "features": %s}\n' % ( ni, ",".join(tailstrs), dcoststr)) result.append(' "edges": [\n%s\n ]\n' % ",\n".join(edgestrs)) result.append('}') return "".join(result)
def compute_crossings(a, r, cumul): count = [0]*len(r.f) vars = [0,0] (pfi, pfj, pei, pej) = r.span for rfi in xrange(len(r.f)): for rei in xrange(len(r.e)): if type(r.fpos[rfi]) is int and type(r.epos[rei]) is int and a.aligned[r.fpos[rfi]][r.epos[rei]]: # both terminals fi = r.fpos[rfi] ei = r.epos[rei] if region_aligned(cumul, pfi, fi, ei+1, pej) + region_aligned(cumul, fi+1, pfj, pei, ei) > 0: count[rfi] = 1 break elif type(r.fpos[rfi]) is tuple and type(r.epos[rei]) is tuple and sym.getindex(r.f[rfi]) == sym.getindex(r.e[rei]): # coindexed nonterminals (fi,fj) = r.fpos[rfi] (ei,ej) = r.epos[rei] if region_aligned(cumul, pfi, fi, ej, pej) + region_aligned(cumul, fj, pfj, pei, ei) > 0: count[rfi] = 1 vars[sym.getindex(r.f[rfi])-1] = 1 break return vars
def _ded_to_xml(node, result, memo, mode, models, weights): if weights: result.append( '<and label=%s cost=%s>' % (xml.sax.saxutils.quoteattr(str(id(node.rule))), xml.sax.saxutils.quoteattr(str(weights.dot(node.dcost))))) else: result.append('<and label=%s>' % (xml.sax.saxutils.quoteattr(str(id(node))))) result.append('<features>') for f, v in node.dcost.iteritems(): result.append('<feature name=%s value=%s/>' % (xml.sax.saxutils.quoteattr(f), xml.sax.saxutils.quoteattr(str(v)))) result.append('</features>') if mode == 'french': children = node.rule.f if node.rule else node.ants elif mode == 'english': children = node.rule.e if node.rule else node.ants else: children = node.ants for child in children: if isinstance(child, Item): _item_to_xml(child, result, memo, mode=mode, models=models, weights=weights) elif sym.isvar(child): _item_to_xml(node.ants[sym.getindex(child) - 1], result, memo, mode=mode, models=models, weights=weights) else: result.append('<leaf label=%s/>' % xml.sax.saxutils.quoteattr(sym.tostring(child))) result.append('</and>')
def _ded_to_text(node, result, memo, mode=None, weights=None): # Convert rule and features into single tokens #vstr = ",".join("%s:%s" % (quotefeature(f),node.dcost[f]) for f in node.dcost) # lhuang: in case no weights vstr = "cost:%s" % weights.dot(node.dcost) if weights is not None \ else "_" rstr = id(node.rule) #rstr = id(node) s = "ruleid=%s<value=%s>" % (rstr,vstr) print "\truleid=%s" % rstr, if False and len(node.ants) == 0: # the format allows this but only if we don't tag with an id. but we tag everything with an id result.append(s) else: result.append('(') result.append(s) if mode == 'french': children = node.rule.f if node.rule else node.ants elif mode == 'english': # lhuang: default mode: english side children = node.rule.e if node.rule else node.ants else: children = node.ants for child in children: if isinstance(child, Item): result.append(' it ') _item_to_text(child, result, memo, mode=mode, weights=weights) elif sym.isvar(child): # lhuang: variable, do recursion result.append(' var ') _item_to_text(node.ants[sym.getindex(child)-1], result, memo, mode=mode, weights=weights) else: # lhuang: english word result.append(' word ') w = quoteattr(sym.tostring(child)) result.append(w) print w, result.append(')') print # end of a hyperedge
def visit(item): ded = self.ded[id(item)] if ded.rule: align = collections.defaultdict(list) if 'align' in ded.rule.attrs: for fi, ei in ded.rule.attrs['align']: align[ei].append(fi) result = [] j1 = None for ei, e in enumerate(ded.rule.e): if sym.isvar(e): result.extend(visit(ded.ants[sym.getindex(e)-1])) else: if len(ded.ants) == 2: j1 = ded.ants[0].j else: j1 = None result.append([ded.rule.f.stringpos(fi, item.i, item.j, j1) for fi in align[ei]]) print ded.rule, item.i, item.j, j1, result return result else: return visit(ded.ants[0])
def _ded_to_text(node, result, memo, mode=None, weights=None): # Convert rule and features into single tokens #vstr = ",".join("%s:%s" % (quotefeature(f),node.dcost[f]) for f in node.dcost) vstr = "cost:%s" % weights.dot(node.dcost) #rstr = id(node.rule) rstr = id(node) s = "%s<%s>" % (rstr, vstr) if False and len( node.ants ) == 0: # the format allows this but only if we don't tag with an id. but we tag everything with an id result.append(s) else: result.append('(') result.append(s) if mode == 'french': children = node.rule.f if node.rule else node.ants elif mode == 'english': children = node.rule.e if node.rule else node.ants else: children = node.ants for child in children: if isinstance(child, Item): result.append(' ') _item_to_text(child, result, memo, mode=mode, weights=weights) elif sym.isvar(child): result.append(' ') _item_to_text(node.ants[sym.getindex(child) - 1], result, memo, mode=mode, weights=weights) else: result.append(' ') result.append(quoteattr(sym.tostring(child))) result.append(')')