Ejemplo n.º 1
0
class DTE(object):
    def __init__(self, grammar=None):
        self.G = grammar
        if not self.G:
            self.G = SubGrammar()
            # self.G.load(hny_config.GRAMMAR_DIR+'/grammar.cfg')

    def encode(self, lhs, rhs):
        assert lhs in self.G
        a = self.G.encode_rule(lhs, rhs)
        if not a:
            print "NonERROR", lhs, rhs, a
            exit(0)
        return a

    def decode(self, lhs, pt):
        return self.decode_rule(lhs, pt)

    def encode_pw(self, pw):
        return self.G.encode_pw(pw)
    
    def decode_pw(self, P):
        return self.G.decode_pw(P)

    def __eq__(self, o_dte):
        return self.G == o_dte.G
        
    def __nonzero__(self):
        return self.G.is_grammar()
Ejemplo n.º 2
0
 def decode_grammar(self, P):
     g=SubGrammar(self.G)
     vd = VaultDistribution()
     iterp = iter(P)
     stack = ['G']
     done = []
     while stack:
         head = stack.pop()
         assert head not in done
         done.append(head)
         p = iterp.next()
         n = vd.decode_vault_size(head, p)
         #print "RuleSizeDecoding:", head, n
         t_set = []
         for x in range(n):
             rhs = self.decode(head, iterp.next())
             #print "Decoding:", stack, head, '==>', rhs
             if rhs != '__totoal__':
                 r = filter(lambda x: x not in done+stack, 
                            self.G.get_actual_NonTlist(head, rhs))
                 if r:
                     for x in r:
                         if (x not in t_set):
                             t_set.append(x)
             g.add_rule(head, rhs)
         t_set.reverse()
         stack.extend(t_set)
     g.finalize() # fixes the freq and some other book keepings
     return g
Ejemplo n.º 3
0
def cal_size_subG(base_pcfg, vault_set_file):
    tdata = [(k,filter(lambda x: x, v)) 
                 for k,v in json.load(open(vault_set_file)).items()
                 if len(filter(lambda x: x, v))>1]
    rm = []
    for x in tdata:
        k, v = x
        for p in v:
            try: p.decode('ascii')
            except:
                rm.append(k); 
                continue
    sys.stderr.write(' '.join([str(x) for x in rm]))
    data = dict(filter(lambda x: x not in rm, tdata))
    D = {}
    for k,v in data.items():
        g = SubGrammar(base_pcfg)
        g.update_grammar(*v)
        res = [(nt, len(g[nt])-1) 
               for nt in NT]
        D[k] =  {'vault': v, 'length': len(v)}
        D[k].update(dict(res))
    return D
Ejemplo n.º 4
0
def cal_size_subG(base_pcfg, vault_set_file):
    tdata = [(k, filter(lambda x: x, v))
             for k, v in json.load(open(vault_set_file)).items()
             if len(filter(lambda x: x, v)) > 1]
    rm = []
    for x in tdata:
        k, v = x
        for p in v:
            try:
                p.decode('ascii')
            except:
                rm.append(k)
                continue
    sys.stderr.write(' '.join([str(x) for x in rm]))
    data = dict(filter(lambda x: x not in rm, tdata))
    D = {}
    for k, v in data.items():
        g = SubGrammar(base_pcfg)
        g.update_grammar(*v)
        res = [(nt, len(g[nt]) - 1) for nt in NT]
        D[k] = {'vault': v, 'length': len(v)}
        D[k].update(dict(res))
    return D
Ejemplo n.º 5
0
 def __init__(self, grammar=None):
     self.G = grammar
     if not self.G:
         self.G = SubGrammar()