class DTE(object): def __init__(self, grammar=None): self.G = grammar if not self.G: self.G = SubGrammar() # self.G.load(hny_config.GRAMMAR_DIR+'/grammar.cfg') def encode(self, lhs, rhs): assert lhs in self.G a = self.G.encode_rule(lhs, rhs) if not a: print "NonERROR", lhs, rhs, a exit(0) return a def decode(self, lhs, pt): return self.decode_rule(lhs, pt) def encode_pw(self, pw): return self.G.encode_pw(pw) def decode_pw(self, P): return self.G.decode_pw(P) def __eq__(self, o_dte): return self.G == o_dte.G def __nonzero__(self): return self.G.is_grammar()
def decode_grammar(self, P): g=SubGrammar(self.G) vd = VaultDistribution() iterp = iter(P) stack = ['G'] done = [] while stack: head = stack.pop() assert head not in done done.append(head) p = iterp.next() n = vd.decode_vault_size(head, p) #print "RuleSizeDecoding:", head, n t_set = [] for x in range(n): rhs = self.decode(head, iterp.next()) #print "Decoding:", stack, head, '==>', rhs if rhs != '__totoal__': r = filter(lambda x: x not in done+stack, self.G.get_actual_NonTlist(head, rhs)) if r: for x in r: if (x not in t_set): t_set.append(x) g.add_rule(head, rhs) t_set.reverse() stack.extend(t_set) g.finalize() # fixes the freq and some other book keepings return g
def cal_size_subG(base_pcfg, vault_set_file): tdata = [(k,filter(lambda x: x, v)) for k,v in json.load(open(vault_set_file)).items() if len(filter(lambda x: x, v))>1] rm = [] for x in tdata: k, v = x for p in v: try: p.decode('ascii') except: rm.append(k); continue sys.stderr.write(' '.join([str(x) for x in rm])) data = dict(filter(lambda x: x not in rm, tdata)) D = {} for k,v in data.items(): g = SubGrammar(base_pcfg) g.update_grammar(*v) res = [(nt, len(g[nt])-1) for nt in NT] D[k] = {'vault': v, 'length': len(v)} D[k].update(dict(res)) return D
def cal_size_subG(base_pcfg, vault_set_file): tdata = [(k, filter(lambda x: x, v)) for k, v in json.load(open(vault_set_file)).items() if len(filter(lambda x: x, v)) > 1] rm = [] for x in tdata: k, v = x for p in v: try: p.decode('ascii') except: rm.append(k) continue sys.stderr.write(' '.join([str(x) for x in rm])) data = dict(filter(lambda x: x not in rm, tdata)) D = {} for k, v in data.items(): g = SubGrammar(base_pcfg) g.update_grammar(*v) res = [(nt, len(g[nt]) - 1) for nt in NT] D[k] = {'vault': v, 'length': len(v)} D[k].update(dict(res)) return D
def __init__(self, grammar=None): self.G = grammar if not self.G: self.G = SubGrammar()