def __init__(self): super(TrainedGrammar, self).__init__() self.term_files = {} self.g_struc = GrammarStructure() for k, f in self.g_struc.getTermFiles().items(): sys.path.append(hny_config.GRAMMAR_DIR) X = __import__('%s' % f) self.term_files[k] = { 'trie' : marisa_trie.Trie().load(hny_config.GRAMMAR_DIR+f+'.tri'), 'arr' : eval("X.%s"%k), 'trie_fl' : hny_config.GRAMMAR_DIR+f+'.tri' }
class TrainedGrammar(Grammar): def __init__(self): super(TrainedGrammar, self).__init__() self.term_files = {} self.g_struc = GrammarStructure() for k, f in self.g_struc.getTermFiles().items(): sys.path.append(hny_config.GRAMMAR_DIR) X = __import__('%s' % f) self.term_files[k] = { 'trie' : marisa_trie.Trie().load(hny_config.GRAMMAR_DIR+f+'.tri'), 'arr' : eval("X.%s"%k), 'trie_fl' : hny_config.GRAMMAR_DIR+f+'.tri' } def __getitem__(self, key): if key in self.term_files: return self.term_files[key]['arr'] return self.G[key] def total_freq(self, key): if key in self.term_files: return self.term_files[key]['arr'][-1] return super(TrainedGrammar, self).total_freq(key) def get_rhs(self, lhs, pt): if lhs in self.term_files: w, f = self.freq2key(pt, self.term_files[lhs]['trie'], self.term_files[lhs]['arr']) return w, f, TERMINAL return super(TrainedGrammar, self).get_rhs(lhs, pt) @staticmethod def key2freq(w, T, A): try: i = T.key_id(unicode(w)) if i<0: print "Could not find {w} in the trie."\ .format(**locals()) raise KeyError S = sum( A[:i] ) return S, S+A[i] except KeyError: return 0.0, 0.0 @staticmethod def freq2key(f, T, A): i = getIndex(f, A) w = T.restore_key(i) return w, A[i] def get_freq_range(self, lhs, rhs): if lhs in self.term_files: return TrainedGrammar.key2freq(rhs, self.term_files[lhs]['trie'], self.term_files[lhs]['arr']) return super(TrainedGrammar, self).get_freq_range(lhs, rhs)
def __init__(self, config_fl=None, scanner=None, Empty=False): self.scanner = scanner if scanner else Scanner() self.grammar_structure = GrammarStructure().G self.G = defaultdict(OrderedDict) if Empty: "Returning For empty" return from string import ascii_lowercase, digits, punctuation for typ, characters in zip('LDY', [ascii_lowercase, digits, punctuation]): self.G[typ] = OrderedDict([(x, [MIN_COUNT-1, TERMINAL]) for x in characters]) self.G['G']['%s,G' % typ] = [MIN_COUNT/2-1, NONTERMINAL] self.G['G']['%s' % typ] = [MIN_COUNT-1, NONTERMINAL]