def __init__(self): self.cache = { 'resources': datrie.Trie(string.printable), 'collections': datrie.Trie(string.printable) } Resource.register('created', self._add_item) Resource.register('deleted', self._del_item) Collection.register('created', self._add_item) Collection.register('deleted', self._del_item)
def test_trie_len(): trie = datrie.Trie(string.ascii_lowercase) words = ['foo', 'f', 'faa', 'bar', 'foobar'] for word in words: trie[word] = None assert len(trie) == len(words) # Calling len on an empty trie caused segfault, see #17 on GitHub. trie = datrie.Trie(string.ascii_lowercase) assert len(trie) == 0
def createPrefixTree(INVPATH, name): jsonFile = codecs.open(INVPATH + name, 'r', 'utf-8') jsonStr = jsonFile.read() invertIndex = json.loads(jsonStr) trie = datrie.Trie("абвгдеёжзийклмнопрстуфхцчшщъыьэюя") reverseTrie = datrie.Trie("абвгдеёжзийклмнопрстуфхцчшщъыьэюя") for k, v in invertIndex.items(): trie[k] = v reverseTrie[k[::-1]] = v return trie, reverseTrie
def __init__(self, alphabet=None, ranges=None): """ Trie needs to know the range of unicode symbols for efficiency, either `alphabet` or `ranges` must be applied. """ if isinstance(alphabet, str): self.trie = datrie.Trie(alphabet) elif ranges is not None: self.trie = datrie.Trie(ranges=ranges) else: print( "Either `alphabet` or `ranges` must be applied when initialing. Using english related chars" ) self.trie = datrie.Trie(string.printable)
def read_primers(args): trie = datrie.Trie(u'ATGCN') primer_names = [] with open(args.primers) as primer_file: reader = csv.reader(primer_file, delimiter='\t') for row in reader: try: chrom, pos_fwd, pos_rev, name_fwd, name_rev, seq_fwd, seq_rev = row[: 7] pos_fwd = int(pos_fwd) pos_rev = int(pos_rev) except ValueError: print("Can't parse row: {}".format(row)) else: primer_names.extend([name_fwd, name_rev]) if args.prefix is not None: # use only a prefix of each primer in the trie seq_fwd = unicode(seq_fwd.upper()[:args.prefix]) seq_rev = unicode(seq_rev.upper()[:args.prefix]) else: # use the whole primer in the trie seq_fwd = unicode(seq_fwd.upper()) seq_rev = unicode(seq_rev.upper()) trie[unicode(seq_fwd)] = PrimerInfo(chrom, 'fwd', name_fwd, pos_fwd, seq_fwd) trie[unicode(seq_rev)] = PrimerInfo(chrom, 'rev', name_rev, pos_rev, seq_rev) return trie, primer_names
def prepare_keyword_trie(keyword_file): keyword_list = [] keys = '' with open(keyword_file, 'r', encoding='utf8') as kw_file: for line in kw_file: if len(line.split('\t')) == 4: key, value, weight_str, category = line.strip().split('\t') try: weight = int(weight_str) except: continue keyword_list.append({ 'k': key, 'v': value, 'w': weight, 'c': category }) keys += key chars = set(keys) kw_trie = datrie.Trie(''.join(chars)) for keyword in keyword_list: if keyword['k'] in kw_trie: value = kw_trie[keyword['k']] value.append(keyword) kw_trie[keyword['k']] = value else: kw_trie[keyword['k']] = [keyword] return kw_trie
def solve(board, pieces): candidates = [board] next_candidates = iter([]) pieces = get_ordering(pieces, board) print "pieces %s" % (pieces, ) trie = datrie.Trie("%s.\n" % pieces) trie = {} while pieces: (piece, count) = pieces.pop() print "processing %s %i" % (piece, count) #print hpy().heap() for c in candidates: cform = unicode(c.get_canonical().__repr__()) if cform not in trie: #print cform trie[cform] = True moves = ([(piece, ) + t for t in c] for c in combinations(c.free, count)) next_candidates = chain(next_candidates, [c.add_piece(move) for move in moves]) # next_candidates.extend() #print "canditates now %i" % len(next_candidates) candidates = (n for n in next_candidates if n != None and filterNode(n, pieces)) #print "next_candidates %i" % len(candidates) next_candidates = iter([]) return candidates
def solve_dfs(board, pieces, ordering): t = time.time() pieces = ordering(pieces, board) stack = [(board, pieces)] solutions = [] #discovered = {} discovered = datrie.Trie("%s.\n" % pieces) print "order: %s" % pieces while stack: b, ps = stack.pop() (p, count), left = ps[0], ps[1:] moves = ([(p, ) + t for t in c] for c in combinations(b.free, count)) for move in moves: c = b.add_piece(move) if not c: continue cform = unicode(c.get_canonical().__repr__()) if cform in discovered: continue discovered[cform] = True if not filterNode(c, left): #print "filtered!" continue if not left: solutions.append(c) #print "%s\nstack size: %i solutions found: %i" % (c, len(stack), len(solutions)) continue stack.append((c, left)) print "took %f" % (time.time() - t) return solutions
def create_trie(): words = words100k() trie = datrie.Trie(ALPHABET) for word in words: trie[word] = 1 return trie
def _insert_into_trie(items): """Insert items into a datrie trie.""" logging.info("Start inserting into trie...") trie = datrie.Trie(string.printable) # noqa for key, val in items.iteritems(): trie[key] = val logging.info("Finished inserting into trie.") return trie
def test_contains(words): trie = datrie.Trie(string.printable) for i, word in enumerate(set(words)): trie[word] = i for i, word in enumerate(set(words)): assert word in trie assert trie[word] == trie.get(word) == i
def __init__(self, filepath): self.trie = datrie.Trie(string.ascii_letters + string.digits + string.whitespace) with open(filepath, 'r') as f: for line in f: # Remove new line and store app names in lower case to allow searching case insensitive self.trie[unicode(line.rstrip().lower())] = line.rstrip()
def test_setdefault(): trie = datrie.Trie(string.ascii_lowercase) assert trie.setdefault('foo', 5) == 5 assert trie.setdefault('foo', 4) == 5 assert trie.setdefault('foo', 5) == 5 assert trie.setdefault('bar', 'vasia') == 'vasia' assert trie.setdefault('bar', 3) == 'vasia' assert trie.setdefault('bar', 7) == 'vasia'
def test_trie_items(): trie = datrie.Trie(string.ascii_lowercase) trie['foo'] = 10 trie['bar'] = 'foo' trie['foobar'] = 30 assert trie.values() == ['foo', 10, 30] assert trie.items() == [('bar', 'foo'), ('foo', 10), ('foobar', 30)] assert trie.keys() == ['bar', 'foo', 'foobar']
def _trie(self): trie = datrie.Trie(string.ascii_lowercase) trie['foo'] = 10 trie['bar'] = 20 trie['foobar'] = 30 trie['foovar'] = 40 trie['foobarzartic'] = None return trie
def __init__(self): self.rooms_by_idx = SortedSet() # integer ID only self.rooms = SortedDict() # key: id, value: room self.bunnies_by_team = { } # key: team id, value: SortedSet(key=bunny.reversed_name) of Bunny objects self.bunnies_by_suffix = datrie.Trie(string.ascii_letters + ''.join( str(part) for part in range(0, 10))) self.bunny_names = {}
def _build_trie(word_len=None, compare_func=_gte): trie = datrie.Trie(string.ascii_lowercase) with open(WORD_FILE, 'r') as fin: for line in fin: line = line.strip() if word_len is None or compare_func(len(line), word_len): trie[u'{}'.format(line)] = 1 return trie
def load_freq_dictionary(path): """Load dictionary of freq from file""" with open(path, 'r', encoding='utf-8') as infile: dict_of_freq = datrie.Trie(string.ascii_lowercase) for line in infile: word, freq = line.split(' ') dict_of_freq[word] = int(freq) return dict_of_freq
def test_pop(words): words = set(words) trie = datrie.Trie(string.printable) for i, word in enumerate(words): trie[word] = i for i, word in enumerate(words): assert trie.pop(word) == i assert trie.pop(word, 42) == trie.get(word, 42) == 42
def makeDaTrie(self): # the word parse # charset = string.ascii_letters+'@#\'&]*-/[=;]' # all of labMT charset = "raingwtsyelofud'pcmhbkz1-vxq8j970&2=@3#[]46/_;5*" # all of all of the sets charset = u"raingwtsyelofud'pcmhbkz1-vxq8j970&2=@3#[]46/_;5*FALSEICUB+" fixedtrie = datrie.Trie(charset) stemtrie = datrie.Trie(charset) for i, word in zip(range(len(self.fixedwords)), self.fixedwords): fixedtrie[u(word)] = (self.fixedscores[i], i) for i, word in zip(range(len(self.stemwords)), self.stemwords): stemtrie[u(word)] = (self.stemscores[i], i) fixedtrie.save('{0}/{1:.2f}-fixed.da'.format(self.folders[self.cindex], self.stopVal)) stemtrie.save('{0}/{1:.2f}-stem.da'.format(self.folders[self.cindex], self.stopVal)) return (fixedtrie, stemtrie)
def load_as_datrie(filepath): with open(filepath, 'r') as f: lines = (x.rstrip() for x in f.readlines()) data = datrie.Trie(string.printable) for x in lines: data[x] = 0 return data
def __init__(self, query_log_path): self.vocabulary = datrie.Trie(string.ascii_lowercase) with open(query_log_path) as log_file: for line in log_file: uid, query, time = line.split() if query not in self.vocabulary: self.vocabulary[query] = 1 else: self.vocabulary[query] += 1
def test_pickle_unpickle(words): trie = datrie.Trie(string.printable) for i, word in enumerate(set(words)): trie[word] = i trie = pickle.loads(pickle.dumps(trie)) for i, word in enumerate(set(words)): assert word in trie assert trie[word] == i
def test_trie_comparison(): trie = datrie.Trie(string.ascii_lowercase) assert trie == trie assert trie == datrie.Trie(string.ascii_lowercase) other = datrie.Trie(string.ascii_lowercase) trie['foo'] = 42 other['foo'] = 24 assert trie != other other['foo'] = trie['foo'] assert trie == other other['bar'] = 42 assert trie != other with pytest.raises(TypeError): trie < other # same for other comparisons
def test_trie_ascii(): trie = datrie.Trie(string.ascii_letters) trie['x'] = 1 trie['y'] = 'foo' trie['xx'] = 2 assert trie['x'] == 1 assert trie['y'] == 'foo' assert trie['xx'] == 2
def __init__(self, language='english'): """A vocabulary is a collection of words in a specific language.""" self.data_folder = 'data' self.language = language.lower() self.words = datrie.Trie(string.ascii_lowercase) try: self._unpickle() except IOError: self._load_from_file() self._pickle()
def _trie(): trie = datrie.Trie(ranges=[(chr(0), chr(127))]) trie['f'] = 1 trie['fo'] = 2 trie['fa'] = 3 trie['faur'] = 4 trie['fauxiiiip'] = 5 trie['fauzox'] = 10 trie['fauzoy'] = 20 return trie
def test_trie_unicode(): # trie for lowercase Russian characters trie = datrie.Trie(ranges=[('а', 'я')]) trie['а'] = 1 trie['б'] = 2 trie['аб'] = 'vasia' assert trie['а'] == 1 assert trie['б'] == 2 assert trie['аб'] == 'vasia'
def __init__(self, to_ascii, config): self.to_ascii = to_ascii self.variant_only = config['variant_only'] # Set up datrie if config['replacements']: self.replacements = datrie.Trie(config['chars']) for src, repllist in config['replacements']: self.replacements[src] = repllist else: self.replacements = None
def get_data(): """Get data from input stream and create need structure""" n = int(input()) dict_of_freq = datrie.Trie(string.ascii_lowercase) for _ in range(n): word, freq = input().split(' ') dict_of_freq[word] = int(freq) m = int(input()) test_set = tuple(input() for _ in range(m)) return dict_of_freq, test_set