def test_find(self): from Bio import triefind trieobj = trie.trie() trieobj["hello"] = 5 trieobj["he"] = 7 trieobj["hej"] = 9 trieobj["foo"] = "bar" trieobj["wor"] = "ld" self.assertEqual(triefind.match("hello world!", trieobj), "hello") k = triefind.match_all("hello world!", trieobj) k.sort() self.assertEqual(k, ["he", "hello"]) k = triefind.find("hello world!", trieobj) k.sort() self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)]) k = triefind.find_words("hello world!", trieobj) k.sort() self.assertEqual(k, [("hello", 0, 5)]) trieobj["world"] = "full" k = triefind.find("hello world!", trieobj) k.sort() self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9), ("world", 6, 11)]) k = triefind.find_words("hello world!", trieobj) k.sort() self.assertEqual(k, [("hello", 0, 5), ("world", 6, 11)])
def process_exact_substring(counter, t): """use triefind.find to gather identical substring matches""" seqs = list(counter) seqs.sort(key=len, reverse=True) for seq in seqs: l = len(seq) for (match, start, end) in triefind.find(seq, t): if len(match) == l: continue counter[seq] += counter[match] counter[match] = 0 counter += Counter() return counter
def find(self, string): if len(string) < 3: return [] if 'InChI=1S'.find(string) != (-1) and len(string) > 3: return CompoundStructures.objects.all() if string[1:-1].find('/') == (-1): if string.find('*') == (-1): if string.endswith('/'): return self.index.get(str(string.translate(None, '/'))) else: res = triefind.find(string, self.index) lengths = map(lambda x: x[2] - x[1], res) if not lengths: return [] maxlen = max(lengths) indices = [i for i, x in enumerate(lengths) if x == maxlen] return sum(map(lambda x: self.index[str(x[0])], [i for j, i in enumerate(res) if j in indices]), []) else: return sum( map(lambda x: x[1], self.index.get_approximate(string.translate(None, '/'), string.count('*'))), []) else: res = triefind.find(string, self.index) keys = map(lambda x: x[0], res) score = dict() for key in keys: for mol in self.index.get(key): if not score.get(mol): score[mol] = 1 else: score[mol] += 1 maxscore = max(score.values()) return map(lambda x: x[0], [x for x in score.iteritems() if x[1] == maxscore])
from Bio import triefind trieobj = trie.trie() trieobj["hello"] = 5 trieobj["he"] = 7 trieobj["hej"] = 9 trieobj["foo"] = "bar" trieobj["wor"] = "ld" print triefind.match("hello world!", trieobj) # "hello" k = triefind.match_all("hello world!", trieobj) k.sort() print k # ["he", "hello"] k = triefind.find("hello world!", trieobj) k.sort() print k # [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)] k = triefind.find_words("hello world!", trieobj) k.sort() print k # [("hello", 0, 5)] trieobj["world"] = "full" k = triefind.find("hello world!", trieobj) k.sort() print k # [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9), ("world", 6, 11)] k = triefind.find_words("hello world!", trieobj) k.sort() print k # [("hello", 0, 5), ("world", 6, 11)]