예제 #1
0
 def test_find(self):
     from Bio import triefind
     trieobj = trie.trie()
     trieobj["hello"] = 5
     trieobj["he"] = 7
     trieobj["hej"] = 9
     trieobj["foo"] = "bar"
     trieobj["wor"] = "ld"
     self.assertEqual(triefind.match("hello world!", trieobj), "hello")
     k = triefind.match_all("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, ["he", "hello"])
     k = triefind.find("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)])
     k = triefind.find_words("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("hello", 0, 5)])
     trieobj["world"] = "full"
     k = triefind.find("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9),
                          ("world", 6, 11)])
     k = triefind.find_words("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("hello", 0, 5), ("world", 6, 11)])
예제 #2
0
 def test_find(self):
     from Bio import triefind
     trieobj = trie.trie()
     trieobj["hello"] = 5
     trieobj["he"] = 7
     trieobj["hej"] = 9
     trieobj["foo"] = "bar"
     trieobj["wor"] = "ld"
     self.assertEqual(triefind.match("hello world!", trieobj), "hello")
     k = triefind.match_all("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, ["he", "hello"])
     k = triefind.find("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)])
     k = triefind.find_words("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("hello", 0, 5)])
     trieobj["world"] = "full"
     k = triefind.find("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9), ("world", 6, 11)])
     k = triefind.find_words("hello world!", trieobj)
     k.sort()
     self.assertEqual(k, [("hello", 0, 5), ("world", 6, 11)])
def process_exact_substring(counter, t):
    """use triefind.find to gather identical substring matches"""
    seqs = list(counter)
    seqs.sort(key=len, reverse=True)
    for seq in seqs:
        l = len(seq)
        for (match, start, end) in triefind.find(seq, t):
            if len(match) == l: continue
            counter[seq] += counter[match]
            counter[match] = 0
    counter += Counter()
    return counter
예제 #4
0
    def find(self, string):
        if len(string) < 3:
            return []

        if 'InChI=1S'.find(string) != (-1) and len(string) > 3:
            return CompoundStructures.objects.all()

        if string[1:-1].find('/') == (-1):
            if string.find('*') == (-1):
                if string.endswith('/'):
                    return self.index.get(str(string.translate(None, '/')))
                else:
                    res = triefind.find(string, self.index)
                    lengths = map(lambda x: x[2] - x[1], res)
                    if not lengths:
                        return []
                    maxlen = max(lengths)
                    indices = [i for i, x in enumerate(lengths) if x == maxlen]
                    return sum(map(lambda x: self.index[str(x[0])], [i for j, i in enumerate(res) if j in indices]), [])
            else:
                return  sum(
                    map(lambda x: x[1], self.index.get_approximate(string.translate(None, '/'), string.count('*'))), [])

        else:
            res = triefind.find(string, self.index)
            keys = map(lambda x: x[0], res)
            score = dict()
            for key in keys:
                for mol in self.index.get(key):
                    if not score.get(mol):
                        score[mol] = 1
                    else:
                        score[mol] += 1

            maxscore = max(score.values())
            return map(lambda x: x[0], [x for x in score.iteritems() if x[1] == maxscore])
예제 #5
0
from Bio import triefind

trieobj = trie.trie()

trieobj["hello"] = 5
trieobj["he"] = 7
trieobj["hej"] = 9
trieobj["foo"] = "bar"
trieobj["wor"] = "ld"

print triefind.match("hello world!", trieobj)    # "hello"
k = triefind.match_all("hello world!", trieobj)
k.sort()
print k     # ["he", "hello"]

k = triefind.find("hello world!", trieobj)
k.sort()
print k     # [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)]

k = triefind.find_words("hello world!", trieobj)
k.sort()
print k     # [("hello", 0, 5)]

trieobj["world"] = "full"
k = triefind.find("hello world!", trieobj)
k.sort()
print k     # [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9), ("world", 6, 11)]

k = triefind.find_words("hello world!", trieobj)
k.sort()
print k     # [("hello", 0, 5), ("world", 6, 11)]
예제 #6
0
from Bio import triefind

trieobj = trie.trie()

trieobj["hello"] = 5
trieobj["he"] = 7
trieobj["hej"] = 9
trieobj["foo"] = "bar"
trieobj["wor"] = "ld"

print triefind.match("hello world!", trieobj)  # "hello"
k = triefind.match_all("hello world!", trieobj)
k.sort()
print k  # ["he", "hello"]

k = triefind.find("hello world!", trieobj)
k.sort()
print k  # [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9)]

k = triefind.find_words("hello world!", trieobj)
k.sort()
print k  # [("hello", 0, 5)]

trieobj["world"] = "full"
k = triefind.find("hello world!", trieobj)
k.sort()
print k  # [("he", 0, 2), ("hello", 0, 5), ("wor", 6, 9), ("world", 6, 11)]

k = triefind.find_words("hello world!", trieobj)
k.sort()
print k  # [("hello", 0, 5), ("world", 6, 11)]