def __init__(self, filename): self.analyzer = StandardAnalyzer() store = park.SQLiteStore(filename) self.model = Model(self.analyzer, store) self.searcher = RandomWalkSearcher(self.model)
def setUp(self): self.store = park.SQLiteStore(self.DB) def cleanup(): if os.path.exists(self.DB): os.unlink(self.DB) self.addCleanup(cleanup)
def test_conflated_query(self): analyzer = analysis.WhitespaceAnalyzer() analyzer.add_token_normalizer(analysis.LowercaseNormalizer()) m = model.Model(analyzer, park.SQLiteStore(":memory:")) m.train(u"This is a test") m.train(u"this is a test") query = analyzer.query(u"this is a query", m) expected = [ dict(term="this", pos=0), dict(term="This", pos=0), dict(term="is", pos=1), dict(term="a", pos=2), dict(term="query", pos=3) ] self.assertListEqual(expected, query.terms)
def run(args): store = park.SQLiteStore("cobe.store") analyzer = analysis.WhitespaceAnalyzer() model = Model(analyzer, store) print "Tokens:" for token, token_id in model.tokens.token_ids.iteritems(): print token, decode_one(token_id) print "Normalized tokens:" for key in model._prefix_keys("n"): print key print "3-gram counts:" get_token = model.tokens.get_token for ngram, count in model._prefix_items("3", strip_prefix=True): # This needs a more efficient way to get the token ids, # maybe a simple varint-aware string split. grams = [get_token(encode_one(i)) for i in decode(ngram)] print grams, decode_one(count)
def __init__(self): self.kv = park.SQLiteStore("orbitd.db") if (self.kv.get("local/device_uuid") == None): self.kv.put("local/device_uuid", str(uuid.uuid4()))
def setUp(self): self.analyzer = WhitespaceAnalyzer() self.store = park.SQLiteStore(":memory:") self.model = Model(self.analyzer, self.store)