Esempio n. 1
0
 def test_asym_min_constraint(self):
     corpus = DefaultJsonCorpus(self.get_docs())
     ds = TagRecommender(max_s2_size=1.0, min_s2_size=10)
     ds.fit(corpus)
     res = ds.knn("football", k=1, metric='asym')
     self.assertEqual(len(res), 0)
Esempio n. 2
0
docs = []
for filename in os.listdir("reuters-21578-json/data/full"):
    f = open("reuters-21578-json/data/full/" + filename)
    js = json.load(f)
    for j in js:
        if 'topics' in j and 'body' in j:
            d = {}
            d["id"] = j['id']
            d["text"] = j['body'].replace("\n", "")
            d["title"] = j['title']
            d["tags"] = ",".join(j['topics'])
            docs.append(d)

print "loaded ", len(docs), " documents"

from seldon.text import DocumentSimilarity, DefaultJsonCorpus
import logging
logger = logging.getLogger()
logger.setLevel(logging.INFO)

corpus = DefaultJsonCorpus(docs)
ds = DocumentSimilarity(model_type='gensim_lsi')
ds.fit(corpus)
print "built model"

import seldon
rw = seldon.Recommender_wrapper()
rw.save_recommender(ds, "reuters_recommender")
print "saved recommender"
Esempio n. 3
0
 def test_asym(self):
     corpus = DefaultJsonCorpus(self.get_docs())
     ds = TagRecommender(max_s2_size=1.0, min_s2_size=0)
     ds.fit(corpus)
     res = ds.knn("football", k=1, metric='asym')
     self.assertEqual(res[0], ("soccer", 1.0))
 def test_jaccard_max_constraint(self):
     corpus = DefaultJsonCorpus(self.get_docs())
     ds = Tag_Recommender(max_s2_size=0.1,min_s2_size=0)
     ds.fit(corpus)
     res = ds.knn("football",k=1,metric='jaccard')
     self.assertEqual(len(res),0)