def test_asym_min_constraint(self): corpus = DefaultJsonCorpus(self.get_docs()) ds = TagRecommender(max_s2_size=1.0, min_s2_size=10) ds.fit(corpus) res = ds.knn("football", k=1, metric='asym') self.assertEqual(len(res), 0)
docs = [] for filename in os.listdir("reuters-21578-json/data/full"): f = open("reuters-21578-json/data/full/" + filename) js = json.load(f) for j in js: if 'topics' in j and 'body' in j: d = {} d["id"] = j['id'] d["text"] = j['body'].replace("\n", "") d["title"] = j['title'] d["tags"] = ",".join(j['topics']) docs.append(d) print "loaded ", len(docs), " documents" from seldon.text import DocumentSimilarity, DefaultJsonCorpus import logging logger = logging.getLogger() logger.setLevel(logging.INFO) corpus = DefaultJsonCorpus(docs) ds = DocumentSimilarity(model_type='gensim_lsi') ds.fit(corpus) print "built model" import seldon rw = seldon.Recommender_wrapper() rw.save_recommender(ds, "reuters_recommender") print "saved recommender"
def test_asym(self): corpus = DefaultJsonCorpus(self.get_docs()) ds = TagRecommender(max_s2_size=1.0, min_s2_size=0) ds.fit(corpus) res = ds.knn("football", k=1, metric='asym') self.assertEqual(res[0], ("soccer", 1.0))
def test_jaccard_max_constraint(self): corpus = DefaultJsonCorpus(self.get_docs()) ds = Tag_Recommender(max_s2_size=0.1,min_s2_size=0) ds.fit(corpus) res = ds.knn("football",k=1,metric='jaccard') self.assertEqual(len(res),0)