def receive_feedbacks(self, session, feedbacks): """ Receive feedbacks from user The format of feedback is: { "docs": [[doc_id, feedback_value], ...], "kws": [[keyword_id, feedback_value], ...], "dockws": [[keyword_id, doc_id, feedback_value], ...] } """ print "propagation started..." for doc_fb in feedbacks.get("docs", []): doc_id, fb = doc_fb doc = Document.get(doc_id) self.ppgt.fb_from_doc(doc, fb, session) for kw_fb in feedbacks.get("kws", []): kw_id, fb = kw_fb kw = Keyword.get(kw_id) self.ppgt.fb_from_kw(kw, fb, session) for dockw_fb in feedbacks.get("dockws", []): kw_id, doc_id, fb = dockw_fb doc = Document.get(doc_id) kw = Keyword.get(kw_id) self.ppgt.fb_from_dockw(kw, doc, fb, session) # propagation is done # updates the feedback value self.upd.update(session) print "propagation finished"
def test_similarity(self): # for doc doc1 = Document.get(1) doc2 = Document.get(2) doc3 = Document.get(3) self.assertAlmostEqual(0.6300877890447911, doc1.similarity_to(doc2)) self.assertAlmostEqual(doc2.similarity_to(doc1), doc1.similarity_to(doc2)) self.assertAlmostEqual(0.31713642199844894, doc1.similarity_to(doc3)) self.assertRaises(NotImplementedError, doc1.similarity_to, doc3, "not implemented metric") # for kw kw1 = Keyword.get("redis") kw2 = Keyword.get("database") kw3 = Keyword.get("python") self.assertAlmostEqual(0.6698544675330306, kw1.similarity_to(kw2)) self.assertAlmostEqual(kw2.similarity_to(kw1), kw1.similarity_to(kw2)) self.assertAlmostEqual(0.2613424459663648, kw1.similarity_to(kw3)) self.assertRaises(NotImplementedError, kw1.similarity_to, kw3, "not implemented metric")
def recommend_keywords(self, fmim, session, top_n, mu, c, sampler=None): """ fmim: FeatureMatrixAndIndexMapping, the fmim for the sub-matrix session: Session, top_n: how many kws are returned mu,c: float, the parameters for LinRel algorithm Return KeywordList: a list of keyword ids as well as their scores """ kws = Keyword.get_many(fmim.kw_ind.keys()) fbs = dict([(kw.id, kw.fb(session)) for kw in kws]) id_with_scores, id_with_explt_scores, id_with_explr_scores = self.generic_rank(fmim.kw2doc_m, fbs, fmim.kw_ind, fmim.kw_ind_r, mu, c) kws = [] for kw_id, score in id_with_scores.items()[:top_n]: kw = Keyword.get(kw_id) kw['score'] = score kw['recommended'] = True kws.append(kw) return kws
def setUp(self): #make the fmim kw_filters = [self.my_kw_filter] doc_filters = [self.kw_count_filter, self.has_database_filter] self.r = LinRelRecommender(2, 2, 1.0, 0.1, 1.0, 0.1, #the default configuration kw_filters = None, doc_filters = [self.kw_count_filter, self.has_database_filter], **fmim.__dict__) self.session = get_session() self.session.update_kw_feedback(Keyword.get("redis"), .7) self.session.update_kw_feedback(Keyword.get("database"), .6) self.session.update_doc_feedback(Document.get(1), .7) self.session.update_doc_feedback(Document.get(2), .7) self.session.update_doc_feedback(Document.get(8), .7) filtered_kws = self.r._filter_objs(kw_filters, kws = Keyword.all_kws) filtered_docs = self.r._filter_objs(doc_filters, docs = Document.all_docs) kw2doc_submat, kw_ind_map, kw_ind_map_r = self.r._submatrix_and_indexing(filtered_kws, filtered_docs, fmim.kw2doc_m, fmim.kw_ind, fmim.doc_ind) doc2kw_submat, doc_ind_map, doc_ind_map_r = self.r._submatrix_and_indexing(filtered_docs, filtered_kws, fmim.doc2kw_m, fmim.doc_ind, fmim.kw_ind) self.fmim = FeatureMatrixAndIndexMapping(kw_ind_map, doc_ind_map, kw2doc_submat, doc2kw_submat, kw_ind_map_r, doc_ind_map_r)
def test_get_many(self): doc_ids = [1, 2] kw_ids = ["a", "the"] self.assertEqual(Document.get_many([1, 2]), Document.get_many(doc_ids)) self.assertEqual(Keyword.get_many(["a", "the"]), Keyword.get_many(kw_ids))
def test_kw_fb_threshold_filter_with_prefiltering(self): #change the feedback self.session.update_kw_feedback(Keyword.get("python"), .2) self.session.update_kw_feedback(Keyword.get("a"), .0999999) actual = kw_fb_threshold_filter(0.1, self.session, with_fb=True) expected = Keyword.get_many(["python"]) self.assertEqual(expected, actual)
def test_add_kws(self): iter1 = Keyword.get_many(["redis", "database", "mysql"]) iter2 = Keyword.get_many(["redis", "database", "python"]) self.session.add_kw_recom_list(iter1) self.assertEqual([iter1], self.session.recom_kws) self.session.add_kw_recom_list(iter2) self.assertEqual([iter1, iter2], self.session.recom_kws)
def test_one(self): docs = [Document.get_many([1,2]), Document.get_many([1,2]), Document.get_many([2,1])] kws = [Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"])] scores = self.e.evaluate(docs, kws) expected = ([1,1,1], [1,1,1]) self.assertArrayAlmostEqual(expected[0], scores[0]) self.assertArrayAlmostEqual(expected[1], scores[1])
def test_get_many(self): doc_ids = [1,2] kw_ids = ["a", "the"] self.assertEqual(Document.get_many([1,2]), Document.get_many(doc_ids)) self.assertEqual(Keyword.get_many(["a", "the"]), Keyword.get_many(kw_ids))
def test_kw_fb_threshold_filter(self): #change the feedback self.session.update_kw_feedback(Keyword.get("python"), .2) self.session.update_kw_feedback(Keyword.get("a"), .0999999) actual = kw_fb_threshold_filter(0.1, self.session, kws = Keyword.all_kws, with_fb = False) expected = Keyword.get_many(["python"]) self.assertEqual(expected, actual)
def test_kw_fb_filter(self): kw = Keyword.get("redis") kw.rec_fb_from_doc(Document.get(1), 1, self.session) self.session.add_doc_recom_list(Document.get_many([1, 2, 6])) self.session.update_kw_feedback(kw, kw.fb_weighted_sum(self.session)) actual = FilterRepository.filters["kw_fb"]([kw]) expected = Keyword.get_many(["redis"]) self.assertEqual(expected, actual)
def test_two(self): docs = [Document.get_many([8,10]), Document.get_many([3,4]), Document.get_many([2,1])] kws = [Keyword.get_many(["a", "the"]), Keyword.get_many(["python", "database"]), Keyword.get_many(["database", "redis"])] scores = self.e.evaluate(docs, kws) expected = ([0.34491169135422844, 0.1726882003112921, 1.0], [0.4834283906452939, 0.759679156743632, 0.9999999999999999]) self.assertArrayAlmostEqual(expected[0], scores[0]) self.assertArrayAlmostEqual(expected[1], scores[1])
def test_associated_keywords_from_documents(self): kws = self.r.associated_keywords_from_docs(Document.get_many([1,2])) self.assertEqual(set(Keyword.get_many(["a", "database", "redis", "the"])), set(kws)) exclude_kws = [Keyword.get("redis")] kws = self.r.associated_keywords_from_docs(Document.get_many([1,2]), exclude_kws) self.assertEqual(set(Keyword.get_many(["a", "database", "the"])), set(kws))
def test_kw_hashable(self): d = {} kwlist1 = Keyword.get_many(["redis", "a", "the"]) kwlist2 = Keyword.get_many(["a", "the", "redis"]) kwlist3 = Keyword.get_many(["redis", "a", "python"]) d[kwlist1] = 1 d[kwlist2] = 2 #override d[kwlist3] = 3 self.assertEqual({kwlist1: 2, kwlist3: 3}, d)
def test_sample_documents_associated_with_keywords(self): """ normal case """ docs = self.r.sample_documents_associated_with_keywords(Keyword.get_many(["python", "redis"]), 2) self.assertEqual(2, len(docs)) for doc in docs: self.assertTrue((Keyword.get("python") in doc.keywords) or \ (Keyword.get("redis") in doc.keywords))
def test_kw_hashable(self): d = {} kwlist1 = Keyword.get_many(["redis", "a", "the"]) kwlist2 = Keyword.get_many(["a", "the", "redis"]) kwlist3 = Keyword.get_many(["redis", "a", "python"]) d[kwlist1] = 1 d[kwlist2] = 2 #override d[kwlist3] = 3 self.assertEqual({kwlist1:2, kwlist3: 3}, d)
def test_associated_keywords_from_documents(self): kws = self.r.associated_keywords_from_docs(Document.get_many([1, 2])) self.assertEqual( set(Keyword.get_many(["a", "database", "redis", "the"])), set(kws)) exclude_kws = [Keyword.get("redis")] kws = self.r.associated_keywords_from_docs(Document.get_many([1, 2]), exclude_kws) self.assertEqual(set(Keyword.get_many(["a", "database", "the"])), set(kws))
def test_affected_kws(self): kws = [Keyword.get("python"), Keyword.get("redis")] self.session.add_affected_kws(*kws) self.assertEqual(kws, self.session.affected_kws) kw3 = Keyword.get("a") kws.append(kw3) self.session.add_affected_kws(kw3) self.assertEqual(set(kws), set(self.session.affected_kws))
def test_model2modellist_similarity(self): #for keywords kw = Keyword.get("redis") kwlist = Keyword.get_many(["database", "mysql"]) self.assertAlmostEqual(0.3754029265429976, kw.similarity_to(kwlist)) #for documents doc = Document.get(6) doclist = Document.get_many([1, 2]) self.assertAlmostEqual(0.7382455893131392, doc.similarity_to(doclist))
def test_sample_documents_associated_with_keywords(self): """ normal case """ docs = self.r.sample_documents_associated_with_keywords( Keyword.get_many(["python", "redis"]), 2) self.assertEqual(2, len(docs)) for doc in docs: self.assertTrue((Keyword.get("python") in doc.keywords) or \ (Keyword.get("redis") in doc.keywords))
def test_fb_from_doc(self): doc = Document.get(1) ppgt.fb_from_doc(doc, 0.5, self.session) upd.update(self.session) # assertions self.assertAlmostEqual(.5 * .7, doc.fb(self.session)) self.assertAlmostEqual(1/2., Keyword.get("a").fb(self.session)) self.assertAlmostEqual(1/4., Keyword.get("redis").fb(self.session)) self.assertAlmostEqual(1/4., Keyword.get("database").fb(self.session))
def test_modellist2modellist_similarity(self): #for keywords kwlist1 = Keyword.get_many(["redis", "a"]) kwlist2 = Keyword.get_many(["database", "the"]) self.assertAlmostEqual(0.42205423035497763, kwlist1.similarity_to(kwlist2)) #for documents doclist1 = Document.get_many([3,5]) doclist2 = Document.get_many([4,6]) self.assertAlmostEqual(0.6990609119502719, doclist1.similarity_to(doclist2))
def setUp(self): self.r = LinRelRecommender(2, 2, 1., .1, 1., .1, None, None, **fmim.__dict__) self.session = get_session() #giving the feedbacks self.session.update_kw_feedback(Keyword.get("redis"), .7) self.session.update_kw_feedback(Keyword.get("database"), .6) self.session.update_doc_feedback(Document.get(1), .7) self.session.update_doc_feedback(Document.get(2), .7) self.session.update_doc_feedback(Document.get(8), .7)
def test_keyword_centroid(self): kw = Keyword.get("a") kwlist1 = KeywordList([kw]) self.assertArrayAlmostEqual(matrix2array(kwlist1.centroid), kw.vec.toarray()[0]) kw1 = Keyword.get("a") kw2 = Keyword.get("the") kwlist2 = Keyword.get_many(["a", "the"]) self.assertArrayAlmostEqual(matrix2array(kwlist2.centroid), (kw1.vec.toarray()[0] + kw2.vec.toarray()[0]) / 2)
def test_rec_fb_from_dockw(self): """ getter/setting for receiving feedback from in-document keyword """ kw = Keyword.get("redis") kw.rec_fb_from_dockw(kw, Document.get(2), .5, self.session) kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session) self.assertEqual(kw.fb_from_doc(self.session), {Document.get(1): 1, Document.get(2): .5}) #is not the right keyword self.assertRaises(AssertionError, kw.rec_fb_from_dockw, Keyword.get("the"), Document.get(1), 1, self.session)
def test_modellist2modellist_similarity(self): #for keywords kwlist1 = Keyword.get_many(["redis", "a"]) kwlist2 = Keyword.get_many(["database", "the"]) self.assertAlmostEqual(0.42205423035497763, kwlist1.similarity_to(kwlist2)) #for documents doclist1 = Document.get_many([3, 5]) doclist2 = Document.get_many([4, 6]) self.assertAlmostEqual(0.6990609119502719, doclist1.similarity_to(doclist2))
def test_equality_same_type(self): kwlist1 = Keyword.get_many(["redis", "a", "the"]) kwlist2 = Keyword.get_many(["a", "the", "redis"]) kwlist3 = Keyword.get_many(["a", "the", "python"]) self.assertEqual(kwlist1, kwlist2) self.assertNotEqual(kwlist3, kwlist2) doclist1 = Document.get_many([1, 2, 3]) doclist2 = Document.get_many([2, 3, 1]) doclist3 = Document.get_many([4, 5, 6]) self.assertEqual(doclist1, doclist2) self.assertNotEqual(doclist3, doclist2)
def test_equality_same_type(self): kwlist1 = Keyword.get_many(["redis", "a", "the"]) kwlist2 = Keyword.get_many(["a", "the", "redis"]) kwlist3 = Keyword.get_many(["a", "the", "python"]) self.assertEqual(kwlist1, kwlist2) self.assertNotEqual(kwlist3, kwlist2) doclist1 = Document.get_many([1,2,3]) doclist2 = Document.get_many([2,3,1]) doclist3 = Document.get_many([4,5,6]) self.assertEqual(doclist1, doclist2) self.assertNotEqual(doclist3, doclist2)
def test_rec_fb_from_kw(self): """ getter/setting for receiving feedback from keyword """ kw = Keyword.get("redis") kw.rec_fb_from_kw(kw, 1, self.session) self.assertEqual(1, kw.fb_from_kw(self.session)) kw.rec_fb_from_kw(kw, .5, self.session) self.assertEqual(.5, kw.fb_from_kw(self.session)) #is not the right keyword self.assertRaises(AssertionError, kw.rec_fb_from_kw, Keyword.get("the"), 1, self.session)
def test_keyword_centroid(self): kw = Keyword.get("a") kwlist1 = KeywordList([kw]) self.assertArrayAlmostEqual(matrix2array(kwlist1.centroid), kw.vec.toarray()[0]) kw1 = Keyword.get("a") kw2 = Keyword.get("the") kwlist2 = Keyword.get_many(["a", "the"]) self.assertArrayAlmostEqual( matrix2array(kwlist2.centroid), (kw1.vec.toarray()[0] + kw2.vec.toarray()[0]) / 2)
def test_recommend(self): docs, kws = self.r.recommend(self.session, 4, 4, 1, .5, 1., .5) self.assertEqual(Document.get_many([1, 8, 2, 6]), docs) self.assertEqual( Keyword.get_many( ["redis", "database", "the", "mysql", "a", "python"]), kws)
def test_recommend_keywords(self): kws = self.r.recommend_keywords(self.fmim, self.session, 8, 1, 0.5) self.assertEqual( list( Keyword.get_many( ["redis", "database", "python", "mysql", "tornado", "web"])), kws)
def setUp(self): doc_goal = Document.get_many([1,2]) kw_goal = Keyword.get_many(["redis", "database"]) self.e = GoalBasedEvaluator() self.e.setGoal(doc_goal, kw_goal)
def setUp(self): init_recommender = QueryBasedRecommender(3, 2, 3, 2, **fmim.__dict__) main_recommender = LinRelRecommender(3, 3, 1., .5, 1., .5, None,None, None,None, **fmim.__dict__) self.app = CmdApp(OnePassPropagator, OverrideUpdater, init_recommender, main_recommender) self.session = get_session() #add recommended list self.session.add_doc_recom_list(Document.get_many([1,2,3])) self.session.add_kw_recom_list(Keyword.get_many(["a", "redis", "database"])) self.fb = { "docs": [[1, .5]], "kws": [["redis", .5]], "dockws": [["redis", 1, .5]] } random.seed(123456)
def test_loop_done(self): """ test if things are cleaned when the loop is done """ doc = Document.get(1) doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session) doc.rec_fb_from_kw(Keyword.get("database"), .5, self.session) doc.rec_fb_from_doc(doc, .5, self.session) # terminate the loop # everything feedback stuff cleaned doc.loop_done(self.session) self.assertEqual(doc.fb_weighted_sum(self.session), 0)
def setUp(self): doc_goal = Document.get_many([1, 2]) kw_goal = Keyword.get_many(["redis", "database"]) self.e = GoalBasedEvaluator() self.e.setGoal(doc_goal, kw_goal)
def test_recommend_main(self): #receive the feedback first self.app.receive_feedbacks(self.session, self.fb) docs, kws = self.app.recommend(start=False, session=self.session) self.assertEqual(Document.get_many([1, 2, 6]), docs) self.assertEqual( Keyword.get_many(["redis", "database", "a", "python", "the"]), kws)
def test_update_kw_fb(self): """update keyword feedback""" kw = Keyword.get('redis') self.session.update_kw_feedback(kw, 1) self.assertEqual(self.session.kw_feedbacks, {kw: 1}) self.assertEqual(kw.fb(self.session), 1)
def test_rec_fb_from_dockw(self): """ getter/setting for receiving feedback from in-document keyword """ kw = Keyword.get("redis") kw.rec_fb_from_dockw(kw, Document.get(2), .5, self.session) kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session) self.assertEqual(kw.fb_from_doc(self.session), { Document.get(1): 1, Document.get(2): .5 }) #is not the right keyword self.assertRaises(AssertionError, kw.rec_fb_from_dockw, Keyword.get("the"), Document.get(1), 1, self.session)
def test_one(self): docs = [ Document.get_many([1, 2]), Document.get_many([1, 2]), Document.get_many([2, 1]) ] kws = [ Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"]) ] scores = self.e.evaluate(docs, kws) expected = ([1, 1, 1], [1, 1, 1]) self.assertArrayAlmostEqual(expected[0], scores[0]) self.assertArrayAlmostEqual(expected[1], scores[1])
def test_sample_documents_associated_with_keywords_sample_size_too_large( self): """ in case the sample size is too large """ docs = self.r.sample_documents_associated_with_keywords( Keyword.get_many(["python"]), 999) self.assertEqual(Document.get_many([3, 4, 5, 6, 8]), docs)
def test_recommend_keywords(self): kws = self.r.recommend_keywords(Document.get_many([6, 1]), 5, 3, query_keywords = Keyword.get_many(["python", "redis", "non-existing"])) kw_from_recom_docs = kws[:3] kw_from_assoc_docs = kws[3:] self.assertEqual(5, len(kws)) self.assertEqual(list(Keyword.get_many(["python", "redis"])), kw_from_recom_docs[:2]) #the first two should be python and redis for kw in kw_from_recom_docs: self.assertTrue(kw["recommended"]) for kw in kw_from_assoc_docs: self.assertFalse(kw["recommended"]) #no easy way to further test the elements of the kws pass
def test_sample_documents_associated_with_keywords_not_existing_keywords_case( self): """ in case keywords are non-existant in the corpus """ docs = self.r.sample_documents_associated_with_keywords( Keyword.get_many(["foo", "bar", "baz"]), 999) self.assertEqual(0, len(docs))
def test_recommend(self): docs, kws = self.r.recommend(self.session, 4, 4, 1, .5, 1., .5) self.assertEqual(Document.get_many([1,8,2,6]), docs) self.assertEqual(Keyword.get_many(["redis", "database", "the", "mysql", "a", "python"]), kws)
def test_fb_from_dockw(self): kw = Keyword.get("redis") doc = Document.get(1) ppgt.fb_from_dockw(kw, doc, .5, self.session) upd.update(self.session) self.assertAlmostEqual(0.183701573217, doc.fb(self.session)) self.assertAlmostEqual(1/4., kw.fb(self.session))
def test_recommend_main(self): #receive the feedback first self.app.receive_feedbacks(self.session, self.fb) docs , kws = self.app.recommend(start = False, session = self.session) self.assertEqual(Document.get_many([1,2,6]), docs) self.assertEqual(Keyword.get_many(["redis", "database", "a", "python", "the"]), kws)
def test_recommend_documents_sensible_query(self): """ query that has keywords existing in the documents' keyword list """ query = "database, python, redis" matched_docs, query_keywords = self.r.recommend_documents(query, 4) self.assertEqual(Document.get_many([6,1,2,5]), matched_docs) self.assertEqual(Keyword.get_many(["database", "python", "redis"]), query_keywords)
def test_rec_fb_from_kw(self): """ getter/setting for receiving feedback from keyword """ doc = Document.get(1) doc.rec_fb_from_kw(Keyword.get("redis"), 1, self.session) doc.rec_fb_from_kw(Keyword.get("database"), .5, self.session) self.assertEqual(doc.fb_from_kw(self.session), {Keyword.get("redis"): 1, Keyword.get("database"): .5}) #does not contain redis, error should be raised self.assertRaises(AssertionError, doc.rec_fb_from_kw, Keyword.get("python"), 1, self.session) #test the weighted sum weights = [0.62981539329519109, 0.45460437826405437, 0.62981539329519109] self.assertEqual((weights[0] * 1 + weights[1] * .5) / sum(weights), doc.fb_weighted_sum(self.session))
def test_two(self): docs = [ Document.get_many([8, 10]), Document.get_many([3, 4]), Document.get_many([2, 1]) ] kws = [ Keyword.get_many(["a", "the"]), Keyword.get_many(["python", "database"]), Keyword.get_many(["database", "redis"]) ] scores = self.e.evaluate(docs, kws) expected = ([0.34491169135422844, 0.1726882003112921, 1.0], [ 0.4834283906452939, 0.759679156743632, 0.9999999999999999 ]) self.assertArrayAlmostEqual(expected[0], scores[0]) self.assertArrayAlmostEqual(expected[1], scores[1])
def test_recommend(self): docs, kws = self.r.recommend(self.session, 4, 4, 1, .5, 1., .5, kw_filters = [self.my_kw_filter], doc_filters = [self.kw_count_filter, self.has_database_filter]) print self.fmim.doc2kw_m.shape self.assertEqual(Document.get_many([2,1,6,7]), docs) self.assertEqual(Keyword.get_many(["redis", "database", "python", "mysql", "a", "the"]), kws)