def receive_feedbacks(self, session, feedbacks): """ Receive feedbacks from user The format of feedback is: { "docs": [[doc_id, feedback_value], ...], "kws": [[keyword_id, feedback_value], ...], "dockws": [[keyword_id, doc_id, feedback_value], ...] } """ print "propagation started..." for doc_fb in feedbacks.get("docs", []): doc_id, fb = doc_fb doc = Document.get(doc_id) self.ppgt.fb_from_doc(doc, fb, session) for kw_fb in feedbacks.get("kws", []): kw_id, fb = kw_fb kw = Keyword.get(kw_id) self.ppgt.fb_from_kw(kw, fb, session) for dockw_fb in feedbacks.get("dockws", []): kw_id, doc_id, fb = dockw_fb doc = Document.get(doc_id) kw = Keyword.get(kw_id) self.ppgt.fb_from_dockw(kw, doc, fb, session) # propagation is done # updates the feedback value self.upd.update(session) print "propagation finished"
def test_rec_fb_from_dockw(self): """ getter/setting for receiving feedback from in-document keyword """ doc = Document.get(1) doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session) doc.rec_fb_from_dockw(Keyword.get("database"), doc, .5, self.session) self.assertEqual(doc.fb_from_kw(self.session), { Keyword.get("redis"): 1, Keyword.get("database"): .5 }) #not the right document self.assertRaises(AssertionError, doc.rec_fb_from_dockw, Document.get(2), Keyword.get("redis"), 1, self.session) #python is not a keyword for document#1, error should be raised self.assertRaises(AssertionError, doc.rec_fb_from_dockw, doc, Keyword.get("python"), 1, self.session) #test the weighted sum weights = [ 0.62981539329519109, 0.45460437826405437, 0.62981539329519109 ] self.assertEqual((weights[0] * 1 + weights[1] * .5) / sum(weights), doc.fb_weighted_sum(self.session))
def __init__(self, **kwargs): for k, v in kwargs.items(): setattr(self, k, v) if not Document.all_docs_loaded: print "loading docs from db..." Document.load_all_from_db()
def test_similarity(self): # for doc doc1 = Document.get(1) doc2 = Document.get(2) doc3 = Document.get(3) self.assertAlmostEqual(0.6300877890447911, doc1.similarity_to(doc2)) self.assertAlmostEqual(doc2.similarity_to(doc1), doc1.similarity_to(doc2)) self.assertAlmostEqual(0.31713642199844894, doc1.similarity_to(doc3)) self.assertRaises(NotImplementedError, doc1.similarity_to, doc3, "not implemented metric") # for kw kw1 = Keyword.get("redis") kw2 = Keyword.get("database") kw3 = Keyword.get("python") self.assertAlmostEqual(0.6698544675330306, kw1.similarity_to(kw2)) self.assertAlmostEqual(kw2.similarity_to(kw1), kw1.similarity_to(kw2)) self.assertAlmostEqual(0.2613424459663648, kw1.similarity_to(kw3)) self.assertRaises(NotImplementedError, kw1.similarity_to, kw3, "not implemented metric")
def setUp(self): #make the fmim kw_filters = [self.my_kw_filter] doc_filters = [self.kw_count_filter, self.has_database_filter] self.r = LinRelRecommender(2, 2, 1.0, 0.1, 1.0, 0.1, #the default configuration kw_filters = None, doc_filters = [self.kw_count_filter, self.has_database_filter], **fmim.__dict__) self.session = get_session() self.session.update_kw_feedback(Keyword.get("redis"), .7) self.session.update_kw_feedback(Keyword.get("database"), .6) self.session.update_doc_feedback(Document.get(1), .7) self.session.update_doc_feedback(Document.get(2), .7) self.session.update_doc_feedback(Document.get(8), .7) filtered_kws = self.r._filter_objs(kw_filters, kws = Keyword.all_kws) filtered_docs = self.r._filter_objs(doc_filters, docs = Document.all_docs) kw2doc_submat, kw_ind_map, kw_ind_map_r = self.r._submatrix_and_indexing(filtered_kws, filtered_docs, fmim.kw2doc_m, fmim.kw_ind, fmim.doc_ind) doc2kw_submat, doc_ind_map, doc_ind_map_r = self.r._submatrix_and_indexing(filtered_docs, filtered_kws, fmim.doc2kw_m, fmim.doc_ind, fmim.kw_ind) self.fmim = FeatureMatrixAndIndexMapping(kw_ind_map, doc_ind_map, kw2doc_submat, doc2kw_submat, kw_ind_map_r, doc_ind_map_r)
def test_get_many(self): doc_ids = [1, 2] kw_ids = ["a", "the"] self.assertEqual(Document.get_many([1, 2]), Document.get_many(doc_ids)) self.assertEqual(Keyword.get_many(["a", "the"]), Keyword.get_many(kw_ids))
def test_get_many(self): doc_ids = [1,2] kw_ids = ["a", "the"] self.assertEqual(Document.get_many([1,2]), Document.get_many(doc_ids)) self.assertEqual(Keyword.get_many(["a", "the"]), Keyword.get_many(kw_ids))
def test_doc_fb_threshold_filter_with_prefiltering(self): #change the feedback self.session.update_doc_feedback(Document.get(1), .2) self.session.update_doc_feedback(Document.get(2), .0999999) actual = doc_fb_threshold_filter(0.1, self.session, with_fb=True) expected = Document.get_many([1]) self.assertEqual(expected, actual)
def test_one(self): docs = [Document.get_many([1,2]), Document.get_many([1,2]), Document.get_many([2,1])] kws = [Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"])] scores = self.e.evaluate(docs, kws) expected = ([1,1,1], [1,1,1]) self.assertArrayAlmostEqual(expected[0], scores[0]) self.assertArrayAlmostEqual(expected[1], scores[1])
def test_doc_fb_threshold_filter(self): #change the feedback self.session.update_doc_feedback(Document.get(1), .2) self.session.update_doc_feedback(Document.get(2), .0999999) actual = doc_fb_threshold_filter(0.1, self.session, docs = Document.all_docs, with_fb = False) expected = Document.get_many([1]) self.assertEqual(expected, actual)
def test_kw_fb_filter(self): kw = Keyword.get("redis") kw.rec_fb_from_doc(Document.get(1), 1, self.session) self.session.add_doc_recom_list(Document.get_many([1, 2, 6])) self.session.update_kw_feedback(kw, kw.fb_weighted_sum(self.session)) actual = FilterRepository.filters["kw_fb"]([kw]) expected = Keyword.get_many(["redis"]) self.assertEqual(expected, actual)
def test_two(self): docs = [Document.get_many([8,10]), Document.get_many([3,4]), Document.get_many([2,1])] kws = [Keyword.get_many(["a", "the"]), Keyword.get_many(["python", "database"]), Keyword.get_many(["database", "redis"])] scores = self.e.evaluate(docs, kws) expected = ([0.34491169135422844, 0.1726882003112921, 1.0], [0.4834283906452939, 0.759679156743632, 0.9999999999999999]) self.assertArrayAlmostEqual(expected[0], scores[0]) self.assertArrayAlmostEqual(expected[1], scores[1])
def test_associated_keywords_from_documents(self): kws = self.r.associated_keywords_from_docs(Document.get_many([1,2])) self.assertEqual(set(Keyword.get_many(["a", "database", "redis", "the"])), set(kws)) exclude_kws = [Keyword.get("redis")] kws = self.r.associated_keywords_from_docs(Document.get_many([1,2]), exclude_kws) self.assertEqual(set(Keyword.get_many(["a", "database", "the"])), set(kws))
def test_filter_objs(self): def has_redis_filter(objs = None): return filter(lambda obj: Keyword.get("redis") in obj.keywords, objs) def has_database_filter(objs = None): return filter(lambda obj: Keyword.get("database") in obj.keywords, objs) self.assertEqual(list(Document.get_many([1, 7])), self.r._filter_objs([has_redis_filter, has_database_filter], objs = Document.get_many([1, 7, 8])))
def test_associated_keywords_from_documents(self): kws = self.r.associated_keywords_from_docs(Document.get_many([1, 2])) self.assertEqual( set(Keyword.get_many(["a", "database", "redis", "the"])), set(kws)) exclude_kws = [Keyword.get("redis")] kws = self.r.associated_keywords_from_docs(Document.get_many([1, 2]), exclude_kws) self.assertEqual(set(Keyword.get_many(["a", "database", "the"])), set(kws))
def test_doc_hashable(self): d = {} doclist1 = Document.get_many([1, 2, 3]) doclist2 = Document.get_many([2, 1, 3]) doclist3 = Document.get_many([4, 5, 6]) d[doclist1] = 1 d[doclist2] = 2 #override d[doclist3] = 3 self.assertEqual({doclist1: 2, doclist3: 3}, d)
def test_doc_fb_filter(self): doc = Document.get(1) doc.rec_fb_from_kw(Keyword.get("redis"), 1, self.session) self.session.update_doc_feedback(doc, doc.fb_weighted_sum(self.session)) print "doc.fb(self.session)=", doc.fb(self.session) actual = FilterRepository.filters["doc_fb"]([doc]) expected = Document.get_many([]) print doc.fb(self.session) self.assertEqual(expected, actual)
def test_doc_hashable(self): d = {} doclist1 = Document.get_many([1,2,3]) doclist2 = Document.get_many([2,1,3]) doclist3 = Document.get_many([4,5,6]) d[doclist1] = 1 d[doclist2] = 2 #override d[doclist3] = 3 self.assertEqual({doclist1:2, doclist3: 3}, d)
def test_affected_docs(self): docs = [Document.get(1), Document.get(2)] self.session.add_affected_docs(*docs) self.assertEqual(docs, self.session.affected_docs) doc3 = Document.get(3) docs.append(doc3) self.session.add_affected_docs(doc3) self.assertEqual(set(docs), set(self.session.affected_docs))
def test_fb_weighted_sum_dockw_only(self): """ test if the weighted sum is correct only feedback from dockw/doc """ kw = Keyword.get("redis") kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session) kw.rec_fb_from_doc(Document.get(2), .5, self.session) self.assertEqual((1 + .5) / 2, kw.fb_weighted_sum(self.session))
def test_model2modellist_similarity(self): #for keywords kw = Keyword.get("redis") kwlist = Keyword.get_many(["database", "mysql"]) self.assertAlmostEqual(0.3754029265429976, kw.similarity_to(kwlist)) #for documents doc = Document.get(6) doclist = Document.get_many([1, 2]) self.assertAlmostEqual(0.7382455893131392, doc.similarity_to(doclist))
def test_modellist2modellist_similarity(self): #for keywords kwlist1 = Keyword.get_many(["redis", "a"]) kwlist2 = Keyword.get_many(["database", "the"]) self.assertAlmostEqual(0.42205423035497763, kwlist1.similarity_to(kwlist2)) #for documents doclist1 = Document.get_many([3,5]) doclist2 = Document.get_many([4,6]) self.assertAlmostEqual(0.6990609119502719, doclist1.similarity_to(doclist2))
def setUp(self): self.r = LinRelRecommender(2, 2, 1., .1, 1., .1, None, None, **fmim.__dict__) self.session = get_session() #giving the feedbacks self.session.update_kw_feedback(Keyword.get("redis"), .7) self.session.update_kw_feedback(Keyword.get("database"), .6) self.session.update_doc_feedback(Document.get(1), .7) self.session.update_doc_feedback(Document.get(2), .7) self.session.update_doc_feedback(Document.get(8), .7)
def test_filter_objs(self): def has_redis_filter(objs=None): return filter(lambda obj: Keyword.get("redis") in obj.keywords, objs) def has_database_filter(objs=None): return filter(lambda obj: Keyword.get("database") in obj.keywords, objs) self.assertEqual( list(Document.get_many([1, 7])), self.r._filter_objs([has_redis_filter, has_database_filter], objs=Document.get_many([1, 7, 8])))
def test_document_centroid(self): doc = Document.get(1) doclist1 = DocumentList([doc]) self.assertArrayAlmostEqual(matrix2array(doclist1.centroid), doc.vec.toarray()[0]) doc1 = Document.get(1) doc2 = Document.get(2) doclist2 = Document.get_many([1, 2]) self.assertArrayAlmostEqual(matrix2array(doclist2.centroid), (doc1.vec.toarray()[0] + doc2.vec.toarray()[0]) / 2)
def test_rec_fb_from_doc(self): """ getter/setting for receiving feedback from document """ kw = Keyword.get("redis") kw.rec_fb_from_doc(Document.get(1), 1, self.session) kw.rec_fb_from_doc(Document.get(2), .5, self.session) self.assertEqual(kw.fb_from_doc(self.session), {Document.get(1): 1, Document.get(2): .5}) #does not contain redis, error should be raised self.assertRaises(AssertionError, kw.rec_fb_from_doc, Document.get(3), 1, self.session)
def test_rec_fb_from_dockw(self): """ getter/setting for receiving feedback from in-document keyword """ kw = Keyword.get("redis") kw.rec_fb_from_dockw(kw, Document.get(2), .5, self.session) kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session) self.assertEqual(kw.fb_from_doc(self.session), {Document.get(1): 1, Document.get(2): .5}) #is not the right keyword self.assertRaises(AssertionError, kw.rec_fb_from_dockw, Keyword.get("the"), Document.get(1), 1, self.session)
def test_modellist2modellist_similarity(self): #for keywords kwlist1 = Keyword.get_many(["redis", "a"]) kwlist2 = Keyword.get_many(["database", "the"]) self.assertAlmostEqual(0.42205423035497763, kwlist1.similarity_to(kwlist2)) #for documents doclist1 = Document.get_many([3, 5]) doclist2 = Document.get_many([4, 6]) self.assertAlmostEqual(0.6990609119502719, doclist1.similarity_to(doclist2))
def test_equality_same_type(self): kwlist1 = Keyword.get_many(["redis", "a", "the"]) kwlist2 = Keyword.get_many(["a", "the", "redis"]) kwlist3 = Keyword.get_many(["a", "the", "python"]) self.assertEqual(kwlist1, kwlist2) self.assertNotEqual(kwlist3, kwlist2) doclist1 = Document.get_many([1, 2, 3]) doclist2 = Document.get_many([2, 3, 1]) doclist3 = Document.get_many([4, 5, 6]) self.assertEqual(doclist1, doclist2) self.assertNotEqual(doclist3, doclist2)
def sample_documents_associated_with_keywords(self, keywords, n): """ sample n documents from all documents that contain any of the keywords """ assert type(keywords) in (KeywordList, ListType) , "keywords should be KeywordList, but is %r" %(keywords) #get all doc ids of which the document contains any of the keywords doc_ids = self._doc_ids_that_contain_keywords([kw.id for kw in keywords]) #sample it try: return Document.get_many(random.sample(doc_ids, n)) except ValueError: #sample size larger than population return Document.get_many(doc_ids)
def test_equality_same_type(self): kwlist1 = Keyword.get_many(["redis", "a", "the"]) kwlist2 = Keyword.get_many(["a", "the", "redis"]) kwlist3 = Keyword.get_many(["a", "the", "python"]) self.assertEqual(kwlist1, kwlist2) self.assertNotEqual(kwlist3, kwlist2) doclist1 = Document.get_many([1,2,3]) doclist2 = Document.get_many([2,3,1]) doclist3 = Document.get_many([4,5,6]) self.assertEqual(doclist1, doclist2) self.assertNotEqual(doclist3, doclist2)
def test_add_docs(self): """ as well as last_recom_docs """ iter1 = Document.get_many([1, 2, 3]) iter2 = Document.get_many([2, 3, 4]) self.session.add_doc_recom_list(iter1) self.assertEqual([iter1], self.session.recom_docs) self.session.add_doc_recom_list(iter2) self.assertEqual([iter1, iter2], self.session.recom_docs) self.assertEqual(iter2, self.session.last_recom_docs)
def test_fb_weighted_sum_mixed_source(self): """ test if the weighted sum is correct feedback include all three sources """ kw = Keyword.get("redis") kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session) kw.rec_fb_from_doc(Document.get(2), .5, self.session) kw.rec_fb_from_kw(kw, .5, self.session) self.assertEqual(.3 * (1 / 2. + 1 / 4.) + .7 * .5, kw.fb_weighted_sum(self.session))
def test_document_centroid(self): doc = Document.get(1) doclist1 = DocumentList([doc]) self.assertArrayAlmostEqual(matrix2array(doclist1.centroid), doc.vec.toarray()[0]) doc1 = Document.get(1) doc2 = Document.get(2) doclist2 = Document.get_many([1, 2]) self.assertArrayAlmostEqual( matrix2array(doclist2.centroid), (doc1.vec.toarray()[0] + doc2.vec.toarray()[0]) / 2)
def setUp(self): doc_goal = Document.get_many([1,2]) kw_goal = Keyword.get_many(["redis", "database"]) self.e = GoalBasedEvaluator() self.e.setGoal(doc_goal, kw_goal)
def test_loop_done(self): """ test if things are cleaned when the loop is done """ kw = Keyword.get("redis") kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session) kw.rec_fb_from_doc(Document.get(2), .5, self.session) kw.rec_fb_from_kw(kw, .5, self.session) # terminate the loop # everything feedback stuff cleaned kw.loop_done(self.session) self.assertEqual(kw.fb_weighted_sum(self.session), 0)
def setUp(self): doc_goal = Document.get_many([1, 2]) kw_goal = Keyword.get_many(["redis", "database"]) self.e = GoalBasedEvaluator() self.e.setGoal(doc_goal, kw_goal)
def test_recommend(self): docs, kws = self.r.recommend(self.session, 4, 4, 1, .5, 1., .5) self.assertEqual(Document.get_many([1, 8, 2, 6]), docs) self.assertEqual( Keyword.get_many( ["redis", "database", "the", "mysql", "a", "python"]), kws)
def setUp(self): init_recommender = QueryBasedRecommender(3, 2, 3, 2, **fmim.__dict__) main_recommender = LinRelRecommender(3, 3, 1., .5, 1., .5, None,None, None,None, **fmim.__dict__) self.app = CmdApp(OnePassPropagator, OverrideUpdater, init_recommender, main_recommender) self.session = get_session() #add recommended list self.session.add_doc_recom_list(Document.get_many([1,2,3])) self.session.add_kw_recom_list(Keyword.get_many(["a", "redis", "database"])) self.fb = { "docs": [[1, .5]], "kws": [["redis", .5]], "dockws": [["redis", 1, .5]] } random.seed(123456)
def test_rec_fb_from_dockw(self): """ getter/setting for receiving feedback from in-document keyword """ kw = Keyword.get("redis") kw.rec_fb_from_dockw(kw, Document.get(2), .5, self.session) kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session) self.assertEqual(kw.fb_from_doc(self.session), { Document.get(1): 1, Document.get(2): .5 }) #is not the right keyword self.assertRaises(AssertionError, kw.rec_fb_from_dockw, Keyword.get("the"), Document.get(1), 1, self.session)
def test_rec_fb_from_doc(self): """ getter/setting for receiving feedback from document """ kw = Keyword.get("redis") kw.rec_fb_from_doc(Document.get(1), 1, self.session) kw.rec_fb_from_doc(Document.get(2), .5, self.session) self.assertEqual(kw.fb_from_doc(self.session), { Document.get(1): 1, Document.get(2): .5 }) #does not contain redis, error should be raised self.assertRaises(AssertionError, kw.rec_fb_from_doc, Document.get(3), 1, self.session)
def test_one(self): docs = [ Document.get_many([1, 2]), Document.get_many([1, 2]), Document.get_many([2, 1]) ] kws = [ Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"]), Keyword.get_many(["redis", "database"]) ] scores = self.e.evaluate(docs, kws) expected = ([1, 1, 1], [1, 1, 1]) self.assertArrayAlmostEqual(expected[0], scores[0]) self.assertArrayAlmostEqual(expected[1], scores[1])
def test_recommend(self): docs, kws = self.r.recommend(self.session, 4, 4, 1, .5, 1., .5) self.assertEqual(Document.get_many([1,8,2,6]), docs) self.assertEqual(Keyword.get_many(["redis", "database", "the", "mysql", "a", "python"]), kws)
def test_sample_documents_associated_with_keywords_sample_size_too_large( self): """ in case the sample size is too large """ docs = self.r.sample_documents_associated_with_keywords( Keyword.get_many(["python"]), 999) self.assertEqual(Document.get_many([3, 4, 5, 6, 8]), docs)
def test_rec_from_doc(self): """ getter/setting for receiving feedback from document """ doc = Document.get(1) doc.rec_fb_from_doc(doc, 1, self.session) self.assertEqual(1, doc.fb_from_doc(self.session)) doc.rec_fb_from_doc(doc, .5, self.session) self.assertEqual(.5, doc.fb_from_doc(self.session)) #is not the right document self.assertRaises(AssertionError, doc.rec_fb_from_doc, Document.get(2), 1, self.session) #test the weighted sum self.assertEqual(.5 * .7, doc.fb_weighted_sum(self.session))
def test_recommend_main(self): #receive the feedback first self.app.receive_feedbacks(self.session, self.fb) docs, kws = self.app.recommend(start=False, session=self.session) self.assertEqual(Document.get_many([1, 2, 6]), docs) self.assertEqual( Keyword.get_many(["redis", "database", "a", "python", "the"]), kws)
def test_update_doc_fb(self): """update document feedback""" doc = Document.get(1) self.session.update_doc_feedback(doc, 1) self.assertEqual(self.session.doc_feedbacks, {doc: 1}) self.assertEqual(doc.fb(self.session), 1)