Example #1
0
    def receive_feedbacks(self, session, feedbacks):
        """
        Receive feedbacks from user
        
        The format of feedback is:
        {
        "docs": [[doc_id, feedback_value], ...],
        "kws": [[keyword_id, feedback_value], ...],
        "dockws": [[keyword_id, doc_id, feedback_value], ...]
        }
        """
        print "propagation started..."
        for doc_fb in feedbacks.get("docs", []):
            doc_id, fb = doc_fb
            doc = Document.get(doc_id)

            self.ppgt.fb_from_doc(doc, fb, session)

        for kw_fb in feedbacks.get("kws", []):
            kw_id, fb = kw_fb
            kw = Keyword.get(kw_id)

            self.ppgt.fb_from_kw(kw, fb, session)

        for dockw_fb in feedbacks.get("dockws", []):
            kw_id, doc_id, fb = dockw_fb
            doc = Document.get(doc_id)
            kw = Keyword.get(kw_id)

            self.ppgt.fb_from_dockw(kw, doc, fb, session)

        # propagation is done
        # updates the feedback value
        self.upd.update(session)
        print "propagation finished"
Example #2
0
    def receive_feedbacks(self, session, feedbacks):
        """
        Receive feedbacks from user
        
        The format of feedback is:
        {
        "docs": [[doc_id, feedback_value], ...],
        "kws": [[keyword_id, feedback_value], ...],
        "dockws": [[keyword_id, doc_id, feedback_value], ...]
        }
        """
        print "propagation started..."
        for doc_fb in feedbacks.get("docs", []):
            doc_id, fb = doc_fb
            doc = Document.get(doc_id)
            
            self.ppgt.fb_from_doc(doc, fb, session)

        for kw_fb in feedbacks.get("kws", []):
            kw_id, fb = kw_fb
            kw = Keyword.get(kw_id)
            
            self.ppgt.fb_from_kw(kw, fb, session)

        for dockw_fb in feedbacks.get("dockws", []):
            kw_id, doc_id, fb = dockw_fb
            doc = Document.get(doc_id)
            kw = Keyword.get(kw_id)
            
            self.ppgt.fb_from_dockw(kw, doc, fb, session)

        # propagation is done
        # updates the feedback value 
        self.upd.update(session)
        print "propagation finished"
Example #3
0
    def test_rec_fb_from_dockw(self):
        """
        getter/setting for receiving feedback from in-document keyword
        """
        doc = Document.get(1)

        doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session)
        doc.rec_fb_from_dockw(Keyword.get("database"), doc, .5, self.session)

        self.assertEqual(doc.fb_from_kw(self.session), {
            Keyword.get("redis"): 1,
            Keyword.get("database"): .5
        })

        #not the right document
        self.assertRaises(AssertionError,
                          doc.rec_fb_from_dockw, Document.get(2),
                          Keyword.get("redis"), 1, self.session)

        #python is not a keyword for document#1, error should be raised
        self.assertRaises(AssertionError, doc.rec_fb_from_dockw, doc,
                          Keyword.get("python"), 1, self.session)

        #test the weighted sum
        weights = [
            0.62981539329519109, 0.45460437826405437, 0.62981539329519109
        ]
        self.assertEqual((weights[0] * 1 + weights[1] * .5) / sum(weights),
                         doc.fb_weighted_sum(self.session))
Example #4
0
    def test_similarity(self):
        # for doc
        doc1 = Document.get(1)
        doc2 = Document.get(2)
        doc3 = Document.get(3)

        self.assertAlmostEqual(0.6300877890447911, doc1.similarity_to(doc2))
        self.assertAlmostEqual(doc2.similarity_to(doc1),
                               doc1.similarity_to(doc2))

        self.assertAlmostEqual(0.31713642199844894, doc1.similarity_to(doc3))

        self.assertRaises(NotImplementedError, doc1.similarity_to, doc3,
                          "not implemented metric")

        # for kw
        kw1 = Keyword.get("redis")
        kw2 = Keyword.get("database")
        kw3 = Keyword.get("python")

        self.assertAlmostEqual(0.6698544675330306, kw1.similarity_to(kw2))
        self.assertAlmostEqual(kw2.similarity_to(kw1), kw1.similarity_to(kw2))

        self.assertAlmostEqual(0.2613424459663648, kw1.similarity_to(kw3))

        self.assertRaises(NotImplementedError, kw1.similarity_to, kw3,
                          "not implemented metric")
    def setUp(self):
                #make the fmim
        kw_filters = [self.my_kw_filter]
        doc_filters = [self.kw_count_filter, self.has_database_filter]
        
        
        self.r = LinRelRecommender(2, 2, 
                                   1.0, 0.1, 1.0, 0.1,
                                   #the default configuration
                                   kw_filters = None, doc_filters = [self.kw_count_filter, self.has_database_filter],
                                   **fmim.__dict__)
        
        self.session = get_session()
        
        self.session.update_kw_feedback(Keyword.get("redis"), .7)
        self.session.update_kw_feedback(Keyword.get("database"), .6)
        
        self.session.update_doc_feedback(Document.get(1), .7)
        self.session.update_doc_feedback(Document.get(2), .7)
        self.session.update_doc_feedback(Document.get(8), .7)

        filtered_kws = self.r._filter_objs(kw_filters, kws = Keyword.all_kws)
        filtered_docs = self.r._filter_objs(doc_filters, docs = Document.all_docs)
        
        kw2doc_submat, kw_ind_map, kw_ind_map_r = self.r._submatrix_and_indexing(filtered_kws, filtered_docs, fmim.kw2doc_m, fmim.kw_ind, fmim.doc_ind)
        doc2kw_submat, doc_ind_map, doc_ind_map_r = self.r._submatrix_and_indexing(filtered_docs, filtered_kws, fmim.doc2kw_m, fmim.doc_ind, fmim.kw_ind)
        
        self.fmim = FeatureMatrixAndIndexMapping(kw_ind_map, doc_ind_map, kw2doc_submat, doc2kw_submat, kw_ind_map_r, doc_ind_map_r)
Example #6
0
    def test_doc_fb_threshold_filter_with_prefiltering(self):
        #change the feedback
        self.session.update_doc_feedback(Document.get(1), .2)
        self.session.update_doc_feedback(Document.get(2), .0999999)

        actual = doc_fb_threshold_filter(0.1, self.session, with_fb=True)
        expected = Document.get_many([1])

        self.assertEqual(expected, actual)
Example #7
0
    def test_doc_fb_threshold_filter(self):
        #change the feedback
        self.session.update_doc_feedback(Document.get(1), .2)
        self.session.update_doc_feedback(Document.get(2), .0999999)
        
        actual = doc_fb_threshold_filter(0.1, self.session, 
                                         docs = Document.all_docs, with_fb = False)
        expected = Document.get_many([1])

        self.assertEqual(expected, actual)
Example #8
0
    def test_affected_docs(self):
        docs = [Document.get(1), Document.get(2)]
        
        self.session.add_affected_docs(*docs)
        self.assertEqual(docs, self.session.affected_docs)

        doc3 = Document.get(3)
        docs.append(doc3)
        self.session.add_affected_docs(doc3)
        self.assertEqual(set(docs), 
                         set(self.session.affected_docs))
Example #9
0
    def test_fb_weighted_sum_dockw_only(self):
        """
        test if the weighted sum is correct
        
        only feedback from dockw/doc
        """
        kw = Keyword.get("redis")

        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)
        kw.rec_fb_from_doc(Document.get(2), .5, self.session)

        self.assertEqual((1 + .5) / 2, kw.fb_weighted_sum(self.session))
    def setUp(self):
        self.r = LinRelRecommender(2, 2, 1., .1, 1., .1, None, None,
                                   **fmim.__dict__)

        self.session = get_session()

        #giving the feedbacks
        self.session.update_kw_feedback(Keyword.get("redis"), .7)
        self.session.update_kw_feedback(Keyword.get("database"), .6)

        self.session.update_doc_feedback(Document.get(1), .7)
        self.session.update_doc_feedback(Document.get(2), .7)
        self.session.update_doc_feedback(Document.get(8), .7)
Example #11
0
    def test_document_centroid(self):
        doc = Document.get(1)
        doclist1 = DocumentList([doc])
        
        self.assertArrayAlmostEqual(matrix2array(doclist1.centroid), doc.vec.toarray()[0])

        doc1 = Document.get(1)
        doc2 = Document.get(2)
        
        doclist2 = Document.get_many([1, 2])
        
        self.assertArrayAlmostEqual(matrix2array(doclist2.centroid), 
                                    (doc1.vec.toarray()[0] + doc2.vec.toarray()[0]) / 2)        
Example #12
0
    def test_fb_weighted_sum_dockw_only(self):
        """
        test if the weighted sum is correct
        
        only feedback from dockw/doc
        """
        kw = Keyword.get("redis")
        
        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)
        kw.rec_fb_from_doc(Document.get(2), .5, self.session)

        self.assertEqual((1 + .5) / 2,
                         kw.fb_weighted_sum(self.session))
Example #13
0
    def test_rec_fb_from_dockw(self):
        """
        getter/setting for receiving feedback from in-document keyword
        """
        kw = Keyword.get("redis")
        
        kw.rec_fb_from_dockw(kw, Document.get(2), .5, self.session)
        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)
        
        self.assertEqual(kw.fb_from_doc(self.session), {Document.get(1): 1, Document.get(2): .5})

        #is not the right keyword
        self.assertRaises(AssertionError, kw.rec_fb_from_dockw, Keyword.get("the"), Document.get(1), 1, self.session)
Example #14
0
    def test_rec_fb_from_doc(self):
        """
        getter/setting for receiving feedback from document
        """
        kw = Keyword.get("redis")
        
        kw.rec_fb_from_doc(Document.get(1), 1, self.session)
        kw.rec_fb_from_doc(Document.get(2), .5, self.session)
        
        self.assertEqual(kw.fb_from_doc(self.session), {Document.get(1): 1, Document.get(2): .5})        

        #does not contain redis, error should be raised
        self.assertRaises(AssertionError, kw.rec_fb_from_doc, Document.get(3), 1, self.session)
Example #15
0
    def test_fb_weighted_sum_mixed_source(self):
        """
        test if the weighted sum is correct

        feedback include all three sources
        """
        kw = Keyword.get("redis")

        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)
        kw.rec_fb_from_doc(Document.get(2), .5, self.session)

        kw.rec_fb_from_kw(kw, .5, self.session)

        self.assertEqual(.3 * (1 / 2. + 1 / 4.) + .7 * .5,
                         kw.fb_weighted_sum(self.session))
Example #16
0
    def test_document_centroid(self):
        doc = Document.get(1)
        doclist1 = DocumentList([doc])

        self.assertArrayAlmostEqual(matrix2array(doclist1.centroid),
                                    doc.vec.toarray()[0])

        doc1 = Document.get(1)
        doc2 = Document.get(2)

        doclist2 = Document.get_many([1, 2])

        self.assertArrayAlmostEqual(
            matrix2array(doclist2.centroid),
            (doc1.vec.toarray()[0] + doc2.vec.toarray()[0]) / 2)
Example #17
0
    def test_fb_weighted_sum_mixed_source(self):
        """
        test if the weighted sum is correct

        feedback include all three sources
        """
        kw = Keyword.get("redis")
        
        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)
        kw.rec_fb_from_doc(Document.get(2), .5, self.session)

        kw.rec_fb_from_kw(kw, .5, self.session)

        self.assertEqual(.3 * (1 / 2. + 1 / 4.) + .7 * .5,
                         kw.fb_weighted_sum(self.session))        
Example #18
0
    def test_loop_done(self):
        """
        test if things are cleaned when the loop is done
        """
        kw = Keyword.get("redis")
        
        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)
        kw.rec_fb_from_doc(Document.get(2), .5, self.session)

        kw.rec_fb_from_kw(kw, .5, self.session)

        # terminate the loop
        # everything feedback stuff cleaned
        kw.loop_done(self.session)

        self.assertEqual(kw.fb_weighted_sum(self.session), 0)    
Example #19
0
    def test_loop_done(self):
        """
        test if things are cleaned when the loop is done
        """
        kw = Keyword.get("redis")

        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)
        kw.rec_fb_from_doc(Document.get(2), .5, self.session)

        kw.rec_fb_from_kw(kw, .5, self.session)

        # terminate the loop
        # everything feedback stuff cleaned
        kw.loop_done(self.session)

        self.assertEqual(kw.fb_weighted_sum(self.session), 0)
Example #20
0
 def test_update_doc_fb(self):
     """update document feedback"""
     doc = Document.get(1)
     self.session.update_doc_feedback(doc, 1)
     
     self.assertEqual(self.session.doc_feedbacks, {doc: 1})
     
     self.assertEqual(doc.fb(self.session), 1)
    def setUp(self):
        self.r = LinRelRecommender(2, 2, 
                                   1., .1, 1., .1,
                                   None, None,
                                   **fmim.__dict__)
        
        
        
        self.session = get_session()

        #giving the feedbacks
        self.session.update_kw_feedback(Keyword.get("redis"), .7)
        self.session.update_kw_feedback(Keyword.get("database"), .6)
        
        self.session.update_doc_feedback(Document.get(1), .7)
        self.session.update_doc_feedback(Document.get(2), .7)
        self.session.update_doc_feedback(Document.get(8), .7)
Example #22
0
    def test_rec_fb_from_doc(self):
        """
        getter/setting for receiving feedback from document
        """
        kw = Keyword.get("redis")

        kw.rec_fb_from_doc(Document.get(1), 1, self.session)
        kw.rec_fb_from_doc(Document.get(2), .5, self.session)

        self.assertEqual(kw.fb_from_doc(self.session), {
            Document.get(1): 1,
            Document.get(2): .5
        })

        #does not contain redis, error should be raised
        self.assertRaises(AssertionError, kw.rec_fb_from_doc, Document.get(3),
                          1, self.session)
Example #23
0
    def test_rec_from_doc(self):
        """
        getter/setting for receiving feedback from document
        """
        doc = Document.get(1)
        
        doc.rec_fb_from_doc(doc, 1, self.session)        
        self.assertEqual(1, doc.fb_from_doc(self.session))

        doc.rec_fb_from_doc(doc, .5, self.session)        
        self.assertEqual(.5, doc.fb_from_doc(self.session))

        #is not the right document
        self.assertRaises(AssertionError, doc.rec_fb_from_doc, Document.get(2), 1, self.session) 

        #test the weighted sum
        self.assertEqual(.5 * .7, doc.fb_weighted_sum(self.session))
Example #24
0
    def test_rec_fb_from_dockw(self):
        """
        getter/setting for receiving feedback from in-document keyword
        """
        kw = Keyword.get("redis")

        kw.rec_fb_from_dockw(kw, Document.get(2), .5, self.session)
        kw.rec_fb_from_dockw(kw, Document.get(1), 1, self.session)

        self.assertEqual(kw.fb_from_doc(self.session), {
            Document.get(1): 1,
            Document.get(2): .5
        })

        #is not the right keyword
        self.assertRaises(AssertionError, kw.rec_fb_from_dockw,
                          Keyword.get("the"), Document.get(1), 1, self.session)
 def test_fb_from_dockw(self):
     kw = Keyword.get("redis")
     doc = Document.get(1)
     
     ppgt.fb_from_dockw(kw, doc, .5, self.session)
     upd.update(self.session)
     
     self.assertAlmostEqual(0.183701573217, doc.fb(self.session))
     self.assertAlmostEqual(1/4., kw.fb(self.session))
Example #26
0
    def test_rec_from_doc(self):
        """
        getter/setting for receiving feedback from document
        """
        doc = Document.get(1)

        doc.rec_fb_from_doc(doc, 1, self.session)
        self.assertEqual(1, doc.fb_from_doc(self.session))

        doc.rec_fb_from_doc(doc, .5, self.session)
        self.assertEqual(.5, doc.fb_from_doc(self.session))

        #is not the right document
        self.assertRaises(AssertionError, doc.rec_fb_from_doc, Document.get(2),
                          1, self.session)

        #test the weighted sum
        self.assertEqual(.5 * .7, doc.fb_weighted_sum(self.session))
Example #27
0
    def test_kw_fb_filter(self):
        kw = Keyword.get("redis")
        kw.rec_fb_from_doc(Document.get(1), 1, self.session)
        self.session.add_doc_recom_list(Document.get_many([1, 2, 6]))
        self.session.update_kw_feedback(kw, kw.fb_weighted_sum(self.session))

        actual = FilterRepository.filters["kw_fb"]([kw])
        expected = Keyword.get_many(["redis"])

        self.assertEqual(expected, actual)
Example #28
0
    def test_kw_fb_filter(self):
        kw = Keyword.get("redis")
        kw.rec_fb_from_doc(Document.get(1), 1, self.session)
        self.session.add_doc_recom_list(Document.get_many([1, 2, 6]))
        self.session.update_kw_feedback(kw, kw.fb_weighted_sum(self.session))

        actual = FilterRepository.filters["kw_fb"]([kw])
        expected = Keyword.get_many(["redis"])

        self.assertEqual(expected, actual)
Example #29
0
 def test_doc_fb_filter(self):
     doc = Document.get(1)
     doc.rec_fb_from_kw(Keyword.get("redis"), 1, self.session)
     self.session.update_doc_feedback(doc, doc.fb_weighted_sum(self.session))
     
     print "doc.fb(self.session)=", doc.fb(self.session)
     actual = FilterRepository.filters["doc_fb"]([doc])
     expected = Document.get_many([])
     
     print doc.fb(self.session)
     self.assertEqual(expected, actual)                            
Example #30
0
    def test_rec_fb_from_dockw(self):
        """
        getter/setting for receiving feedback from in-document keyword
        """
        doc = Document.get(1)

        doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session)
        doc.rec_fb_from_dockw(Keyword.get("database"), doc, .5, self.session)
        
        self.assertEqual(doc.fb_from_kw(self.session), {Keyword.get("redis"): 1, Keyword.get("database"): .5})

        #not the right document
        self.assertRaises(AssertionError, doc.rec_fb_from_dockw, Document.get(2), Keyword.get("redis"), 1, self.session)

        #python is not a keyword for document#1, error should be raised
        self.assertRaises(AssertionError, doc.rec_fb_from_dockw, doc, Keyword.get("python"), 1, self.session)

        #test the weighted sum
        weights = [0.62981539329519109, 0.45460437826405437, 0.62981539329519109]                
        self.assertEqual((weights[0] * 1 + weights[1] * .5) / sum(weights), doc.fb_weighted_sum(self.session))
Example #31
0
    def test_model2modellist_similarity(self):
        #for keywords
        kw = Keyword.get("redis")
        kwlist = Keyword.get_many(["database", "mysql"])

        self.assertAlmostEqual(0.3754029265429976, kw.similarity_to(kwlist))
        
        #for documents
        doc = Document.get(6)
        doclist = Document.get_many([1, 2])
        
        self.assertAlmostEqual(0.7382455893131392, doc.similarity_to(doclist))
Example #32
0
    def test_model2modellist_similarity(self):
        #for keywords
        kw = Keyword.get("redis")
        kwlist = Keyword.get_many(["database", "mysql"])

        self.assertAlmostEqual(0.3754029265429976, kw.similarity_to(kwlist))

        #for documents
        doc = Document.get(6)
        doclist = Document.get_many([1, 2])

        self.assertAlmostEqual(0.7382455893131392, doc.similarity_to(doclist))
    def test_fb_from_doc(self):
        doc = Document.get(1)
        ppgt.fb_from_doc(doc, 0.5, self.session)        
        
        upd.update(self.session)
        
        # assertions
        self.assertAlmostEqual(.5 * .7, doc.fb(self.session))

        self.assertAlmostEqual(1/2., Keyword.get("a").fb(self.session))
        self.assertAlmostEqual(1/4., Keyword.get("redis").fb(self.session))
        self.assertAlmostEqual(1/4., Keyword.get("database").fb(self.session))
Example #34
0
    def test_doc_fb_filter(self):
        doc = Document.get(1)
        doc.rec_fb_from_kw(Keyword.get("redis"), 1, self.session)
        self.session.update_doc_feedback(doc,
                                         doc.fb_weighted_sum(self.session))

        print "doc.fb(self.session)=", doc.fb(self.session)
        actual = FilterRepository.filters["doc_fb"]([doc])
        expected = Document.get_many([])

        print doc.fb(self.session)
        self.assertEqual(expected, actual)
    def setUp(self):
        #make the fmim
        kw_filters = [self.my_kw_filter]
        doc_filters = [self.kw_count_filter, self.has_database_filter]

        self.r = LinRelRecommender(
            2,
            2,
            1.0,
            0.1,
            1.0,
            0.1,
            #the default configuration
            kw_filters=None,
            doc_filters=[self.kw_count_filter, self.has_database_filter],
            **fmim.__dict__)

        self.session = get_session()

        self.session.update_kw_feedback(Keyword.get("redis"), .7)
        self.session.update_kw_feedback(Keyword.get("database"), .6)

        self.session.update_doc_feedback(Document.get(1), .7)
        self.session.update_doc_feedback(Document.get(2), .7)
        self.session.update_doc_feedback(Document.get(8), .7)

        filtered_kws = self.r._filter_objs(kw_filters, kws=Keyword.all_kws)
        filtered_docs = self.r._filter_objs(doc_filters,
                                            docs=Document.all_docs)

        kw2doc_submat, kw_ind_map, kw_ind_map_r = self.r._submatrix_and_indexing(
            filtered_kws, filtered_docs, fmim.kw2doc_m, fmim.kw_ind,
            fmim.doc_ind)
        doc2kw_submat, doc_ind_map, doc_ind_map_r = self.r._submatrix_and_indexing(
            filtered_docs, filtered_kws, fmim.doc2kw_m, fmim.doc_ind,
            fmim.kw_ind)

        self.fmim = FeatureMatrixAndIndexMapping(kw_ind_map, doc_ind_map,
                                                 kw2doc_submat, doc2kw_submat,
                                                 kw_ind_map_r, doc_ind_map_r)
    def test_query_that_produces_match(self):
        """
        Query that matches something in the corpus
        """
        query = "python, redis"
        docs, kws = self.r.recommend(query)

        self.assertEqual(Document.get(6), docs[0])
        self.assertEqual(4, len(docs))

        #kws should be superset of assoc_kws
        assoc_kws = set([kw for doc in docs for kw in doc.keywords])
        self.assertTrue(assoc_kws.issubset(set(kws)))
    def test_all_together(self):
        """
        All three types of feedbacks are involved
        """
        doc = Document.get(1)
        kw = Keyword.get("redis")
        recom_docs = [Document.get(_id) for _id in [1,2,3]]
        self.session.add_doc_recom_list(recom_docs)

        ppgt.fb_from_doc(doc, 0.5, self.session)
        ppgt.fb_from_dockw(kw, doc, .5, self.session)
        ppgt.fb_from_kw(kw, 0.5, self.session)

        upd.update(self.session)

        self.assertAlmostEqual(0.56689342264886755 * .5 / (0.56689342264886755 + 0.49704058656839417), Keyword.get("a").fb(self.session))
        self.assertAlmostEqual(1 / 4. * .3 + .5 * .7, Keyword.get("redis").fb(self.session))
        self.assertAlmostEqual(1 / 4., Keyword.get("database").fb(self.session))
        
        self.assertAlmostEqual(0.183701573217 * .3  + .7 * .5, recom_docs[0].fb(self.session))
        self.assertAlmostEqual(0.191506501383, recom_docs[1].fb(self.session))
        self.assertAlmostEqual(0, recom_docs[2].fb(self.session))
 def test_fb_from_kw(self):
     kw = Keyword.get("redis")
     
     recom_docs = [Document.get(_id) for _id in [1,2,3]]
     self.session.add_doc_recom_list(recom_docs)
     
     ppgt.fb_from_kw(kw, 0.5, self.session)        
     upd.update(self.session)
     
     # assertions
     self.assertAlmostEqual(.5 * .7, kw.fb(self.session))
     
     self.assertAlmostEqual(0.183701573217, recom_docs[0].fb(self.session))
     self.assertAlmostEqual(0.191506501383, recom_docs[1].fb(self.session))
     self.assertAlmostEqual(0, recom_docs[2].fb(self.session))
 def test_query_that_produces_match(self):
     """
     Query that matches something in the corpus
     """
     query = "python, redis"
     docs, kws = self.r.recommend(query)
     
     self.assertEqual(Document.get(6), docs[0])
     self.assertEqual(4, len(docs))
     
     #kws should be superset of assoc_kws
     assoc_kws = set([kw 
                      for doc in docs
                      for kw in doc.keywords])
     self.assertTrue(assoc_kws.issubset(set(kws)))
Example #40
0
    def test_similarity(self):
        # for doc
        doc1 = Document.get(1)
        doc2 = Document.get(2)
        doc3 = Document.get(3)
        
        self.assertAlmostEqual(0.6300877890447911, doc1.similarity_to(doc2))
        self.assertAlmostEqual(doc2.similarity_to(doc1), doc1.similarity_to(doc2))

        self.assertAlmostEqual(0.31713642199844894, doc1.similarity_to(doc3))
        
        self.assertRaises(NotImplementedError, doc1.similarity_to, doc3, "not implemented metric")
        
        # for kw
        kw1 = Keyword.get("redis")
        kw2 = Keyword.get("database")
        kw3 = Keyword.get("python")
        
        self.assertAlmostEqual(0.6698544675330306, kw1.similarity_to(kw2))
        self.assertAlmostEqual(kw2.similarity_to(kw1), kw1.similarity_to(kw2))

        self.assertAlmostEqual(0.2613424459663648, kw1.similarity_to(kw3))
        
        self.assertRaises(NotImplementedError, kw1.similarity_to, kw3, "not implemented metric")
Example #41
0
    def test_document(self):
        """
        whether id, article, keywords are correct
        """
        kw_strs = ["redis", "database", "a"]
        doc = Document.get(1)

        self.assertEqual(doc.id, 1)

        self.assertEqual(doc.title, "redis: key-value-storage database (ONE)")

        self.assertEqual(set(doc.keywords), set(Keyword.get_many(kw_strs)))

        #that is as far as we can test
        #no numerical testing
        self.assertTrue(type(doc._kw_weight) is DictType)
Example #42
0
    def test_loop_done(self):
        """
        test if things are cleaned when the loop is done
        """
        doc = Document.get(1)

        doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session)
        doc.rec_fb_from_kw(Keyword.get("database"), .5, self.session)

        doc.rec_fb_from_doc(doc, .5, self.session)

        # terminate the loop
        # everything feedback stuff cleaned
        doc.loop_done(self.session)

        self.assertEqual(doc.fb_weighted_sum(self.session), 0)
Example #43
0
    def test_document(self):
        """
        whether id, article, keywords are correct
        """
        kw_strs = ["redis", "database", "a"]
        doc = Document.get(1)
        
        self.assertEqual(doc.id, 1)
        
        self.assertEqual(doc.title, "redis: key-value-storage database (ONE)")
        
        self.assertEqual(set(doc.keywords), set(Keyword.get_many(kw_strs)))

        #that is as far as we can test
        #no numerical testing
        self.assertTrue(type(doc._kw_weight) is DictType)
Example #44
0
    def test_loop_done(self):
        """
        test if things are cleaned when the loop is done
        """
        doc = Document.get(1)

        doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session)
        doc.rec_fb_from_kw(Keyword.get("database"), .5, self.session)

        doc.rec_fb_from_doc(doc, .5, self.session)

        # terminate the loop
        # everything feedback stuff cleaned
        doc.loop_done(self.session)

        self.assertEqual(doc.fb_weighted_sum(self.session), 0)
Example #45
0
    def test_all_together(self):
        """
        All three sources of feedbacks are involved
        """
        doc = Document.get(1)
        
        doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session)
        doc.rec_fb_from_kw(Keyword.get("database"), .5, self.session)

        doc.rec_fb_from_doc(doc, .5, self.session)
        
        redis = Keyword.get("redis")
        db = Keyword.get("database")
        weights = {redis: 0.62981539329519109, 
                   db: 0.45460437826405437, 
                   Keyword.get("a"): 0.62981539329519109
        }
        
        self.assertAlmostEqual(.5 * .7 + .3 * (weights[redis] * 1 + weights[db] * .5) / sum(weights.values()),
                               doc.fb_weighted_sum(self.session))
Example #46
0
    def recommend_documents(self, fmim,
                            session, top_n, mu, c, 
                            sampler = None):
        """
        return a list of document ids as well as the scores
        """
        docs = Document.get_many(fmim.doc_ind.keys())
        fbs = dict([(doc.id, doc.fb(session)) for doc in docs])
        
        id_with_scores, id_with_explt_scores, id_with_explr_scores = self.generic_rank(fmim.doc2kw_m, fbs, 
                                                                                       fmim.doc_ind,fmim.doc_ind_r,
                                                                                       mu, c)
        docs = []
        for doc_id, score in id_with_scores.items()[:top_n]:
            doc = Document.get(doc_id)
            doc["score"] = score
            doc['recommended'] = True
            docs.append(doc)

        return docs
Example #47
0
    def recommend_documents(self, query, top_n):
        """
        Param:
        query: string, the query string, phrases separated by comma, for example: machine learning, natural language processing
        top_n: integer, the number of documents to be returned
        
        Return:
        DocumentList, the recommended documents
        KeywordList, the query keywords(that exist in the corpus)
        """
        query_keywords = [kw_str.strip() 
                          for kw_str in query.strip().split(",")]
        #prepare the query word binary column vector        
        word_vec = self._word_vec(query_keywords)
        
        existing_keywords = Keyword.get_many([word 
                                              for word in query_keywords
                                              if self.kw_ind.has_key(word)])
        
        #get the scores for documents and score it
        scores = matrix2array((self.doc2kw_m * word_vec).T)
        
        #get none zero scores
        non_zero_scores = filter(None, scores)
        sorted_scores = sorted(enumerate(non_zero_scores), 
                               key = lambda (_, score): score, 
                               reverse = True)
        
        #get the top_n documents
        docs = DocumentList([])
        for ind, score in sorted_scores[:top_n]:
            doc_id = self.doc_ind_r[ind]
            doc = Document.get(doc_id)
            doc['score'] = score
            doc["recommended"] = True
            docs.append(doc)

        return docs, existing_keywords                         
Example #48
0
    def test_all_together(self):
        """
        All three sources of feedbacks are involved
        """
        doc = Document.get(1)

        doc.rec_fb_from_dockw(Keyword.get("redis"), doc, 1, self.session)
        doc.rec_fb_from_kw(Keyword.get("database"), .5, self.session)

        doc.rec_fb_from_doc(doc, .5, self.session)

        redis = Keyword.get("redis")
        db = Keyword.get("database")
        weights = {
            redis: 0.62981539329519109,
            db: 0.45460437826405437,
            Keyword.get("a"): 0.62981539329519109
        }

        self.assertAlmostEqual(
            .5 * .7 + .3 *
            (weights[redis] * 1 + weights[db] * .5) / sum(weights.values()),
            doc.fb_weighted_sum(self.session))
Example #49
0
    def test_type_mismatch(self):
        kw = Keyword.get("redis")
        kwlist = Keyword.get_many(["database", "mysql"])

        doc = Document.get(6)
        doclist = Document.get_many([1, 2])

        #doc to kw
        self.assertRaises(AssertionError, kw.similarity_to, doc)

        #kw to doc
        self.assertRaises(AssertionError, doc.similarity_to, kw)

        #kw to doclist
        self.assertRaises(AssertionError, kw.similarity_to, doclist)

        #doclist to kw
        self.assertRaises(AssertionError, doclist.similarity_to, kw)

        #doc to kwlist
        self.assertRaises(AssertionError, doc.similarity_to, kwlist)

        #kwlist to doc
        self.assertRaises(AssertionError, kwlist.similarity_to, doc)
Example #50
0
    def test_type_mismatch(self):
        kw = Keyword.get("redis")
        kwlist = Keyword.get_many(["database", "mysql"])

        doc = Document.get(6)
        doclist = Document.get_many([1, 2])
        
        #doc to kw
        self.assertRaises(AssertionError, kw.similarity_to, doc)
        
        #kw to doc
        self.assertRaises(AssertionError, doc.similarity_to, kw)

        #kw to doclist
        self.assertRaises(AssertionError, kw.similarity_to, doclist)
        
        #doclist to kw
        self.assertRaises(AssertionError, doclist.similarity_to, kw)

        #doc to kwlist
        self.assertRaises(AssertionError, doc.similarity_to, kwlist)
                
        #kwlist to doc
        self.assertRaises(AssertionError, kwlist.similarity_to, doc)
Example #51
0
from util import config_doc_kw_model

config_doc_kw_model()

from scinet3.model import Document, Keyword

Document.load_all_from_db()

def doc_compute(d):
    return d.get(Keyword.get("redis"), 0) * .5 / sum(d.values())

doc = Document.get(1)
print doc._kw_weight
{"a": 0.62981539329519109, "redis": 0.62981539329519109, "database": 0.45460437826405437}

print doc_compute(doc._kw_weight)

doc = Document.get(2)
print doc._kw_weight
{"the": 0.58478244910295341, "redis": 0.6577450118852588, "database": 0.47476413782131072}
print doc_compute(doc._kw_weight)


doc = Document.get(3)
print doc._kw_weight
{"tornado": 0.57555052264377027, "web": 0.50353869621889158, "a": 0.50353869621889158, "python": 0.40204372735417787}
print doc_compute(doc._kw_weight)


def kw_compute(d):
Example #52
0
 def doc_feedbacks(self):
     """document feedback"""
     key = "session:%s:%s" %(self.session_id, "doc_feedbacks")
     return dict([(Document.get(int(_id)), float(fb))
                  for _id, fb in self.redis.hgetall(key).items()])
Example #53
0
 def affected_docs(self):
     return [Document.get(doc_id) 
             for doc_id in self.get("affected_docs", set())]