Example #1
0
    def run_als_eval(self, k, mcf_settings):
        precisions = []    
        recalls = []    
        aps = []
        miufs = []
        diversities = {'structural-1':[], 'structural-2':[], 'semantic':[]}
        recs = {}
        
        mcf = MCF(mcf_settings)
        for pid in self.target_users:
            mcf.fit(self.eval_inters_per_person[pid])

            eval_list, read_list, gtp = self.filter_out_evaluation_list_for_pid(pid)
            eval_list = eval_list - read_list

            max_possible_recall = k/len(gtp) if len(gtp)>k else 1.0

            rec_set = mcf.run_user(pid, k)

            precision = self.precision_at_k(rec_set, gtp)
            precisions.append(precision)
            recall = self.recall_at_k(rec_set, gtp)
            recalls.append(recall)
            ap = utils.apk(list(gtp), list(rec_set), k)
            aps.append(ap)            
            miuf = self.mean_inverse_user_frequency(rec_set, self.eval_inters_per_person[pid])
            miufs.append(miuf)
            sd = self.structural_diversity(rec_set)
            semd = self.semantic_diversity(rec_set)
            diversities['semantic'].append(semd)
            recs[pid] = [rec_set, precision, recall, miuf, semd, sd, max_possible_recall]

        return recs
Example #2
0
    def run_pure_ppr_eval(self, k, damp_factor):
        
        ppr = PurePPR(self.cutoff_weight, damp_factor) 
        
        
        precisions = []
        recalls = []
        aps = []
        miufs = []
        diversities = {'structural-1':[], 'structural-2':[], 'semantic':[]}
        recs = {}
        
        for active_user in self.target_users:
            user_specific_note_contents = self.all_note_contents.loc[self.all_note_contents['NoteID'].isin(self.eval_nids_per_person[active_user])] 

        
            ppr.fit_user(active_user, self.users, self.eval_nids_per_person[active_user], 
                         self.eval_inters_per_person[active_user], user_specific_note_contents, 
                         self.week_num) 

            eval_list, already_read_list, gtp = self.filter_out_evaluation_list_for_pid(active_user)
            eval_list = eval_list - already_read_list

            ppr_values = ppr.run_user(active_user, eval_list)            
            note_ppr_values = {nid:value for nid, value in ppr_values.items() if nid in eval_list}
            rec_set = set([item[0] for item in utils.topn_from_dict(note_ppr_values, k)])
            max_possible_recall = k/len(gtp) if len(gtp)>k else 1.0

            precision = self.precision_at_k(rec_set, gtp)
            precisions.append(precision)
            # print('--p@%d: %.3f'%(k, precision))
            
            recall = self.recall_at_k(rec_set, gtp)
            recalls.append(recall)

            ap = utils.apk(list(gtp), list(rec_set), k)
            # print('ap@%d: %.3f'%(k, ap))  
            aps.append(ap)
            
            miuf = self.mean_inverse_user_frequency(rec_set, self.eval_inters_per_person[active_user])
            # print(('miuf@%d: %.3f'%(k, miuf)))
            miufs.append(miuf)
            
            # self.true_prediction_labels(rec_set, gtp, active_user)
            sd = self.structural_diversity(rec_set)
            semd = self.semantic_diversity(rec_set)
            # diversities['structural-1'].append(sd1)
            # diversities['structural-2'].append(sd2)
            diversities['semantic'].append(semd)
            # logging.info('Structural diversity@%d: %.3f',k, sd1)
            # logging.info('Structural diversity@%d: %.3f',k, sd2)
            logging.info('Semantic diversity@%d: %.3f',k, semd)
            recs[active_user] = [rec_set, precision, recall, miuf, semd, sd, max_possible_recall]

        return recs
Example #3
0
    def run_popularity_eval(self, k):

        precisions = []     
        recalls = []
        aps = []
        miufs = []
        diversities = {'structural-1':[], 'structural-2':[], 'semantic':[]}
        recs = {}
        
        for active_user in self.target_users:
            popularities = {nid:0 for nid in self.eval_nids_per_person[active_user]}
            for pid, inters in self.eval_inters_per_person[active_user].items(): 
                inter_counts = inters['NoteID'].value_counts().to_dict()
                popularities = {nid:count+inter_counts.get(nid,0) for nid, count in popularities.items()}
            
            eval_list, already_read_list, gtp = self.filter_out_evaluation_list_for_pid(active_user)
            eval_list = eval_list - already_read_list

            # print('%d / %d'%(len(eval_list), len(gtp)))

            rec_set = set(utils.get_popularity_rank(eval_list, popularities, k))

            precision = self.precision_at_k(rec_set, gtp)
            precisions.append(precision)
            # print('--p@%d: %.3f'%(k, precision))
            max_possible_recall = k/len(gtp) if len(gtp)>k else 1.0
        
            recall = self.recall_at_k(rec_set, gtp)
            recalls.append(recall)

            ap = utils.apk(list(gtp), list(rec_set), k)
            # print('ap@%d: %.3f'%(k, ap))  
            aps.append(ap)
            
            miuf = self.mean_inverse_user_frequency(rec_set, self.eval_inters_per_person[active_user])
            # print(('miuf@%d: %.3f'%(k, miuf)))
            miufs.append(miuf)
            
            # self.true_prediction_labels(rec_set, gtp, active_user)

            sd = self.structural_diversity(rec_set)
            semd = self.semantic_diversity(rec_set)
            # diversities['structural-1'].append(sd1)
            # diversities['structural-2'].append(sd2)
            diversities['semantic'].append(semd)
            # logging.info('Structural diversity@%d: %.3f',k, sd1)
            # logging.info('Structural diversity@%d: %.3f',k, sd2)
            # logging.info('Semantic diversity@%d: %.3f',k, semd)
            recs[active_user] = [rec_set, precision, recall, miuf, semd, sd, max_possible_recall]

        return recs
Example #4
0
    def run_ucf_eval(self, k, cf_settings):
        precisions = []    
        recalls = []    
        aps = []
        miufs = []
        diversities = {'structural-1':[], 'structural-2':[], 'semantic':[]}
        recs = {}
        
        user_cf = UserCF(self.cutoff_weight, cf_settings)
        for pid in self.target_users:
            # logging.info('-----------------------------------------------')
            # logging.info('Prediction for user %d', pid)
            user_cf.fit(self.eval_inters_per_person[pid])

            eval_list, read_list, gtp = self.filter_out_evaluation_list_for_pid(pid)
            eval_list = eval_list - read_list

            # logging.info('%d / %d', len(eval_list), len(gtp))
            # logging.debug('max precision@%d, %f', k, len(gtp)/k if len(gtp)/k<1 else 1.0)
            max_possible_recall = k/len(gtp) if len(gtp)>k else 1.0

            rec_set = user_cf.run_user(pid, eval_list, k)
            # logging.info('Recommendations: %s', str(rec_set))
            precision = self.precision_at_k(rec_set, gtp)
            precisions.append(precision)
            # logging.info('p@%d: %.3f', k, precision)
            recall = self.recall_at_k(rec_set, gtp)
            recalls.append(recall)
            ap = utils.apk(list(gtp), list(rec_set), k)
            aps.append(ap)
            # self.true_prediction_labels(rec_set, gtp, pid)
            
            miuf = self.mean_inverse_user_frequency(rec_set, self.eval_inters_per_person[pid])
            # print('miuf@%d: %.3f'%(k, miuf)) 
            miufs.append(miuf)

            sd = self.structural_diversity(rec_set)
            semd = self.semantic_diversity(rec_set)
            # diversities['structural-1'].append(sd1)
            # diversities['structural-2'].append(sd2)
            diversities['semantic'].append(semd)
            # logging.info('Structural diversity@%d: %.3f',k, sd1)
            # logging.info('Structural diversity@%d: %.3f',k, sd2)
            logging.info('Semantic diversity@%d: %.3f',k, semd)
            recs[pid] = [rec_set, precision, recall, miuf, semd, sd, max_possible_recall]

        #print('map@%d'%k,np.mean(aps))

        return recs
Example #5
0
    def run_random_eval(self, k):
        
        precisions = []
        recalls = []
        aps = []
        miufs = []
        diversities = {'structural-1':[], 'structural-2':[], 'semantic':[]}
        recs = {}
        
        for active_user in self.target_users:

            user_specific_note_contents = self.all_note_contents.loc[self.all_note_contents['NoteID'].isin(self.eval_nids_per_person[active_user])] 
            eval_list, already_read_list, gtp = self.filter_out_evaluation_list_for_pid(active_user)
            # print('----------------------------')            
            eval_list = eval_list - already_read_list

            # print('%d / %d'%(len(eval_list), len(gtp)))
            # print('max precision@%d, %f'%(k, len(gtp)/k if len(gtp)/k<1 else 1.0))
            # print('----------------------------')
            max_possible_recall = k/len(gtp) if len(gtp)>k else 1.0
             
            rec_set = set(utils.get_random_list(eval_list, k))

            precision = self.precision_at_k(rec_set, gtp)
            precisions.append(precision)
            
            recall = self.recall_at_k(rec_set, gtp)
            recalls.append(recall)

            ap = utils.apk(list(gtp), list(rec_set), k)
            aps.append(ap)
            
            miuf = self.mean_inverse_user_frequency(rec_set, self.eval_inters_per_person[active_user])
            miufs.append(miuf)
            

            sd = self.structural_diversity(rec_set)
            semd = self.semantic_diversity(rec_set)
            diversities['semantic'].append(semd)
            logging.info('Semantic diversity@%d: %.3f',k, semd)
            recs[active_user] = [rec_set, precision, recall, miuf, semd, sd, max_possible_recall]


        return recs
Example #6
0
    def run_cbf_eval(self, k, cbf_settings):
        """
        ['tfidf+lsi', 'tokens_phrases', 15, 'averaging','explicit_implicit','cosine']
        ['sentence_emb_precomputed', PRE_EMBS, PRE_SIMS, 'averaging','explicit_implicit','cosine']
        """
        precisions = []
        recalls = []
        aps = []
        miufs = []
        diversities = {'structural-1':[], 'structural-2':[], 'semantic':[]}
        recs = {}

        cbf = CBF(self.cutoff_weight, cbf_settings[0])

        for pid in self.target_users:
            logging.info('Prediction for user %d', pid)
            if cbf_settings[0]=='sentence_emb_precomputed':
                cbf.load_precomputed_emb(cbf_settings[1], self.eval_inters_per_person[pid])
            else:
                cbf.fit(self.eval_inters_per_person[pid], 5, ['NOUN', 'VERB'], STOP_WORDS, 
                        cbf_settings[1], cbf_settings[2], cbf_settings[6])
            cbf.construct_user_profile(pid, cbf_settings[3], cbf_settings[4])

            eval_list, read_list, gtp = self.filter_out_evaluation_list_for_pid(pid) 
            eval_list = eval_list - read_list
            max_possible_recall = k/len(gtp) if len(gtp)>k else 1.0

            #logging.info('%d / %d', len(eval_list), len(gtp))
            #logging.debug('max precision@%d, %f', k, len(gtp)/k if len(gtp)/k<1 else 1.0)
            # print('%d / %d'%( len(eval_list), len(gtp)))
            # print('max precision@%d, %f'%( k, len(gtp)/k if len(gtp)/k<1 else 1.0))
            
            # Feed the training set as usual to construct the testing RS
            rec_set = cbf.run_user(pid, eval_list, cbf_settings[5], k)

            precision = self.precision_at_k(rec_set, gtp)
            precisions.append(precision)
            # logging.info('p@%d: %.3f', k, precision)
            # print('p@%d: %.3f'%(k, precision))
            
            recall = self.recall_at_k(rec_set, gtp)
            recalls.append(recall)

            ap = utils.apk(list(gtp), list(rec_set), k)
            aps.append(ap)
            # self.true_prediction_labels(rec_set, gtp, pid)
            
            miuf = self.mean_inverse_user_frequency(rec_set, self.eval_inters_per_person[pid])
            # print('miuf@%d: %.3f'%(k, miuf)) 
            miufs.append(miuf)
            
            sd = self.structural_diversity(rec_set)
            semd = self.semantic_diversity(rec_set)
            # diversities['structural-1'].append(sd1)
            # diversities['structural-2'].append(sd2)
            diversities['semantic'].append(semd)
            # logging.info('Structural diversity@%d: %.3f',k, sd1)
            # logging.info('Structural diversity@%d: %.3f',k, sd2)
            # logging.info('Semantic diversity@%d: %.3f',k, semd)
            recs[pid] = [rec_set, precision, recall, miuf, semd, sd, max_possible_recall]

        return recs
Example #7
0
    def run_csclrec_eval(self, k, ppr_settings, pfilter, post_create_dates, temporal_start_week, ratio_read):
        """
        A pipeline to run CSCLRec recommender offline each week
        """
        
        '''  
            Init CSCLRec '''
        ppr = CSCLRec(self.cutoff_weight, ppr_settings) 
        # print('=== Step 1 Done ===')
        
        
        # User interactions
        # UserProfiler keeps analytical data for each user

        up = UserProfiler(self.train_user_inter, self.train_post_inter, self.users,
                          self.weight_user_lookup, self.weight_post_lookup, temporal_start_week)
        up.init_user_profiles(post_create_dates, self.week_num, slide_window_size = 2)
        ''' 
            Initialize the user profiler, and analyze each user's behavior for this time
            Precompute neareast neighbours for each user 
            (as a default setting to apply to the PPR graph for all users)'''
        # print('=== Step 2 Done ===')
        
        
        ''' 
        Init a content analyzer
        to compute the content graph consisting of wordnet hypernyms
        '''
        # Content analyzer
        #content_graph = None
        #if ppr_settings['cbf']:
        #content_analyzer = ContentAnalyzer(self.all_note_contents)
        #content_graph = content_analyzer.construct_hypernym_graph()
        content_graph = self.precomputed_content_graph
        # print('=== Step 3 Done ===')
        
        precisions = []
        recalls = []
        aps = []
        miufs = []
        diversities = {'structural-1':[], 'structural-2':[], 'semantic':[]}
        recs = {}
        
        for active_user in self.target_users:
            # print('================================================')
            # print('Predicting for user %d'%active_user)


            ''' 
                fit CSCLRec engine with settings and training data
                 cotents of training posts, specifically for this user'''
            user_specific_note_contents = self.all_note_contents.loc[self.all_note_contents['NoteID'].isin(self.eval_nids_per_person[active_user])] 
            
            # Remove invisible posts from this user's evaluation graph
            user_specific_content_graph = content_graph.copy()
            removed_nodes = [x for x,y in content_graph.nodes(data=True) 
                             if y['node_type']=='post' and x not in self.eval_nids_per_person[active_user]]
            user_specific_content_graph.remove_nodes_from(removed_nodes)
            out_degree = user_specific_content_graph.degree()
            node_types = nx.get_node_attributes(user_specific_content_graph,'node_type')
            to_remove=[n for n in user_specific_content_graph 
                       if (out_degree[n] ==1 or out_degree[n] ==0) 
                       and node_types[n]=='hypernym']
            user_specific_content_graph.remove_nodes_from(to_remove)
            # print('----------------------------')
            # print('Content graph info')
            # print(nx.info(user_specific_content_graph))
        
            ppr.fit_user(active_user, self.users, self.eval_nids_per_person[active_user], 
                         self.eval_inters_per_person[active_user], user_specific_note_contents, 
                         self.week_num, up, user_specific_content_graph) 

            eval_list, already_read_list, gtp = self.filter_out_evaluation_list_for_pid(active_user)
            eval_list = eval_list - already_read_list
            # print('----------------------------')
            # print('%d / %d'%(len(eval_list), len(gtp)))
            # print('max precision@%d, %f'%(k, len(gtp)/k if len(gtp)/k<1 else 1.0))
            # print('----------------------------')

            # Build graph and run PPR for this user
            ppr_values = ppr.run_user(active_user, eval_list)            
            # filtering and reranking the recommendations (Contextualized post-filtering)
            rec_set = pfilter.rerank(ppr_values, eval_list, already_read_list, None, k, ratio_read, verbose = 1) 

            # Evaluation
            precision = self.precision_at_k(rec_set, gtp)
            precisions.append(precision)
            # print('--p@%d: %.3f'%(k, precision))
            max_possible_recall = k/len(gtp) if len(gtp)>k else 1.0

            recall = self.recall_at_k(rec_set, gtp)
            recalls.append(recall)

            ap = utils.apk(list(gtp), list(rec_set), k)
            # print('ap@%d: %.3f'%(k, ap))  
            aps.append(ap)
            
            miuf = self.mean_inverse_user_frequency(rec_set, self.eval_inters_per_person[active_user])
            # print(('miuf@%d: %.3f'%(k, miuf)))
            miufs.append(miuf)
            
            # self.true_prediction_labels(rec_set, gtp, active_user)

            sd = self.structural_diversity(rec_set)
            semd = self.semantic_diversity(rec_set)
            # diversities['structural-1'].append(sd1)
            # diversities['structural-2'].append(sd2)
            diversities['semantic'].append(semd)
            # logging.info('Structural diversity@%d: %.3f',k, sd1)
            # logging.info('Structural diversity@%d: %.3f',k, sd2)
            # logging.info('Semantic diversity@%d: %.3f',k, semd)
            recs[active_user] = [rec_set, precision,recall, miuf, semd, sd, max_possible_recall]


        return recs, up
Example #8
0
    def run_coppr_eval(self, k, ppr_settings, pfilter, post_create_dates, temporal_start_week, ratio_read):
        """
        A pipeline to run CoPPR recommender offline each week
        """
        
        ''' 
            Init CoPPR '''
        ppr = CoPPR(self.cutoff_weight, ppr_settings) 
        # print('=== Step 1 Done ===')
        
        
        # User interactions
        # UserProfiler keeps analytical data for each user

        up = UserProfiler(self.train_user_inter, self.train_post_inter, self.users,
                          self.weight_user_lookup, self.weight_post_lookup, temporal_start_week)
        up.init_user_profiles(post_create_dates, self.week_num, slide_window_size = 2)

        
        precisions = []
        recalls = []
        aps = []
        miufs = []
        diversities = {'structural-1':[], 'structural-2':[], 'semantic':[]}
        recs = {}
        
        for active_user in self.target_users:
            # print('================================================')
            # print('Predicting for user %d'%active_user)


            ''' 
                fit PPR engine with settings and training data
                 cotents of training posts, specifically for this user'''
            user_specific_note_contents = self.all_note_contents.loc[self.all_note_contents['NoteID'].isin(self.eval_nids_per_person[active_user])] 


            ppr.fit_user(active_user, self.users, self.eval_nids_per_person[active_user], 
                         self.eval_inters_per_person[active_user], user_specific_note_contents, 
                         self.week_num, up) 

            eval_list, already_read_list, gtp = self.filter_out_evaluation_list_for_pid(active_user)
            eval_list = eval_list - already_read_list

            # Build graph and run PPR for this user
            ppr_values = ppr.run_user(active_user, eval_list, self.precomputed_keywords)            
            # filtering and reranking the recommendations (Contextualized post-filtering)
            rec_set = pfilter.rerank(ppr_values, eval_list, already_read_list, None, k, ratio_read, verbose = 1) 

            # Evaluation
            precision = self.precision_at_k(rec_set, gtp)
            precisions.append(precision)
            # print('--p@%d: %.3f'%(k, precision))
            max_possible_recall = k/len(gtp) if len(gtp)>k else 1.0

            recall = self.recall_at_k(rec_set, gtp)
            recalls.append(recall)

            ap = utils.apk(list(gtp), list(rec_set), k)
            # print('ap@%d: %.3f'%(k, ap))  
            aps.append(ap)
            
            miuf = self.mean_inverse_user_frequency(rec_set, self.eval_inters_per_person[active_user])
            # print(('miuf@%d: %.3f'%(k, miuf)))
            miufs.append(miuf)
            
            # self.true_prediction_labels(rec_set, gtp, active_user)

            sd = self.structural_diversity(rec_set)
            semd = self.semantic_diversity(rec_set)
            # diversities['structural-1'].append(sd1)
            # diversities['structural-2'].append(sd2)
            diversities['semantic'].append(semd)
            # logging.info('Structural diversity@%d: %.3f',k, sd1)
            # logging.info('Structural diversity@%d: %.3f',k, sd2)
            # logging.info('Semantic diversity@%d: %.3f',k, semd)
            recs[active_user] = [rec_set, precision, recall, miuf, semd, sd, max_possible_recall]


        return recs, up
Example #9
0
def test_apk(predicted, expected_score):
    actual = np.array([1, 2, 3])
    np.testing.assert_almost_equal(apk(actual, predicted, k=3), expected_score)