def co_review_rating_dev_dist(self, user_buz_rating): ''' @summary: The distribution of the rating deviation of co-reviews for positive pairs and negative pairs ''' score_pos, score_neg = [], [] for i in xrange(len(self.user_base) - 1): print str(i)+'/'+str(len(self.user_base)), 'th', 'user' u_i = self.user_base[i] label_i = self.user_label[u_i] for j in xrange(i+1, len(self.user_base)): u_j = self.user_base[j] label_j = self.user_label[u_j] rating_dev_co_review = Yelp.pf_rating_deviation(u_i, u_j, (user_buz_rating,)) if rating_dev_co_review > 0: if label_i * label_j == 1: score_pos.append(rating_dev_co_review) else: score_neg.append(rating_dev_co_review) print 'num of pos pairs:', len(score_pos) print 'num of neg pairs:', len(score_neg) print mquantiles(score_pos) print mquantiles(score_neg) fig = plt.figure(figsize=(12, 6)) ax1 = fig.add_subplot(121) ax2 = fig.add_subplot(122) ax1.boxplot([score_pos, score_neg], labels=['pos', 'neg']) ax2.hist([score_pos, score_neg], color = ['r', 'b'], cumulative=False, normed=True, label=['pos', 'neg']) ax2.legend() plt.show()
if __name__ == '__main__': yc = ConfigParser.ConfigParser() yc.read(CONFIG) sfx = yc.get('Path', 'yep_data_class') st = Stat(user_base = Utility.load_user_filtered(L_USER_F + sfx), # specify user base user_label = Utility.load_user_label(R_USER_LABEL + sfx)) # specify user label # st.co_review_num_dist(Utility.load_user_buz_rating(R_USER_BUZ_RATING + sfx)) # st.co_review_rating_dev_dist(Utility.load_user_buz_rating(R_USER_BUZ_RATING + sfx)) # st.knn_sim_distribution(np.iinfo(np.int64).max, Yelp.load_sim_mat(SM_COMMON_FRIEND + sfx)) # st.knn_sim_distribution(10, Yelp.load_sim_mat(SM_COMMON_FRIEND + sfx)) # st.knn_sim_distribution(5, Yelp.load_sim_mat(SM_COMMON_FRIEND + sfx)) # st.knn_sim_distribution(2, Yelp.load_sim_mat(SM_COMMON_FRIEND + sfx)) # st.knn_sim_distribution(1, Yelp.load_sim_mat(SM_COMMON_FRIEND + sfx)) # st.knn_sim_distribution(np.iinfo(np.int64).max, Yelp.load_sim_mat(SM_RATING_DEVIATION + sfx)) # st.knn_sim_distribution(np.iinfo(np.int64).max, Yelp.load_sim_mat(SM_COREVIEW_RATIO + sfx)) # st.knn_sim_distribution(1, Yelp.load_sim_mat(SM_COREVIEW_RATIO + sfx)) st.knn_sim_distribution(2, Yelp.load_sim_mat(SM_COREVIEW_RATIO + sfx)) # st.knn_sim_distribution(5, Yelp.load_sim_mat(SM_COREVIEW_RATIO + sfx)) # st.knn_sim_distribution(10, Yelp.load_sim_mat(SM_COREVIEW_RATIO + sfx))