def check_buy(): begin_date = datetime.datetime(2014, 11, 18) end_date = datetime.datetime(2014, 12, 17) data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) cf_dir = utils.get_data_dir(utils.FLAG_CF) frate_str = "%s/rate_%s_%s" % (cf_dir, begin_date.strftime("%m%d"), end_date.strftime("%m%d")) user_ids_list, item_ids_list, user_ids_dict, item_ids_dict = bcbf.compute_user_item_list(frate_str) rate_matrix = io.mmread("data") rate_matrix = rate_matrix.tolil() buy_date = datetime.datetime(2014, 12, 18) fbuy_str = "%s/data_buy_%s" % (data_dir, buy_date.strftime("%m%d")) count = 0 with open(fbuy_str) as fin: for line in fin: cols = line.strip().split(",") user = cols[0] item = cols[1] if item in item_ids_dict and user in user_ids_dict: u_ix = user_ids_dict[user] i_ix = item_ids_dict[item] print >> sys.stdout, "%s,%s,%d" % (user, item, rate_matrix[(u_ix, i_ix)]) else: count += 1
def main(): begin_date = datetime.datetime(2014,11,18) end_date = datetime.datetime(2014,12,17) data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) fraw_str = '%s/data_%s_%s' %(data_dir,begin_date.strftime('%m%d'),end_date.strftime('%m%d')) cf_dir = utils.get_data_dir(utils.FLAG_CF) frate_str = '%s/rate_buy_%s_%s' %(cf_dir,begin_date.strftime('%m%d'),end_date.strftime('%m%d')) theta = 0.0 top_n =10 frs_str = '%s/cf_%s_%s_%.1f_%d' %(cf_dir,begin_date.strftime('%m%d'),end_date.strftime('%m%d'),theta,top_n) """ print >> sys.stdout,'[build_item_rate_data] doing...' build_item_rate_data(fraw_str,frate_str,only_buy = True) print >> sys.stdout,'[build_item_rate_data] done' """ user_ids_list,item_ids_list,user_ids_dict,item_ids_dict = compute_user_item_list(frate_str) print >> sys.stdout,'user num %d' %(len(user_ids_list)) print >> sys.stdout,'item num %d' %(len(item_ids_list)) print >> sys.stdout,'[compute_user_item_list] done ' print >> sys.stdout,'[load_rate_data] doing...' rate_matrix = load_rate_data(frate_str,user_ids_dict,item_ids_dict,theta) print >> sys.stdout,'[load_rate_data] done...' print >> sys.stdout,'[model_and_predict] doing...' predict_matrix = model_and_predict(rate_matrix,user_ids_list,item_ids_list,top_n,frs_str) print >> sys.stdout,'[model_and_predict] done...' buy_date = datetime.datetime(2014,12,18) fbuy_str = '%s/data_buy_%s'%(data_dir,buy_date.strftime('%m%d')) utils.evaluate_res_except_history(frs_str,fbuy_str,True,fraw_str)
def main(): data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) rule_dir = utils.get_data_dir(utils.FLAG_RULE) fraw_str = '%s/data_%s_%s' %(data_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d')) fitem_str = '%s/item' %(data_dir) fres_cate_str = '%s/test_candidate_nbr_cate_%s_%s' %(rule_dir,utils.DATE_SPLIT.strftime('%m%d'),utils.DATE_END.strftime('%m%d')) fbuy_str = '%s/data_buy_%s'%(data_dir,utils.DATE_NEXT.strftime('%m%d')) cn = CN(fraw_str,utils.DATE_SPLIT,fitem_str) cn.candiate_with_user_nbr_cate(fres_cate_str) utils.evaluate_res_except_history(fres_cate_str,fbuy_str,True,fraw_str)
def main(): data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) cf_dir = utils.get_data_dir(utils.FLAG_CF) rule_dir = utils.get_data_dir(utils.FLAG_RULE) fraw_str = '%s/data_%s_%s' %(data_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d')) fitem_str = '%s/item' %(data_dir) fres_cate_str = '%s/test_candidate_rule_cate_%s_%s' %(rule_dir,utils.DATE_SPLIT.strftime('%m%d'),utils.DATE_END.strftime('%m%d')) fbuy_str = '%s/data_buy_%s'%(data_dir,utils.DATE_NEXT.strftime('%m%d')) bcr = CR(fraw_str,utils.DATE_SPLIT,fitem_str) bcr.candidate_items_by_recent_cate(fres_cate_str) utils.evaluate_res_except_history(fres_cate_str,fbuy_str,True,fraw_str)
def main(): data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) rule_dir = utils.get_data_dir(utils.FLAG_RULE) fraw_str = '%s/data_%s_%s' %(data_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d')) fitem_str = '%s/item' %(data_dir) fbuy_str = '%s/data_buy_%s'%(data_dir,utils.DATE_NEXT.strftime('%m%d')) fcandiadate_str = '%s/candidate_user_%s_%s' %(rule_dir,utils.DATE_SPLIT.strftime('%m%d'),utils.DATE_END.strftime('%m%d')) fuser_label_str = '%s/user_label_%s_%s' %(rule_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d')) #cluster_user(fraw_str,begin_date,fuser_label_str) #candidate_with_user_cluster(fraw_str,utils.DATE_BEGIN,fitem_str,fcandiadate_str) #candidate_with_user_nbr_history(fraw_str,utils.DATE_SPLIT,fitem_str,fcandiadate_str) candiate_with_user_nbr_cate(fraw_str,utils.DATE_SPLIT,fitem_str,fcandiadate_str) utils.evaluate_res_except_history(fcandiadate_str,fbuy_str,True,fraw_str)
def test_icic(): icic = ICIC() cate = '3064' rule_dir = utils.get_data_dir(utils.FLAG_RULE) fitem_label_str = '%s/item_label/test_item_label_%s_%s_%s' %(rule_dir,cate,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d')) if os.path.exists(fitem_label_str): print 'cate %s exists' %(cate) item_label_dict = dict() with open(fitem_label_str) as fin: for line in fin: cols = line.strip().split(',') item_label_dict[cols[0]]= cols[1] return item_label_dict else: if icic.user_score_dict == None: data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) fraw_str = '%s/data_%s_%s' %(data_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d')) fitem_str = '%s/item' %(data_dir) user_data_model = dm.UserHistoryDataModel(fraw_str,utils.DATE_BEGIN) ci_data_model = dm.CateItemDataModel(fitem_str) icic.init_base_data(user_data_model,ci_data_model) return icic.cluster_item(cate,fitem_label_str)
def buy_item_in_recent_record(): data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) fraw_str = "%s/data_%s_%s" % (data_dir, utils.DATE_BEGIN.strftime("%m%d"), utils.DATE_END.strftime("%m%d")) fitem_str = "%s/item" % (data_dir) fbuy_str = "%s/data_buy_%s" % (data_dir, utils.DATE_NEXT.strftime("%m%d")) user_data_model = dm.UserHistoryDataModel(fraw_str, utils.DATE_SPLIT) history_item_set = set() for user in user_data_model.user_score_dict: attr = user_data_model.user_score_dict[user] for cate in attr.get_all_cates(): history_item_set |= attr.get_items_by_cate(cate) buy_item_set = set() with open(fbuy_str) as fin: for line in fin: cols = line.strip().split(",") buy_item_set.add(cols[1]) overlap_item_set = history_item_set & buy_item_set print len(overlap_item_set) print float(len(overlap_item_set)) / len(buy_item_set)
print >> fout,line.strip() fout.close() def merge_user_purchase_cate(fin_str,fout_str): user_dict = dict() with open(fin_str) as fin: for line in fin: cols = line.strip().split(',') user_dict.setdefault(cols[0],set()) user_dict[cols[0]].add(cols[-2]) fout = open(fout_str,'w') for user in user_dict: print >> fout,'%s,%s' %(user,'#'.join(user_dict[user])) fout.close() if __name__ == '__main__': parent_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) begin_date = dt(2014,12,18) split_date = dt(2014,12,16) td = timedelta(1) next_date = split_date+td fout1 = '%s/user_behavior_filter'%(parent_dir) fraw_str = '/home/kliner/shitaixiaoniu/ali_rs/resource/user' fitem_str = '/home/kliner/shitaixiaoniu/ali_rs/resource/item' fout_data_delta = '%s/data_%s_%s' %(parent_dir,begin_date.strftime('%m%d'),split_date.strftime('%m%d')) fout_data_by_day= '%s/data_%s' %(parent_dir,next_date.strftime('%m%d')) fout_data_buy_by_day= '%s/data_buy_%s' %(parent_dir,next_date.strftime('%m%d')) #filter_records_by_selected_item(fraw_str,fitem_str,fout1) #compute_items_users_num(fout1) split_records_by_delta(fout1,'%s 00'%(begin_date.strftime('%Y-%m-%d')),'%s 23'%(split_date.strftime('%Y-%m-%d')),fout_data_delta) #split_records_by_date(fout1,next_date.strftime('%Y-%m-%d'),fout_data_by_day)
print len(data_neg_list) print len(data_pos_list) data_pos_array = np.array(data_pos_list) data_neg_array = np.array(data_neg_list) plt.plot(data_neg_array[:,0],data_neg_array[:,1],'o') plt.plot(data_pos_array[:,0],data_pos_array[:,1],'ro') plt.show() if __name__ == '__main__': begin_date = datetime.datetime(2014,11,18) end_date = datetime.datetime(2014,12,17) td = datetime.timedelta(1) split_td = datetime.timedelta(6) splite_date = end_date - split_td data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) rule_dir = utils.get_data_dir(utils.FLAG_RULE) fraw_str = '%s/data_%s_%s' %(data_dir,begin_date.strftime('%m%d'),end_date.strftime('%m%d')) fitem_str = '%s/item' %(data_dir) buy_date = end_date+td fbuy_str = '%s/data_buy_%s'%(data_dir,buy_date.strftime('%m%d')) fout_str = '%s/rule_stat_cate_%s_%s' %(rule_dir,splite_date.strftime('%m%d'),end_date.strftime('%m%d')) fout_item_str = '%s/rule_stat_cate_item_%s_%s' %(rule_dir,splite_date.strftime('%m%d'),end_date.strftime('%m%d')) fout_mean_str ='%s/rule_stat_user_cate_mean_std_%s_%s' %(rule_dir,splite_date.strftime('%m%d'),end_date.strftime('%m%d')) fout_prob_str ='%s/rule_stat_user_cate_prob_%s_%s' %(rule_dir,splite_date.strftime('%m%d'),end_date.strftime('%m%d')) #count_cate_bahavior_by_recent(fraw_str,fbuy_str,fitem_str,fout_str,splite_date) compute_user_cate_mean_var(fout_str,fout_mean_str) compute_user_cate_prob(fout_str,fout_mean_str,fout_prob_str) #plot_user_mean_std(fout_mean_str) #count_cate_item_by_recent(fraw_str,fitem_str,fout_item_str,splite_date)
def get_user_history(fbefore_str): user_dict=dict() with open(fbefore_str) as fin: for line in fin: cols = line.strip().split(',') user_dict.setdefault(cols[0],[]) user_dict[cols[0]].append('\001'.join(cols[1:])) return user_dict if __name__=='__main__': begin_date = datetime.datetime(2014,11,18) split_date = datetime.datetime(2014,12,17) td = datetime.timedelta(1) next_date = split_date+td data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) cf_dir = utils.get_data_dir(utils.FLAG_CF) frate_str = '%s/rate_%s_%s' %(cf_dir,begin_date.strftime('%m%d'),split_date.strftime('%m%d')) fbefore_str = '%s/data_%s_%s' %(data_dir,begin_date.strftime('%m%d'),split_date.strftime('%m%d')) fbuy_str = '%s/data_buy_%s' %(data_dir,next_date.strftime('%m%d')) candidate_dir = utils.get_data_dir(utils.FLAG_STAT) fout_str = '%s/user_no_behavior_in_buy_%s' %(candidate_dir,next_date.strftime('%m%d')) fpurchase_str = '%s/purchase_compose_%s' %(candidate_dir,next_date.strftime('%m%d')) #get_new_items_in_purchase(frate_str,fbuy_str) compute_purchase_compose(fbuy_str,fbefore_str,fpurchase_str) #user_no_history_in_purchase(fbuy_str,fbefore_str,fout_str) #compute_user_bhr_dis(fbefore_str) """ is_filter = False if is_filter: