예제 #1
0
파일: stat.py 프로젝트: shitaixiaoniu/RS_GP
def check_buy():
    begin_date = datetime.datetime(2014, 11, 18)
    end_date = datetime.datetime(2014, 12, 17)
    data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST)

    cf_dir = utils.get_data_dir(utils.FLAG_CF)
    frate_str = "%s/rate_%s_%s" % (cf_dir, begin_date.strftime("%m%d"), end_date.strftime("%m%d"))
    user_ids_list, item_ids_list, user_ids_dict, item_ids_dict = bcbf.compute_user_item_list(frate_str)
    rate_matrix = io.mmread("data")
    rate_matrix = rate_matrix.tolil()
    buy_date = datetime.datetime(2014, 12, 18)
    fbuy_str = "%s/data_buy_%s" % (data_dir, buy_date.strftime("%m%d"))
    count = 0

    with open(fbuy_str) as fin:
        for line in fin:
            cols = line.strip().split(",")
            user = cols[0]
            item = cols[1]
            if item in item_ids_dict and user in user_ids_dict:
                u_ix = user_ids_dict[user]

                i_ix = item_ids_dict[item]
                print >> sys.stdout, "%s,%s,%d" % (user, item, rate_matrix[(u_ix, i_ix)])
            else:
                count += 1
예제 #2
0
def main():
    begin_date = datetime.datetime(2014,11,18)
    end_date = datetime.datetime(2014,12,17)
    data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) 
    fraw_str = '%s/data_%s_%s' %(data_dir,begin_date.strftime('%m%d'),end_date.strftime('%m%d'))
    cf_dir = utils.get_data_dir(utils.FLAG_CF)
    frate_str = '%s/rate_buy_%s_%s' %(cf_dir,begin_date.strftime('%m%d'),end_date.strftime('%m%d'))  
    theta = 0.0
    top_n =10 
    frs_str = '%s/cf_%s_%s_%.1f_%d' %(cf_dir,begin_date.strftime('%m%d'),end_date.strftime('%m%d'),theta,top_n)
    """
    print >> sys.stdout,'[build_item_rate_data] doing...'
    build_item_rate_data(fraw_str,frate_str,only_buy = True)
    print >> sys.stdout,'[build_item_rate_data] done'
    """
    user_ids_list,item_ids_list,user_ids_dict,item_ids_dict = compute_user_item_list(frate_str)
    print >> sys.stdout,'user num %d' %(len(user_ids_list))
    print >> sys.stdout,'item num %d' %(len(item_ids_list))
    print >> sys.stdout,'[compute_user_item_list] done '
    print >> sys.stdout,'[load_rate_data] doing...'
    rate_matrix = load_rate_data(frate_str,user_ids_dict,item_ids_dict,theta)
    print >> sys.stdout,'[load_rate_data] done...'
    print >> sys.stdout,'[model_and_predict] doing...'
    predict_matrix = model_and_predict(rate_matrix,user_ids_list,item_ids_list,top_n,frs_str)
    print >> sys.stdout,'[model_and_predict] done...'
    buy_date = datetime.datetime(2014,12,18)
    fbuy_str = '%s/data_buy_%s'%(data_dir,buy_date.strftime('%m%d'))
    utils.evaluate_res_except_history(frs_str,fbuy_str,True,fraw_str)
예제 #3
0
def main():
    data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) 
    rule_dir = utils.get_data_dir(utils.FLAG_RULE)
    fraw_str = '%s/data_%s_%s' %(data_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d'))  
    fitem_str = '%s/item' %(data_dir)
    fres_cate_str = '%s/test_candidate_nbr_cate_%s_%s' %(rule_dir,utils.DATE_SPLIT.strftime('%m%d'),utils.DATE_END.strftime('%m%d'))
    fbuy_str = '%s/data_buy_%s'%(data_dir,utils.DATE_NEXT.strftime('%m%d'))
    cn = CN(fraw_str,utils.DATE_SPLIT,fitem_str)
    cn.candiate_with_user_nbr_cate(fres_cate_str)
    utils.evaluate_res_except_history(fres_cate_str,fbuy_str,True,fraw_str)
예제 #4
0
def main():
    data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) 
    cf_dir = utils.get_data_dir(utils.FLAG_CF)
    rule_dir = utils.get_data_dir(utils.FLAG_RULE)
    fraw_str = '%s/data_%s_%s' %(data_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d'))  
    fitem_str = '%s/item' %(data_dir)
    fres_cate_str = '%s/test_candidate_rule_cate_%s_%s' %(rule_dir,utils.DATE_SPLIT.strftime('%m%d'),utils.DATE_END.strftime('%m%d'))
    fbuy_str = '%s/data_buy_%s'%(data_dir,utils.DATE_NEXT.strftime('%m%d'))
    bcr = CR(fraw_str,utils.DATE_SPLIT,fitem_str)
    bcr.candidate_items_by_recent_cate(fres_cate_str)
    utils.evaluate_res_except_history(fres_cate_str,fbuy_str,True,fraw_str)
예제 #5
0
def main():
    data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) 
    rule_dir = utils.get_data_dir(utils.FLAG_RULE)
    fraw_str = '%s/data_%s_%s' %(data_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d'))  
    fitem_str = '%s/item' %(data_dir)
    fbuy_str = '%s/data_buy_%s'%(data_dir,utils.DATE_NEXT.strftime('%m%d'))
    fcandiadate_str = '%s/candidate_user_%s_%s' %(rule_dir,utils.DATE_SPLIT.strftime('%m%d'),utils.DATE_END.strftime('%m%d'))
    fuser_label_str = '%s/user_label_%s_%s' %(rule_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d'))
    #cluster_user(fraw_str,begin_date,fuser_label_str)
    #candidate_with_user_cluster(fraw_str,utils.DATE_BEGIN,fitem_str,fcandiadate_str)
    #candidate_with_user_nbr_history(fraw_str,utils.DATE_SPLIT,fitem_str,fcandiadate_str)
    candiate_with_user_nbr_cate(fraw_str,utils.DATE_SPLIT,fitem_str,fcandiadate_str)
    
    utils.evaluate_res_except_history(fcandiadate_str,fbuy_str,True,fraw_str)
예제 #6
0
def test_icic():
    icic = ICIC() 
    cate = '3064'
    rule_dir = utils.get_data_dir(utils.FLAG_RULE)
    fitem_label_str = '%s/item_label/test_item_label_%s_%s_%s' %(rule_dir,cate,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d'))
    if os.path.exists(fitem_label_str):
        print 'cate %s exists' %(cate)
        item_label_dict = dict()
        with open(fitem_label_str) as fin:
            for line in fin:
                cols = line.strip().split(',')
                item_label_dict[cols[0]]= cols[1]
        return item_label_dict
    else:
        if icic.user_score_dict == None:
            data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) 
            fraw_str = '%s/data_%s_%s' %(data_dir,utils.DATE_BEGIN.strftime('%m%d'),utils.DATE_END.strftime('%m%d'))  
            fitem_str = '%s/item' %(data_dir)
            user_data_model = dm.UserHistoryDataModel(fraw_str,utils.DATE_BEGIN)
            ci_data_model = dm.CateItemDataModel(fitem_str)
            icic.init_base_data(user_data_model,ci_data_model)
            
        return icic.cluster_item(cate,fitem_label_str)
예제 #7
0
파일: stat.py 프로젝트: shitaixiaoniu/RS_GP
def buy_item_in_recent_record():

    data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST)
    fraw_str = "%s/data_%s_%s" % (data_dir, utils.DATE_BEGIN.strftime("%m%d"), utils.DATE_END.strftime("%m%d"))
    fitem_str = "%s/item" % (data_dir)
    fbuy_str = "%s/data_buy_%s" % (data_dir, utils.DATE_NEXT.strftime("%m%d"))
    user_data_model = dm.UserHistoryDataModel(fraw_str, utils.DATE_SPLIT)
    history_item_set = set()
    for user in user_data_model.user_score_dict:
        attr = user_data_model.user_score_dict[user]
        for cate in attr.get_all_cates():
            history_item_set |= attr.get_items_by_cate(cate)
    buy_item_set = set()
    with open(fbuy_str) as fin:
        for line in fin:
            cols = line.strip().split(",")
            buy_item_set.add(cols[1])
    overlap_item_set = history_item_set & buy_item_set
    print len(overlap_item_set)
    print float(len(overlap_item_set)) / len(buy_item_set)
예제 #8
0
                    print >> fout,line.strip()
    fout.close()
def merge_user_purchase_cate(fin_str,fout_str):
    user_dict = dict()
    with open(fin_str) as fin:
        for line in fin:
            cols = line.strip().split(',')
            user_dict.setdefault(cols[0],set())
            user_dict[cols[0]].add(cols[-2])
    fout = open(fout_str,'w')
    for user in user_dict:
        print >> fout,'%s,%s' %(user,'#'.join(user_dict[user]))
    fout.close()

if __name__ == '__main__':
    parent_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST)
    begin_date = dt(2014,12,18)
    split_date = dt(2014,12,16)
    td = timedelta(1)
    next_date = split_date+td
    fout1 = '%s/user_behavior_filter'%(parent_dir)
    fraw_str = '/home/kliner/shitaixiaoniu/ali_rs/resource/user'
    fitem_str = '/home/kliner/shitaixiaoniu/ali_rs/resource/item'
    fout_data_delta = '%s/data_%s_%s' %(parent_dir,begin_date.strftime('%m%d'),split_date.strftime('%m%d'))
    fout_data_by_day= '%s/data_%s' %(parent_dir,next_date.strftime('%m%d'))
    fout_data_buy_by_day= '%s/data_buy_%s' %(parent_dir,next_date.strftime('%m%d'))
    #filter_records_by_selected_item(fraw_str,fitem_str,fout1)
    #compute_items_users_num(fout1)
    split_records_by_delta(fout1,'%s 00'%(begin_date.strftime('%Y-%m-%d')),'%s 23'%(split_date.strftime('%Y-%m-%d')),fout_data_delta)
    #split_records_by_date(fout1,next_date.strftime('%Y-%m-%d'),fout_data_by_day)
    
예제 #9
0
    print len(data_neg_list)
    print len(data_pos_list)
    data_pos_array = np.array(data_pos_list)
    data_neg_array = np.array(data_neg_list)
    plt.plot(data_neg_array[:,0],data_neg_array[:,1],'o')
    plt.plot(data_pos_array[:,0],data_pos_array[:,1],'ro')

    plt.show()

if __name__ == '__main__':
    begin_date = datetime.datetime(2014,11,18)
    end_date = datetime.datetime(2014,12,17)
    td = datetime.timedelta(1)
    split_td = datetime.timedelta(6)
    splite_date = end_date - split_td
    data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST) 
    rule_dir = utils.get_data_dir(utils.FLAG_RULE)
    fraw_str = '%s/data_%s_%s' %(data_dir,begin_date.strftime('%m%d'),end_date.strftime('%m%d'))  
    fitem_str = '%s/item' %(data_dir)
    buy_date = end_date+td
    fbuy_str = '%s/data_buy_%s'%(data_dir,buy_date.strftime('%m%d'))
    fout_str = '%s/rule_stat_cate_%s_%s' %(rule_dir,splite_date.strftime('%m%d'),end_date.strftime('%m%d'))
    fout_item_str = '%s/rule_stat_cate_item_%s_%s' %(rule_dir,splite_date.strftime('%m%d'),end_date.strftime('%m%d'))
    fout_mean_str ='%s/rule_stat_user_cate_mean_std_%s_%s' %(rule_dir,splite_date.strftime('%m%d'),end_date.strftime('%m%d')) 
    fout_prob_str ='%s/rule_stat_user_cate_prob_%s_%s' %(rule_dir,splite_date.strftime('%m%d'),end_date.strftime('%m%d')) 
    #count_cate_bahavior_by_recent(fraw_str,fbuy_str,fitem_str,fout_str,splite_date)
    compute_user_cate_mean_var(fout_str,fout_mean_str)
    compute_user_cate_prob(fout_str,fout_mean_str,fout_prob_str)
    #plot_user_mean_std(fout_mean_str)
    #count_cate_item_by_recent(fraw_str,fitem_str,fout_item_str,splite_date)
예제 #10
0
            
def get_user_history(fbefore_str):
    user_dict=dict()
    with open(fbefore_str) as fin:
        for line in fin:
            cols = line.strip().split(',')
            user_dict.setdefault(cols[0],[])
            user_dict[cols[0]].append('\001'.join(cols[1:]))
    return user_dict

if __name__=='__main__':
    begin_date = datetime.datetime(2014,11,18)
    split_date = datetime.datetime(2014,12,17)
    td = datetime.timedelta(1)
    next_date = split_date+td
    data_dir = utils.get_data_dir(utils.FLAG_TRAIN_TEST)
    cf_dir = utils.get_data_dir(utils.FLAG_CF)
    frate_str = '%s/rate_%s_%s' %(cf_dir,begin_date.strftime('%m%d'),split_date.strftime('%m%d'))  
    fbefore_str = '%s/data_%s_%s' %(data_dir,begin_date.strftime('%m%d'),split_date.strftime('%m%d'))
    fbuy_str = '%s/data_buy_%s' %(data_dir,next_date.strftime('%m%d'))
    candidate_dir = utils.get_data_dir(utils.FLAG_STAT)
    fout_str = '%s/user_no_behavior_in_buy_%s' %(candidate_dir,next_date.strftime('%m%d'))
    fpurchase_str = '%s/purchase_compose_%s' %(candidate_dir,next_date.strftime('%m%d'))
    #get_new_items_in_purchase(frate_str,fbuy_str)
    compute_purchase_compose(fbuy_str,fbefore_str,fpurchase_str)

    #user_no_history_in_purchase(fbuy_str,fbefore_str,fout_str)
    #compute_user_bhr_dis(fbefore_str)
    """
    is_filter = False
    if is_filter: