コード例 #1
0
def exp():
    '''
    编写实验逻辑
    '''
    filepath = r'./data/netflix5k_result.txt'
    train_data, test_data = readData(filepath, split=',', train_ratio=0.7)
    train_data = train_data.rename(columns={0: 'uid', 1: 'iid'})
    test_data = test_data.rename(columns={0: 'uid', 1: 'iid'})
    train, _, udegree, idegree = process_data(train_data, test_data)
    # userid从1开始的情况
    total_item_score = np.zeros(train.shape[1], dtype=np.float64)
    for user in tqdm(range(1, train.shape[0]), ascii=True):
        # userid从0开始的情况
        # for user in range(train.shape[0]):
        one_item_score = massDiffisionForOne(train,
                                             user,
                                             udegree,
                                             idegree,
                                             K=1000)
        total_item_score += one_item_score

    # 获得度-itemset 分布信息
    Ndegree_items = getNdegree_items(idegree, N=20)
    # 获得testset item度分布
    test_item_degree = test_data.iid.value_counts()
    corr_score = trend_predict(total_item_score,
                               Ndegree_items,
                               test_item_degree,
                               method='pearson')
    print(corr_score)
コード例 #2
0
def exp(mylambda):
    '''
    编写实验逻辑
    '''
    # score_filepath = 'temp/md_delicious111.pkl'
    # filepath = r'./data/delicious/delicious_subset2.txt'
    # filepath = r'./data/Amazon/amazon_gpu.csv'
    # score_filepath = 'temp/md_amazon_nok1.pkl'
    # filepath = r'./data/netflix5k_result.txt'
    # score_filepath = 'temp/md_nf_noknn1.pkl'
    filepath = r'./data/movielen5000_7533_link864581_day0_1096.txt'
    score_filepath = 'temp/md_ml_noknn.pkl'
    # train_data, test_data = readData(filepath, split='\t', train_ratio=0.7)    
    train_data, test_data = readData(filepath, split=',', train_ratio=0.7)    
    train_data = train_data.rename(columns={0:'uid',1:'iid'})
    test_data = test_data.rename(columns={0:'uid',1:'iid'})
    train, _, udegree, idegree = process_data(train_data, test_data)
    # 获得度-itemset 分布信息
    degreedistrev = degree_item_map(idegree)
    Ndegree_items = getNdegree_items(degreedistrev, N=10)
    # 获得testset item度分布
    test_item_degree = test_data.iid.value_counts()


    # userid从1开始的情况
    total_item_score = np.zeros(train.shape[1], dtype=np.float64)
    if os.path.exists(score_filepath):
        item_scores = pickle.load(open(score_filepath,'rb'))
        for user in tqdm(range(1,train.shape[0]),ascii=True):
            if udegree.get(user, 0.0) == 0.0:
                continue
            one_item_score = item_scores[user]
            total_item_score += one_item_score * pow(udegree.get(user), mylambda)
    else:
        item_scores = {}
        for user in tqdm(range(1,train.shape[0]),ascii=True):
        # userid从0开始的情况
        # for user in range(train.shape[0]):
            if udegree.get(user, 0.0) == 0.0:
                continue
            one_item_score = massDiffisionForOne(train, user, udegree, idegree, mylambda)
            total_item_score += one_item_score * pow(udegree.get(user), mylambda)
            item_scores[user] = one_item_score
        pickle.dump(item_scores, open(score_filepath,'wb'))

    
    corr_score = trend_predict(total_item_score, 
                                Ndegree_items,
                                test_item_degree, 
                                method='pearson')
    print(corr_score)
    return corr_score