Python readData Examples

Programming Language: Python

Namespace/Package Name: PreprocessData

Method/Function: readData

Examples at hotexamples.com: 2

Python readData - 2 examples found. These are the top rated real world Python examples of PreprocessData.readData extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def exp():
    '''
    编写实验逻辑
    '''
    filepath = r'./data/netflix5k_result.txt'
    train_data, test_data = readData(filepath, split=',', train_ratio=0.7)
    train_data = train_data.rename(columns={0: 'uid', 1: 'iid'})
    test_data = test_data.rename(columns={0: 'uid', 1: 'iid'})
    train, _, udegree, idegree = process_data(train_data, test_data)
    # userid从1开始的情况
    total_item_score = np.zeros(train.shape[1], dtype=np.float64)
    for user in tqdm(range(1, train.shape[0]), ascii=True):
        # userid从0开始的情况
        # for user in range(train.shape[0]):
        one_item_score = massDiffisionForOne(train,
                                             user,
                                             udegree,
                                             idegree,
                                             K=1000)
        total_item_score += one_item_score

    # 获得度-itemset 分布信息
    Ndegree_items = getNdegree_items(idegree, N=20)
    # 获得testset item度分布
    test_item_degree = test_data.iid.value_counts()
    corr_score = trend_predict(total_item_score,
                               Ndegree_items,
                               test_item_degree,
                               method='pearson')
    print(corr_score)

Example #2

Show file

def exp(mylambda):
    '''
    编写实验逻辑
    '''
    # score_filepath = 'temp/md_delicious111.pkl'
    # filepath = r'./data/delicious/delicious_subset2.txt'
    # filepath = r'./data/Amazon/amazon_gpu.csv'
    # score_filepath = 'temp/md_amazon_nok1.pkl'
    # filepath = r'./data/netflix5k_result.txt'
    # score_filepath = 'temp/md_nf_noknn1.pkl'
    filepath = r'./data/movielen5000_7533_link864581_day0_1096.txt'
    score_filepath = 'temp/md_ml_noknn.pkl'
    # train_data, test_data = readData(filepath, split='\t', train_ratio=0.7)    
    train_data, test_data = readData(filepath, split=',', train_ratio=0.7)    
    train_data = train_data.rename(columns={0:'uid',1:'iid'})
    test_data = test_data.rename(columns={0:'uid',1:'iid'})
    train, _, udegree, idegree = process_data(train_data, test_data)
    # 获得度-itemset 分布信息
    degreedistrev = degree_item_map(idegree)
    Ndegree_items = getNdegree_items(degreedistrev, N=10)
    # 获得testset item度分布
    test_item_degree = test_data.iid.value_counts()


    # userid从1开始的情况
    total_item_score = np.zeros(train.shape[1], dtype=np.float64)
    if os.path.exists(score_filepath):
        item_scores = pickle.load(open(score_filepath,'rb'))
        for user in tqdm(range(1,train.shape[0]),ascii=True):
            if udegree.get(user, 0.0) == 0.0:
                continue
            one_item_score = item_scores[user]
            total_item_score += one_item_score * pow(udegree.get(user), mylambda)
    else:
        item_scores = {}
        for user in tqdm(range(1,train.shape[0]),ascii=True):
        # userid从0开始的情况
        # for user in range(train.shape[0]):
            if udegree.get(user, 0.0) == 0.0:
                continue
            one_item_score = massDiffisionForOne(train, user, udegree, idegree, mylambda)
            total_item_score += one_item_score * pow(udegree.get(user), mylambda)
            item_scores[user] = one_item_score
        pickle.dump(item_scores, open(score_filepath,'wb'))

    
    corr_score = trend_predict(total_item_score, 
                                Ndegree_items,
                                test_item_degree, 
                                method='pearson')
    print(corr_score)
    return corr_score