Python ETLUtils.load_csv_fileの例

プログラミング言語: Python

名前空間/パッケージ名: etl

クラス/型: ETLUtils

メソッド/関数: load_csv_file

hotexamples.comのコード掲載数: 8

Python ETLUtils.load_csv_file - 8件のコード例が見つかりました。すべてオープンソースプロジェクトから抽出されたPythonのetl.ETLUtils.load_csv_fileの実例で、最も評価が高いものを厳選しています。コード例の評価を行っていただくことで、より質の高いコード例が表示されるようになります。

よく使われるメソッド

表示非表示

load_json_file(30)

filter_records(23)

save_json_file(13)

save_csv_file(12)

split_train_test(7)

drop_fields(6)

load_csv_file(6)

filter_out_records(5)

select_fields(4)

split_train_test_copy(4)

add_transpose_list_column(2)

write_row_to_csv(2)

write_row_to_json(2)

count_frequency(1)

keep_fields(1)

search_sentences(1)

コード例 #1

ファイルを表示

def get_ml_100K_dataset():
    # records = ETLUtils.load_csv_file('/Users/fpena/tmp/bpmf/ml-1k.csv', '\t')
    records = ETLUtils.load_csv_file('/Users/fpena/tmp/bpmf/ml-100k.csv', '\t')
    # records = ETLUtils.load_csv_file('/Users/fpena/UCC/Thesis/datasets/uncompressed/ml-100k.csv', '\t')
    for record in records:
        record['overall_rating'] = float(record['overall_rating'])
    return records

コード例 #2

ファイルを表示

ファイル: main.py プロジェクト: swarnamd/yelp

def parse_dafevara_file():

    ARTISTS_NAMES_FIELD = 'artists_names'
    folder = '/Users/fpena/tmp/dafevara/'
    file_path = folder + 'artists-names-by-userId.csv'
    records = ETLUtils.load_csv_file(file_path, '|')

    for record in records:
        artists = record[ARTISTS_NAMES_FIELD].replace(' ', '_')
        record[ARTISTS_NAMES_FIELD] = artists.replace(';', ' ')
        # print(record[ARTISTS_NAMES_FIELD])

    output_file = folder + 'user_artists.txt'
    with open(output_file, 'w') as of:
        for record in records:
            of.write('%s\n' % record[ARTISTS_NAMES_FIELD])

コード例 #3

ファイルを表示

ファイル: main.py プロジェクト: swarnamd/yelp

def add_extra_column_to_csv():

    csv_file_name = '/tmp/results/rival_yelp_restaurant_results_folds_4.csv'

    records = ETLUtils.load_csv_file(csv_file_name)

    with open(csv_file_name, 'r') as csvinput:
        reader = csv.reader(csvinput)
        headers = next(reader)
        index = headers.index('Evaluation_Set') + 1
        headers.insert(index, Constants.FM_NUM_FACTORS_FIELD)

    print(headers)

    for record in records:
        record[Constants.FM_NUM_FACTORS_FIELD] = 10

    ETLUtils.save_csv_file('/tmp/my_csv_file.csv', records, headers)

コード例 #4

ファイルを表示

ファイル: carskit_caller.py プロジェクト: swarnamd/yelp

def export_results(fold):

    recommender = Constants.CARSKIT_RECOMMENDERS
    ratings_fold_folder = Constants.RIVAL_RATINGS_FOLD_FOLDER % fold
    prediction_type_map = {
        'user_test': 'rating',
        'test_items': 'rating',
        'rel_plus_n': 'ranking'
    }
    prediction_type = prediction_type_map[Constants.RIVAL_EVALUATION_STRATEGY]
    # ratings_file = ratings_fold_folder + 'UserSplitting-BiasedMF-rating-predictions.txt'
    ratings_file = ratings_fold_folder + recommender + '-rating-predictions.txt'
    results_file = ratings_fold_folder + 'carskit_' + recommender +\
                   '_results_' + prediction_type + '.txt'

    records = ETLUtils.load_csv_file(ratings_file, '\t')
    predictions = [record['prediction'] for record in records]

    with open(results_file, 'w') as f:
        for prediction in predictions:
            f.write("%s\n" % prediction)

コード例 #5

ファイルを表示

ファイル: carskit_caller.py プロジェクト: melqkiades/yelp

def export_results(fold):

    recommender = Constants.CARSKIT_RECOMMENDERS
    ratings_fold_folder = Constants.RIVAL_RATINGS_FOLD_FOLDER % fold
    prediction_type_map = {
        'user_test': 'rating',
        'test_items': 'rating',
        'rel_plus_n': 'ranking'
    }
    prediction_type = prediction_type_map[Constants.RIVAL_EVALUATION_STRATEGY]
    # ratings_file = ratings_fold_folder + 'UserSplitting-BiasedMF-rating-predictions.txt'
    ratings_file = ratings_fold_folder + recommender + '-rating-predictions.txt'
    results_file = ratings_fold_folder + 'carskit_' + recommender +\
                   '_results_' + prediction_type + '.txt'

    records = ETLUtils.load_csv_file(ratings_file, '\t')
    predictions = [record['prediction'] for record in records]

    with open(results_file, 'w') as f:
        for prediction in predictions:
            f.write("%s\n" % prediction)

コード例 #6

ファイルを表示

def get_ml_1m_dataset():
    records = ETLUtils.load_csv_file('/Users/fpena/UCC/Thesis/datasets/uncompressed/ml-1m.csv', '|')
    for record in records:
        record['overall_rating'] = float(record['overall_rating'])
    return records

コード例 #7

ファイルを表示

                  'ids': str
              })

k = 100
svd.compute(k=k,
            min_values=10,
            pre_normalize=None,
            mean_center=True,
            post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)

# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)

records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []

for record in records:
    try:
        # print(record['user'], record['item'], record['rating'])
        user = record['user']
        item = int(record['item'])
        predicted_rating = svd.predict(item, user, 1, 5)
        print(record['user'], record['item'], predicted_rating)
        # predicted_rating = round(predicted_rating)
        actual_rating = svd.get_matrix().value(item, user)
        error = abs(predicted_rating - actual_rating)
        errors.append(error)
    except KeyError:
        continue

コード例 #8

ファイルを表示

ファイル: ocelma_recsys.py プロジェクト: antoine-tran/yelp

# svd.load_data(filename=file_name, sep='::', format={'col':0, 'row':1, 'value':2, 'ids': int})
file_name = '/Users/fpena/tmp/reviews.csv'
file_name_header = '/Users/fpena/tmp/reviews-header.csv'
# file_name = '/Users/fpena/tmp/small-reviews-matrix.csv'
# file_name_header = '/Users/fpena/tmp/small-reviews-header.csv'
svd.load_data(filename=file_name, sep='|', format={'col':0, 'row':1, 'value':2, 'ids': str})

k = 100
svd.compute(k=k, min_values=10, pre_normalize=None, mean_center=True, post_normalize=True)
# predicted_rating = svd.predict(int(5), 'A1', 1, 10)
# predicted_rating2 = svd.predict(int(1), 'A1', 1, 10)

# print('Predicted rating', predicted_rating)
# print('Predicted rating', predicted_rating2)

records = ETLUtils.load_csv_file(file_name_header, '|')
errors = []

for record in records:
    try:
        # print(record['user'], record['item'], record['rating'])
        user = record['user']
        item = int(record['item'])
        predicted_rating = svd.predict(item, user, 1, 5)
        print(record['user'], record['item'], predicted_rating)
        # predicted_rating = round(predicted_rating)
        actual_rating = svd.get_matrix().value(item, user)
        error = abs(predicted_rating - actual_rating)
        errors.append(error)
    except KeyError:
        continue