Beispiel #1
0
    data_path = os.path.join(path, 'data')
    cache_path = os.path.join(path, 'cache')
    prediction_path = os.path.join(path, 'prediction')
    file_clicks = os.path.join(data_path, file_clicks_basename)
    file_buys = os.path.join(data_path, file_buys_basename)
    file_test = os.path.join(data_path, file_test_basename)
    file_what_to_buy_features = os.path.join(
        cache_path, file_what_to_buy_features_basename)
    file_buy_or_not_features = os.path.join(cache_path,
                                            file_buy_or_not_features_basename)
    file_scores = os.path.join(prediction_path, file_scores_basename)
    file_result = os.path.join(prediction_path, file_result_basename)

    effective_columns_names = ['Session ID', 'Timestamp', 'Item ID']
    clicks = read_clicks(file_clicks, usecols=effective_columns_names)\
        .sort_values('Timestamp')\
        .reset_index(drop=True)
    clicks_grouped_by_session_id, clicks_grouped_by_session_id_keys = df_group_by_session_id(
        clicks)

    if os.path.isfile(file_what_to_buy_features):
        what_to_buy = features_from_csv(file_what_to_buy_features,
                                        ['Session ID', 'Item ID'], {
                                            'F3': 'Counts',
                                            'F6': 'Sequent Clicks',
                                            'F7': 'Time Difference'
                                        })
    else:
        what_to_buy = extract_what_to_buy(clicks_grouped_by_session_id)
        features_to_csv(file_what_to_buy_features, what_to_buy.values())
Beispiel #2
0
    file_result_basename = sys.argv[8]

    data_path = os.path.join(path, 'data')
    cache_path = os.path.join(path, 'cache')
    prediction_path = os.path.join(path, 'prediction')
    file_clicks = os.path.join(data_path, file_clicks_basename)
    file_buys = os.path.join(data_path, file_buys_basename)
    file_test = os.path.join(data_path, file_test_basename)
    file_what_to_buy_features = os.path.join(cache_path, file_what_to_buy_features_basename)
    file_buy_or_not_features = os.path.join(cache_path, file_buy_or_not_features_basename)
    file_scores = os.path.join(prediction_path, file_scores_basename)
    file_result = os.path.join(prediction_path, file_result_basename)

    effective_columns_names = ['Session ID', 'Timestamp', 'Item ID']
    clicks = read_clicks(file_clicks, usecols=effective_columns_names)\
        .sort_values('Timestamp')\
        .reset_index(drop=True)
    clicks_grouped_by_session_id, clicks_grouped_by_session_id_keys = df_group_by_session_id(clicks)

    if os.path.isfile(file_what_to_buy_features):
        what_to_buy = features_from_csv(file_what_to_buy_features, ['Session ID', 'Item ID'],
                                        {'F3': 'Counts', 'F6': 'Sequent Clicks', 'F7': 'Time Difference'})
    else:
        what_to_buy = extract_what_to_buy(clicks_grouped_by_session_id)
        features_to_csv(file_what_to_buy_features, what_to_buy.values())

    if os.path.isfile(file_buy_or_not_features):
        buy_or_not = np.load(file_buy_or_not_features)
    else:
        buy_or_not = extract_buy_or_not(clicks_grouped_by_session_id, what_to_buy)
        np.save(file_buy_or_not_features, buy_or_not)
if __name__ == "__main__":
    assert len(sys.argv) == 5, r"Incorrect argument list, names also shouldn't contain spaces" + \
                               "\nusage: argv[0] /dir/with/data clicks_file buys_file frac"

    path = sys.argv[1]
    file_clicks_basename = sys.argv[2]
    file_buys_basename = sys.argv[3]
    char_frac = sys.argv[4]
    float_frac = float(char_frac)

    assert float_frac > 0, 'Frac must be a positive number'

    frac = float_frac if float_frac <= 1 else int(float_frac)
    if frac == 1:
        exit(0)

    file_clicks_parts = os.path.splitext(file_clicks_basename)
    file_buys_parts = os.path.splitext(file_buys_basename)
    file_clicks_sliced_basename = file_clicks_parts[0] + '-' + char_frac + file_clicks_parts[1]
    file_buys_sliced_basename = file_buys_parts[0] + '-' + char_frac + file_buys_parts[1]
    file_clicks = os.path.join(path, file_clicks_basename)
    file_buys = os.path.join(path, file_buys_basename)
    file_clicks_sliced = os.path.join(path, file_clicks_sliced_basename)
    file_buys_sliced = os.path.join(path, file_buys_sliced_basename)

    clicks = read_clicks(file_clicks)
    buys = read_buys(file_buys)
    clicks, buys = slice_data(clicks, buys, frac=frac)

    write_df(file_clicks_sliced, clicks)
    write_df(file_buys_sliced, buys)
Beispiel #4
0
                               "\nusage: argv[0] /dir/with/data clicks_file buys_file frac"

    path = sys.argv[1]
    file_clicks_basename = sys.argv[2]
    file_buys_basename = sys.argv[3]
    char_frac = sys.argv[4]
    float_frac = float(char_frac)

    assert float_frac > 0, 'Frac must be a positive number'

    frac = float_frac if float_frac <= 1 else int(float_frac)
    if frac == 1:
        exit(0)

    file_clicks_parts = os.path.splitext(file_clicks_basename)
    file_buys_parts = os.path.splitext(file_buys_basename)
    file_clicks_sliced_basename = file_clicks_parts[
        0] + '-' + char_frac + file_clicks_parts[1]
    file_buys_sliced_basename = file_buys_parts[
        0] + '-' + char_frac + file_buys_parts[1]
    file_clicks = os.path.join(path, file_clicks_basename)
    file_buys = os.path.join(path, file_buys_basename)
    file_clicks_sliced = os.path.join(path, file_clicks_sliced_basename)
    file_buys_sliced = os.path.join(path, file_buys_sliced_basename)

    clicks = read_clicks(file_clicks)
    buys = read_buys(file_buys)
    clicks, buys = slice_data(clicks, buys, frac=frac)

    write_df(file_clicks_sliced, clicks)
    write_df(file_buys_sliced, buys)