Esempio n. 1
0
def get_avg_maps(train_data):
    chunk_avg = utilities.get_chunk_avg(train_data)
    hour_avg = utilities.get_hour_avg(train_data)
    hour_avg_by_chunk = utilities.get_hour_avg_by_chunk(train_data)
    weekday_avg = utilities.get_weekday_avg(train_data)
    weekday_avg_by_chunk = utilities.get_weekday_avg_by_chunk(train_data)

    return (chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk,
            hour_avg, weekday_avg)
Esempio n. 2
0
def baseline(training_file, submission_file, output_file):
    data = utilities.read_file(training_file)
    sub_data = utilities.read_file(submission_file, True)

    print 'Calculating hour averages...'
    hour_avg_by_chunk = utilities.get_hour_avg_by_chunk(data)
    hour_avg = utilities.get_hour_avg(data)

    print 'Filling submission file...'
    for i in range(1, len(sub_data)):
        chunk_id = sub_data[i][1]
        hour = sub_data[i][3]
        for j in range(5, len(sub_data[i])):
            if sub_data[i][j] == '0':
                if chunk_id in hour_avg_by_chunk:
                    sub_data[i][j] = hour_avg_by_chunk[chunk_id][hour][j - 5]
                else:
                    sub_data[i][j] = hour_avg[hour][j - 5]

    utilities.write_file(output_file, sub_data)
Esempio n. 3
0
def baseline(training_file, submission_file, output_file):
    data = utilities.read_file(training_file)
    sub_data = utilities.read_file(submission_file, True)

    print 'Calculating hour averages...'
    hour_avg_by_chunk = utilities.get_hour_avg_by_chunk(data)
    hour_avg = utilities.get_hour_avg(data)

    print 'Filling submission file...'
    for i in range(1, len(sub_data)):
        chunk_id = sub_data[i][1]
        hour = sub_data[i][3]
        for j in range(5, len(sub_data[i])):
            if sub_data[i][j] == '0':
                if chunk_id in hour_avg_by_chunk:
                    sub_data[i][j] = hour_avg_by_chunk[chunk_id][hour][j - 5]
                else:
                    sub_data[i][j] = hour_avg[hour][j - 5]

    utilities.write_file(output_file, sub_data)