def get_avg_maps(train_data): chunk_avg = utilities.get_chunk_avg(train_data) hour_avg = utilities.get_hour_avg(train_data) hour_avg_by_chunk = utilities.get_hour_avg_by_chunk(train_data) weekday_avg = utilities.get_weekday_avg(train_data) weekday_avg_by_chunk = utilities.get_weekday_avg_by_chunk(train_data) return (chunk_avg, hour_avg_by_chunk, weekday_avg_by_chunk, hour_avg, weekday_avg)
def baseline(training_file, submission_file, output_file): data = utilities.read_file(training_file) sub_data = utilities.read_file(submission_file, True) print 'Calculating hour averages...' hour_avg_by_chunk = utilities.get_hour_avg_by_chunk(data) hour_avg = utilities.get_hour_avg(data) print 'Filling submission file...' for i in range(1, len(sub_data)): chunk_id = sub_data[i][1] hour = sub_data[i][3] for j in range(5, len(sub_data[i])): if sub_data[i][j] == '0': if chunk_id in hour_avg_by_chunk: sub_data[i][j] = hour_avg_by_chunk[chunk_id][hour][j - 5] else: sub_data[i][j] = hour_avg[hour][j - 5] utilities.write_file(output_file, sub_data)