def file_join(file_from, file_to, key_word): if os.path.isdir(file_to): file_to = my_filter.walk_dir(file_to, 'csv') fp_from = open(file_from, 'r') lable_from = fp_from.readline().strip().split(',') index_from = lable_from.index(key_word) dict_from = {} for line in fp_from: line = line.strip().split(',') temp = line[:index_from] + line[index_from+1:] value = '' for i in temp: value += i + ',' value = value[:-1] dict_from[line[index_from]] = value if type(file_to) == types.ListType: for f in file_to: fp_to = open(f, 'r') lable_to = fp_to.readline().strip().split(',') index_to = lable_to.index(key_word) file_new = f[:f.rfind('.')] + '-joined.csv' fp_write = open(file_new, 'w') lable_new = lable_to for word in lable_from: if key_word != word: lable_new.append(word) value = '' for i in lable_new: value += i + ',' value = value[:-1] fp_write.write(value+'\n') for line in fp_to: temp = line.strip().split(',') line_new = line.strip() + ',' + dict_from[temp[index_to]] fp_write.write(line_new+'\n') else: fp_to = open(file_to, 'r') lable_to = fp_to.readline().strip().split(',') index_to = lable_to.index(key_word) file_new = file_to[:file_to.rfind('.')] + '-joined.csv' fp_write = open(file_new, 'w') lable_new = lable_to for word in lable_from: if key_word != word: lable_new.append(word) value = '' for i in lable_new: value += i + ',' value = value[:-1] fp_write.write(value+'\n') for line in fp_to: temp = line.strip().split(',') line_new = line.strip() + ',' + dict_from[temp[index_to]] fp_write.write(line_new+'\n')
def analyse_data(file_path_read, task_type = 1): weather_dict = data_dict('./weather-analysed.csv', 'date') # task_type = train is 1, test is 2 if task_type != 1 and task_type != 2: print 'wrong task_type, please input 1 for train, 2 for test.\n' return -1 if task_type == 1: labels = 'date,what_day,hour,holiday,weekend,weather,temp_high,temp_low,temp_mean,temp_predict,num' else: labels = 'date,what_day,hour,holiday,weekend,weather,temp_high,temp_low,temp_mean,temp_predict' if os.path.isdir(file_path_read): file_path_read = my_filter.walk_dir(file_path_read, 'txt') else: file_path_read = [file_path_read] for path_read in file_path_read: file_path_write = path_read[:path_read.rfind('.')] file_path_write += '-analysed.csv' fp_read = open(path_read, 'r') fp_write = open(file_path_write, 'w') fp_write.write(labels+'\n') for line in fp_read: write_line = '' write_dict = {} line = line.strip().split(',') date_hour = line[0] write_dict['date'] = date_hour[:-2] if task_type == 1: write_dict['num'] = line[1] write_dict['hour'] = date_hour[-2:] write_dict['what_day'] = my_filter.what_day(write_dict['date']) if write_dict['date'] in holiday_date: write_dict['holiday'] = '1' else: write_dict['holiday'] = '0' if int(write_dict['what_day']) >= 6 and write_dict['date'] not in work_date : write_dict['weekend'] = '1' elif write_dict['date'] in holiday_date: write_dict['weekend'] = '1' else: write_dict['weekend'] = '0' write_dict['weather'] = weather_dict[write_dict['date']]['weather'] write_dict['temp_high'] = weather_dict[write_dict['date']]['temp_high'] write_dict['temp_low'] = weather_dict[write_dict['date']]['temp_low'] write_dict['temp_mean'] = weather_dict[write_dict['date']]['temp_mean'] hour = int(date_hour[-2:]) temp_low = float(write_dict['temp_low']) temp_high = float(write_dict['temp_high']) if hour < 6: temp_predict = temp_high - (hour+24-14)/16.0*(temp_high - temp_low) elif hour < 14: temp_predict = temp_low + (hour-6)/8.0*(temp_high - temp_low) else: temp_predict = temp_high - (hour-14)/16.0*(temp_high - temp_low) write_dict['temp_predict'] = str(temp_predict) label_list = labels.strip().split(',') for l in label_list: write_line += write_dict[l] + ',' write_line = write_line[:-1] # delete the last ',' fp_write.write(write_line+'\n') fp_read.close() fp_write.close()
if __name__ == '__main__': file_path_read = raw_input(prompt['file_path_read']) line_num = int(raw_input('what the line number: ')) algorithm_type = int(raw_input('what kind of algorithm you want to use:\ \n1: linear regression\ \n2: random forest\n')) df_test = pd.read_csv('./date-7-analysed.csv') # X_test = df_test[feature_cols] # X_test = preprocessing.normalize(X_test) # print X_test.head() if os.path.isdir(file_path_read): file_path_read = my_filter.walk_dir(file_path_read, 'csv') else: file_path_read = [file_path_read] while(algorithm_type not in algorithm): print '\n\nwrong algorithm type!\n' algorithm_type = int(raw_input('what kind of algorithm you want to use:\ \n1: linear regression\ \n2: random forests\n')) if algorithm_type == 1: result = my_linear_regression(file_path_read, df_test) file_path_write = 'predict' + '-linear' + '-line' + str(line_num) + '.txt' else: result = my_random_forests(file_path_read, df_test) file_path_write = 'predict' + '-randomforest' + '-line' + str(line_num) + '.txt'