def makeV9(date_begin, folder): frame = pd.read_csv('data/farming.csv') frame_final = pd.read_csv(folder + '/' + 'y.csv') # 前半个月平均值 date1 = tools.move_day(date_begin, -30) date2 = tools.move_day(date_begin, -15) frame1 = frame[(frame.time >= date1) & (frame.time < date2)] frame1 = frame1.groupby(['province', 'market', 'type', 'name'], as_index=False)['avgprice'].agg({ '_half_month_ago_day_avg': np.mean, }) frame_final = pd.merge(frame_final, frame1, how='left', on=['province', 'market', 'type', 'name']) # 前1个月平均值 date1 = tools.move_day(date_begin, -60) date2 = tools.move_day(date_begin, -30) frame1 = frame[(frame.time >= date1) & (frame.time < date2)] frame1 = frame1.groupby(['province', 'market', 'type', 'name'], as_index=False)['avgprice'].agg({ '_one_month_ago_day_avg': np.mean, }) frame_final = pd.merge(frame_final, frame1, how='left', on=['province', 'market', 'type', 'name']) frame_final.drop(['y'], axis=1, inplace=True) frame_final.to_csv(folder + '/' + 'v9.csv', index=False)
def makeY(date_begin, folder): if date_begin == '2016-07-01': frame = pd.read_csv('data/product_market.csv') frame['y'] = 0 frame.to_csv(folder + '/' + 'y.csv', index=False) return date_end = tools.move_day(date_begin, 30) frame = pd.read_csv('data/farming.csv') frame = frame[(frame.time >= date_begin) & (frame.time <= date_end)] frame[['province', 'market', 'type', 'name', 'time', 'avgprice']].rename(columns={ 'avgprice': 'y' }).to_csv(folder + '/' + 'y.csv', index=False) pass
def makeY(date_begin, folder): if date_begin == '2020-04-02': frame = pd.read_csv('data/product_market.csv') frame['y'] = 0 frame.to_csv(folder + '/' + 'y.csv', index=False) return date_end = tools.move_day(date_begin, 30) frame = pd.read_csv('data/farming.csv') frame.index = pd.DatetimeIndex(frame['time']) frame = frame[(frame.index >= date_begin) & (frame.index <= date_end)] newdf = frame[['province', 'market', 'type', 'name', 'time', 'avgprice']].rename(columns={'avgprice': 'y'}) print(newdf.head) newdf.to_csv(folder + '/' + 'y.csv', index=False) pass
def makeV8(date_begin, folder): frame = pd.read_csv('data/farming.csv') frame_final = pd.read_csv(folder + '/' + 'y.csv') for day in [1, 2, 3, 4, 7, 14, 21, 30, 60]: date1 = tools.move_day(date_begin, -day) frame1 = frame[(frame.time >= date1) & (frame.time < date_begin)] frame1 = frame1.groupby(['name', 'type'], as_index=False)['avgprice'].agg({ '_' + str(day) + 'day_avg': np.mean, }) frame_final = pd.merge(frame_final, frame1, how='left', on=['type', 'name']) frame_final.drop(['y'], axis=1, inplace=True) frame_final.to_csv(folder + '/' + 'v8.csv', index=False) pass
def makeV7(date_begin, folder): frame = pd.read_csv('data/farming.csv') frame_final = pd.read_csv(folder + '/' + 'y.csv') for day in [1, 2, 3, 4, 7, 14, 21, 30, 60]: date1 = tools.move_day(date_begin, -day) frame1 = frame[(frame.time >= date1) & (frame.time < date_begin)] frame1 = frame1.groupby(['province', 'market', 'type', 'name'], as_index=False)['avgprice'].agg({ '_' + str(day) + 'day_offset': lambda x: np.max(x) - np.min(x), '_' + str(day) + 'day_min2': _second_min, '_' + str(day) + 'day_min3': _third_min, }) frame_final = pd.merge(frame_final, frame1, how='left', on=['province', 'market', 'type', 'name']) frame_final.drop(['y'], axis=1, inplace=True) frame_final.to_csv(folder + '/' + 'v7.csv', index=False) pass