def get_candles_for_feature(candle_types, start_dt, end_dt): candles_for_feature = {} for candle_type in candle_types: candles = API().get_candles(pair, candle_type=candle_type, start_dt=(start_dt - timedelta(1)).strftime('%Y-%m-%d %H:%M:%S'), end_dt=end_dt.strftime('%Y-%m-%d %H:%M:%S')) candles = pd.DataFrame(candles, columns=['open', 'high', 'low', 'close', 'volume', 'timestamp']) candles.index = candles.timestamp.map(lambda x: datetime.fromtimestamp(x / 1000)) candles_for_feature[candle_type] = candles return candles_for_feature
def summary_messages(self, start_dt, end_dt): messages_summary = {} pair = 'xrp_jpy' candle_types = ['1min', '5min', '15min', '30min', '1hour'] for candle_type in candle_types: candles = API().get_candles(pair, candle_type, start_dt, end_dt) candles = pd.DataFrame(candles, columns=['open', 'high', 'low', 'close', 'volume', 'timestamp']) candles.index = candles['timestamp'] messages_summary[candle_type] = candles return messages_summary
def extract_feature(start_dt, end_dt, pair, candle_types, df_all=None): features_all = None for candle_type in candle_types: params = get_params(candle_type) if df_all is not None: logger.debug('candle type: {}'.format(candle_type)) candles = df_all[candle_type] candles.index = candles.timestamp.map(lambda x: datetime.fromtimestamp(x / 1000)) # 時刻ごとに特徴量を算出(並列処理) args = [(candles[(d - timedelta(minutes=130) <= candles.index) & (candles.index <= d)], params, candle_type, d) for d in datetimerange(str2dt(start_dt), str2dt(end_dt) + timedelta(minutes=1))] tmp_features = multi_process(args) # 必要な時間のみ抽出 features = None dts = [d for d in datetimerange(str2dt(start_dt), str2dt(end_dt) + timedelta(minutes=1))] for dt, tmp_feature in zip(dts, tmp_features): feature = tmp_feature[tmp_feature.index == dt] if features is None: features = feature else: features = pd.concat([features, feature]) del tmp_features gc.collect() else: start_dt_ext = (datetime.strptime(start_dt, '%Y-%m-%d %H:%M:%S') - timedelta(minutes=130)).strftime('%Y-%m-%d %H:%M:%S') candles = API().get_candles(pair, candle_type=candle_type, start_dt=start_dt_ext, end_dt=end_dt) candles = pd.DataFrame(candles, columns=['open', 'high', 'low', 'close', 'volume', 'timestamp']) candles.index = candles.timestamp.map(lambda x: datetime.fromtimestamp(x / 1000)) candles.to_csv('candles_{}_{}.csv'.format(end_dt, candle_type)) features = _extract_feature(candles, params, candle_type, end_dt) features.to_csv('features_{}_{}.csv'.format(end_dt, candle_type)) ''' features = features.loc[(start_dt <= features.index) & (features.index <= end_dt)] ''' features.columns = [c + '_' + candle_type for c in features.columns] if features_all is None: features_all = features else: features_all = pd.concat([features_all, features], axis=1) features_all = features_all.fillna(method='ffill') del features gc.collect() return features_all