def download_day_k_data(self, response): path = response.meta['path'] item = response.meta['item'] try: # 已经保存的csv数据 if os.path.exists(path): saved_df = pd.read_csv(path, dtype=str) else: saved_df = pd.DataFrame() df = utils.read_csv(io.BytesIO(response.body), encoding='GB2312', na_values='None') df['code'] = item['code'] df['securityId'] = item['id'] df['name'] = item['name'] # 指数数据 if item['type'] == 'index': df = df.loc[:, [ '日期', 'code', 'name', '最低价', '开盘价', '收盘价', '最高价', '成交量', '成交金额', 'securityId', '前收盘', '涨跌额', '涨跌幅' ]] df['turnoverRate'] = None df['tCap'] = None df['mCap'] = None df['pe'] = None df.columns = KDATA_INDEX_COL # 股票数据 else: df = df.loc[:, [ '日期', 'code', 'name', '最低价', '开盘价', '收盘价', '最高价', '成交量', '成交金额', 'securityId', '前收盘', '涨跌额', '涨跌幅', '换手率', '总市值', '流通市值' ]] df['factor'] = None df.columns = KDATA_STOCK_COL # 合并到当前csv中 saved_df = saved_df.append(df, ignore_index=True) if item['type'] == 'index': saved_df = saved_df.dropna(subset=KDATA_INDEX_COLUMN_163) # 保证col顺序 saved_df = saved_df.loc[:, KDATA_INDEX_COL] else: saved_df = saved_df.dropna(subset=KDATA_COLUMN_163) # 保证col顺序 saved_df = saved_df.loc[:, KDATA_STOCK_COL] saved_df = saved_df.drop_duplicates(subset='timestamp', keep='last') saved_df = saved_df.set_index(saved_df['timestamp'], drop=False) saved_df.index = pd.to_datetime(saved_df.index) saved_df = saved_df.sort_index() saved_df.to_csv(path, index=False) except Exception as e: self.logger.exception( 'error when getting k data url={} error={}'.format( response.url, e))
def download_day_k_data(self, response): path = response.meta['path'] item = response.meta['item'] try: # 已经保存的csv数据 if os.path.exists(path): df_current = pd.read_csv(path, dtype=str) else: df_current = pd.DataFrame() df = utils.read_csv(io.BytesIO(response.body), encoding='GB2312', na_values='None') df['code'] = item['code'] df['securityId'] = item['id'] df = df.loc[:, ['日期', 'code', '最低价', '开盘价', '收盘价', '最高价', '成交量', '成交金额', 'securityId', '前收盘', '涨跌额', '涨跌幅', '换手率', '总市值', '流通市值']] df['factor'] = None df.columns = KDATA_COLUMN_STOCK # 合并到当前csv中 df_current = df_current.append(df, ignore_index=True) df_current = df_current.dropna(subset=KDATA_COLUMN_163) df_current = df_current.drop_duplicates(subset='timestamp', keep='last') df_current = df_current.set_index(df_current['timestamp']) df_current.index = pd.to_datetime(df_current.index) df_current = df_current.sort_index() df_current.to_csv(path, index=False) except Exception as e: self.logger.error('error when getting k data url={} error={}'.format(response.url, e))
def download_day_k_data(self, response): path = response.meta['path'] item = response.meta['item'] try: # 已经保存的csv数据 if os.path.exists(path): df_current = pd.read_csv(path, dtype=str) else: df_current = pd.DataFrame() df = utils.read_csv(io.BytesIO(response.body), encoding='GB2312', na_values='None') df['code'] = item['code'] df['securityId'] = item['id'] df = df.loc[:, [ '日期', 'code', '最低价', '开盘价', '收盘价', '最高价', '成交量', '成交金额', 'securityId', '前收盘', '涨跌额', '涨跌幅', '换手率', '总市值', '流通市值' ]] df['factor'] = None df.columns = KDATA_COLUMN_STOCK # 合并到当前csv中 df_current = df_current.append(df, ignore_index=True) df_current = df_current.dropna(subset=KDATA_COLUMN_163) df_current = df_current.drop_duplicates(subset='timestamp', keep='last') df_current = df_current.set_index(df_current['timestamp']) df_current.index = pd.to_datetime(df_current.index) df_current = df_current.sort_index() df_current.to_csv(path, index=False) except Exception as e: self.logger.error( 'error when getting k data url={} error={}'.format( response.url, e))