Exemple #1
0
    def download_day_k_data(self, response):
        path = response.meta['path']
        item = response.meta['item']

        try:
            # 已经保存的csv数据
            if os.path.exists(path):
                saved_df = pd.read_csv(path, dtype=str)
            else:
                saved_df = pd.DataFrame()

            df = utils.read_csv(io.BytesIO(response.body),
                                encoding='GB2312',
                                na_values='None')
            df['code'] = item['code']
            df['securityId'] = item['id']
            df['name'] = item['name']
            # 指数数据
            if item['type'] == 'index':
                df = df.loc[:, [
                    '日期', 'code', 'name', '最低价', '开盘价', '收盘价', '最高价', '成交量',
                    '成交金额', 'securityId', '前收盘', '涨跌额', '涨跌幅'
                ]]
                df['turnoverRate'] = None
                df['tCap'] = None
                df['mCap'] = None
                df['pe'] = None
                df.columns = KDATA_INDEX_COL
            # 股票数据
            else:
                df = df.loc[:, [
                    '日期', 'code', 'name', '最低价', '开盘价', '收盘价', '最高价', '成交量',
                    '成交金额', 'securityId', '前收盘', '涨跌额', '涨跌幅', '换手率', '总市值',
                    '流通市值'
                ]]
                df['factor'] = None
                df.columns = KDATA_STOCK_COL

            # 合并到当前csv中
            saved_df = saved_df.append(df, ignore_index=True)

            if item['type'] == 'index':
                saved_df = saved_df.dropna(subset=KDATA_INDEX_COLUMN_163)
                # 保证col顺序
                saved_df = saved_df.loc[:, KDATA_INDEX_COL]
            else:
                saved_df = saved_df.dropna(subset=KDATA_COLUMN_163)
                # 保证col顺序
                saved_df = saved_df.loc[:, KDATA_STOCK_COL]

            saved_df = saved_df.drop_duplicates(subset='timestamp',
                                                keep='last')
            saved_df = saved_df.set_index(saved_df['timestamp'], drop=False)
            saved_df.index = pd.to_datetime(saved_df.index)
            saved_df = saved_df.sort_index()
            saved_df.to_csv(path, index=False)
        except Exception as e:
            self.logger.exception(
                'error when getting k data url={} error={}'.format(
                    response.url, e))
    def download_day_k_data(self, response):
        path = response.meta['path']
        item = response.meta['item']

        try:
            # 已经保存的csv数据
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
            else:
                df_current = pd.DataFrame()

            df = utils.read_csv(io.BytesIO(response.body), encoding='GB2312', na_values='None')
            df['code'] = item['code']
            df['securityId'] = item['id']
            df = df.loc[:,
                 ['日期', 'code', '最低价', '开盘价', '收盘价', '最高价', '成交量', '成交金额', 'securityId', '前收盘', '涨跌额', '涨跌幅', '换手率',
                  '总市值', '流通市值']]
            df['factor'] = None
            df.columns = KDATA_COLUMN_STOCK

            # 合并到当前csv中
            df_current = df_current.append(df, ignore_index=True)

            df_current = df_current.dropna(subset=KDATA_COLUMN_163)
            df_current = df_current.drop_duplicates(subset='timestamp', keep='last')
            df_current = df_current.set_index(df_current['timestamp'])
            df_current.index = pd.to_datetime(df_current.index)
            df_current = df_current.sort_index()
            df_current.to_csv(path, index=False)
        except Exception as e:
            self.logger.error('error when getting k data url={} error={}'.format(response.url, e))
Exemple #3
0
    def download_day_k_data(self, response):
        path = response.meta['path']
        item = response.meta['item']

        try:
            # 已经保存的csv数据
            if os.path.exists(path):
                df_current = pd.read_csv(path, dtype=str)
            else:
                df_current = pd.DataFrame()

            df = utils.read_csv(io.BytesIO(response.body),
                                encoding='GB2312',
                                na_values='None')
            df['code'] = item['code']
            df['securityId'] = item['id']
            df = df.loc[:, [
                '日期', 'code', '最低价', '开盘价', '收盘价', '最高价', '成交量', '成交金额',
                'securityId', '前收盘', '涨跌额', '涨跌幅', '换手率', '总市值', '流通市值'
            ]]
            df['factor'] = None
            df.columns = KDATA_COLUMN_STOCK

            # 合并到当前csv中
            df_current = df_current.append(df, ignore_index=True)

            df_current = df_current.dropna(subset=KDATA_COLUMN_163)
            df_current = df_current.drop_duplicates(subset='timestamp',
                                                    keep='last')
            df_current = df_current.set_index(df_current['timestamp'])
            df_current.index = pd.to_datetime(df_current.index)
            df_current = df_current.sort_index()
            df_current.to_csv(path, index=False)
        except Exception as e:
            self.logger.error(
                'error when getting k data url={} error={}'.format(
                    response.url, e))