Python sync_getの例、findy.utils.request.sync_get Pythonの例

コード例 #1

0

ファイルを表示

ファイル: china_etf_list_spider.py プロジェクト: doncat99/FinanceCenter

    def populate_sh_etf_type(self, df: pd.DataFrame, http_session):
        """
        填充沪市 ETF 代码对应的 TYPE 到列表数据中
        :param df: ETF 列表数据
        :return: 包含 ETF 对应 TYPE 的列表数据
        """
        query_url = 'http://query.sse.com.cn/infodisplay/queryETFNewAllInfo.do?' \
                    'isPagination=false&type={}&pageHelp.pageSize=25'

        type_df = pd.DataFrame()
        for etf_class in [1, 2]:
            url = query_url.format(etf_class)
            text = sync_get(http_session,
                            url,
                            headers=DEFAULT_SH_ETF_LIST_HEADER,
                            return_type='text')
            if text is None:
                continue
            response_dict = demjson.decode(text)
            response_df = pd.DataFrame(response_dict.get('result', []))
            response_df = response_df[['fundid1', 'etftype']]

            type_df = pd.concat([type_df, response_df])

        result_df = df.copy()
        result_df = result_df.sort_values(by='FUND_ID').reset_index(drop=True)
        type_df = type_df.sort_values(by='fundid1').reset_index(drop=True)

        result_df['ETF_TYPE'] = type_df['etftype']

        return result_df

コード例 #2

0

ファイルを表示

ファイル: china_etf_list_spider.py プロジェクト: doncat99/FinanceCenter

    async def download_sh_etf_component(self, df: pd.DataFrame, http_session,
                                        db_session):
        """
        ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF
                        5. 债券 ETF 6. 黄金 ETF
        :param df: ETF 列表数据
        :return: None
        """
        query_url = 'http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?' \
                    'isPagination=false&type={}&etfClass={}'

        etf_df = df[(df['ETF_CLASS'] == '1') | (df['ETF_CLASS'] == '2')]
        etf_df = self.populate_sh_etf_type(etf_df, http_session)

        for _, etf in etf_df.iterrows():
            url = query_url.format(etf['ETF_TYPE'], etf['ETF_CLASS'])
            text = sync_get(http_session,
                            url,
                            headers=DEFAULT_SH_ETF_LIST_HEADER,
                            return_type='text')
            if text is None:
                continue
            try:
                response_dict = demjson.decode(text)
            except Exception as e:
                self.logger.error(
                    f'decode {url} failed with text: {text}, error as: {e}')
                continue

            response_df = pd.DataFrame(response_dict.get('result', []))
            etf_code = etf['FUND_ID']
            etf_id = f'etf_sh_{etf_code}'
            response_df = response_df[['instrumentId',
                                       'instrumentName']].copy()
            response_df.rename(columns={
                'instrumentId': 'stock_code',
                'instrumentName': 'stock_name'
            },
                               inplace=True)

            response_df['entity_id'] = etf_id
            response_df['entity_type'] = EntityType.ETF.value
            response_df['exchange'] = ChnExchange.SSE.value
            response_df['code'] = etf_code
            response_df['name'] = etf['FUND_NAME']
            response_df['timestamp'] = now_pd_timestamp(self.region)

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda code: china_stock_code_to_id(code))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{etf_id}_{x}')

            await df_to_db(region=self.region,
                           provider=self.provider,
                           data_schema=self.data_schema,
                           db_session=db_session,
                           df=response_df)
            self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...')

コード例 #3

0

ファイルを表示

ファイル: china_etf_list_spider.py プロジェクト: doncat99/FinanceCenter

    async def run(self):
        http_session = get_sync_http_session()
        db_session = get_db_session(self.region, self.provider,
                                    self.data_schema)

        # 抓取沪市 ETF 列表
        url = 'http://query.sse.com.cn/commonQuery.do?sqlId=COMMON_SSE_ZQPZ_ETFLB_L_NEW'
        text = sync_get(http_session,
                        url,
                        headers=DEFAULT_SH_ETF_LIST_HEADER,
                        return_type='text')
        if text is None:
            return

        response_dict = demjson.decode(text)

        df = pd.DataFrame(response_dict.get('result', []))
        await self.persist_etf_list(df, ChnExchange.SSE.value, db_session)
        self.logger.info('沪市 ETF 列表抓取完成...')

        # 抓取沪市 ETF 成分股
        await self.download_sh_etf_component(df, http_session, db_session)
        self.logger.info('沪市 ETF 成分股抓取完成...')

        # 抓取深市 ETF 列表
        url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1945'
        content = sync_get(http_session, url, return_type='content')
        if content is None:
            return

        df = pd.read_excel(io.BytesIO(content), dtype=str)
        await self.persist_etf_list(df, ChnExchange.SZSE.value, db_session)
        self.logger.info('深市 ETF 列表抓取完成...')

        # 抓取深市 ETF 成分股
        await self.download_sz_etf_component(df, http_session, db_session)
        self.logger.info('深市 ETF 成分股抓取完成...')

コード例 #4

0

ファイルを表示