Ejemplo n.º 1
0
    def run(self):
        http_session = get_http_session()

        # 抓取沪市 ETF 列表
        url = 'http://query.sse.com.cn/commonQuery.do?sqlId=COMMON_SSE_ZQPZ_ETFLB_L_NEW'
        response = request_get(http_session,
                               url,
                               headers=DEFAULT_SH_ETF_LIST_HEADER)
        response_dict = demjson.decode(response.text)

        df = pd.DataFrame(response_dict.get('result', []))
        self.persist_etf_list(df, exchange='sh')
        self.logger.info('沪市 ETF 列表抓取完成...')

        # 抓取沪市 ETF 成分股
        self.download_sh_etf_component(df, http_session)
        self.logger.info('沪市 ETF 成分股抓取完成...')

        # 抓取深市 ETF 列表
        url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1945'
        response = request_get(http_session, url)

        df = pd.read_excel(io.BytesIO(response.content), dtype=str)
        self.persist_etf_list(df, exchange='sz')
        self.logger.info('深市 ETF 列表抓取完成...')

        # 抓取深市 ETF 成分股
        self.download_sz_etf_component(df, http_session)
        self.logger.info('深市 ETF 成分股抓取完成...')
    def run(self):
        http_session = get_http_session()

        url = 'http://query.sse.com.cn/security/stock/downloadStockListFile.do?csrcCode=&stockCode=&areaName=&stockType=1'

        resp = request_get(http_session, url, headers=DEFAULT_SH_HEADER)
        self.download_stock_list(response=resp, exchange='sh')

        url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1110&TABKEY=tab1&random=0.20932135244582617'

        resp = request_get(http_session, url, headers=DEFAULT_SZ_HEADER)
        self.download_stock_list(response=resp, exchange='sz')
    def record(self, entity, start, end, size, timestamps, http_session):
        # 此 url 不支持分页,如果超过我们想取的条数,则只能取最大条数
        if start is None or size > self.default_size:
            size = 8000

        param = {
            'security_item': entity,
            'level': self.level.value,
            'size': size
        }

        security_item = param['security_item']
        size = param['size']

        url = ChinaETFDayKdataRecorder.url.format(security_item.exchange,
                                                  security_item.code, size)

        response = request_get(http_session, url)
        response_json = demjson.decode(response.text)

        if response_json is None or len(response_json) == 0:
            return []

        df = pd.DataFrame(response_json)
        df.rename(columns={'day': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['name'] = security_item.name
        df['provider'] = 'sina'
        df['level'] = param['level']

        return df.to_dict(orient='records')
    def run(self):
        # get stock blocks from sina
        http_session = get_http_session()
        for category, url in self.category_map_url.items():
            resp = request_get(http_session, url)
            resp.encoding = 'GBK'

            tmp_str = resp.text
            json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1]
            tmp_json = json.loads(json_str)

            the_list = []

            for code in tmp_json:
                name = tmp_json[code].split(',')[1]
                entity_id = f'block_cn_{code}'
                the_list.append({
                    'id': entity_id,
                    'entity_id': entity_id,
                    'entity_type': EntityType.Block.value,
                    'exchange': 'cn',
                    'code': code,
                    'name': name,
                    'category': category.value
                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(df=df,
                         region=Region.CHN,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=True)

            self.logger.info(f"finish record sina blocks:{category.value}")
Ejemplo n.º 5
0
    def populate_sh_etf_type(df: pd.DataFrame, http_session):
        """
        填充沪市 ETF 代码对应的 TYPE 到列表数据中
        :param df: ETF 列表数据
        :return: 包含 ETF 对应 TYPE 的列表数据
        """
        query_url = 'http://query.sse.com.cn/infodisplay/queryETFNewAllInfo.do?' \
                    'isPagination=false&type={}&pageHelp.pageSize=25'

        type_df = pd.DataFrame()
        for etf_class in [1, 2]:
            url = query_url.format(etf_class)
            response = request_get(http_session,
                                   url,
                                   headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson.decode(response.text)
            response_df = pd.DataFrame(response_dict.get('result', []))
            response_df = response_df[['fundid1', 'etftype']]

            type_df = pd.concat([type_df, response_df])

        result_df = df.copy()
        result_df = result_df.sort_values(by='FUND_ID').reset_index(drop=True)
        type_df = type_df.sort_values(by='fundid1').reset_index(drop=True)

        result_df['ETF_TYPE'] = type_df['etftype']

        return result_df
Ejemplo n.º 6
0
    def run(self):
        http_session = get_http_session()

        for category, url in self.category_map_url.items():
            resp = request_get(http_session, url)
            results = json_callback_param(resp.text)
            the_list = []
            for result in results:
                items = result.split(',')
                code = items[1]
                name = items[2]
                entity_id = f'block_cn_{code}'
                the_list.append({
                    'id': entity_id,
                    'entity_id': entity_id,
                    'entity_type': EntityType.Block.value,
                    'exchange': 'cn',
                    'code': code,
                    'name': name,
                    'category': category.value
                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(df=df,
                         region=Region.CHN,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=True)
            self.logger.info(f"finish record sina blocks:{category.value}")
Ejemplo n.º 7
0
    def record(self, entity, start, end, size, timestamps, http_session):
        json_results = []
        for timestamp in timestamps:
            timestamp_str = to_time_str(timestamp)
            url = self.url.format(timestamp_str)
            response = request_get(http_session, url, headers=DEFAULT_SH_SUMMARY_HEADER)

            results = demjson.decode(response.text[response.text.index("(") + 1:response.text.index(")")])['result']
            result = [result for result in results if result['productType'] == '1']
            if result and len(result) == 1:
                result_json = result[0]
                # 有些较老的数据不存在,默认设为0.0
                json_results.append({
                    'provider': Provider.Exchange.value,
                    'timestamp': timestamp,
                    'name': '上证指数',
                    'pe': to_float(result_json['profitRate'], 0.0),
                    'total_value': to_float(result_json['marketValue1'] + '亿', 0.0),
                    'total_tradable_vaule': to_float(result_json['negotiableValue1'] + '亿', 0.0),
                    'volume': to_float(result_json['trdVol1'] + '万', 0.0),
                    'turnover': to_float(result_json['trdAmt1'] + '亿', 0.0),
                    'turnover_rate': to_float(result_json['exchangeRate'], 0.0),
                })

                if len(json_results) > self.batch_size:
                    return json_results

        return json_results
Ejemplo n.º 8
0
    def download_sz_etf_component(self, df: pd.DataFrame, http_session):
        query_url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vII_NewestComponent/indexid/{}.phtml'

        self.parse_sz_etf_underlying_index(df)
        for _, etf in df.iterrows():
            underlying_index = etf['拟合指数']
            etf_code = etf['证券代码']

            if len(underlying_index) == 0:
                self.logger.info(f'{etf["证券简称"]} - {etf_code} 非 A 股市场指数,跳过...')
                continue

            url = query_url.format(underlying_index)
            response = request_get(http_session, url)
            response.encoding = 'gbk'

            try:
                dfs = pd.read_html(response.text, header=1)
            except ValueError as error:
                self.logger.error(
                    f'HTML parse error: {error}, response: {response.text}')
                continue

            if len(dfs) < 4:
                continue

            response_df = dfs[3].copy()
            response_df = response_df.dropna(axis=1, how='any')
            response_df['品种代码'] = response_df['品种代码'].apply(
                lambda x: f'{x:06d}')

            etf_id = f'etf_sz_{etf_code}'
            response_df = response_df[['品种代码', '品种名称']].copy()
            response_df.rename(columns={
                '品种代码': 'stock_code',
                '品种名称': 'stock_name'
            },
                               inplace=True)

            response_df['entity_id'] = etf_id
            response_df['entity_type'] = EntityType.ETF.value
            response_df['exchange'] = 'sz'
            response_df['code'] = etf_code
            response_df['name'] = etf['证券简称']
            response_df['timestamp'] = now_pd_timestamp(Region.CHN)

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda code: china_stock_code_to_id(code))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{etf_id}_{x}')

            df_to_db(df=response_df,
                     region=Region.CHN,
                     data_schema=self.data_schema,
                     provider=self.provider)
            self.logger.info(f'{etf["证券简称"]} - {etf_code} 成分股抓取完成...')

            self.sleep()
    def run(self):
        http_session = get_http_session()

        exchanges = ['NYSE', 'NASDAQ', 'AMEX']

        for exchange in exchanges:
            url = 'https://old.nasdaq.com/screening/companies-by-name.aspx?letter=0&render=download&exchange={}'.format(exchange)
            resp = request_get(http_session, url, headers=YAHOO_STOCK_LIST_HEADER)
            self.download_stock_list(response=resp, exchange=exchange)
    def record(self, entity, start, end, size, timestamps, http_session):
        the_quarters = get_year_quarters(start, now_pd_timestamp(Region.CHN))
        if not is_same_date(entity.timestamp, start) and len(the_quarters) > 1:
            the_quarters = the_quarters[1:]

        param = {
            'security_item': entity,
            'quarters': the_quarters,
            'level': self.level.value
        }

        security_item = param['security_item']
        quarters = param['quarters']
        level = param['level']

        result_df = pd.DataFrame()
        for year, quarter in quarters:
            query_url = self.url.format(security_item.code, year, quarter)
            response = request_get(http_session, query_url)
            response.encoding = 'gbk'

            try:
                dfs = pd.read_html(response.text)
            except ValueError as error:
                self.logger.error(
                    f'skip ({year}-{quarter:02d}){security_item.code}{security_item.name}({error})'
                )
                self.sleep()
                continue

            if len(dfs) < 5:
                self.sleep()
                continue

            df = dfs[4].copy()
            df = df.iloc[1:]
            df.columns = [
                'timestamp', 'open', 'high', 'close', 'low', 'volume',
                'turnover'
            ]
            df['name'] = security_item.name
            df['level'] = level
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = Provider.Sina

            result_df = pd.concat([result_df, df])

            self.logger.info(
                f'({security_item.code}{security_item.name})({year}-{quarter:02d})'
            )
            self.sleep()

        result_df = result_df.sort_values(by='timestamp')

        return result_df.to_dict(orient='records')
Ejemplo n.º 11
0
    def record(self, entity, start, end, size, timestamps, http_session):
        url = self.generate_url(category=entity.category,
                                code=entity.code,
                                number=size)

        resp = request_get(http_session, url)

        opendate = "opendate"
        avg_price = "avg_price"
        avg_changeratio = 'avg_changeratio'
        turnover = 'turnover'
        netamount = 'netamount'
        ratioamount = 'ratioamount'
        r0_net = 'r0_net'
        r0_ratio = 'r0_ratio'
        r0x_ratio = 'r0x_ratio'
        cnt_r0x_ratio = 'cnt_r0x_ratio'

        json_list = []
        try:
            json_list = eval(resp.text)
        except Exception as e:
            resp.encoding = 'GBK'
            self.logger.error(resp.text)
            self.sleep()

        result_list = []
        for item in json_list:
            result_list.append({
                'name':
                entity.name,
                'timestamp':
                to_pd_timestamp(item['opendate']),
                'close':
                to_float(item['avg_price']),
                'change_pct':
                to_float(item['avg_changeratio']),
                'turnover_rate':
                to_float(item['turnover']) / 10000,
                'net_inflows':
                to_float(item['netamount']),
                'net_inflow_rate':
                to_float(item['ratioamount']),
                'net_main_inflows':
                to_float(item['r0_net']),
                'net_main_inflow_rate':
                to_float(item['r0_ratio'])
            })

        return result_list
Ejemplo n.º 12
0
    def download_sh_etf_component(self, df: pd.DataFrame, http_session):
        """
        ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF
                        5. 债券 ETF 6. 黄金 ETF
        :param df: ETF 列表数据
        :return: None
        """
        query_url = 'http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?' \
                    'isPagination=false&type={}&etfClass={}'

        etf_df = df[(df['ETF_CLASS'] == '1') | (df['ETF_CLASS'] == '2')]
        etf_df = self.populate_sh_etf_type(etf_df, http_session)

        for _, etf in etf_df.iterrows():
            url = query_url.format(etf['ETF_TYPE'], etf['ETF_CLASS'])
            response = request_get(http_session,
                                   url,
                                   headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson.decode(response.text)
            response_df = pd.DataFrame(response_dict.get('result', []))

            etf_code = etf['FUND_ID']
            etf_id = f'etf_sh_{etf_code}'
            response_df = response_df[['instrumentId',
                                       'instrumentName']].copy()
            response_df.rename(columns={
                'instrumentId': 'stock_code',
                'instrumentName': 'stock_name'
            },
                               inplace=True)

            response_df['entity_id'] = etf_id
            response_df['entity_type'] = EntityType.ETF.value
            response_df['exchange'] = 'sh'
            response_df['code'] = etf_code
            response_df['name'] = etf['FUND_NAME']
            response_df['timestamp'] = now_pd_timestamp(Region.CHN)

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda code: china_stock_code_to_id(code))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{etf_id}_{x}')

            df_to_db(df=response_df,
                     region=Region.CHN,
                     data_schema=self.data_schema,
                     provider=self.provider)
            self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...')

            self.sleep()
    def fetch_csi_index_component(self, df: pd.DataFrame, http_session):
        """
        抓取上证、中证指数成分股
        """
        query_url = 'http://www.csindex.com.cn/uploads/file/autofile/cons/{}cons.xls'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)

            try:
                response = request_get(http_session, url)
                response.raise_for_status()
            except requests.HTTPError as error:
                self.logger.error(
                    f'{index["name"]} - {index_code} 成分股抓取错误 ({error})')
                continue

            response_df = pd.read_excel(io.BytesIO(response.content))

            response_df = response_df[[
                '成分券代码Constituent Code', '成分券名称Constituent Name'
            ]].rename(
                columns={
                    '成分券代码Constituent Code': 'stock_code',
                    '成分券名称Constituent Name': 'stock_name'
                })

            index_id = f'index_cn_{index_code}'
            response_df['entity_id'] = index_id
            response_df['entity_type'] = EntityType.Index.value
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index['name']
            response_df['timestamp'] = now_pd_timestamp(Region.CHN)

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(df=response_df,
                     region=Region.CHN,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=True)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()
    def fetch_cni_index_component(self, df: pd.DataFrame, http_session):
        """
        抓取国证指数成分股
        """
        query_url = 'http://www.cnindex.com.cn/docs/yb_{}.xls'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)

            try:
                response = request_get(http_session, url)
                response.raise_for_status()
            except requests.HTTPError as error:
                self.logger.error(
                    f'{index["name"]} - {index_code} 成分股抓取错误 ({error})')
                continue

            response_df = pd.read_excel(io.BytesIO(response.content),
                                        dtype='str')

            index_id = f'index_cn_{index_code}'

            try:
                response_df = response_df[['样本股代码']]
            except KeyError:
                response_df = response_df[['证券代码']]

            response_df['entity_id'] = index_id
            response_df['entity_type'] = EntityType.Index.value
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index['name']
            response_df['timestamp'] = now_pd_timestamp(Region.CHN)

            response_df.columns = ['stock_code']
            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(df=response_df,
                     region=Region.CHN,
                     data_schema=self.data_schema,
                     provider=self.provider)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()
    def fetch_csi_index(self, http_session) -> None:
        """
        抓取上证、中证指数列表
        """
        url = 'http://www.csindex.com.cn/zh-CN/indices/index' \
              '?page={}&page_size={}&data_type=json&class_1=1&class_2=2&class_7=7&class_10=10'

        index_list = []
        page = 1
        page_size = 50

        while True:
            query_url = url.format(page, page_size)
            response = request_get(http_session, query_url)
            response_dict = demjson.decode(response.text)
            response_index_list = response_dict.get('list', [])

            if len(response_index_list) == 0:
                break

            index_list.extend(response_index_list)

            self.logger.info(f'上证、中证指数第 {page} 页抓取完成...')
            page += 1
            self.sleep()

        df = pd.DataFrame(index_list)
        df = df[[
            'base_date', 'base_point', 'index_code', 'indx_sname',
            'online_date', 'class_eseries'
        ]].copy()
        df.columns = [
            'timestamp', 'base_point', 'code', 'name', 'list_date',
            'class_eseries'
        ]
        df['category'] = df['class_eseries'].apply(
            lambda x: x.split(' ')[0].lower())
        df = df.drop('class_eseries', axis=1)
        df = df.loc[df['code'].str.contains(r'^\d{6}$')]

        self.persist_index(df)
        self.logger.info('上证、中证指数列表抓取完成...')

        # 抓取上证、中证指数成分股
        self.fetch_csi_index_component(df, http_session)
        self.logger.info('上证、中证指数成分股抓取完成...')
    def fetch_szse_index(self, http_session) -> None:
        """
        抓取深证指数列表
        """
        url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1812_zs&TABKEY=tab1'
        response = request_get(http_session, url)
        df = pd.read_excel(io.BytesIO(response.content), dtype='str')

        df.columns = ['code', 'name', 'timestamp', 'base_point', 'list_date']
        df['category'] = 'szse'
        df = df.loc[df['code'].str.contains(r'^\d{6}$')]
        self.persist_index(df)
        self.logger.info('深证指数列表抓取完成...')

        # 抓取深证指数成分股
        self.fetch_szse_index_component(df, http_session)
        self.logger.info('深证指数成分股抓取完成...')
    def fetch_szse_index_component(self, df: pd.DataFrame, http_session):
        """
        抓取深证指数成分股
        """
        query_url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1747_zs&TABKEY=tab1&ZSDM={}'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)
            response = request_get(http_session, url)

            response_df = pd.read_excel(io.BytesIO(response.content),
                                        dtype='str')

            index_id = f'index_cn_{index_code}'
            response_df['entity_id'] = index_id
            response_df['entity_type'] = EntityType.Index.value
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index['name']
            response_df['timestamp'] = now_pd_timestamp(Region.CHN)

            response_df.rename(columns={
                '证券代码': 'stock_code',
                '证券简称': 'stock_name'
            },
                               inplace=True)
            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))

            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(df=response_df,
                     region=Region.CHN,
                     data_schema=self.data_schema,
                     provider=self.provider)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()
    def record(self, entity, start, end, size, timestamps, http_session):
        for page in range(1, 5):
            resp = request_get(
                http_session,
                self.category_stocks_url.format(page, entity.code))
            try:
                if resp.text == 'null' or resp.text is None:
                    break
                category_jsons = demjson.decode(resp.text)
                the_list = []
                for category in category_jsons:
                    stock_code = category['code']
                    stock_id = china_stock_code_to_id(stock_code)
                    block_id = entity.id
                    the_list.append({
                        'id': '{}_{}'.format(block_id, stock_id),
                        'entity_id': block_id,
                        'entity_type': EntityType.Block.value,
                        'exchange': entity.exchange,
                        'code': entity.code,
                        'name': entity.name,
                        'timestamp': now_pd_timestamp(Region.CHN),
                        'stock_id': stock_id,
                        'stock_code': stock_code,
                        'stock_name': category['name'],
                    })
                if the_list:
                    df = pd.DataFrame.from_records(the_list)
                    df_to_db(df=df,
                             region=Region.CHN,
                             data_schema=self.data_schema,
                             provider=self.provider,
                             force_update=True)

                self.logger.info('finish recording BlockStock:{},{}'.format(
                    entity.category, entity.name))

            except Exception as e:
                self.logger.error("error:{},resp.text:{}".format(e, resp.text))
            self.sleep()
    def fetch_cni_index(self, http_session) -> None:
        """
        抓取国证指数列表
        """
        url = 'http://www.cnindex.com.cn/zstx/jcxl/'

        response = request_get(http_session, url)
        response.encoding = 'utf-8'
        dfs = pd.read_html(response.text)

        # 第 9 个 table 之后为非股票指数
        dfs = dfs[1:9]

        result_df = pd.DataFrame()
        for df in dfs:
            header = df.iloc[0]
            df = df[1:]
            df.columns = header
            df.astype('str')

            result_df = pd.concat([result_df, df])

        result_df = result_df.drop('样本股数量', axis=1)
        result_df.columns = [
            'name', 'code', 'timestamp', 'base_point', 'list_date'
        ]
        result_df['timestamp'] = result_df['timestamp'].apply(
            lambda x: x.replace('-', ''))
        result_df['list_date'] = result_df['list_date'].apply(
            lambda x: x.replace('-', ''))
        result_df['category'] = 'csi'
        result_df = result_df.loc[result_df['code'].str.contains(r'^\d{6}$')]

        self.persist_index(result_df)
        self.logger.info('国证指数列表抓取完成...')

        # 抓取国证指数成分股
        self.fetch_cni_index_component(result_df, http_session)
        self.logger.info('国证指数成分股抓取完成...')
Ejemplo n.º 20
0
    def record(self, entity, start, end, size, timestamps, http_session):
        resp = request_get(http_session,
                           self.category_stocks_url.format(entity.code, '1'))
        try:
            results = json_callback_param(resp.text)
            the_list = []
            for result in results:
                items = result.split(',')
                stock_code = items[1]
                stock_id = china_stock_code_to_id(stock_code)
                block_id = entity.id

                the_list.append({
                    'id': '{}_{}'.format(block_id, stock_id),
                    'entity_id': block_id,
                    'entity_type': EntityType.Block.value,
                    'exchange': entity.exchange,
                    'code': entity.code,
                    'name': entity.name,
                    'timestamp': now_pd_timestamp(Region.CHN),
                    'stock_id': stock_id,
                    'stock_code': stock_code,
                    'stock_name': items[2],
                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(df=df,
                         region=Region.CHN,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=True)

            self.logger.info('finish recording block:{},{}'.format(
                entity.category, entity.name))

        except Exception as e:
            self.logger.error("error:{}, resp.text:{}".format(e, resp.text))

        self.sleep()
    def record(self, entity, start, end, size, timestamps, http_session):
        the_url = self.url.format(
            "{}".format(entity.code), level_flag(self.level), size,
            now_time_str(region=Region.CHN, fmt=TIME_FORMAT_DAY1))

        resp = request_get(http_session, the_url)
        results = json_callback_param(resp.text)

        kdatas = []

        if results:
            klines = results['data']['klines']

            # TODO: ignore the last unfinished kdata now,could control it better if need
            for result in klines[:-1]:
                # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00"
                # time,open,close,high,low,volume,turnover
                fields = result.split(',')
                the_timestamp = to_pd_timestamp(fields[0])

                the_id = generate_kdata_id(entity_id=entity.id,
                                           timestamp=the_timestamp,
                                           level=self.level)

                kdatas.append(
                    dict(id=the_id,
                         timestamp=the_timestamp,
                         entity_id=entity.id,
                         code=entity.code,
                         name=entity.name,
                         level=self.level.value,
                         open=to_float(fields[1]),
                         close=to_float(fields[2]),
                         high=to_float(fields[3]),
                         low=to_float(fields[4]),
                         volume=to_float(fields[5]),
                         turnover=to_float(fields[6])))
        return kdatas
    def fetch_cumulative_net_value(self, security_item, start, end,
                                   http_session) -> pd.DataFrame:
        query_url = 'http://api.fund.eastmoney.com/f10/lsjz?' \
                    'fundCode={}&pageIndex={}&pageSize=200&startDate={}&endDate={}'

        page = 1
        df = pd.DataFrame()

        while True:
            url = query_url.format(security_item.code, page,
                                   to_time_str(start), to_time_str(end))

            response = request_get(http_session,
                                   url,
                                   headers=EASTMONEY_ETF_NET_VALUE_HEADER)
            response_json = demjson.decode(response.text)
            response_df = pd.DataFrame(response_json['Data']['LSJZList'])

            # 最后一页
            if not pd_is_not_null(response_df):
                break

            response_df['FSRQ'] = pd.to_datetime(response_df['FSRQ'])
            response_df['JZZZL'] = pd.to_numeric(response_df['JZZZL'],
                                                 errors='coerce')
            response_df['LJJZ'] = pd.to_numeric(response_df['LJJZ'],
                                                errors='coerce')
            response_df = response_df.fillna(0)
            response_df.set_index('FSRQ', inplace=True, drop=True)

            df = pd.concat([df, response_df])
            page += 1

            self.sleep()

        return df
    def record(self, entity, start, end, size, timestamps, http_session):
        param = {
            'url':
            self.generate_url(code='{}{}'.format(entity.exchange, entity.code),
                              number=size),
            'security_item':
            entity
        }

        resp = request_get(http_session, param['url'])
        # {opendate:"2019-04-29",trade:"10.8700",changeratio:"-0.0431338",turnover:"74.924",netamount:"-2903349.8500",
        # ratioamount:"-0.155177",r0:"0.0000",r1:"2064153.0000",r2:"6485031.0000",r3:"10622169.2100",r0_net:"0.0000",
        # r1_net:"2064153.0000",r2_net:"-1463770.0000",r3_net:"-3503732.8500"}
        # opendate = "opendate"
        # trade = "trade"
        # changeratio = 'changeratio'
        # turnover = 'turnover'
        # netamount = 'netamount'
        # ratioamount = 'ratioamount'
        # r0 = 'r0'
        # r1 = 'r1'
        # r2 = 'r2'
        # r3 = 'r3'
        # r0_net = 'r0_net'
        # r1_net = 'r1_net'
        # r2_net = 'r2_net'
        # r3_net = 'r3_net'

        json_list = []

        try:
            json_list = eval(resp.text)
        except Exception as e:
            resp.encoding = 'GBK'
            self.logger.error("text:{}, error:{}".format(resp.text, e))
            self.sleep()

        result_list = []
        for item in json_list:
            amount = to_float(item['r0']) + to_float(item['r1']) + to_float(
                item['r2']) + to_float(item['r3'])

            result = {
                'timestamp':
                to_pd_timestamp(item['opendate']),
                'name':
                entity.name,
                'close':
                to_float(item['trade']),
                'change_pct':
                to_float(item['changeratio']),
                'turnover_rate':
                to_float(item['turnover']) / 10000,
                'net_inflows':
                to_float(item['netamount']),
                'net_inflow_rate':
                to_float(item['ratioamount']),
                #     # 主力=超大单+大单
                #     net_main_inflows = Column(Float)
                #     net_main_inflow_rate = Column(Float)
                #     # 超大单
                #     net_huge_inflows = Column(Float)
                #     net_huge_inflow_rate = Column(Float)
                #     # 大单
                #     net_big_inflows = Column(Float)
                #     net_big_inflow_rate = Column(Float)
                #
                #     # 中单
                #     net_medium_inflows = Column(Float)
                #     net_medium_inflow_rate = Column(Float)
                #     # 小单
                #     net_small_inflows = Column(Float)
                #     net_small_inflow_rate = Column(Float)
                'net_main_inflows':
                to_float(item['r0_net']) + to_float(item['r1_net']),
                'net_huge_inflows':
                to_float(item['r0_net']),
                'net_big_inflows':
                to_float(item['r1_net']),
                'net_medium_inflows':
                to_float(item['r2_net']),
                'net_small_inflows':
                to_float(item['r3_net']),
            }

            if amount != 0:
                result['net_main_inflow_rate'] = (to_float(
                    item['r0_net']) + to_float(item['r1_net'])) / amount
                result['net_huge_inflow_rate'] = to_float(
                    item['r0_net']) / amount
                result['net_big_inflow_rate'] = to_float(
                    item['r1_net']) / amount
                result['net_medium_inflow_rate'] = to_float(
                    item['r2_net']) / amount
                result['net_small_inflow_rate'] = to_float(
                    item['r3_net']) / amount

            result_list.append(result)

        return result_list