Python df_to_db Examples, zvdata.api.df_to_db Python Examples

Example #1

0

Show file

File: sina_china_stock_category_recorder.py Project: zilinly/zvt

    def record(self, entity, start, end, size, timestamps):
        for page in range(1, 5):
            resp = requests.get(self.category_stocks_url.format(page, entity.code))
            try:
                if resp.text == 'null' or resp.text is None:
                    break
                category_jsons = demjson.decode(resp.text)
                the_list = []
                for category in category_jsons:
                    stock_code = category['code']
                    stock_id = china_stock_code_to_id(stock_code)
                    block_id = entity.id
                    the_list.append({
                        'id': '{}_{}'.format(block_id, stock_id),
                        'entity_id': block_id,
                        'entity_type': 'block',
                        'exchange': entity.exchange,
                        'code': entity.code,
                        'name': entity.name,
                        'timestamp': now_pd_timestamp(),
                        'stock_id': stock_id,
                        'stock_code': stock_code,
                        'stock_name': category['name'],

                    })
                if the_list:
                    df = pd.DataFrame.from_records(the_list)
                    df_to_db(data_schema=self.data_schema, df=df, provider=self.provider,
                             force_update=True)

                self.logger.info('finish recording BlockStock:{},{}'.format(entity.category, entity.name))

            except Exception as e:
                self.logger.error("error:,resp.text:", e, resp.text)
            self.sleep()

Example #2

0

Show file

File: china_index_list_spider.py Project: yezj0010/zvt

    def fetch_szse_index_component(self, df: pd.DataFrame):
        """
        抓取深证指数成分股
        """
        query_url = 'http://www.szse.cn/api/report/ShowReport?SHOWTYPE=xlsx&CATALOGID=1747_zs&TABKEY=tab1&ZSDM={}'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)
            response = requests.get(url)

            response_df = pd.read_excel(io.BytesIO(response.content),
                                        dtype='str')

            index_id = f'index_cn_{index_code}'
            response_df = response_df[['证券代码']]
            response_df['id'] = response_df['证券代码'].apply(
                lambda x: f'{index_id}_{china_stock_code_to_id(str(x))}')
            response_df['entity_id'] = response_df['id']
            response_df['stock_id'] = response_df['证券代码'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['index_id'] = index_id
            response_df.drop('证券代码', axis=1, inplace=True)

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()

Example #3

0

Show file

 def run(self):
     for category, url in self.category_map_url.items():
         resp = requests.get(url)
         results = json_callback_param(resp.text)
         the_list = []
         for result in results:
             items = result.split(',')
             code = items[1]
             name = items[2]
             entity_id = f'block_cn_{code}'
             the_list.append({
                 'id': entity_id,
                 'entity_id': entity_id,
                 'entity_type': 'block',
                 'exchange': 'cn',
                 'code': code,
                 'name': name,
                 'category': category.value
             })
         if the_list:
             df = pd.DataFrame.from_records(the_list)
             df_to_db(data_schema=self.data_schema,
                      df=df,
                      provider=self.provider,
                      force_update=True)
         self.logger.info(f"finish record sina blocks:{category.value}")

Example #4

0

Show file

    def record(self, entity, start, end, size, timestamps):
        q = query(valuation).filter(valuation.code == to_jq_entity_id(entity))
        count: pd.Timedelta = now_pd_timestamp() - start
        df = get_fundamentals_continuously(q,
                                           end_date=now_time_str(),
                                           count=count.days + 1,
                                           panel=False)
        df['entity_id'] = entity.id
        df['timestamp'] = pd.to_datetime(df['day'])
        df['code'] = entity.code
        df['name'] = entity.name
        df['id'] = df['timestamp'].apply(
            lambda x: "{}_{}".format(entity.id, to_time_str(x)))
        df = df.rename(
            {
                'pe_ratio_lyr': 'pe',
                'pe_ratio': 'pe_ttm',
                'pb_ratio': 'pb',
                'ps_ratio': 'ps',
                'pcf_ratio': 'pcf'
            },
            axis='columns')

        df['market_cap'] = df['market_cap'] * 100000000
        df['circulating_cap'] = df['circulating_cap'] * 100000000
        df['capitalization'] = df['capitalization'] * 10000
        df['circulating_cap'] = df['circulating_cap'] * 10000
        df_to_db(df=df,
                 data_schema=self.data_schema,
                 provider=self.provider,
                 force_update=self.force_update)

        return None

Example #5

0

Show file

File: sina_china_stock_category_recorder.py Project: Vipamp/zvt

    def run(self):
        # get stock blocks from sina
        for category, url in self.category_map_url.items():
            resp = requests.get(url)
            resp.encoding = 'GBK'

            tmp_str = resp.text
            json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1]
            tmp_json = json.loads(json_str)

            the_list = []

            for code in tmp_json:
                name = tmp_json[code].split(',')[1]
                entity_id = 'index_cn_{}'.format(code)
                the_list.append({
                    'id': entity_id,
                    'entity_id': entity_id,
                    'entity_type': 'block',
                    'exchange': 'cn',
                    'code': code,
                    'name': name,
                    'category': category.value
                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(data_schema=self.data_schema,
                         df=df,
                         provider=self.provider,
                         force_update=True)

            self.logger.info(f"finish record sina block:{category.value}")

Example #6

0

Show file

File: china_stock_meta_recorder.py Project: cz9874308/zvt

    def record(self, entity, start, end, size, timestamps):
        q = query(finance.FUND_PORTFOLIO_STOCK).filter(finance.FUND_PORTFOLIO_STOCK.pub_date >= start).filter(
            finance.FUND_PORTFOLIO_STOCK.code == entity.code)
        df = finance.run_query(q)
        if pd_is_not_null(df):
            #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
            # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
            # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
            # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
            # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
            df['timestamp'] = pd.to_datetime(df['pub_date'])

            df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True)
            df['proportion'] = df['proportion'] * 0.01

            df = portfolio_relate_stock(df, entity)

            df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x))
            df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['report_date'] = pd.to_datetime(df['period_end'])
            df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x))

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

            # self.logger.info(df.tail())
            self.logger.info(f"persist etf {entity.code} portfolio success")

        return None

Example #7

0

Show file

File: github_user_list_recorder.py Project: zvtvz/play-github

    def record(self, entity_item, start, end, size, timestamps):
        self.seed += 1

        timestamp = timestamps[0]

        the_url = self.url.format(to_time_str(timestamp),
                                  to_time_str(timestamp))

        items = get_all_results(url=the_url,
                                token=GithubAccount.get_token(seed=self.seed))

        current_time = now_pd_timestamp()

        results = [{
            'id': f'user_github_{item["login"]}',
            'entity_id': f'user_github_{item["login"]}',
            'timestamp': timestamp,
            'exchange': 'github',
            'entity_type': 'user',
            'code': item['login'],
            'node_id': item['node_id'],
            'created_timestamp': current_time,
            'updated_timestamp': None
        } for item in items]

        # for save faster
        df = pd.DataFrame(data=results[:-1])
        df_to_db(df=df,
                 data_schema=self.data_schema,
                 provider=self.provider,
                 force=True)

        return results[-1:]

Example #8

0

Show file

File: china_stock_category_recorder.py Project: mmerchant/zvt

    def run(self):
        for category, url in self.category_map_url.items():
            resp = requests.get(url)
            results = json_callback_param(resp.text)
            for result in results:
                items = result.split(',')
                code = items[1]
                name = items[2]
                id = 'index_cn_{}'.format(code)
                if id in self.index_ids:
                    continue
                self.session.add(
                    Index(id=id,
                          entity_id=id,
                          entity_type='index',
                          exchange='cn',
                          code=code,
                          name=name,
                          category=category.value))
            self.session.commit()

        indices = get_entities(session=self.session,
                               entity_type='index',
                               return_type='domain',
                               filters=[
                                   Index.category.in_([
                                       StockCategory.concept.value,
                                       StockCategory.industry.value
                                   ])
                               ],
                               provider=self.provider)

        for index_item in indices:
            resp = requests.get(
                self.category_stocks_url.format(index_item.code, '1'))
            try:
                results = json_callback_param(resp.text)
                the_list = []
                for result in results:
                    items = result.split(',')
                    stock_code = items[1]
                    stock_id = china_stock_code_to_id(stock_code)
                    index_id = index_item.id
                    the_list.append({
                        'id': '{}_{}'.format(index_id, stock_id),
                        'index_id': index_id,
                        'stock_id': stock_id
                    })
                if the_list:
                    df = pd.DataFrame.from_records(the_list)
                    df_to_db(data_schema=self.data_schema,
                             df=df,
                             provider=self.provider)

                self.logger.info('finish recording index:{},{}'.format(
                    index_item.category, index_item.name))

            except Exception as e:
                self.logger.error("error:,resp.text:", e, resp.text)
            self.sleep()

Example #9

0

Show file

File: china_stock_meta_recorder.py Project: cz9874308/zvt

    def run(self):
        # 抓取etf列表
        df_index = self.to_zvt_entity(get_all_securities(['etf']), entity_type='etf', category='etf')
        df_to_db(df_index, data_schema=Etf, provider=self.provider)

        # self.logger.info(df_index)
        self.logger.info("persist etf list success")
        logout()

Example #10

0

Show file

File: stock_trade_day_recorder.py Project: zhoujunjie221/zvt

    def record(self, entity, start, end, size, timestamps):
        df = pd.DataFrame()
        dates = get_trade_days(start_date=start)
        df['timestamp'] = pd.to_datetime(dates)
        df['id'] = [to_time_str(date) for date in dates]
        df['entity_id'] = 'stock_sz_000001'

        df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

Example #11

0

Show file

File: china_etf_list_spider.py Project: zilinly/zvt

    def download_sz_etf_component(self, df: pd.DataFrame):
        query_url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vII_NewestComponent/indexid/{}.phtml'

        self.parse_sz_etf_underlying_index(df)
        for _, etf in df.iterrows():
            underlying_index = etf['拟合指数']
            etf_code = etf['证券代码']

            if len(underlying_index) == 0:
                self.logger.info(f'{etf["证券简称"]} - {etf_code} 非 A 股市场指数，跳过...')
                continue

            url = query_url.format(underlying_index)
            response = requests.get(url)
            response.encoding = 'gbk'

            try:
                dfs = pd.read_html(response.text, header=1)
            except ValueError as error:
                self.logger.error(
                    f'HTML parse error: {error}, response: {response.text}')
                continue

            if len(dfs) < 4:
                continue

            response_df = dfs[3].copy()
            response_df = response_df.dropna(axis=1, how='any')
            response_df['品种代码'] = response_df['品种代码'].apply(
                lambda x: f'{x:06d}')

            etf_id = f'etf_sz_{etf_code}'
            response_df = response_df[['品种代码', '品种名称']].copy()
            response_df.rename(columns={
                '品种代码': 'stock_code',
                '品种名称': 'stock_name'
            },
                               inplace=True)

            response_df['entity_id'] = etf_id
            response_df['entity_type'] = 'etf'
            response_df['exchange'] = 'sz'
            response_df['code'] = etf_code
            response_df['name'] = etf['证券简称']
            response_df['timestamp'] = now_pd_timestamp()

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda code: china_stock_code_to_id(code))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{etf_id}_{x}')

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{etf["证券简称"]} - {etf_code} 成分股抓取完成...')

            self.sleep()

Example #12

0

Show file

File: china_stock_meta_recorder.py Project: cz9874308/zvt

    def run(self):
        # 抓取股票列表
        df_stock = self.to_zvt_entity(get_all_securities(['stock']), entity_type='stock')
        df_to_db(df_stock, data_schema=Stock, provider=self.provider)
        # persist StockDetail too
        df_to_db(df=df_stock, data_schema=StockDetail, provider=self.provider, force_update=False)

        # self.logger.info(df_stock)
        self.logger.info("persist stock list success")

        logout()

Example #13

0

Show file

    def record(self, entity, start, end, size, timestamps):
        if self.start_timestamp:
            start = max(self.start_timestamp, to_pd_timestamp(start))

        # if self.level < IntervalLevel.LEVEL_1HOUR:
        #     start = '2019-01-01'

        end = now_pd_timestamp()

        start_timestamp = to_time_str(start)

        # 聚宽get_price函数必须指定结束时间，否则会有未来数据
        end_timestamp = to_time_str(end, fmt=TIME_FORMAT_MINUTE2)
        # 不复权
        df = get_price(
            to_jq_entity_id(entity),
            start_date=to_time_str(start_timestamp),
            end_date=end_timestamp,
            frequency=self.jq_trading_level,
            fields=['open', 'close', 'low', 'high', 'volume', 'money'],
            skip_paused=True,
            fq=None)
        if df_is_not_null(df):
            df.index.name = 'timestamp'
            df.reset_index(inplace=True)
            df['name'] = entity.name
            df.rename(columns={'money': 'turnover'}, inplace=True)

            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = 'joinquant'
            df['level'] = self.level.value
            df['code'] = entity.code

            def generate_kdata_id(se):
                if self.level >= IntervalLevel.LEVEL_1DAY:
                    return "{}_{}".format(
                        se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))
                else:
                    return "{}_{}".format(
                        se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id,
                                                            axis=1)

            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force=self.force_update)

        return None

Example #14

0

Show file

File: jq_stock_kdata_recorder.py Project: jjandnn/zvt

    def record(self, entity, start, end, size, timestamps):
        # 只要前复权数据
        if not self.end_timestamp:
            df = get_bars(to_jq_entity_id(entity),
                          count=size,
                          unit=self.jq_trading_level,
                          fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'],
                          fq_ref_date=to_time_str(now_pd_timestamp()),
                          include_now=True)
        else:
            end_timestamp = to_time_str(self.end_timestamp)
            df = get_bars(to_jq_entity_id(entity),
                          count=size,
                          unit=self.jq_trading_level,
                          fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'],
                          end_dt=end_timestamp,
                          fq_ref_date=to_time_str(now_pd_timestamp()),
                          include_now=False)

        if pd_is_not_null(df):
            df['name'] = entity.name
            df.rename(columns={'money': 'turnover', 'date': 'timestamp'}, inplace=True)

            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = 'joinquant'
            df['level'] = self.level.value
            df['code'] = entity.code

            # 判断是否需要重新计算之前保存的前复权数据
            check_df = df.head(1)
            check_date = check_df['timestamp'][0]
            current_df = get_kdata(entity_id=entity.id, provider=self.provider, start_timestamp=check_date,
                                   end_timestamp=check_date, limit=1, level=self.level)
            if pd_is_not_null(current_df):
                old = current_df.iloc[0, :]['close']
                new = check_df['close'][0]
                # 相同时间的close不同，表明前复权需要重新计算
                if round(old, 2) != round(new, 2):
                    self.factor = new / old
                    self.last_timestamp = pd.Timestamp(check_date)

            def generate_kdata_id(se):
                if self.level >= IntervalLevel.LEVEL_1DAY:
                    return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))
                else:
                    return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1)

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

        return None

Example #15

0

Show file

File: coin_meta_recorder.py Project: zilinly/zvt

    def run(self):
        for exchange_str in self.exchanges:
            exchange = CCXTAccount.get_ccxt_exchange(exchange_str)
            try:
                markets = exchange.fetch_markets()
                df = pd.DataFrame()

                # markets有些为key=symbol的dict,有些为list
                markets_type = type(markets)
                if markets_type != dict and markets_type != list:
                    self.logger.exception(
                        "unknown return markets type {}".format(markets_type))
                    return

                aa = []
                for market in markets:
                    if markets_type == dict:
                        name = market
                        code = market

                    if markets_type == list:
                        code = market['symbol']
                        name = market['symbol']

                    if name not in COIN_PAIRS:
                        continue
                    aa.append(market)

                    security_item = {
                        'id': '{}_{}_{}'.format('coin', exchange_str, code),
                        'entity_id': '{}_{}_{}'.format('coin', exchange_str,
                                                       code),
                        'exchange': exchange_str,
                        'entity_type': 'coin',
                        'code': code,
                        'name': name
                    }

                    df = df.append(security_item, ignore_index=True)

                # 存储该交易所的数字货币列表
                if not df.empty:
                    df_to_db(df=df,
                             data_schema=self.data_schema,
                             provider=self.provider,
                             force_update=True)
                self.logger.info(
                    "init_markets for {} success".format(exchange_str))
            except Exception as e:
                self.logger.exception(
                    f"init_markets for {exchange_str} failed", e)

Example #16

0

Show file

File: china_stock_list_spider.py Project: zilinly/zvt

    def download_stock_list(self, response, exchange):
        df = None
        if exchange == 'sh':
            df = pd.read_csv(io.BytesIO(response.content),
                             sep='\s+',
                             encoding='GB2312',
                             dtype=str,
                             parse_dates=['上市日期'])
            if df is not None:
                df = df.loc[:, ['公司代码', '公司简称', '上市日期']]

        elif exchange == 'sz':
            df = pd.read_excel(io.BytesIO(response.content),
                               sheet_name='A股列表',
                               dtype=str,
                               parse_dates=['A股上市日期'])
            if df is not None:
                df = df.loc[:, ['A股代码', 'A股简称', 'A股上市日期']]

        if df is not None:
            df.columns = ['code', 'name', 'list_date']

            df = df.dropna(subset=['code'])

            # handle the dirty data
            # 600996,贵广网络,2016-12-26,2016-12-26,sh,stock,stock_sh_600996,,次新股,贵州,,
            df.loc[df['code'] == '600996', 'list_date'] = '2016-12-26'
            print(df[df['list_date'] == '-'])
            df['list_date'] = df['list_date'].apply(
                lambda x: to_pd_timestamp(x))
            df['exchange'] = exchange
            df['entity_type'] = 'stock'
            df['id'] = df[['entity_type', 'exchange',
                           'code']].apply(lambda x: '_'.join(x.astype(str)),
                                          axis=1)
            df['entity_id'] = df['id']
            df['timestamp'] = df['list_date']
            df = df.dropna(axis=0, how='any')
            df = df.drop_duplicates(subset=('id'), keep='last')
            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=False)
            # persist StockDetail too
            df_to_db(df=df,
                     data_schema=StockDetail,
                     provider=self.provider,
                     force_update=False)
            self.logger.info(df.tail())
            self.logger.info("persist stock list successs")

Example #17

0

Show file

    def run(self):
        # get stock category from sina
        for category, url in self.category_map_url.items():
            resp = requests.get(url)
            resp.encoding = 'GBK'

            tmp_str = resp.text
            json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1]
            tmp_json = json.loads(json_str)
            for code in tmp_json:
                name = tmp_json[code].split(',')[1]
                id = 'index_cn_{}'.format(code)
                if id in self.index_ids:
                    continue
                self.session.add(Index(id=id, entity_type='index', exchange='cn', code=code, name=name,
                                       category=category.value))
            self.session.commit()

        indices = get_entities(session=self.session, entity_type='index',
                               return_type='domain', filters=[
                Index.category.in_([StockCategory.industry.value, StockCategory.concept.value])],
                               provider=self.provider)

        for index_item in indices:
            for page in range(1, 5):
                resp = requests.get(self.category_stocks_url.format(page, index_item.code))
                try:
                    if resp.text == 'null' or resp.text is None:
                        break
                    category_jsons = demjson.decode(resp.text)
                    the_list = []
                    for category in category_jsons:
                        stock_code = category['code']
                        stock_id = china_stock_code_to_id(stock_code)
                        index_id = index_item.id
                        the_list.append({
                            'id': '{}_{}'.format(index_id, stock_id),
                            'index_id': index_id,
                            'stock_id': stock_id
                        })
                    if the_list:
                        df = pd.DataFrame.from_records(the_list)
                        df_to_db(data_schema=self.data_schema, df=df, provider=self.provider)

                    self.logger.info('finish recording index:{},{}'.format(index_item.category, index_item.name))

                except Exception as e:
                    self.logger.error("error:,resp.text:", e, resp.text)
                self.sleep()

Example #18

0

Show file

File: china_index_list_spider.py Project: zilinly/zvt

    def fetch_csi_index_component(self, df: pd.DataFrame):
        """
        抓取上证、中证指数成分股
        """
        query_url = 'http://www.csindex.com.cn/uploads/file/autofile/cons/{}cons.xls'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)

            try:
                response = requests.get(url)
                response.raise_for_status()
            except requests.HTTPError as error:
                self.logger.error(
                    f'{index["name"]} - {index_code} 成分股抓取错误 ({error})')
                continue

            response_df = pd.read_excel(io.BytesIO(response.content))

            response_df = response_df[[
                '成分券代码Constituent Code', '成分券名称Constituent Name'
            ]].rename(
                columns={
                    '成分券代码Constituent Code': 'stock_code',
                    '成分券名称Constituent Name': 'stock_name'
                })

            index_id = f'index_cn_{index_code}'
            response_df['entity_id'] = index_id
            response_df['entity_type'] = 'index'
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index['name']
            response_df['timestamp'] = now_pd_timestamp()

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider,
                     force_update=True)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()

Example #19

0

Show file

File: china_index_list_spider.py Project: zilinly/zvt

    def persist_index(self, df) -> None:
        df['timestamp'] = df['timestamp'].apply(lambda x: to_pd_timestamp(x))
        df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x))
        df['id'] = df['code'].apply(lambda code: f'index_cn_{code}')
        df['entity_id'] = df['id']
        df['exchange'] = 'cn'
        df['entity_type'] = 'index'

        df = df.dropna(axis=0, how='any')
        df = df.drop_duplicates(subset='id', keep='last')

        df_to_db(df=df,
                 data_schema=Index,
                 provider=self.provider,
                 force_update=False)

Example #20

0

Show file

File: china_index_list_spider.py Project: zilinly/zvt

    def fetch_cni_index_component(self, df: pd.DataFrame):
        """
        抓取国证指数成分股
        """
        query_url = 'http://www.cnindex.com.cn/docs/yb_{}.xls'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)

            try:
                response = requests.get(url)
                response.raise_for_status()
            except requests.HTTPError as error:
                self.logger.error(
                    f'{index["name"]} - {index_code} 成分股抓取错误 ({error})')
                continue

            response_df = pd.read_excel(io.BytesIO(response.content),
                                        dtype='str')

            index_id = f'index_cn_{index_code}'

            try:
                response_df = response_df[['样本股代码']]
            except KeyError:
                response_df = response_df[['证券代码']]

            response_df['entity_id'] = index_id
            response_df['entity_type'] = 'index'
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index['name']
            response_df['timestamp'] = now_pd_timestamp()

            response_df.columns = ['stock_code']
            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()

Example #21

0

Show file

File: china_etf_list_spider.py Project: zilinly/zvt

    def download_sh_etf_component(self, df: pd.DataFrame):
        """
        ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF
                        5. 债券 ETF 6. 黄金 ETF
        :param df: ETF 列表数据
        :return: None
        """
        query_url = 'http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?' \
                    'isPagination=false&type={}&etfClass={}'

        etf_df = df[(df['ETF_CLASS'] == '1') | (df['ETF_CLASS'] == '2')]
        etf_df = self.populate_sh_etf_type(etf_df)

        for _, etf in etf_df.iterrows():
            url = query_url.format(etf['ETF_TYPE'], etf['ETF_CLASS'])
            response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson.decode(response.text)
            response_df = pd.DataFrame(response_dict.get('result', []))

            etf_code = etf['FUND_ID']
            etf_id = f'etf_sh_{etf_code}'
            response_df = response_df[['instrumentId',
                                       'instrumentName']].copy()
            response_df.rename(columns={
                'instrumentId': 'stock_code',
                'instrumentName': 'stock_name'
            },
                               inplace=True)

            response_df['entity_id'] = etf_id
            response_df['entity_type'] = 'etf'
            response_df['exchange'] = 'sh'
            response_df['code'] = etf_code
            response_df['name'] = etf['FUND_NAME']
            response_df['timestamp'] = now_pd_timestamp()

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda code: china_stock_code_to_id(code))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{etf_id}_{x}')

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...')

            self.sleep()

Example #22

0

Show file

File: china_etf_list_spider.py Project: zilinly/zvt

    def persist_etf_list(self, df: pd.DataFrame, exchange: str):
        if df is None:
            return

        df = df.copy()
        if exchange == 'sh':
            df = df[['FUND_ID', 'FUND_NAME']]
        elif exchange == 'sz':
            df = df[['证券代码', '证券简称']]

        df.columns = ['code', 'name']
        df['id'] = df['code'].apply(lambda code: f'etf_{exchange}_{code}')
        df['entity_id'] = df['id']
        df['exchange'] = exchange
        df['entity_type'] = 'etf'
        df['category'] = BlockCategory.etf.value

        df = df.dropna(axis=0, how='any')
        df = df.drop_duplicates(subset='id', keep='last')

        df_to_db(df=df,
                 data_schema=Etf,
                 provider=self.provider,
                 force_update=False)

Example #23

0

Show file

    def record(self, entity, start, end, size, timestamps):
        resp = requests.get(self.category_stocks_url.format(entity.code, '1'))
        try:
            results = json_callback_param(resp.text)
            the_list = []
            for result in results:
                items = result.split(',')
                stock_code = items[1]
                stock_id = china_stock_code_to_id(stock_code)
                block_id = entity.id

                the_list.append({
                    'id': '{}_{}'.format(block_id, stock_id),
                    'entity_id': block_id,
                    'entity_type': 'block',
                    'exchange': entity.exchange,
                    'code': entity.code,
                    'name': entity.name,
                    'timestamp': now_pd_timestamp(),
                    'stock_id': stock_id,
                    'stock_code': stock_code,
                    'stock_name': items[2],
                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(data_schema=self.data_schema,
                         df=df,
                         provider=self.provider,
                         force_update=True)

            self.logger.info('finish recording block:{},{}'.format(
                entity.category, entity.name))

        except Exception as e:
            self.logger.error("error:,resp.text:", e, resp.text)
        self.sleep()

Example #24

0

Show file

File: factor.py Project: godsonhyl/zvt

 def persist_result(self):
     df_to_db(df=self.factor_df,
              data_schema=self.factor_schema,
              provider='zvt')

Example #25

0

Show file

File: etf_valuation_recorder.py Project: cz9874308/zvt

    def record(self, entity, start, end, size, timestamps):
        if not end:
            end = now_pd_timestamp()

        date_range = pd.date_range(start=start, end=end, freq='1D').tolist()
        for date in date_range:
            # etf包含的个股和比例
            etf_stock_df = get_etf_stocks(code=entity.code,
                                          timestamp=date,
                                          provider=self.provider)

            if pd_is_not_null(etf_stock_df):
                all_pct = etf_stock_df['proportion'].sum()

                if all_pct >= 1.2 or all_pct <= 0.8:
                    self.logger.error(
                        f'ignore etf:{entity.id}  date:{date} proportion sum:{all_pct}'
                    )
                    break

                etf_stock_df.set_index('stock_id', inplace=True)

                # 个股的估值数据
                stock_valuation_df = StockValuation.query_data(
                    entity_ids=etf_stock_df.index.to_list(),
                    filters=[StockValuation.timestamp == date],
                    index='entity_id')

                if pd_is_not_null(stock_valuation_df):
                    stock_count = len(etf_stock_df)
                    valuation_count = len(stock_valuation_df)

                    self.logger.info(
                        f'etf:{entity.id} date:{date} stock count: {stock_count},'
                        f'valuation count:{valuation_count}')

                    pct = abs(stock_count - valuation_count) / stock_count

                    if pct >= 0.2:
                        self.logger.error(
                            f'ignore etf:{entity.id}  date:{date} pct:{pct}')
                        break

                    se = pd.Series({
                        'id': "{}_{}".format(entity.id, date),
                        'entity_id': entity.id,
                        'timestamp': date,
                        'code': entity.code,
                        'name': entity.name
                    })
                    for col in ['pe', 'pe_ttm', 'pb', 'ps', 'pcf']:
                        # PE=P/E
                        # 这里的算法为：将其价格都设为PE,那么Earning为1(亏钱为-1)，结果为 总价格(PE)/总Earning

                        value = 0
                        price = 0

                        # 权重估值
                        positive_df = stock_valuation_df[[
                            col
                        ]][stock_valuation_df[col] > 0]
                        positive_df['count'] = 1
                        positive_df = positive_df.multiply(
                            etf_stock_df["proportion"], axis="index")
                        if pd_is_not_null(positive_df):
                            value = positive_df['count'].sum()
                            price = positive_df[col].sum()

                        negative_df = stock_valuation_df[[
                            col
                        ]][stock_valuation_df[col] < 0]
                        if pd_is_not_null(negative_df):
                            negative_df['count'] = 1
                            negative_df = negative_df.multiply(
                                etf_stock_df["proportion"], axis="index")
                            value = value - negative_df['count'].sum()
                            price = price + negative_df[col].sum()

                        se[f'{col}1'] = price / value

                        # 简单算术平均估值
                        positive_df = stock_valuation_df[col][
                            stock_valuation_df[col] > 0]
                        positive_count = len(positive_df)

                        negative_df = stock_valuation_df[col][
                            stock_valuation_df[col] < 0]
                        negative_count = len(negative_df)

                        value = positive_count - negative_count
                        price = positive_df.sum() + abs(negative_df.sum())

                        se[col] = price / value
                    df = se.to_frame().T

                    self.logger.info(df)

                    df_to_db(df=df,
                             data_schema=self.data_schema,
                             provider=self.provider,
                             force_update=self.force_update)

        return None

Example #26

0

Show file

    def record(self, entity, start, end, size, timestamps):
        if not end:
            end = now_pd_timestamp()

        date_range = pd.date_range(start=start, end=end, freq='1D').tolist()
        for date in date_range:
            # etf包含的个股和比例
            etf_stock_df = get_etf_stocks(code=entity.code,
                                          timestamp=date,
                                          provider=self.provider)

            all_pct = etf_stock_df['proportion'].sum()

            if all_pct >= 1.1 or all_pct <= 0.9:
                self.logger.info(
                    f'etf:{entity.id}  date:{date} proportion sum:{all_pct}')

            if pd_is_not_null(etf_stock_df):
                etf_stock_df.set_index('stock_id', inplace=True)

                # 个股的估值数据
                stock_valuation_df = StockValuation.query_data(
                    entity_ids=etf_stock_df.index.to_list(),
                    filters=[StockValuation.timestamp == date],
                    index='entity_id')

                if pd_is_not_null(stock_valuation_df):
                    # 暂时只支持 简单算术平均估值，理由：模糊的正确比精确的错误有用
                    # A股个股的市值往往相差很大，按市值权重的话，这样的估值很难反映整体
                    self.logger.info(
                        f'etf:{entity.id} date:{date} stock count: {len(etf_stock_df)},valuation count:{len(stock_valuation_df)}'
                    )

                    #     # 静态pe
                    #     pe = Column(Float)
                    #     # 动态pe
                    #     pe_ttm = Column(Float)
                    #     # 市净率
                    #     pb = Column(Float)
                    #     # 市销率
                    #     ps = Column(Float)
                    #     # 市现率
                    #     pcf = Column(Float)

                    se = pd.Series({
                        'id': "{}_{}".format(entity.id, date),
                        'entity_id': entity.id,
                        'timestamp': date,
                        'code': entity.code,
                        'name': entity.name
                    })
                    for col in ['pe', 'pe_ttm', 'pb', 'ps', 'pcf']:
                        # PE=P/E
                        # 这里的算法为：将其价格都设为1，算出总earning,再相除
                        positive_df = stock_valuation_df[col][
                            stock_valuation_df[col] > 0]
                        positive_count = len(positive_df)

                        negative_df = stock_valuation_df[col][
                            stock_valuation_df[col] < 0]
                        negative_count = len(negative_df)

                        result = (positive_count + negative_count) / (
                            positive_count / positive_df.mean() +
                            negative_count / negative_df.mean())

                        se[col] = result
                    df = se.to_frame().T

                    self.logger.info(df)

                    df_to_db(df=df,
                             data_schema=self.data_schema,
                             provider=self.provider,
                             force_update=self.force_update)

        return None

Example #27

0

Show file

File: __init__.py Project: zilinly/zvt

def init_main_index(provider='exchange'):
    for item in CHINA_STOCK_MAIN_INDEX:
        item['timestamp'] = to_pd_timestamp(item['timestamp'])
    df = pd.DataFrame(CHINA_STOCK_MAIN_INDEX)
    # print(df)
    df_to_db(df=df, data_schema=Index, provider=provider, force_update=False)

Example #28

0

Show file

 def persist_result(self):
     df_to_db(df=self.factor_df, data_schema=self.factor_schema, provider='zvt', force_update=False)