Esempio n. 1
0
    def fetch_csi_index_component(self):
        """
        抓取上证、中证指数成分股
        """
        for _, index in self.all_index.iterrows():
            response_df = pd.DataFrame()
            index_code = index.name.split(".")[0]

            stocks = get_index_stocks(index.name)
            response_df['stock_code'] = stocks
            response_df['stock_code'] = response_df['stock_code'].apply(
                lambda x: x.split(".")[0])
            index_id = f'index_cn_{index_code}'
            response_df['entity_id'] = index_id

            response_df['entity_type'] = 'index'
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index.display_name
            response_df['timestamp'] = now_pd_timestamp()

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider,
                     force_update=True)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()
Esempio n. 2
0
 def record_cs_index(self, index_type):
     df = cs_index_api.get_cs_index(index_type=index_type)
     df_to_db(data_schema=self.data_schema,
              df=df,
              provider=self.provider,
              force_update=True)
     self.logger.info(f"finish record {index_type} index")
Esempio n. 3
0
    def run(self):
        http_session = get_http_session()

        for category, url in self.category_map_url.items():
            resp = request_get(http_session, url)
            results = json_callback_param(resp.text)
            the_list = []
            for result in results:
                items = result.split(',')
                code = items[1]
                name = items[2]
                entity_id = f'block_cn_{code}'
                the_list.append({
                    'id': entity_id,
                    'entity_id': entity_id,
                    'entity_type': EntityType.Block.value,
                    'exchange': 'cn',
                    'code': code,
                    'name': name,
                    'category': category.value
                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(df=df,
                         region=Region.CHN,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=True)
            self.logger.info(f"finish record sina blocks:{category.value}")
    def record(self, entity, start, end, size, timestamps):
        for page in range(1, 5):
            resp = requests.get(self.category_stocks_url.format(page, entity.code))
            try:
                if resp.text == 'null' or resp.text is None:
                    break
                category_jsons = demjson.decode(resp.text)
                the_list = []
                for category in category_jsons:
                    stock_code = category['code']
                    stock_id = china_stock_code_to_id(stock_code)
                    block_id = entity.id
                    the_list.append({
                        'id': '{}_{}'.format(block_id, stock_id),
                        'entity_id': block_id,
                        'entity_type': 'block',
                        'exchange': entity.exchange,
                        'code': entity.code,
                        'name': entity.name,
                        'timestamp': now_pd_timestamp(),
                        'stock_id': stock_id,
                        'stock_code': stock_code,
                        'stock_name': category['name'],

                    })
                if the_list:
                    df = pd.DataFrame.from_records(the_list)
                    df_to_db(data_schema=self.data_schema, df=df, provider=self.provider,
                             force_update=True)

                self.logger.info('finish recording BlockStock:{},{}'.format(entity.category, entity.name))

            except Exception as e:
                self.logger.error("error:,resp.text:", e, resp.text)
            self.sleep()
Esempio n. 5
0
    def persist_etf_list(self, df: pd.DataFrame, exchange: str):
        if df is None:
            return

        df = df.copy()
        if exchange == 'sh':
            df = df[['FUND_ID', 'FUND_NAME']]
        elif exchange == 'sz':
            df = df[['证券代码', '证券简称']]

        df.columns = ['code', 'name']
        df['id'] = df['code'].apply(lambda code: f'etf_{exchange}_{code}')
        df['entity_id'] = df['id']
        df['exchange'] = exchange
        df['entity_type'] = EntityType.ETF.value
        df['category'] = BlockCategory.etf.value

        df = df.dropna(axis=0, how='any')
        df = df.drop_duplicates(subset='id', keep='last')

        df_to_db(df=df,
                 ref_df=None,
                 region=Region.CHN,
                 data_schema=Etf,
                 provider=self.provider)
Esempio n. 6
0
    def record(self, entity, start, end, size, timestamps, http_session):
        try:
            trade_day = StockTradeDay.query_data(
                region=self.region,
                limit=1,
                order=StockTradeDay.timestamp.desc(),
                return_type='domain')
            if len(trade_day) > 0:
                start = trade_day[0].timestamp
        except Exception as _:
            pass

        df = pd.DataFrame()
        dates = self.nyse.schedule(start_date=to_time_str(start),
                                   end_date=to_time_str(
                                       now_pd_timestamp(Region.US)))
        dates = dates.index.to_list()
        self.logger.info(f'add dates:{dates}')
        df['timestamp'] = pd.to_datetime(dates)
        df['id'] = [to_time_str(date) for date in dates]
        df['entity_id'] = 'nyse'

        df_to_db(df=df,
                 region=self.region,
                 data_schema=self.data_schema,
                 provider=self.provider,
                 force_update=self.force_update)
Esempio n. 7
0
    def record(self, entity, start, end, size, timestamps):
        df = get_locked_shares([to_jq_entity_id(entity)],
                               start_date=to_time_str(start),
                               end_date=to_time_str(now_pd_timestamp() +
                                                    timedelta(days=150)))

        if pd_is_not_null(df):

            df['locked_rate1'] = df['rate1'] * 100
            df['locked_rate2'] = df['rate2'] * 100
            df['locked_num'] = df['num']

            df['entity_id'] = entity.id
            df['end_date'] = pd.to_datetime(df.day)

            df['timestamp'] = df['end_date']
            df['provider'] = 'joinquant'
            df['code'] = entity.code

            def generate_id(se):
                return "{}_{}".format(
                    se['entity_id'],
                    to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_id,
                                                            axis=1)

            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)
        return None
Esempio n. 8
0
    def run(self):
        # get stock blocks from sina
        for category, url in self.category_map_url.items():
            resp = requests.get(url)
            resp.encoding = "GBK"

            tmp_str = resp.text
            json_str = tmp_str[tmp_str.index("{") : tmp_str.index("}") + 1]
            tmp_json = json.loads(json_str)

            the_list = []

            for code in tmp_json:
                name = tmp_json[code].split(",")[1]
                entity_id = f"block_cn_{code}"
                the_list.append(
                    {
                        "id": entity_id,
                        "entity_id": entity_id,
                        "entity_type": "block",
                        "exchange": "cn",
                        "code": code,
                        "name": name,
                        "category": category.value,
                    }
                )
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True)

            self.logger.info(f"finish record sina blocks:{category.value}")
Esempio n. 9
0
    def record(self, entity, start, end, size, timestamps):
        for page in range(1, 5):
            resp = requests.get(self.category_stocks_url.format(page, entity.code))
            try:
                if resp.text == "null" or resp.text is None:
                    break
                category_jsons = demjson3.decode(resp.text)
                the_list = []
                for category in category_jsons:
                    stock_code = category["code"]
                    stock_id = china_stock_code_to_id(stock_code)
                    block_id = entity.id
                    the_list.append(
                        {
                            "id": "{}_{}".format(block_id, stock_id),
                            "entity_id": block_id,
                            "entity_type": "block",
                            "exchange": entity.exchange,
                            "code": entity.code,
                            "name": entity.name,
                            "timestamp": now_pd_timestamp(),
                            "stock_id": stock_id,
                            "stock_code": stock_code,
                            "stock_name": category["name"],
                        }
                    )
                if the_list:
                    df = pd.DataFrame.from_records(the_list)
                    df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True)

                self.logger.info("finish recording BlockStock:{},{}".format(entity.category, entity.name))

            except Exception as e:
                self.logger.error("error:,resp.text:", e, resp.text)
            self.sleep()
Esempio n. 10
0
 def run(self):
     for category, url in self.category_map_url.items():
         resp = requests.get(url, headers=DEFAULT_HEADER)
         results = json_callback_param(resp.text)
         the_list = []
         for result in results:
             items = result.split(",")
             code = items[1]
             name = items[2]
             entity_id = f"block_cn_{code}"
             the_list.append(
                 {
                     "id": entity_id,
                     "entity_id": entity_id,
                     "entity_type": "block",
                     "exchange": "cn",
                     "code": code,
                     "name": name,
                     "category": category.value,
                 }
             )
         if the_list:
             df = pd.DataFrame.from_records(the_list)
             df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=self.force_update)
         self.logger.info(f"finish record eastmoney blocks:{category.value}")
Esempio n. 11
0
    def record(self, entity, start, end, size, timestamps):
        resp = requests.get(self.category_stocks_url.format(entity.code, "1"), headers=DEFAULT_HEADER)
        try:
            results = json_callback_param(resp.text)
            the_list = []
            for result in results:
                items = result.split(",")
                stock_code = items[1]
                stock_id = china_stock_code_to_id(stock_code)
                block_id = entity.id

                the_list.append(
                    {
                        "id": "{}_{}".format(block_id, stock_id),
                        "entity_id": block_id,
                        "entity_type": "block",
                        "exchange": entity.exchange,
                        "code": entity.code,
                        "name": entity.name,
                        "timestamp": now_pd_timestamp(),
                        "stock_id": stock_id,
                        "stock_code": stock_code,
                        "stock_name": items[2],
                    }
                )
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True)

            self.logger.info("finish recording block:{},{}".format(entity.category, entity.name))

        except Exception as e:
            self.logger.error("error:,resp.text:", e, resp.text)
        self.sleep()
    def download_stock_list(self, response, exchange):
        df = None
        if exchange == 'sh':
            df = pd.read_csv(io.BytesIO(response.content), sep='\s+', encoding='GB2312', dtype=str,
                             parse_dates=['上市日期'])
            if df is not None:
                df = df.loc[:, ['公司代码', '公司简称', '上市日期']]

        elif exchange == 'sz':
            df = pd.read_excel(io.BytesIO(response.content), sheet_name='A股列表', dtype=str, parse_dates=['A股上市日期'])
            if df is not None:
                df = df.loc[:, ['A股代码', 'A股简称', 'A股上市日期']]

        if df is not None:
            df.columns = ['code', 'name', 'list_date']

            df = df.dropna(subset=['code'])

            # handle the dirty data
            # 600996,贵广网络,2016-12-26,2016-12-26,sh,stock,stock_sh_600996,,次新股,贵州,,
            df.loc[df['code'] == '600996', 'list_date'] = '2016-12-26'
            # print(df[df['list_date'] == '-'])
            df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x))
            df['exchange'] = exchange
            df['entity_type'] = EntityType.Stock.value
            df['id'] = df[['entity_type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['entity_id'] = df['id']
            df['timestamp'] = df['list_date']
            df = df.dropna(axis=0, how='any')
            df = df.drop_duplicates(subset=('id'), keep='last')
            df_to_db(df=df, region=Region.CHN, data_schema=self.data_schema, provider=self.provider, force_update=False)
            # persist StockDetail too
            df_to_db(df=df, region=Region.CHN, data_schema=StockDetail, provider=self.provider, force_update=False)
            # self.logger.info(df.tail())
            self.logger.info("persist stock list successs")
Esempio n. 13
0
    def run(self):
        from zvt.api import get_kdata
        bond_data = get_kdata(entity_id='bond_cn_EMM00166466')
        now_date = to_time_str(now_pd_timestamp())
        if bond_data.empty:
            # 初始时间定在2007年
            start = '2007-01-01'
        else:
            start = to_time_str(bond_data.timestamp.max())
        # EMM00166466 中债国债到期收益率:10年
        df = c.edb("EMM00166466", f"IsLatest=0,StartDate={start},EndDate={now_date},ispandas=1")

        if pd_is_not_null(df):
            df['name'] = "中债国债到期收益率:10年"
            df.rename(columns={'RESULT': 'data_value', 'DATES': 'timestamp'}, inplace=True)

            df['entity_id'] = 'bond_cn_EMM00166466'
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = 'emquantapi'
            df['exchange'] = 'cn'
            df['level'] = '1d'
            df['code'] = "EMM00166466"


            def generate_kdata_id(se):
                return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1)

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
    def record(self, entity, start, end, size, timestamps, http_session):
        try:
            trade_day = StockTradeDay.query_data(
                region=self.region,
                limit=1,
                order=StockTradeDay.timestamp.desc(),
                return_type='domain')
            if len(trade_day) > 0:
                start = trade_day[0].timestamp
            else:
                start = "1990-12-19"
        except Exception as _:
            pass

        df = pd.DataFrame()
        dates_df = bao_get_trade_days(start_date=start)
        dates = dates_df[dates_df['is_trading_day'] ==
                         '1']['calendar_date'].to_list()
        self.logger.info(f'add dates:{dates}')
        df['timestamp'] = pd.to_datetime(dates)
        df['id'] = [to_time_str(date) for date in dates]
        df['entity_id'] = 'chn'

        df_to_db(df=df,
                 region=self.region,
                 data_schema=self.data_schema,
                 provider=self.provider,
                 force_update=self.force_update)
Esempio n. 15
0
 def run(self):
     df = em_api.get_tradable_list(entity_type="stockus")
     self.logger.info(df)
     df_to_db(df=df,
              data_schema=self.data_schema,
              provider=self.provider,
              force_update=self.force_update)
Esempio n. 16
0
 def fetch_szse_index(self, sz_data) -> None:
     """
     抓取深证指数列表
     """
     df = pd.DataFrame()
     for em_code in set(sz_data):
         if len(em_code) > 9:
             continue
         data = c.css(em_code, [i for i in self.colums_map.keys()],
                      "TradeDate=" + self.now_date + ",ispandas=1")
         data['code'] = em_code[:6]
         df = df.append(data)
     df = df.rename(columns=self.colums_map)
     df['timestamp'] = pd.to_datetime(df.list_date)
     df['exchange'] = 'sz'
     df['category'] = 'main'
     df['entity_type'] = 'index'
     df['entity_id'] = df.apply(
         lambda x: 'index' + '_' + 'sz' + '_' + x.code, axis=1)
     df['id'] = df['entity_id']
     df_to_db(df=df,
              data_schema=Index,
              provider=self.provider,
              force_update=False)
     self.logger.info('深证指数列表写入完成...')
Esempio n. 17
0
    def record(self, entity, start, end, size, timestamps):
        df = finance.run_query(
            query(finance.STK_SHARES_PLEDGE).filter(
                finance.STK_SHARES_PLEDGE.code == to_jq_entity_id(entity)).
            filter(finance.STK_SHARES_PLEDGE.pub_date >= to_time_str(start)))

        if pd_is_not_null(df):
            df['name'] = entity.name
            df['entity_id'] = entity.id
            df['pub_date'] = pd.to_datetime(df.pub_date)

            df['timestamp'] = df['pub_date']
            df['provider'] = 'joinquant'
            df['code'] = entity.code

            def generate_id(se):
                return "{}_{}_{}".format(
                    se['entity_id'],
                    to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY), se.name)

            df = pd.concat([
                i.reset_index(drop=True)
                for i in dict(list(df.groupby('timestamp'))).values()
            ])
            df.index += 1
            df['id'] = df[['entity_id', 'timestamp']].apply(generate_id,
                                                            axis=1)

            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)
        return None
Esempio n. 18
0
    def fetch_csi_index(self, sh_data) -> None:
        """
        抓取上证、中证指数列表
        """
        df = pd.DataFrame()
        for em_code in set(sh_data):
            if len(em_code) > 10:
                continue
            data = c.css(em_code, [i for i in self.colums_map.keys()],
                         "TradeDate=" + self.now_date + ",ispandas=1")
            try:
                data['code'] = em_code[:6]
            except TypeError:
                print(em_code)
                continue
            df = df.append(data)
        df = df.rename(columns=self.colums_map)
        df['timestamp'] = pd.to_datetime(df.list_date)
        df['exchange'] = 'sh'
        df['category'] = 'main'
        df['entity_type'] = 'index'
        df['entity_id'] = df.apply(
            lambda x: 'index' + '_' + 'sh' + '_' + x.code, axis=1)
        df['id'] = df['entity_id']

        # df['codes'] = df.index
        # df['code_len'] = df.apply(lambda x:len(x.codes),axis=1)
        # df = df.query("code_len==9")
        df_to_db(df=df,
                 data_schema=Index,
                 provider=self.provider,
                 force_update=False)

        self.logger.info('上证、中证指数列表写入完成...')
Esempio n. 19
0
    def run(self):
        # 抓取fund列表
        df = finance.run_query(query(finance.FUND_MAIN_INFO))
        df.index.name = 'entity_id'
        df = df.reset_index()
        # 上市日期
        df.rename(columns={'start_date': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['list_date'] = df['timestamp']
        df['end_date'] = pd.to_datetime(df['end_date'])

        df['entity_id'] = df.main_code.apply(lambda x: normalize_code(x))
        df['entity_id'] = df['entity_id'].apply(
            lambda x: to_entity_id(entity_type='fund', jq_code=x))

        df['id'] = df['entity_id']
        df['entity_type'] = 'fund'
        df['exchange'] = df['entity_id'].apply(
            lambda x: get_entity_exchange(x))
        df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x))
        df['category'] = 'fund'
        # df['choice_code'] = df.apply(lambda x:x.main_code+'.'+x.exchange.upper(),axis=1)
        # loginResult = c.start("ForceLogin=1", '')
        # df['underlying_index_code'] = df.apply(lambda x:c.css(x.choice_code, "BMINDEXCODE", "Rank=1").Data if x.operate_mode == 'ETF' else None,axis=1)
        # df['underlying_index_code'] = df['underlying_index_code'].apply(lambda x:[i for i in x.values()][0][0].lower().replace(".","_") if x else None)
        # c.stop()
        df_to_db(df,
                 data_schema=FundDetail,
                 provider=self.provider,
                 force_update=self.force_update)

        # self.logger.info(df_index)
        self.logger.info("persist etf list success")
        logout()
Esempio n. 20
0
    def persist_factor(self):

        if self.states:
            session = get_db_session(provider='zvt', data_schema=FactorState)
            for entity_id in self.states:
                state = self.states[entity_id]
                if state:
                    domain_id = f'{self.factor_name}_{entity_id}'
                    factor_state: FactorState = session.query(FactorState).get(
                        domain_id)
                    state_str = json.dumps(state, cls=FactorStateEncoder)
                    if factor_state:
                        factor_state.state = state_str
                    else:
                        factor_state = FactorState(
                            id=domain_id,
                            entity_id=entity_id,
                            factor_name=self.factor_name,
                            state=state_str)
                        session.add(factor_state)
            session.commit()
        df = self.factor_df.copy()
        df['zhongshu'] = df['zhongshu'].apply(
            lambda x: json.dumps(x, cls=FactorStateEncoder))

        df_to_db(df=df,
                 data_schema=self.factor_schema,
                 provider='zvt',
                 force_update=False)
    def run(self):
        # get stock blocks from sina
        for category, url in self.category_map_url.items():
            resp = requests.get(url)
            resp.encoding = 'GBK'

            tmp_str = resp.text
            json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1]
            tmp_json = json.loads(json_str)

            the_list = []

            for code in tmp_json:
                name = tmp_json[code].split(',')[1]
                entity_id = f'block_cn_{code}'
                the_list.append({
                    'id': entity_id,
                    'entity_id': entity_id,
                    'entity_type': 'block',
                    'exchange': 'cn',
                    'code': code,
                    'name': name,
                    'category': category.value
                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(data_schema=self.data_schema, df=df, provider=self.provider,
                         force_update=True)

            self.logger.info(f"finish record sina blocks:{category.value}")
Esempio n. 22
0
    def run(self):
        # 抓取fund列表
        df = finance.run_query(query(finance.FUND_MAIN_INFO))
        df.index.name = 'entity_id'
        df = df.reset_index()
        # 上市日期
        df.rename(columns={'start_date': 'timestamp'}, inplace=True)
        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['list_date'] = df['timestamp']
        df['end_date'] = pd.to_datetime(df['end_date'])

        df['entity_id'] = df.main_code.apply(lambda x: normalize_code(x))
        df['entity_id'] = df['entity_id'].apply(
            lambda x: to_entity_id(entity_type='fund', jq_code=x))

        df['id'] = df['entity_id']
        df['entity_type'] = 'fund'
        df['exchange'] = df['entity_id'].apply(
            lambda x: get_entity_exchange(x))
        df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x))

        df['category'] = 'fund'

        df_to_db(df,
                 data_schema=FundDetail,
                 provider=self.provider,
                 force_update=self.force_update)

        # self.logger.info(df_index)
        self.logger.info("persist etf list success")
        logout()
    def record(self, entity, start, end, size, timestamps):
        if not end:
            end = to_time_str(now_pd_timestamp())
        start = to_time_str(start)

        df = finance.run_query(
            query(finance.FUND_DIVIDEND).filter(
                finance.FUND_DIVIDEND.code == entity.code,
                finance.FUND_DIVIDEND.pub_date >= start).limit(20))
        df.rename(columns=FundDividendDetail.get_data_map(self), inplace=True)
        df.dropna(subset=['dividend_date'], inplace=True)
        if pd_is_not_null(df):
            df.reset_index(drop=True, inplace=True)
            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df.announce_date)
            df['provider'] = 'joinquant'
            df['code'] = entity.code

            def generate_id(se):
                return "{}_{}_{}".format(
                    se['entity_id'],
                    to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY), se.name)

            df.reset_index(drop=True, inplace=True)
            df.index += 1
            df['id'] = df[['entity_id', 'timestamp']].apply(generate_id,
                                                            axis=1)

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_id,
                                                            axis=1)
            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)
        return None
Esempio n. 24
0
    def record(self, entity, start, end, size, timestamps):
        q = query(finance.FUND_PORTFOLIO_STOCK).filter(finance.FUND_PORTFOLIO_STOCK.pub_date >= start).filter(
            finance.FUND_PORTFOLIO_STOCK.code == entity.code)
        df = finance.run_query(q)
        if pd_is_not_null(df):
            #          id    code period_start  period_end    pub_date  report_type_id report_type  rank  symbol  name      shares    market_cap  proportion
            # 0   8640569  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     1  601318  中国平安  19869239.0  1.361043e+09        7.09
            # 1   8640570  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     2  600519  贵州茅台    921670.0  6.728191e+08        3.50
            # 2   8640571  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     3  600036  招商银行  18918815.0  5.806184e+08        3.02
            # 3   8640572  159919   2018-07-01  2018-09-30  2018-10-26          403003        第三季度     4  601166  兴业银行  22862332.0  3.646542e+08        1.90
            df['timestamp'] = pd.to_datetime(df['pub_date'])

            df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True)
            df['proportion'] = df['proportion'] * 0.01

            df = portfolio_relate_stock(df, entity)

            df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x))
            df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['report_date'] = pd.to_datetime(df['period_end'])
            df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x))

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

            # self.logger.info(df.tail())
            self.logger.info(f"persist etf {entity.code} portfolio success")

        return None
Esempio n. 25
0
    def record(self, entity, start, end, size, timestamps):
        for timestamp in timestamps:
            df = run_query(
                table='finance.STK_HK_HOLD_INFO',
                conditions=
                f'link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}')
            print(df)

            if pd_is_not_null(df):
                df.rename(columns={
                    'day': 'timestamp',
                    'link_id': 'holder_code',
                    'link_name': 'holder_name'
                },
                          inplace=True)
                df['timestamp'] = pd.to_datetime(df['timestamp'])

                df['entity_id'] = df['code'].apply(
                    lambda x: to_entity_id(entity_type='stock', jq_code=x))
                df['code'] = df['code'].apply(lambda x: x.split('.')[0])

                # id格式为:{holder_name}_{entity_id}_{timestamp}
                df['id'] = df[['holder_name', 'entity_id', 'timestamp']].apply(
                    lambda se: "{}_{}_{}".format(
                        se['holder_name'], se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)),
                    axis=1)

                df_to_db(df=df,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=self.force_update)
Esempio n. 26
0
    def record(self, entity, start, end, size, timestamps):
        resp = requests.get(self.category_stocks_url.format(entity.code, '1'))
        try:
            results = json_callback_param(resp.text)
            the_list = []
            for result in results:
                items = result.split(',')
                stock_code = items[1]
                stock_id = china_stock_code_to_id(stock_code)
                block_id = entity.id

                the_list.append({
                    'id': '{}_{}'.format(block_id, stock_id),
                    'entity_id': block_id,
                    'entity_type': 'block',
                    'exchange': entity.exchange,
                    'code': entity.code,
                    'name': entity.name,
                    'timestamp': now_pd_timestamp(),
                    'stock_id': stock_id,
                    'stock_code': stock_code,
                    'stock_name': items[2],

                })
            if the_list:
                df = pd.DataFrame.from_records(the_list)
                df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True)

            self.logger.info('finish recording block:{},{}'.format(entity.category, entity.name))

        except Exception as e:
            self.logger.error("error:,resp.text:", e, resp.text)
        self.sleep()
Esempio n. 27
0
    def record(self, entity, start, end, size, timestamps):
        for timestamp in timestamps:
            df = run_query(
                table="finance.STK_HK_HOLD_INFO",
                conditions=
                f"link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}")
            print(df)

            if pd_is_not_null(df):
                df.rename(columns={
                    "day": "timestamp",
                    "link_id": "holder_code",
                    "link_name": "holder_name"
                },
                          inplace=True)
                df["timestamp"] = pd.to_datetime(df["timestamp"])

                df["entity_id"] = df["code"].apply(
                    lambda x: to_entity_id(entity_type="stock", jq_code=x))
                df["code"] = df["code"].apply(lambda x: x.split(".")[0])

                # id格式为:{holder_name}_{entity_id}_{timestamp}
                df["id"] = df[["holder_name", "entity_id", "timestamp"]].apply(
                    lambda se: "{}_{}_{}".format(
                        se["holder_name"], se["entity_id"],
                        to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)),
                    axis=1,
                )

                df_to_db(df=df,
                         data_schema=self.data_schema,
                         provider=self.provider,
                         force_update=self.force_update)
    def download_stock_list(self, response, exchange):
        df = pd.read_csv(io.BytesIO(response.content), encoding='UTF8', dtype=str)

        if df is not None:
            df.rename(columns = {'Symbol':'code', 'Name':'name', 'IPOyear':'list_date', 'industry':'industry', 'Sector':'sector'}, inplace = True) 
            df = df[['code', 'name', 'list_date', 'industry', 'sector']]

            df.fillna({'list_date':'1980'}, inplace=True)

            df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x))
            df['exchange'] = exchange
            df['entity_type'] = EntityType.Stock.value
            df['id'] = df[['entity_type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1)
            df['entity_id'] = df['id'].str.strip()
            df['timestamp'] = df['list_date']
            df = df.dropna(axis=0, how='any')
            df = df.drop_duplicates(subset=('id'), keep='last')

            # persist StockDetail
            df_to_db(df=df, region=Region.US, data_schema=StockDetail, provider=self.provider, force_update=True)

            df.drop(['industry','sector'], axis=1, inplace=True)
            df_to_db(df=df, region=Region.US, data_schema=self.data_schema, provider=self.provider, force_update=True)

            self.logger.info("persist stock list successs")
Esempio n. 29
0
    def persist_factor(self):
        df = self.factor_df.copy()
        #: encode json columns
        if pd_is_not_null(df) and self.factor_col_map_object_hook():
            for col in self.factor_col_map_object_hook():
                if col in df.columns:
                    df[col] = df[col].apply(
                        lambda x: json.dumps(x, cls=self.state_encoder()))

        if self.states:
            g = df.groupby(level=0)
            for entity_id in self.states:
                state = self.states[entity_id]
                try:
                    if state:
                        self.persist_state(entity_id=entity_id)
                    if entity_id in g.groups:
                        df_to_db(df=df.loc[(entity_id, )],
                                 data_schema=self.factor_schema,
                                 provider="zvt",
                                 force_update=False)
                except Exception as e:
                    self.logger.error(
                        f"{self.name} {entity_id} save state error")
                    self.logger.exception(e)
                    #: clear them if error happen
                    self.clear_state_data(entity_id)
        else:
            df_to_db(df=df,
                     data_schema=self.factor_schema,
                     provider="zvt",
                     force_update=False)
Esempio n. 30
0
    def record(self, entity, start, end, size, timestamps):
        if entity.block_type != 'gics':
            return None
        # industry_stocks = get_industry_stocks(entity.code,date=now_pd_timestamp())
        industry_stocks = c.sector(entity.code,
                                   to_time_str(now_pd_timestamp()))
        if len(industry_stocks.Data) == 0:
            return None

        codes = [i for i in industry_stocks.Data if '.SH' in i or '.SZ' in i]
        names = [
            i for i in industry_stocks.Data
            if '.SH' not in i and '.SZ' not in i
        ]
        df = pd.DataFrame({"stock": codes, "stock_name": names})
        df["stock_id"] = df.stock.apply(
            lambda x: to_entity_id(x, "stock").lower())
        df["stock_code"] = df.stock_id.str.split("_", expand=True)[2]
        df["code"] = entity.code
        df["exchange"] = entity.exchange
        df["name"] = entity.name
        df["timestamp"] = now_pd_timestamp()
        df["entity_id"] = entity.id
        df["block_type"] = entity.block_type
        df["entity_type"] = "block"
        df["id"] = df.apply(lambda x: x.entity_id + "_" + x.stock_id, axis=1)
        if df.empty:
            return None
        df_to_db(data_schema=self.data_schema,
                 df=df,
                 provider=self.provider,
                 force_update=True)

        self.logger.info('finish recording BlockStock:{},{}'.format(
            entity.category, entity.name))