def fetch_csi_index_component(self): """ 抓取上证、中证指数成分股 """ for _, index in self.all_index.iterrows(): response_df = pd.DataFrame() index_code = index.name.split(".")[0] stocks = get_index_stocks(index.name) response_df['stock_code'] = stocks response_df['stock_code'] = response_df['stock_code'].apply( lambda x: x.split(".")[0]) index_id = f'index_cn_{index_code}' response_df['entity_id'] = index_id response_df['entity_type'] = 'index' response_df['exchange'] = 'cn' response_df['code'] = index_code response_df['name'] = index.display_name response_df['timestamp'] = now_pd_timestamp() response_df['stock_id'] = response_df['stock_code'].apply( lambda x: china_stock_code_to_id(str(x))) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{index_id}_{x}') df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider, force_update=True) self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...') self.sleep()
def record_cs_index(self, index_type): df = cs_index_api.get_cs_index(index_type=index_type) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info(f"finish record {index_type} index")
def run(self): http_session = get_http_session() for category, url in self.category_map_url.items(): resp = request_get(http_session, url) results = json_callback_param(resp.text) the_list = [] for result in results: items = result.split(',') code = items[1] name = items[2] entity_id = f'block_cn_{code}' the_list.append({ 'id': entity_id, 'entity_id': entity_id, 'entity_type': EntityType.Block.value, 'exchange': 'cn', 'code': code, 'name': name, 'category': category.value }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(df=df, region=Region.CHN, data_schema=self.data_schema, provider=self.provider, force_update=True) self.logger.info(f"finish record sina blocks:{category.value}")
def record(self, entity, start, end, size, timestamps): for page in range(1, 5): resp = requests.get(self.category_stocks_url.format(page, entity.code)) try: if resp.text == 'null' or resp.text is None: break category_jsons = demjson.decode(resp.text) the_list = [] for category in category_jsons: stock_code = category['code'] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append({ 'id': '{}_{}'.format(block_id, stock_id), 'entity_id': block_id, 'entity_type': 'block', 'exchange': entity.exchange, 'code': entity.code, 'name': entity.name, 'timestamp': now_pd_timestamp(), 'stock_id': stock_id, 'stock_code': stock_code, 'stock_name': category['name'], }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info('finish recording BlockStock:{},{}'.format(entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def persist_etf_list(self, df: pd.DataFrame, exchange: str): if df is None: return df = df.copy() if exchange == 'sh': df = df[['FUND_ID', 'FUND_NAME']] elif exchange == 'sz': df = df[['证券代码', '证券简称']] df.columns = ['code', 'name'] df['id'] = df['code'].apply(lambda code: f'etf_{exchange}_{code}') df['entity_id'] = df['id'] df['exchange'] = exchange df['entity_type'] = EntityType.ETF.value df['category'] = BlockCategory.etf.value df = df.dropna(axis=0, how='any') df = df.drop_duplicates(subset='id', keep='last') df_to_db(df=df, ref_df=None, region=Region.CHN, data_schema=Etf, provider=self.provider)
def record(self, entity, start, end, size, timestamps, http_session): try: trade_day = StockTradeDay.query_data( region=self.region, limit=1, order=StockTradeDay.timestamp.desc(), return_type='domain') if len(trade_day) > 0: start = trade_day[0].timestamp except Exception as _: pass df = pd.DataFrame() dates = self.nyse.schedule(start_date=to_time_str(start), end_date=to_time_str( now_pd_timestamp(Region.US))) dates = dates.index.to_list() self.logger.info(f'add dates:{dates}') df['timestamp'] = pd.to_datetime(dates) df['id'] = [to_time_str(date) for date in dates] df['entity_id'] = 'nyse' df_to_db(df=df, region=self.region, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def record(self, entity, start, end, size, timestamps): df = get_locked_shares([to_jq_entity_id(entity)], start_date=to_time_str(start), end_date=to_time_str(now_pd_timestamp() + timedelta(days=150))) if pd_is_not_null(df): df['locked_rate1'] = df['rate1'] * 100 df['locked_rate2'] = df['rate2'] * 100 df['locked_num'] = df['num'] df['entity_id'] = entity.id df['end_date'] = pd.to_datetime(df.day) df['timestamp'] = df['end_date'] df['provider'] = 'joinquant' df['code'] = entity.code def generate_id(se): return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def run(self): # get stock blocks from sina for category, url in self.category_map_url.items(): resp = requests.get(url) resp.encoding = "GBK" tmp_str = resp.text json_str = tmp_str[tmp_str.index("{") : tmp_str.index("}") + 1] tmp_json = json.loads(json_str) the_list = [] for code in tmp_json: name = tmp_json[code].split(",")[1] entity_id = f"block_cn_{code}" the_list.append( { "id": entity_id, "entity_id": entity_id, "entity_type": "block", "exchange": "cn", "code": code, "name": name, "category": category.value, } ) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info(f"finish record sina blocks:{category.value}")
def record(self, entity, start, end, size, timestamps): for page in range(1, 5): resp = requests.get(self.category_stocks_url.format(page, entity.code)) try: if resp.text == "null" or resp.text is None: break category_jsons = demjson3.decode(resp.text) the_list = [] for category in category_jsons: stock_code = category["code"] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append( { "id": "{}_{}".format(block_id, stock_id), "entity_id": block_id, "entity_type": "block", "exchange": entity.exchange, "code": entity.code, "name": entity.name, "timestamp": now_pd_timestamp(), "stock_id": stock_id, "stock_code": stock_code, "stock_name": category["name"], } ) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info("finish recording BlockStock:{},{}".format(entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def run(self): for category, url in self.category_map_url.items(): resp = requests.get(url, headers=DEFAULT_HEADER) results = json_callback_param(resp.text) the_list = [] for result in results: items = result.split(",") code = items[1] name = items[2] entity_id = f"block_cn_{code}" the_list.append( { "id": entity_id, "entity_id": entity_id, "entity_type": "block", "exchange": "cn", "code": code, "name": name, "category": category.value, } ) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=self.force_update) self.logger.info(f"finish record eastmoney blocks:{category.value}")
def record(self, entity, start, end, size, timestamps): resp = requests.get(self.category_stocks_url.format(entity.code, "1"), headers=DEFAULT_HEADER) try: results = json_callback_param(resp.text) the_list = [] for result in results: items = result.split(",") stock_code = items[1] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append( { "id": "{}_{}".format(block_id, stock_id), "entity_id": block_id, "entity_type": "block", "exchange": entity.exchange, "code": entity.code, "name": entity.name, "timestamp": now_pd_timestamp(), "stock_id": stock_id, "stock_code": stock_code, "stock_name": items[2], } ) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info("finish recording block:{},{}".format(entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def download_stock_list(self, response, exchange): df = None if exchange == 'sh': df = pd.read_csv(io.BytesIO(response.content), sep='\s+', encoding='GB2312', dtype=str, parse_dates=['上市日期']) if df is not None: df = df.loc[:, ['公司代码', '公司简称', '上市日期']] elif exchange == 'sz': df = pd.read_excel(io.BytesIO(response.content), sheet_name='A股列表', dtype=str, parse_dates=['A股上市日期']) if df is not None: df = df.loc[:, ['A股代码', 'A股简称', 'A股上市日期']] if df is not None: df.columns = ['code', 'name', 'list_date'] df = df.dropna(subset=['code']) # handle the dirty data # 600996,贵广网络,2016-12-26,2016-12-26,sh,stock,stock_sh_600996,,次新股,贵州,, df.loc[df['code'] == '600996', 'list_date'] = '2016-12-26' # print(df[df['list_date'] == '-']) df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x)) df['exchange'] = exchange df['entity_type'] = EntityType.Stock.value df['id'] = df[['entity_type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['entity_id'] = df['id'] df['timestamp'] = df['list_date'] df = df.dropna(axis=0, how='any') df = df.drop_duplicates(subset=('id'), keep='last') df_to_db(df=df, region=Region.CHN, data_schema=self.data_schema, provider=self.provider, force_update=False) # persist StockDetail too df_to_db(df=df, region=Region.CHN, data_schema=StockDetail, provider=self.provider, force_update=False) # self.logger.info(df.tail()) self.logger.info("persist stock list successs")
def run(self): from zvt.api import get_kdata bond_data = get_kdata(entity_id='bond_cn_EMM00166466') now_date = to_time_str(now_pd_timestamp()) if bond_data.empty: # 初始时间定在2007年 start = '2007-01-01' else: start = to_time_str(bond_data.timestamp.max()) # EMM00166466 中债国债到期收益率:10年 df = c.edb("EMM00166466", f"IsLatest=0,StartDate={start},EndDate={now_date},ispandas=1") if pd_is_not_null(df): df['name'] = "中债国债到期收益率:10年" df.rename(columns={'RESULT': 'data_value', 'DATES': 'timestamp'}, inplace=True) df['entity_id'] = 'bond_cn_EMM00166466' df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'emquantapi' df['exchange'] = 'cn' df['level'] = '1d' df['code'] = "EMM00166466" def generate_kdata_id(se): return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def record(self, entity, start, end, size, timestamps, http_session): try: trade_day = StockTradeDay.query_data( region=self.region, limit=1, order=StockTradeDay.timestamp.desc(), return_type='domain') if len(trade_day) > 0: start = trade_day[0].timestamp else: start = "1990-12-19" except Exception as _: pass df = pd.DataFrame() dates_df = bao_get_trade_days(start_date=start) dates = dates_df[dates_df['is_trading_day'] == '1']['calendar_date'].to_list() self.logger.info(f'add dates:{dates}') df['timestamp'] = pd.to_datetime(dates) df['id'] = [to_time_str(date) for date in dates] df['entity_id'] = 'chn' df_to_db(df=df, region=self.region, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def run(self): df = em_api.get_tradable_list(entity_type="stockus") self.logger.info(df) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def fetch_szse_index(self, sz_data) -> None: """ 抓取深证指数列表 """ df = pd.DataFrame() for em_code in set(sz_data): if len(em_code) > 9: continue data = c.css(em_code, [i for i in self.colums_map.keys()], "TradeDate=" + self.now_date + ",ispandas=1") data['code'] = em_code[:6] df = df.append(data) df = df.rename(columns=self.colums_map) df['timestamp'] = pd.to_datetime(df.list_date) df['exchange'] = 'sz' df['category'] = 'main' df['entity_type'] = 'index' df['entity_id'] = df.apply( lambda x: 'index' + '_' + 'sz' + '_' + x.code, axis=1) df['id'] = df['entity_id'] df_to_db(df=df, data_schema=Index, provider=self.provider, force_update=False) self.logger.info('深证指数列表写入完成...')
def record(self, entity, start, end, size, timestamps): df = finance.run_query( query(finance.STK_SHARES_PLEDGE).filter( finance.STK_SHARES_PLEDGE.code == to_jq_entity_id(entity)). filter(finance.STK_SHARES_PLEDGE.pub_date >= to_time_str(start))) if pd_is_not_null(df): df['name'] = entity.name df['entity_id'] = entity.id df['pub_date'] = pd.to_datetime(df.pub_date) df['timestamp'] = df['pub_date'] df['provider'] = 'joinquant' df['code'] = entity.code def generate_id(se): return "{}_{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY), se.name) df = pd.concat([ i.reset_index(drop=True) for i in dict(list(df.groupby('timestamp'))).values() ]) df.index += 1 df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def fetch_csi_index(self, sh_data) -> None: """ 抓取上证、中证指数列表 """ df = pd.DataFrame() for em_code in set(sh_data): if len(em_code) > 10: continue data = c.css(em_code, [i for i in self.colums_map.keys()], "TradeDate=" + self.now_date + ",ispandas=1") try: data['code'] = em_code[:6] except TypeError: print(em_code) continue df = df.append(data) df = df.rename(columns=self.colums_map) df['timestamp'] = pd.to_datetime(df.list_date) df['exchange'] = 'sh' df['category'] = 'main' df['entity_type'] = 'index' df['entity_id'] = df.apply( lambda x: 'index' + '_' + 'sh' + '_' + x.code, axis=1) df['id'] = df['entity_id'] # df['codes'] = df.index # df['code_len'] = df.apply(lambda x:len(x.codes),axis=1) # df = df.query("code_len==9") df_to_db(df=df, data_schema=Index, provider=self.provider, force_update=False) self.logger.info('上证、中证指数列表写入完成...')
def run(self): # 抓取fund列表 df = finance.run_query(query(finance.FUND_MAIN_INFO)) df.index.name = 'entity_id' df = df.reset_index() # 上市日期 df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['entity_id'] = df.main_code.apply(lambda x: normalize_code(x)) df['entity_id'] = df['entity_id'].apply( lambda x: to_entity_id(entity_type='fund', jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = 'fund' df['exchange'] = df['entity_id'].apply( lambda x: get_entity_exchange(x)) df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x)) df['category'] = 'fund' # df['choice_code'] = df.apply(lambda x:x.main_code+'.'+x.exchange.upper(),axis=1) # loginResult = c.start("ForceLogin=1", '') # df['underlying_index_code'] = df.apply(lambda x:c.css(x.choice_code, "BMINDEXCODE", "Rank=1").Data if x.operate_mode == 'ETF' else None,axis=1) # df['underlying_index_code'] = df['underlying_index_code'].apply(lambda x:[i for i in x.values()][0][0].lower().replace(".","_") if x else None) # c.stop() df_to_db(df, data_schema=FundDetail, provider=self.provider, force_update=self.force_update) # self.logger.info(df_index) self.logger.info("persist etf list success") logout()
def persist_factor(self): if self.states: session = get_db_session(provider='zvt', data_schema=FactorState) for entity_id in self.states: state = self.states[entity_id] if state: domain_id = f'{self.factor_name}_{entity_id}' factor_state: FactorState = session.query(FactorState).get( domain_id) state_str = json.dumps(state, cls=FactorStateEncoder) if factor_state: factor_state.state = state_str else: factor_state = FactorState( id=domain_id, entity_id=entity_id, factor_name=self.factor_name, state=state_str) session.add(factor_state) session.commit() df = self.factor_df.copy() df['zhongshu'] = df['zhongshu'].apply( lambda x: json.dumps(x, cls=FactorStateEncoder)) df_to_db(df=df, data_schema=self.factor_schema, provider='zvt', force_update=False)
def run(self): # get stock blocks from sina for category, url in self.category_map_url.items(): resp = requests.get(url) resp.encoding = 'GBK' tmp_str = resp.text json_str = tmp_str[tmp_str.index('{'):tmp_str.index('}') + 1] tmp_json = json.loads(json_str) the_list = [] for code in tmp_json: name = tmp_json[code].split(',')[1] entity_id = f'block_cn_{code}' the_list.append({ 'id': entity_id, 'entity_id': entity_id, 'entity_type': 'block', 'exchange': 'cn', 'code': code, 'name': name, 'category': category.value }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info(f"finish record sina blocks:{category.value}")
def run(self): # 抓取fund列表 df = finance.run_query(query(finance.FUND_MAIN_INFO)) df.index.name = 'entity_id' df = df.reset_index() # 上市日期 df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['entity_id'] = df.main_code.apply(lambda x: normalize_code(x)) df['entity_id'] = df['entity_id'].apply( lambda x: to_entity_id(entity_type='fund', jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = 'fund' df['exchange'] = df['entity_id'].apply( lambda x: get_entity_exchange(x)) df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x)) df['category'] = 'fund' df_to_db(df, data_schema=FundDetail, provider=self.provider, force_update=self.force_update) # self.logger.info(df_index) self.logger.info("persist etf list success") logout()
def record(self, entity, start, end, size, timestamps): if not end: end = to_time_str(now_pd_timestamp()) start = to_time_str(start) df = finance.run_query( query(finance.FUND_DIVIDEND).filter( finance.FUND_DIVIDEND.code == entity.code, finance.FUND_DIVIDEND.pub_date >= start).limit(20)) df.rename(columns=FundDividendDetail.get_data_map(self), inplace=True) df.dropna(subset=['dividend_date'], inplace=True) if pd_is_not_null(df): df.reset_index(drop=True, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df.announce_date) df['provider'] = 'joinquant' df['code'] = entity.code def generate_id(se): return "{}_{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY), se.name) df.reset_index(drop=True, inplace=True) df.index += 1 df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): q = query(finance.FUND_PORTFOLIO_STOCK).filter(finance.FUND_PORTFOLIO_STOCK.pub_date >= start).filter( finance.FUND_PORTFOLIO_STOCK.code == entity.code) df = finance.run_query(q) if pd_is_not_null(df): # id code period_start period_end pub_date report_type_id report_type rank symbol name shares market_cap proportion # 0 8640569 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 1 601318 中国平安 19869239.0 1.361043e+09 7.09 # 1 8640570 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 2 600519 贵州茅台 921670.0 6.728191e+08 3.50 # 2 8640571 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 3 600036 招商银行 18918815.0 5.806184e+08 3.02 # 3 8640572 159919 2018-07-01 2018-09-30 2018-10-26 403003 第三季度 4 601166 兴业银行 22862332.0 3.646542e+08 1.90 df['timestamp'] = pd.to_datetime(df['pub_date']) df.rename(columns={'symbol': 'stock_code', 'name': 'stock_name'}, inplace=True) df['proportion'] = df['proportion'] * 0.01 df = portfolio_relate_stock(df, entity) df['stock_id'] = df['stock_code'].apply(lambda x: china_stock_code_to_id(x)) df['id'] = df[['entity_id', 'stock_id', 'pub_date', 'id']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['report_date'] = pd.to_datetime(df['period_end']) df['report_period'] = df['report_type'].apply(lambda x: jq_to_report_period(x)) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) # self.logger.info(df.tail()) self.logger.info(f"persist etf {entity.code} portfolio success") return None
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: df = run_query( table='finance.STK_HK_HOLD_INFO', conditions= f'link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}') print(df) if pd_is_not_null(df): df.rename(columns={ 'day': 'timestamp', 'link_id': 'holder_code', 'link_name': 'holder_name' }, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = df['code'].apply( lambda x: to_entity_id(entity_type='stock', jq_code=x)) df['code'] = df['code'].apply(lambda x: x.split('.')[0]) # id格式为:{holder_name}_{entity_id}_{timestamp} df['id'] = df[['holder_name', 'entity_id', 'timestamp']].apply( lambda se: "{}_{}_{}".format( se['holder_name'], se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)), axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def record(self, entity, start, end, size, timestamps): resp = requests.get(self.category_stocks_url.format(entity.code, '1')) try: results = json_callback_param(resp.text) the_list = [] for result in results: items = result.split(',') stock_code = items[1] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append({ 'id': '{}_{}'.format(block_id, stock_id), 'entity_id': block_id, 'entity_type': 'block', 'exchange': entity.exchange, 'code': entity.code, 'name': entity.name, 'timestamp': now_pd_timestamp(), 'stock_id': stock_id, 'stock_code': stock_code, 'stock_name': items[2], }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info('finish recording block:{},{}'.format(entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: df = run_query( table="finance.STK_HK_HOLD_INFO", conditions= f"link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}") print(df) if pd_is_not_null(df): df.rename(columns={ "day": "timestamp", "link_id": "holder_code", "link_name": "holder_name" }, inplace=True) df["timestamp"] = pd.to_datetime(df["timestamp"]) df["entity_id"] = df["code"].apply( lambda x: to_entity_id(entity_type="stock", jq_code=x)) df["code"] = df["code"].apply(lambda x: x.split(".")[0]) # id格式为:{holder_name}_{entity_id}_{timestamp} df["id"] = df[["holder_name", "entity_id", "timestamp"]].apply( lambda se: "{}_{}_{}".format( se["holder_name"], se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)), axis=1, ) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def download_stock_list(self, response, exchange): df = pd.read_csv(io.BytesIO(response.content), encoding='UTF8', dtype=str) if df is not None: df.rename(columns = {'Symbol':'code', 'Name':'name', 'IPOyear':'list_date', 'industry':'industry', 'Sector':'sector'}, inplace = True) df = df[['code', 'name', 'list_date', 'industry', 'sector']] df.fillna({'list_date':'1980'}, inplace=True) df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x)) df['exchange'] = exchange df['entity_type'] = EntityType.Stock.value df['id'] = df[['entity_type', 'exchange', 'code']].apply(lambda x: '_'.join(x.astype(str)), axis=1) df['entity_id'] = df['id'].str.strip() df['timestamp'] = df['list_date'] df = df.dropna(axis=0, how='any') df = df.drop_duplicates(subset=('id'), keep='last') # persist StockDetail df_to_db(df=df, region=Region.US, data_schema=StockDetail, provider=self.provider, force_update=True) df.drop(['industry','sector'], axis=1, inplace=True) df_to_db(df=df, region=Region.US, data_schema=self.data_schema, provider=self.provider, force_update=True) self.logger.info("persist stock list successs")
def persist_factor(self): df = self.factor_df.copy() #: encode json columns if pd_is_not_null(df) and self.factor_col_map_object_hook(): for col in self.factor_col_map_object_hook(): if col in df.columns: df[col] = df[col].apply( lambda x: json.dumps(x, cls=self.state_encoder())) if self.states: g = df.groupby(level=0) for entity_id in self.states: state = self.states[entity_id] try: if state: self.persist_state(entity_id=entity_id) if entity_id in g.groups: df_to_db(df=df.loc[(entity_id, )], data_schema=self.factor_schema, provider="zvt", force_update=False) except Exception as e: self.logger.error( f"{self.name} {entity_id} save state error") self.logger.exception(e) #: clear them if error happen self.clear_state_data(entity_id) else: df_to_db(df=df, data_schema=self.factor_schema, provider="zvt", force_update=False)
def record(self, entity, start, end, size, timestamps): if entity.block_type != 'gics': return None # industry_stocks = get_industry_stocks(entity.code,date=now_pd_timestamp()) industry_stocks = c.sector(entity.code, to_time_str(now_pd_timestamp())) if len(industry_stocks.Data) == 0: return None codes = [i for i in industry_stocks.Data if '.SH' in i or '.SZ' in i] names = [ i for i in industry_stocks.Data if '.SH' not in i and '.SZ' not in i ] df = pd.DataFrame({"stock": codes, "stock_name": names}) df["stock_id"] = df.stock.apply( lambda x: to_entity_id(x, "stock").lower()) df["stock_code"] = df.stock_id.str.split("_", expand=True)[2] df["code"] = entity.code df["exchange"] = entity.exchange df["name"] = entity.name df["timestamp"] = now_pd_timestamp() df["entity_id"] = entity.id df["block_type"] = entity.block_type df["entity_type"] = "block" df["id"] = df.apply(lambda x: x.entity_id + "_" + x.stock_id, axis=1) if df.empty: return None df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info('finish recording BlockStock:{},{}'.format( entity.category, entity.name))