def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: df = run_query( table="finance.STK_HK_HOLD_INFO", conditions= f"link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}") print(df) if pd_is_not_null(df): df.rename(columns={ "day": "timestamp", "link_id": "holder_code", "link_name": "holder_name" }, inplace=True) df["timestamp"] = pd.to_datetime(df["timestamp"]) df["entity_id"] = df["code"].apply( lambda x: to_entity_id(entity_type="stock", jq_code=x)) df["code"] = df["code"].apply(lambda x: x.split(".")[0]) # id格式为:{holder_name}_{entity_id}_{timestamp} df["id"] = df[["holder_name", "entity_id", "timestamp"]].apply( lambda se: "{}_{}_{}".format( se["holder_name"], se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)), axis=1, ) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def run(self): # 抓取fund列表 df = finance.run_query(query(finance.FUND_MAIN_INFO)) df.index.name = 'entity_id' df = df.reset_index() # 上市日期 df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['entity_id'] = df.main_code.apply(lambda x: normalize_code(x)) df['entity_id'] = df['entity_id'].apply( lambda x: to_entity_id(entity_type='fund', jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = 'fund' df['exchange'] = df['entity_id'].apply( lambda x: get_entity_exchange(x)) df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x)) df['category'] = 'fund' # df['choice_code'] = df.apply(lambda x:x.main_code+'.'+x.exchange.upper(),axis=1) # loginResult = c.start("ForceLogin=1", '') # df['underlying_index_code'] = df.apply(lambda x:c.css(x.choice_code, "BMINDEXCODE", "Rank=1").Data if x.operate_mode == 'ETF' else None,axis=1) # df['underlying_index_code'] = df['underlying_index_code'].apply(lambda x:[i for i in x.values()][0][0].lower().replace(".","_") if x else None) # c.stop() df_to_db(df, data_schema=FundDetail, provider=self.provider, force_update=self.force_update) # self.logger.info(df_index) self.logger.info("persist etf list success") logout()
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: df = run_query( table='finance.STK_HK_HOLD_INFO', conditions= f'link_id#=#{entity.code}&day#=#{to_time_str(timestamp)}') print(df) if pd_is_not_null(df): df.rename(columns={ 'day': 'timestamp', 'link_id': 'holder_code', 'link_name': 'holder_name' }, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = df['code'].apply( lambda x: to_entity_id(entity_type='stock', jq_code=x)) df['code'] = df['code'].apply(lambda x: x.split('.')[0]) # id格式为:{holder_name}_{entity_id}_{timestamp} df['id'] = df[['holder_name', 'entity_id', 'timestamp']].apply( lambda se: "{}_{}_{}".format( se['holder_name'], se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)), axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def run(self): # 抓取fund列表 df = finance.run_query(query(finance.FUND_MAIN_INFO)) df.index.name = 'entity_id' df = df.reset_index() # 上市日期 df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['entity_id'] = df.main_code.apply(lambda x: normalize_code(x)) df['entity_id'] = df['entity_id'].apply( lambda x: to_entity_id(entity_type='fund', jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = 'fund' df['exchange'] = df['entity_id'].apply( lambda x: get_entity_exchange(x)) df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x)) df['category'] = 'fund' df_to_db(df, data_schema=FundDetail, provider=self.provider, force_update=self.force_update) # self.logger.info(df_index) self.logger.info("persist etf list success") logout()
def record(self, entity, start, end, size, timestamps): if entity.block_type != 'gics': return None # industry_stocks = get_industry_stocks(entity.code,date=now_pd_timestamp()) industry_stocks = c.sector(entity.code, to_time_str(now_pd_timestamp())) if len(industry_stocks.Data) == 0: return None codes = [i for i in industry_stocks.Data if '.SH' in i or '.SZ' in i] names = [ i for i in industry_stocks.Data if '.SH' not in i and '.SZ' not in i ] df = pd.DataFrame({"stock": codes, "stock_name": names}) df["stock_id"] = df.stock.apply( lambda x: to_entity_id(x, "stock").lower()) df["stock_code"] = df.stock_id.str.split("_", expand=True)[2] df["code"] = entity.code df["exchange"] = entity.exchange df["name"] = entity.name df["timestamp"] = now_pd_timestamp() df["entity_id"] = entity.id df["block_type"] = entity.block_type df["entity_type"] = "block" df["id"] = df.apply(lambda x: x.entity_id + "_" + x.stock_id, axis=1) if df.empty: return None df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info('finish recording BlockStock:{},{}'.format( entity.category, entity.name))
def record(self, entity, start, end, size, timestamps): for timestamp in timestamps: q = jq_query(finance.STK_HK_HOLD_INFO).filter( finance.STK_HK_HOLD_INFO.link_id == entity.code, finance.STK_HK_HOLD_INFO.day == to_time_str(timestamp)) df = finance.run_query(q) # print(df) if pd_is_not_null(df): df.rename(columns={ 'day': 'timestamp', 'link_id': 'holder_code', 'link_name': 'holder_name' }, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = df['code'].apply(lambda x: to_entity_id( entity_type=EntityType.Stock, jq_code=x)) df['code'] = df['code'].apply(lambda x: x.split('.')[0]) # id格式为:{holder_name}_{entity_id}_{timestamp} df['id'] = df[['holder_name', 'entity_id', 'timestamp']].apply( lambda se: "{}_{}_{}".format( se['holder_name'], se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)), axis=1) df_to_db(df=df, region=Region.CHN, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
def record(self, entity, start, end, size, timestamps): try: industry_stocks = get_industry_stocks(entity.code, date=now_pd_timestamp()) except: industry_stocks = get_concept_stocks(entity.code, date=now_pd_timestamp()) if len(industry_stocks) == 0: return None df = pd.DataFrame({"stock": industry_stocks}) df["stock_id"] = df.stock.apply(lambda x: to_entity_id(x, "stock")) df["stock_code"] = df.stock_id.str.split("_", expand=True)[2] df["stock_name"] = df.stock_id.apply(lambda x:get_data(data_schema=Stock, entity_id=x, provider='joinquant').name) df["block_type"] = entity.block_type df["code"] = entity.code df["name"] = entity.name df["exchange"] = entity.exchange df["timestamp"] = now_pd_timestamp() df["entity_id"] = entity.id df["entity_type"] = "block" df["id"] = df.apply(lambda x: x.entity_id + "_" + x.stock_id, axis=1) if df.empty: return None df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info('finish recording BlockStock:{},{}'.format(entity.category, entity.name))
def to_zvt_entity(self, df, entity_type, category=None): df.index.name = 'entity_id' df = df.reset_index() # 上市日期 df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['entity_id'] = df['entity_id'].apply(lambda x: to_entity_id(entity_type=entity_type, jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = entity_type df['exchange'] = df['entity_id'].apply(lambda x: get_entity_exchange(x)) df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x)) df['name'] = df['display_name'] if entity_type == 'etf': # ETF 查询标的指数 df['choice_code'] = df.apply(lambda x: x.code + '.' + x.exchange.upper(), axis=1) loginResult = c.start("ForceLogin=1", '') df['underlying_index_code'] = df.apply(lambda x: c.css(x.choice_code, "BMINDEXCODE", "Rank=1").Data, axis=1) df['index_codes'] = df['underlying_index_code'].apply(lambda x: [i for i in x.values()][0][0]) df['index_exchange'] = df['index_codes'].apply(lambda x: str(x).split('.')) df['index_code'] = df['index_codes'].apply(lambda x: str(x).split('.')[0]) df['index_exchange'] = df['index_exchange'].apply( lambda x: x[1] if isinstance(x, list) and len(x) > 1 else None) df['underlying_index_code'] = df.apply( lambda x: 'index_' + x.index_exchange.lower() + '_' + x.index_code if x.index_exchange else None, axis=1) loginResult = c.stop() if category: df['category'] = category return df
def run(self): # 按不同类别抓取 # 编码 基金运作方式 # 401001 开放式基金 # 401002 封闭式基金 # 401003 QDII # 401004 FOF # 401005 ETF # 401006 LOF for operate_mode_id in (401001, 401002, 401005): year_count = 2 while True: latest = Fund.query_data( region=self.region, filters=[Fund.operate_mode_id == operate_mode_id], order=Fund.timestamp.desc(), limit=1, return_type='domain') start_timestamp = '2000-01-01' if latest: start_timestamp = latest[0].timestamp end_timestamp = min( next_date(start_timestamp, 365 * year_count), now_pd_timestamp(self.region)) df = jq_run_query( table='finance.FUND_MAIN_INFO', conditions= f'operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}', parse_dates=['start_date', 'end_date'], dtype={'main_code': str}) if not pd_is_not_null(df) or (df['start_date'].max().year < end_timestamp.year): year_count = year_count + 1 if pd_is_not_null(df): df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['code'] = df['main_code'] df['entity_id'] = df['code'].apply( lambda x: to_entity_id(entity_type='fund', jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = 'fund' df['exchange'] = 'sz' df_to_db(df, ref_df=None, region=self.region, data_schema=Fund, provider=self.provider) self.logger.info( f'persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}' ) if is_same_date(end_timestamp, now_pd_timestamp(self.region)): break
def run(self): # 按不同类别抓取 # 编码 基金运作方式 # 401001 开放式基金 # 401002 封闭式基金 # 401003 QDII # 401004 FOF # 401005 ETF # 401006 LOF for operate_mode_id in (401001, 401002, 401005): year_count = 2 while True: latest = Fund.query_data( filters=[Fund.operate_mode_id == operate_mode_id], order=Fund.timestamp.desc(), limit=1, return_type="domain", ) start_timestamp = "2000-01-01" if latest: start_timestamp = latest[0].timestamp end_timestamp = min( next_date(start_timestamp, 365 * year_count), now_pd_timestamp()) df = run_query( table="finance.FUND_MAIN_INFO", conditions= f"operate_mode_id#=#{operate_mode_id}&start_date#>=#{to_time_str(start_timestamp)}&start_date#<=#{to_time_str(end_timestamp)}", parse_dates=["start_date", "end_date"], dtype={"main_code": str}, ) if not pd_is_not_null(df) or (df["start_date"].max().year < end_timestamp.year): year_count = year_count + 1 if pd_is_not_null(df): df.rename(columns={"start_date": "timestamp"}, inplace=True) df["timestamp"] = pd.to_datetime(df["timestamp"]) df["list_date"] = df["timestamp"] df["end_date"] = pd.to_datetime(df["end_date"]) df["code"] = df["main_code"] df["entity_id"] = df["code"].apply( lambda x: to_entity_id(entity_type="fund", jq_code=x)) df["id"] = df["entity_id"] df["entity_type"] = "fund" df["exchange"] = "sz" df_to_db(df, data_schema=Fund, provider=self.provider, force_update=self.force_update) self.logger.info( f"persist fund {operate_mode_id} list success {start_timestamp} to {end_timestamp}" ) if is_same_date(end_timestamp, now_pd_timestamp()): break
def format(self, entity, df): df.rename(columns={ 'day': 'timestamp', 'link_id': 'holder_code', 'link_name': 'holder_name' }, inplace=True) df.update(df.select_dtypes(include=[np.number]).fillna(0)) df['timestamp'] = pd.to_datetime(df['timestamp']) df['entity_id'] = df['code'].apply( lambda x: to_entity_id(entity_type=EntityType.Stock, jq_code=x)) df['provider'] = self.provider.value df['code'] = df['code'].apply(lambda x: x.split('.')[0]) df['id'] = self.generate_domain_id(entity, df) return df
def to_zvt_entity(self, df, entity_type, category=None): df.index.name = 'entity_id' df = df.reset_index() # 上市日期 df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['entity_id'] = df['entity_id'].apply(lambda x: x.lower()) df['entity_id'] = df['entity_id'].apply(lambda x: to_entity_id(entity_type=entity_type, jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = entity_type df['exchange'] = df['entity_id'].apply(lambda x: get_entity_exchange(x)) df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x)) df['end_date'].fillna(pd.to_datetime("22000101"),inplace=True) if category: df['category'] = category return df
def to_zvt_entity(self, df, entity_type: EntityType, category=None): df.index.name = 'entity_id' df = df.reset_index() # 上市日期 df.rename(columns={'start_date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['list_date'] = df['timestamp'] df['end_date'] = pd.to_datetime(df['end_date']) df['entity_id'] = df['entity_id'].apply(lambda x: to_entity_id(entity_type=entity_type, jq_code=x)) df['id'] = df['entity_id'] df['entity_type'] = entity_type.value df['exchange'] = df['entity_id'].apply(lambda x: get_entity_exchange(x)) df['code'] = df['entity_id'].apply(lambda x: get_entity_code(x)) df['name'] = df['display_name'] if category: df['category'] = category return df
def to_zvt_entity(self, df, entity_type, category=None): df = df.set_index("code") df.index.name = "entity_id" df = df.reset_index() # 上市日期 df.rename(columns={"start_date": "timestamp"}, inplace=True) df["timestamp"] = pd.to_datetime(df["timestamp"]) df["list_date"] = df["timestamp"] df["end_date"] = pd.to_datetime(df["end_date"]) df["entity_id"] = df["entity_id"].apply( lambda x: to_entity_id(entity_type=entity_type, jq_code=x)) df["id"] = df["entity_id"] df["entity_type"] = entity_type df["exchange"] = df["entity_id"].apply( lambda x: get_entity_exchange(x)) df["code"] = df["entity_id"].apply(lambda x: get_entity_code(x)) df["name"] = df["display_name"] if category: df["category"] = category return df