def record(self, entity, start, end, size, timestamps): if self.adjust_type == AdjustType.hfq: fq_ref_date = '2000-01-01' else: fq_ref_date = to_time_str(now_pd_timestamp()) if not self.end_timestamp: df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], fq_ref_date=fq_ref_date, include_now=self.real_time) else: end_timestamp = to_time_str(self.end_timestamp) df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_dt=end_timestamp, fq_ref_date=fq_ref_date, include_now=False) if pd_is_not_null(df): df['name'] = entity.name df.rename(columns={'money': 'turnover', 'date': 'timestamp'}, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value df['code'] = entity.code # 判断是否需要重新计算之前保存的前复权数据 if self.adjust_type == AdjustType.qfq: check_df = df.head(1) check_date = check_df['timestamp'][0] current_df = get_kdata(entity_id=entity.id, provider=self.provider, start_timestamp=check_date, end_timestamp=check_date, limit=1, level=self.level, adjust_type=self.adjust_type) if pd_is_not_null(current_df): old = current_df.iloc[0, :]['close'] new = check_df['close'][0] # 相同时间的close不同,表明前复权需要重新计算 if round(old, 2) != round(new, 2): qfq_factor = new / old last_timestamp = pd.Timestamp(check_date) self.recompute_qfq(entity, qfq_factor=qfq_factor, last_timestamp=last_timestamp) def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def generate_path_fields(self, entity): if self.finance_report_type == 'IncomeStatementQSHSZ': q = query(income).filter(income.code == to_jq_entity_id(entity), ) return q elif self.finance_report_type == 'CashFlowStatementQSHSZ': q = query(cash_flow).filter( income.code == to_jq_entity_id(entity), ) return q
def record(self, entity, start, end, size, timestamps): # different with the default timestamps handling param = self.generate_request_param(entity, start, end, size, timestamps) from jqdatasdk import finance # 上市公司分红送股(除权除息)数据 df = finance.run_query( query(finance.STK_XR_XD).filter( finance.STK_XR_XD.code == to_jq_entity_id(entity), finance.STK_XR_XD.board_plan_pub_date >= to_time_str(start)).order_by( finance.STK_XR_XD.report_date).limit(10)) df.rename({'board_plan_pub_date': 'pub_date'}, inplace=True) df2 = finance.run_query( query(finance.STK_CAPITAL_CHANGE).filter( finance.STK_CAPITAL_CHANGE.code == to_jq_entity_id(entity), finance.STK_CAPITAL_CHANGE.pub_date >= to_time_str(start)).order_by( finance.STK_CAPITAL_CHANGE.pub_date).limit(2)) df2['company_name'] = df.company_name[0] df = df.append(df2) # 财报时间 公告时间 df.set_index(['report_date', 'pub_date'], drop=True, inplace=True) map_data = { value[0]: key for key, value in self.get_data_map().items() } df.drop([i for i in df.columns if i not in map_data.keys()], axis=1, inplace=True) df.rename(columns=map_data, inplace=True) df.reset_index(drop=False, inplace=True) df['report_date'] = pd.to_datetime(df['report_date']) df['report_period'] = df['report_date'].apply( lambda x: get_recent_report_period(x)) df['timestamp'] = df['report_date'] df['pub_date'] = pd.to_datetime(df['pub_date']) df['entity_id'] = entity.id df['provider'] = 'joinquant' df['code'] = entity.code def generate_finance_id(se): return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_finance_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): if not self.end_timestamp: df = get_bars( to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'] ) else: end_timestamp = to_time_str(self.end_timestamp) df = get_bars( to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_date=end_timestamp, ) if pd_is_not_null(df): df["name"] = entity.name df.rename(columns={ "money": "turnover", "date": "timestamp" }, inplace=True) df["entity_id"] = entity.id df["timestamp"] = pd.to_datetime(df["timestamp"]) df["provider"] = "joinquant" df["level"] = self.level.value df["code"] = entity.code def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format( se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format( se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_ISO8601)) df["id"] = df[["entity_id", "timestamp"]].apply(generate_kdata_id, axis=1) df = df.drop_duplicates(subset="id", keep="last") df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): now_date = to_time_str(now_pd_timestamp()) jq_entity_di = to_jq_entity_id(entity) if size > 1000: start_end_size = self.evaluate_start_end_size_timestamps(entity) size = 1000 bdate= pd.bdate_range(start=start_end_size[0], periods=size) self.start_timestamp = bdate[0] self.end_timestamp = bdate[-1] if bdate[-1] <= now_pd_timestamp() else now_pd_timestamp() if not self.end_timestamp: factor_data = get_factor_values(securities=[jq_entity_di], factors=self.data_schema.important_cols(), end_date=now_date, count=size) else: end_timestamp = to_time_str(self.end_timestamp) if self.start_timestamp: start_timestamp = to_time_str(self.start_timestamp) else: bdate_list = pd.bdate_range(end=end_timestamp, periods=size) start_timestamp = to_time_str(bdate_list[0]) factor_data = get_factor_values(securities=[to_jq_entity_id(entity)], factors=self.data_schema.important_cols(), start_date=start_timestamp, end_date=end_timestamp) df_list = [values.rename(columns={jq_entity_di: key}) for key, values in factor_data.items()] if len(df_list) != 0: df = pd.concat(df_list,join='inner',sort=True,axis=1).sort_index(ascending=True) else: df = pd.DataFrame(columns=self.data_schema.important_cols(),index=pd.bdate_range(start=start_timestamp,end=end_timestamp)) if pd_is_not_null(df): df_fill = pd.DataFrame(index=pd.bdate_range(start=start_timestamp, end=end_timestamp)) if self.end_timestamp else pd.DataFrame(index=df.index) if df_fill.shape[0] != df.shape[0]: df = pd.concat([df_fill,df],axis=1) df['name'] = entity.name df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df.index) df['provider'] = 'joinquant' df['code'] = entity.code def generate_factor_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_factor_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): if not self.end_timestamp: df = get_bars( to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'] ) else: end_timestamp = to_time_str(self.end_timestamp) df = get_bars( to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_date=end_timestamp) if pd_is_not_null(df): df['name'] = entity.name df.rename(columns={ 'money': 'turnover', 'date': 'timestamp' }, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value df['code'] = entity.code def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df = df.drop_duplicates(subset='id', keep='last') df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps, http_session): if not self.end_timestamp: df = jq_get_money_flow(code=to_jq_entity_id(entity), date=to_time_str(start)) else: df = jq_get_money_flow(code=to_jq_entity_id(entity), date=start, end_date=to_time_str(self.end_timestamp)) df = df.dropna() if pd_is_not_null(df): return df return None
def record(self, entity, start, end, size, timestamps, http_session): if self.adjust_type == AdjustType.hfq: fq_ref_date = '2000-01-01' else: fq_ref_date = to_time_str(now_pd_timestamp(Region.CHN)) if not self.end_timestamp: df = jq_get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], fq_ref_date=fq_ref_date) else: end_timestamp = to_time_str(self.end_timestamp) df = jq_get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_date=end_timestamp, fq_ref_date=fq_ref_date) # self.logger.info("record {} for {}, size:{}".format(self.data_schema.__name__, entity.id, len(df))) if pd_is_not_null(df): # start_timestamp = to_time_str(df.iloc[1]['timestamp']) # end_timestamp = to_time_str(df.iloc[-1]['timestamp']) # 判断是否需要重新计算之前保存的前复权数据 if self.adjust_type == AdjustType.qfq: check_df = df.head(1) check_date = check_df['timestamp'][0] current_df = get_kdata(region=self.region, entity_id=entity.id, provider=self.provider, start_timestamp=check_date, end_timestamp=check_date, limit=1, level=self.level, adjust_type=self.adjust_type) if pd_is_not_null(current_df): old = current_df.iloc[0, :]['close'] new = check_df['close'][0] # 相同时间的close不同,表明前复权需要重新计算 if round(old, 2) != round(new, 2): qfq_factor = new / old last_timestamp = pd.Timestamp(check_date) self.recompute_qfq(entity, qfq_factor=qfq_factor, last_timestamp=last_timestamp) return df return None
def record(self, entity, start, end, size, timestamps): if not end: end = to_time_str(now_pd_timestamp()) start = to_time_str(start) q = query(finance.STK_STATUS_CHANGE).filter( finance.STK_STATUS_CHANGE.code == to_jq_entity_id(entity)).filter( finance.STK_STATUS_CHANGE.pub_date >= to_time_str(start) ).limit(10) df = finance.run_query(q) if pd_is_not_null(df): df['pub_date'] = pd.to_datetime(df['pub_date']) df['exchange'] = entity.exchange df['entity_type'] = entity.entity_type df['change_date'] = pd.to_datetime(df['change_date']) df['timestamp'] = df['change_date'] df['entity_id'] = entity.id df['provider'] = 'joinquant' df['code'] = entity.code def generate_finance_id(se): return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_finance_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): df = get_locked_shares([to_jq_entity_id(entity)], start_date=to_time_str(start), end_date=to_time_str(now_pd_timestamp() + timedelta(days=150))) if pd_is_not_null(df): df['locked_rate1'] = df['rate1'] * 100 df['locked_rate2'] = df['rate2'] * 100 df['locked_num'] = df['num'] df['entity_id'] = entity.id df['end_date'] = pd.to_datetime(df.day) df['timestamp'] = df['end_date'] df['provider'] = 'joinquant' df['code'] = entity.code def generate_id(se): return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): start = max(start, to_pd_timestamp('2005-01-01')) end = min(now_pd_timestamp(), start + Timedelta(days=500)) count: Timedelta = end - start # df = get_fundamentals_continuously(q, end_date=now_time_str(), count=count.days + 1, panel=False) df = get_fundamentals(table='valuation', code=to_jq_entity_id(entity), date=to_time_str(end), count=min(count.days, 500)) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['day']) df['code'] = entity.code df['name'] = entity.name df['id'] = df['timestamp'].apply(lambda x: "{}_{}".format(entity.id, to_time_str(x))) df = df.rename({'pe_ratio_lyr': 'pe', 'pe_ratio': 'pe_ttm', 'pb_ratio': 'pb', 'ps_ratio': 'ps', 'pcf_ratio': 'pcf'}, axis='columns') df['market_cap'] = df['market_cap'] * 100000000 df['circulating_market_cap'] = df['circulating_market_cap'] * 100000000 df['capitalization'] = df['capitalization'] * 10000 df['circulating_cap'] = df['circulating_cap'] * 10000 df['turnover_ratio'] = df['turnover_ratio'] * 0.01 df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): df = finance.run_query( query(finance.STK_SHARES_PLEDGE).filter( finance.STK_SHARES_PLEDGE.code == to_jq_entity_id(entity)). filter(finance.STK_SHARES_PLEDGE.pub_date >= to_time_str(start))) if pd_is_not_null(df): df['name'] = entity.name df['entity_id'] = entity.id df['pub_date'] = pd.to_datetime(df.pub_date) df['timestamp'] = df['pub_date'] df['provider'] = 'joinquant' df['code'] = entity.code def generate_id(se): return "{}_{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY), se.name) df = pd.concat([ i.reset_index(drop=True) for i in dict(list(df.groupby('timestamp'))).values() ]) df.index += 1 df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): q = query(valuation).filter(valuation.code == to_jq_entity_id(entity)) count: pd.Timedelta = now_pd_timestamp() - start df = get_fundamentals_continuously(q, end_date=now_time_str(), count=count.days + 1, panel=False) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['day']) df['code'] = entity.code df['name'] = entity.name df['id'] = df['timestamp'].apply( lambda x: "{}_{}".format(entity.id, to_time_str(x))) df = df.rename( { 'pe_ratio_lyr': 'pe', 'pe_ratio': 'pe_ttm', 'pb_ratio': 'pb', 'ps_ratio': 'ps', 'pcf_ratio': 'pcf' }, axis='columns') df['market_cap'] = df['market_cap'] * 100000000 df['circulating_market_cap'] = df['circulating_market_cap'] * 100000000 df['capitalization'] = df['capitalization'] * 10000 df['circulating_cap'] = df['circulating_cap'] * 10000 df['turnover_ratio'] = df['turnover_ratio'] * 0.01 df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps, http_session): if not self.end_timestamp: df = jq_get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'] ) else: end_timestamp = to_time_str(self.end_timestamp) df = jq_get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_date=end_timestamp) if pd_is_not_null(df): return df return None
def record(self, entity, start, end, size, timestamps): # different with the default timestamps handling param = self.generate_request_param(entity, start, end, size, timestamps) query_path = eval(self.generate_path_fields(entity)) # company_type_flag(entity) q = query(query_path, ).filter( query_path.code == to_jq_entity_id(entity), query_path.end_date >= param['endDate'], query_path.report_type == 0).limit(param['latestCount']) df = finance.run_query(q) if df.empty: return None # 财报时间 公告时间 if 'end_date' in df.columns and "report_date" not in df.columns: df.rename(columns={ 'end_date': "report_date", }, inplace=True) df.set_index(['report_date', 'pub_date'], drop=True, inplace=True) map_data = { value[0]: key for key, value in self.get_data_map().items() } df.drop([i for i in df.columns if i not in map_data.keys()], axis=1, inplace=True) df.rename(columns=map_data, inplace=True) df.reset_index(drop=False, inplace=True) df['report_date'] = pd.to_datetime(df['report_date']) df['report_period'] = df['report_date'].apply( lambda x: to_report_period_type(x)) # df['report_period'] = df['report_date'].apply(lambda x: get_recent_report_date(x)) df['pub_date'] = pd.to_datetime(df['pub_date']) df['timestamp'] = df['report_date'] df['entity_id'] = entity.id df['provider'] = 'joinquant' df['code'] = entity.code def generate_finance_id(se): return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_finance_id, axis=1) df = df.drop_duplicates(subset=['id'], keep='last') df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): df = finance.run_query( query(finance.STK_SHAREHOLDERS_SHARE_CHANGE).filter( finance.STK_SHAREHOLDERS_SHARE_CHANGE.code == to_jq_entity_id( entity)).filter(finance.STK_SHAREHOLDERS_SHARE_CHANGE. pub_date >= to_time_str(start))) if pd_is_not_null(df): df.reset_index(inplace=True, drop=True) df['name'] = entity.name df['index_columns'] = df.index df.rename( columns={ 'pub_date': 'timestamp', # 公告日期 'end_date': 'holder_end_date', # 变动截至日期 'shareholder_name': 'holder_name', # 股东名称 'change_number': 'volume', # 变动数量 'change_ratio': 'change_pct', # 变动比例 变动数量占总股本比例(%) 'after_change_ratio': 'holding_pct', # 变动后_占总股本比例(%) 'price_ceiling': 'price', # 交易均价(元) }, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df.timestamp) df['provider'] = 'joinquant' df['code'] = entity.code df['holder_direction'] = df.type.replace(1, '减持').replace(0, '增持') def generate_id(se): return "{}_{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY), se.name) df = pd.concat([ i.reset_index(drop=True) for i in dict(list(df.groupby('timestamp'))).values() ]) df.index += 1 df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) df['holder_name'] = df['holder_name'].apply( lambda x: str(x).replace('(有限合伙)', '')) df['holder_name'] = df['holder_name'].apply( lambda x: str(x).replace('(有限合伙)', '')) df['holder_name'] = df['holder_name'].apply( lambda x: str(x).split('-')[0]) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps, http_session): end = min(now_pd_timestamp(self.region), start + Timedelta(days=500)) count: Timedelta = end - start df = jq_get_fundamentals(table='valuation', code=to_jq_entity_id(entity), date=to_time_str(end), count=min(count.days, 500)) if pd_is_not_null(df): return df return None
def fill_timestamp_with_jq(self, security_item, the_data): # get report published date from jq df = jq_get_fundamentals(table='indicator', code=to_jq_entity_id(security_item), columns='pubDate', date=to_jq_report_period(the_data.report_date), count=None, parse_dates=['pubDate']) if pd_is_not_null(df): the_data.timestamp = to_pd_timestamp(df['pubDate'][0]) self.logger.info('jq fill {} {} timestamp:{} for report_date:{}'.format( self.data_schema.__name__, security_item.id, the_data.timestamp, the_data.report_date)) session = get_db_session(region=self.region, provider=self.provider, data_schema=self.data_schema) session.commit()
def fill_timestamp_with_jq(self, security_item, the_data): # get report published date from jq try: q = query(indicator.pubDate).filter( indicator.code == to_jq_entity_id(security_item), ) df = get_fundamentals(q, statDate=to_jq_report_period( the_data.report_date)) if not df.empty and pd.isna(df).empty: the_data.timestamp = to_pd_timestamp(df['pubDate'][0]) self.session.commit() except Exception as e: self.logger.error(e)
def record(self, entity, start, end, size, timestamps): df = get_mtss(code=to_jq_entity_id(entity), date=to_time_str(start)) if pd_is_not_null(df): df['entity_id'] = entity.id df['code'] = entity.code df.rename(columns={'date': 'timestamp'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['id'] = df[['entity_id', 'timestamp']].apply( lambda se: "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)), axis=1) print(df) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): if not end: end = to_time_str(now_pd_timestamp()) start = to_time_str(start) em_code = to_jq_entity_id(entity) div_columns_dict = { "report_date": "report_date", #报告时间 "board_plan_pub_date": "announce_date", #公告日 "a_registration_date": "record_date", #股权登记日 "a_bonus_date": "dividend_date", # 除权除息日 "shareholders_plan_bonusnote": "dividend", # 方案 "announce_date_general_meeting": "shareholders_plan_pub_date", # 股东大会公告日 "implementation_pub_date": "announce_date_dividend_implementation", # 分红实施公告日 "b_registration_date": "last_trading_day_b_shares", # B股最后交易日 股权登记日 "at_bonus_ratio_rmb": "dividend_per_share_after_tax", # 每股股利(税后) 原始数据/10 "bonus_ratio_rmb": "dividend_per_share_before_tax", # 每股股利(税前) 原始数据/10 "plan_progress": "dividend_plan_progress", # 分红方案进度 "dividend_arrival_date": "dividend_pay_date", # 派息日,红利到账日 "dividend_ratio": "share_bonus_per_share", # 每股送股比例 原始数据/10 "transfer_ratio": "per_share_conversion_ratio", # 每股转增比例 应该 原始数据/10 } df = finance.run_query(query(finance.STK_XR_XD).filter( finance.STK_XR_XD.code == em_code, finance.STK_XR_XD.board_plan_pub_date >= start).order_by( finance.STK_XR_XD.report_date).limit(100)) df.rename(columns=div_columns_dict, inplace=True) df.dropna(subset=['dividend_date'], inplace=True) if pd_is_not_null(df): df.reset_index(drop=True,inplace=True) df['dividend_per_share_after_tax'] = df['dividend_per_share_after_tax']/10 df['dividend_per_share_before_tax'] = df['dividend_per_share_before_tax']/10 df['share_bonus_per_share'] = df['share_bonus_per_share']/10 df['per_share_conversion_ratio'] = df['per_share_conversion_ratio']/10 # df['dividend'] = df['dividend'].apply(lambda x: str(x).split('(')[0]) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df.report_date) df['provider'] = 'joinquant' df['code'] = entity.code def generate_id(se): return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1) # df.replace('None',pd.NaT,inplace=True) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def fill_timestamp_with_jq(self, security_item, the_data): # get report published date from jq q = query( indicator.pubDate ).filter( indicator.code == to_jq_entity_id(security_item), ) df = get_fundamentals(q, statDate=to_jq_report_period(the_data.report_date)) if not df.empty: the_data.timestamp = to_pd_timestamp(df['pubDate'][0]) self.logger.info( 'jq fill {} {} timestamp:{} for report_date:{}'.format(self.data_schema, security_item.id, the_data.timestamp, the_data.report_date)) self.session.commit()
def fill_timestamp_with_jq(self, security_item, the_data): # get report published date from jq try: q = jq_query(indicator.pubDate).filter( indicator.code == to_jq_entity_id(security_item), ) df = jq_get_fundamentals(q, statDate=to_jq_report_period( the_data.report_date)) if pd_is_not_null(df) and pd.isna(df).empty: the_data.timestamp = to_pd_timestamp(df['pubDate'][0]) self.logger.info( 'jq fill {} {} timestamp:{} for report_date:{}'.format( self.data_schema, security_item.id, the_data.timestamp, the_data.report_date)) self.session.commit() except Exception as e: self.logger.error(e)
def fill_timestamp_with_jq(self, security_item, the_data): # get report published date from jq try: df = get_fundamentals(table='indicator', code=to_jq_entity_id(security_item), columns='pubDate', date=to_jq_report_period( the_data.report_date), count=None) if not df.empty and pd.isna(df).empty: the_data.timestamp = to_pd_timestamp(df['pubDate'][0]) self.logger.info( 'jq fill {} {} timestamp:{} for report_date:{}'.format( self.data_schema, security_item.id, the_data.timestamp, the_data.report_date)) self.session.commit() except Exception as e: self.logger.error(e)
def fill_timestamp_with_jq(self, security_item, the_data): # get report published date from jq try: df = get_fundamentals( table="indicator", code=to_jq_entity_id(security_item), columns="pubDate", date=to_jq_report_period(the_data.report_date), count=None, parse_dates=["pubDate"], ) if pd_is_not_null(df): the_data.timestamp = to_pd_timestamp(df["pubDate"][0]) self.logger.info( "jq fill {} {} timestamp:{} for report_date:{}".format( self.data_schema, security_item.id, the_data.timestamp, the_data.report_date)) self.session.commit() except Exception as e: self.logger.error(e)
def record(self, entity, start, end, size, timestamps): start = max(start, to_pd_timestamp("2005-01-01")) end = min(now_pd_timestamp(), start + Timedelta(days=500)) count: Timedelta = end - start # df = get_fundamentals_continuously(q, end_date=now_time_str(), count=count.days + 1, panel=False) df = get_fundamentals(table="valuation", code=to_jq_entity_id(entity), date=to_time_str(end), count=min(count.days, 500)) df["entity_id"] = entity.id df["timestamp"] = pd.to_datetime(df["day"]) df["code"] = entity.code df["name"] = entity.name df["id"] = df["timestamp"].apply( lambda x: "{}_{}".format(entity.id, to_time_str(x))) df = df.rename( { "pe_ratio_lyr": "pe", "pe_ratio": "pe_ttm", "pb_ratio": "pb", "ps_ratio": "ps", "pcf_ratio": "pcf" }, axis="columns", ) df["market_cap"] = df["market_cap"] * 100000000 df["circulating_market_cap"] = df["circulating_market_cap"] * 100000000 df["capitalization"] = df["capitalization"] * 10000 df["circulating_cap"] = df["circulating_cap"] * 10000 df["turnover_ratio"] = df["turnover_ratio"] * 0.01 df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps, http_session): df = jq_get_mtss(code=to_jq_entity_id(entity), date=to_time_str(start)) if pd_is_not_null(df): return df return None
def record(self, entity, start, end, size, timestamps): if not self.end_timestamp: df = get_money_flow(code=to_jq_entity_id(entity), date=to_time_str(start)) else: df = get_money_flow(code=to_jq_entity_id(entity), date=start, end_date=to_time_str(self.end_timestamp)) df = df.dropna() if pd_is_not_null(df): df["name"] = entity.name df.rename( columns={ "date": "timestamp", "net_amount_main": "net_main_inflows", "net_pct_main": "net_main_inflow_rate", "net_amount_xl": "net_huge_inflows", "net_pct_xl": "net_huge_inflow_rate", "net_amount_l": "net_big_inflows", "net_pct_l": "net_big_inflow_rate", "net_amount_m": "net_medium_inflows", "net_pct_m": "net_medium_inflow_rate", "net_amount_s": "net_small_inflows", "net_pct_s": "net_small_inflow_rate", }, inplace=True, ) # 转换到标准float inflows_cols = [ "net_main_inflows", "net_huge_inflows", "net_big_inflows", "net_medium_inflows", "net_small_inflows", ] for col in inflows_cols: df[col] = pd.to_numeric(df[col], errors="coerce") df = df.dropna() if not pd_is_not_null(df): return None df[inflows_cols] = df[inflows_cols].apply(lambda x: x * 10000) inflow_rate_cols = [ "net_main_inflow_rate", "net_huge_inflow_rate", "net_big_inflow_rate", "net_medium_inflow_rate", "net_small_inflow_rate", ] for col in inflow_rate_cols: df[col] = pd.to_numeric(df[col], errors="coerce") df = df.dropna() if not pd_is_not_null(df): return None df[inflow_rate_cols] = df[inflow_rate_cols].apply( lambda x: x / 100) # 计算总流入 df["net_inflows"] = (df["net_huge_inflows"] + df["net_big_inflows"] + df["net_medium_inflows"] + df["net_small_inflows"]) # 计算总流入率 amount = df["net_main_inflows"] / df["net_main_inflow_rate"] df["net_inflow_rate"] = df["net_inflows"] / amount df["entity_id"] = entity.id df["timestamp"] = pd.to_datetime(df["timestamp"]) df["provider"] = "joinquant" df["code"] = entity.code def generate_kdata_id(se): return "{}_{}".format( se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)) df["id"] = df[["entity_id", "timestamp"]].apply(generate_kdata_id, axis=1) df = df.drop_duplicates(subset="id", keep="last") df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): if not self.end_timestamp: df = get_money_flow(code=to_jq_entity_id(entity), date=to_time_str(start)) else: df = get_money_flow(code=to_jq_entity_id(entity), date=start, end_date=to_time_str(self.end_timestamp)) df = df.dropna() if pd_is_not_null(df): df['name'] = entity.name df.rename(columns={ 'date': 'timestamp', 'net_amount_main': 'net_main_inflows', 'net_pct_main': 'net_main_inflow_rate', 'net_amount_xl': 'net_huge_inflows', 'net_pct_xl': 'net_huge_inflow_rate', 'net_amount_l': 'net_big_inflows', 'net_pct_l': 'net_big_inflow_rate', 'net_amount_m': 'net_medium_inflows', 'net_pct_m': 'net_medium_inflow_rate', 'net_amount_s': 'net_small_inflows', 'net_pct_s': 'net_small_inflow_rate' }, inplace=True) # 转换到标准float inflows_cols = [ 'net_main_inflows', 'net_huge_inflows', 'net_big_inflows', 'net_medium_inflows', 'net_small_inflows' ] for col in inflows_cols: df[col] = pd.to_numeric(df[col], errors='coerce') df = df.dropna() if not pd_is_not_null(df): return None df[inflows_cols] = df[inflows_cols].apply(lambda x: x * 10000) inflow_rate_cols = [ 'net_main_inflow_rate', 'net_huge_inflow_rate', 'net_big_inflow_rate', 'net_medium_inflow_rate', 'net_small_inflow_rate' ] for col in inflow_rate_cols: df[col] = pd.to_numeric(df[col], errors='coerce') df = df.dropna() if not pd_is_not_null(df): return None df[inflow_rate_cols] = df[inflow_rate_cols].apply( lambda x: x / 100) # 计算总流入 df['net_inflows'] = df['net_huge_inflows'] + df[ 'net_big_inflows'] + df['net_medium_inflows'] + df[ 'net_small_inflows'] # 计算总流入率 amount = df['net_main_inflows'] / df['net_main_inflow_rate'] df['net_inflow_rate'] = df['net_inflows'] / amount df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['code'] = entity.code def generate_kdata_id(se): return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df = df.drop_duplicates(subset='id', keep='last') df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity, start, end, size, timestamps): if self.adjust_type == AdjustType.hfq: fq_ref_date = "2000-01-01" else: fq_ref_date = to_time_str(now_pd_timestamp()) if not self.end_timestamp: df = get_bars( to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], fq_ref_date=fq_ref_date, ) else: end_timestamp = to_time_str(self.end_timestamp) df = get_bars( to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_date=end_timestamp, fq_ref_date=fq_ref_date, ) if pd_is_not_null(df): df["name"] = entity.name df.rename(columns={ "money": "turnover", "date": "timestamp" }, inplace=True) df["entity_id"] = entity.id df["timestamp"] = pd.to_datetime(df["timestamp"]) df["provider"] = "joinquant" df["level"] = self.level.value df["code"] = entity.code # 判断是否需要重新计算之前保存的前复权数据 if self.adjust_type == AdjustType.qfq: check_df = df.head(1) check_date = check_df["timestamp"][0] current_df = get_kdata( entity_id=entity.id, provider=self.provider, start_timestamp=check_date, end_timestamp=check_date, limit=1, level=self.level, adjust_type=self.adjust_type, ) if pd_is_not_null(current_df): old = current_df.iloc[0, :]["close"] new = check_df["close"][0] # 相同时间的close不同,表明前复权需要重新计算 if round(old, 2) != round(new, 2): qfq_factor = new / old last_timestamp = pd.Timestamp(check_date) self.recompute_qfq(entity, qfq_factor=qfq_factor, last_timestamp=last_timestamp) def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format( se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format( se["entity_id"], to_time_str(se["timestamp"], fmt=TIME_FORMAT_ISO8601)) df["id"] = df[["entity_id", "timestamp"]].apply(generate_kdata_id, axis=1) df = df.drop_duplicates(subset="id", keep="last") df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None