def cal_ma_states(start='000001', end='002000'): logger.info(f'start cal day ma stats {start}:{end}') entities = get_entities(provider='eastmoney', entity_type='stock', columns=[Stock.entity_id, Stock.code], filters=[Stock.code >= start, Stock.code < end]) codes = entities.index.to_list() ma_1d_stats = MaStateStas(codes=codes, start_timestamp='2005-01-01', end_timestamp=now_pd_timestamp(), level=IntervalLevel.LEVEL_1DAY) ma_1d_factor = MaFactor(codes=codes, start_timestamp='2005-01-01', end_timestamp=now_pd_timestamp(), level=IntervalLevel.LEVEL_1DAY) logger.info(f'finish cal day ma stats {start}:{end}') ma_1wk_stats = MaStateStas(codes=codes, start_timestamp='2005-01-01', end_timestamp=now_pd_timestamp(), level=IntervalLevel.LEVEL_1WEEK) logger.info(f'finish cal week ma stats {start}:{end}')
def record(self, entity, start, end, size, timestamps): # 只要前复权数据 if not self.end_timestamp: df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], fq_ref_date=to_time_str(now_pd_timestamp()), include_now=True) else: end_timestamp = to_time_str(self.end_timestamp) df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'], end_dt=end_timestamp, fq_ref_date=to_time_str(now_pd_timestamp()), include_now=False) if pd_is_not_null(df): df['name'] = entity.name df.rename(columns={'money': 'turnover', 'date': 'timestamp'}, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value df['code'] = entity.code # 判断是否需要重新计算之前保存的前复权数据 check_df = df.head(1) check_date = check_df['timestamp'][0] current_df = get_kdata(entity_id=entity.id, provider=self.provider, start_timestamp=check_date, end_timestamp=check_date, limit=1, level=self.level) if pd_is_not_null(current_df): old = current_df.iloc[0, :]['close'] new = check_df['close'][0] # 相同时间的close不同,表明前复权需要重新计算 if round(old, 2) != round(new, 2): self.factor = new / old self.last_timestamp = pd.Timestamp(check_date) def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def every_day_report(): while True: try: today = now_pd_timestamp() long_targets = select_by_finance(today) logger.info(f'selected:{len(long_targets)}') if long_targets: long_targets = list(set(long_targets)) df = get_entities(provider='eastmoney', entity_schema=Stock, entity_ids=long_targets, columns=['code', 'name']) info = [df.loc[i, 'code'] + ' ' + df.loc[i, 'name'] for i in df.index] msg = ' '.join(info) else: msg = 'no targets' logger.info(msg) email_action = EmailInformer() email_action.send_message("*****@*****.**", f'{today} 基本面选股结果', msg) break except Exception as e: logger.exception('report2 sched error:{}'.format(e)) time.sleep(60 * 3)
def on_finish(self): last_year = str(now_pd_timestamp().year) codes = [item.code for item in self.entities] need_filleds = get_dividend_financing( provider=self.provider, codes=codes, return_type='domain', session=self.session, filters=[DividendFinancing.rights_raising_fund.is_(None)], end_timestamp=last_year) for item in need_filleds: df = get_rights_issue_detail( provider=self.provider, entity_id=item.entity_id, columns=[ RightsIssueDetail.timestamp, RightsIssueDetail.rights_raising_fund ], start_timestamp=item.timestamp, end_timestamp="{}-12-31".format(item.timestamp.year)) if df_is_not_null(df): item.rights_raising_fund = df['rights_raising_fund'].sum() self.session.commit() super().on_finish()
def record(self, entity, start, end, size, timestamps): for page in range(1, 5): resp = requests.get(self.category_stocks_url.format(page, entity.code)) try: if resp.text == 'null' or resp.text is None: break category_jsons = demjson.decode(resp.text) the_list = [] for category in category_jsons: stock_code = category['code'] stock_id = china_stock_code_to_id(stock_code) block_id = entity.id the_list.append({ 'id': '{}_{}'.format(block_id, stock_id), 'entity_id': block_id, 'entity_type': 'block', 'exchange': entity.exchange, 'code': entity.code, 'name': entity.name, 'timestamp': now_pd_timestamp(), 'stock_id': stock_id, 'stock_code': stock_code, 'stock_name': category['name'], }) if the_list: df = pd.DataFrame.from_records(the_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) self.logger.info('finish recording BlockStock:{},{}'.format(entity.category, entity.name)) except Exception as e: self.logger.error("error:,resp.text:", e, resp.text) self.sleep()
def select_by_finance(timestamp=now_pd_timestamp(), entity_ids=None): if timestamp.dayofweek in (5, 6): logger.info(f'today:{timestamp} is {timestamp.day_name()},just ignore') today = to_time_str(timestamp) my_selector = TargetSelector(start_timestamp='2015-01-01', end_timestamp=today, entity_ids=entity_ids) # add the factors good_factor1 = GoodCompanyFactor(start_timestamp='2015-01-01', end_timestamp=today, entity_ids=entity_ids) good_factor2 = GoodCompanyFactor( start_timestamp='2015-01-01', end_timestamp=today, entity_ids=entity_ids, data_schema=CashFlowStatement, columns=[ CashFlowStatement.report_period, CashFlowStatement.net_op_cash_flows ], filters=[CashFlowStatement.net_op_cash_flows > 0], col_threshold={'net_op_cash_flows': 100000000}) my_selector.add_filter_factor(good_factor1) my_selector.add_filter_factor(good_factor2) my_selector.run() long_targets = my_selector.get_open_long_targets(today) logger.info(f'selected:{len(long_targets)}') return long_targets
def record(self, entity, start, end, size, timestamps): q = query(valuation).filter(valuation.code == to_jq_entity_id(entity)) count: pd.Timedelta = now_pd_timestamp() - start df = get_fundamentals_continuously(q, end_date=now_time_str(), count=count.days + 1, panel=False) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['day']) df['code'] = entity.code df['name'] = entity.name df['id'] = df['timestamp'].apply( lambda x: "{}_{}".format(entity.id, to_time_str(x))) df = df.rename( { 'pe_ratio_lyr': 'pe', 'pe_ratio': 'pe_ttm', 'pb_ratio': 'pb', 'ps_ratio': 'ps', 'pcf_ratio': 'pcf' }, axis='columns') df['market_cap'] = df['market_cap'] * 100000000 df['circulating_cap'] = df['circulating_cap'] * 100000000 df['capitalization'] = df['capitalization'] * 10000 df['circulating_cap'] = df['circulating_cap'] * 10000 df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def record(self, entity_item, start, end, size, timestamps): self.seed += 1 timestamp = timestamps[0] the_url = self.url.format(to_time_str(timestamp), to_time_str(timestamp)) items = get_all_results(url=the_url, token=GithubAccount.get_token(seed=self.seed)) current_time = now_pd_timestamp() results = [{ 'id': f'user_github_{item["login"]}', 'entity_id': f'user_github_{item["login"]}', 'timestamp': timestamp, 'exchange': 'github', 'entity_type': 'user', 'code': item['login'], 'node_id': item['node_id'], 'created_timestamp': current_time, 'updated_timestamp': None } for item in items] # for save faster df = pd.DataFrame(data=results[:-1]) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force=True) return results[-1:]
def persist(self, entity, domain_list): if domain_list: if domain_list[0].timestamp >= domain_list[-1].timestamp: first_timestamp = domain_list[-1].timestamp last_timestamp = domain_list[0].timestamp else: first_timestamp = domain_list[0].timestamp last_timestamp = domain_list[-1].timestamp self.logger.info( "persist {} for entity_id:{},time interval:[{},{}]".format( self.data_schema, entity.id, first_timestamp, last_timestamp)) current_timestamp = now_pd_timestamp() saving_datas = domain_list # FIXME:remove this logic # FIXME:should remove unfinished data when recording,always set it to False now if is_same_date(current_timestamp, last_timestamp) and self.contain_unfinished_data: if current_timestamp.hour >= self.close_hour and current_timestamp.minute >= self.close_minute + 2: # after the closing time of the day,we think the last data is finished saving_datas = domain_list else: # ignore unfinished kdata saving_datas = domain_list[:-1] self.logger.info( "ignore kdata for entity_id:{},level:{},timestamp:{},current_timestamp" .format(entity.id, self.level, last_timestamp, current_timestamp)) self.session.add_all(saving_datas) self.session.commit()
def record(self, entity, start, end, size, timestamps): if self.start_timestamp: start = max(self.start_timestamp, to_pd_timestamp(start)) end = now_pd_timestamp() + timedelta(days=1) start_timestamp = to_time_str(start) end_timestamp = to_time_str(end) # 不复权 df = get_price(to_jq_entity_id(entity), start_date=to_time_str(start_timestamp), end_date=end_timestamp, frequency=self.jq_trading_level, fields=['open', 'close', 'low', 'high', 'volume', 'money'], skip_paused=True, fq=None) df.index.name = 'timestamp' df.reset_index(inplace=True) df['name'] = entity.name df.rename(columns={'money': 'turnover'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value # remove the unfinished kdata if is_in_trading(entity_type='stock', exchange='sh', timestamp=df.iloc[-1, :]['timestamp']): df = df.iloc[:-1, :] return df.to_dict(orient='records')
def init_entities(self): super().init_entities() # 过滤掉退市的 self.entities = [ entity for entity in self.entities if (entity.end_date is None) or ( entity.end_date > now_pd_timestamp()) ]
def is_in_trading(entity_type, exchange, timestamp): current = now_pd_timestamp() timestamp = to_pd_timestamp(timestamp) if is_same_date(current, timestamp): for start, end in get_trading_intervals(entity_type=entity_type, exchange=exchange): if current > date_and_time(current, start) and current < date_and_time(current, end): return True return False
def __init__( self, data_schema=FinanceFactor, entity_ids: List[str] = None, entity_type: str = 'stock', exchanges: List[str] = ['sh', 'sz'], codes: List[str] = None, the_timestamp: Union[str, pd.Timestamp] = None, start_timestamp: Union[str, pd.Timestamp] = '2005-01-01', end_timestamp: Union[str, pd.Timestamp] = now_pd_timestamp(), # 高roe,高现金流,低财务杠杆,有增长 columns: List = [ FinanceFactor.roe, FinanceFactor.op_income_growth_yoy, FinanceFactor.net_profit_growth_yoy, FinanceFactor.report_period, FinanceFactor.op_net_cash_flow_per_op_income, FinanceFactor.sales_net_cash_flow_per_op_income, FinanceFactor.current_ratio, FinanceFactor.debt_asset_ratio ], filters: List = [ FinanceFactor.roe >= 0.02, FinanceFactor.op_income_growth_yoy >= 0.05, FinanceFactor.net_profit_growth_yoy >= 0.05, FinanceFactor.op_net_cash_flow_per_op_income >= 0.1, FinanceFactor.sales_net_cash_flow_per_op_income >= 0.3, FinanceFactor.current_ratio >= 1, FinanceFactor.debt_asset_ratio <= 0.5 ], order: object = None, limit: int = None, provider: str = 'eastmoney', level: Union[str, IntervalLevel] = IntervalLevel.LEVEL_1DAY, category_field: str = 'entity_id', time_field: str = 'timestamp', computing_window: int = None, keep_all_timestamp: bool = True, fill_method: str = 'ffill', effective_number: int = None, transformer: Transformer = None, accumulator: Accumulator = None, persist_factor: bool = False, dry_run: bool = False, # 3 years window='1095d', count=8, col_threshold={'roe': 0.02}, handling_on_period=('roe', )) -> None: self.window = window self.count = count self.col_threshold = col_threshold # 对于根据年度计算才有意义的指标,比如roe,我们会对不同季度的值区别处理 self.handling_on_period = handling_on_period super().__init__(data_schema, entity_ids, entity_type, exchanges, codes, the_timestamp, start_timestamp, end_timestamp, columns, filters, order, limit, provider, level, category_field, time_field, computing_window, keep_all_timestamp, fill_method, effective_number, transformer, accumulator, persist_factor, dry_run)
def report_core_company(): while True: error_count = 0 email_action = EmailInformer() try: # StockTradeDay.record_data(provider='joinquant') # Stock.record_data(provider='joinquant') # FinanceFactor.record_data(provider='eastmoney') # BalanceSheet.record_data(provider='eastmoney') target_date = to_time_str(now_pd_timestamp()) my_selector: TargetSelector = FundamentalSelector( start_timestamp='2015-01-01', end_timestamp=target_date) my_selector.run() long_targets = my_selector.get_open_long_targets( timestamp=target_date) if long_targets: stocks = get_entities(provider='joinquant', entity_schema=Stock, entity_ids=long_targets, return_type='domain') # add them to eastmoney try: try: eastmoneypy.del_group('core') except: pass eastmoneypy.create_group('core') for stock in stocks: eastmoneypy.add_to_group(stock.code, group_name='core') except Exception as e: email_action.send_message( "*****@*****.**", f'report_core_company error', 'report_core_company error:{}'.format(e)) info = [f'{stock.name}({stock.code})' for stock in stocks] msg = ' '.join(info) else: msg = 'no targets' logger.info(msg) email_action.send_message(get_subscriber_emails(), f'{to_time_str(target_date)} 核心资产选股结果', msg) break except Exception as e: logger.exception('report_core_company error:{}'.format(e)) time.sleep(60 * 3) error_count = error_count + 1 if error_count == 10: email_action.send_message( "*****@*****.**", f'report_core_company error', 'report_core_company error:{}'.format(e))
def get_report_period(the_date=now_pd_timestamp()): if the_date.month >= 10: return "{}{}".format(the_date.year, '-09-30') elif the_date.month >= 7: return "{}{}".format(the_date.year, '-06-30') elif the_date.month >= 4: return "{}{}".format(the_date.year, '-03-31') else: return "{}{}".format(the_date.year - 1, '-12-31')
def download_sz_etf_component(self, df: pd.DataFrame): query_url = 'http://vip.stock.finance.sina.com.cn/corp/go.php/vII_NewestComponent/indexid/{}.phtml' self.parse_sz_etf_underlying_index(df) for _, etf in df.iterrows(): underlying_index = etf['拟合指数'] etf_code = etf['证券代码'] if len(underlying_index) == 0: self.logger.info(f'{etf["证券简称"]} - {etf_code} 非 A 股市场指数,跳过...') continue url = query_url.format(underlying_index) response = requests.get(url) response.encoding = 'gbk' try: dfs = pd.read_html(response.text, header=1) except ValueError as error: self.logger.error( f'HTML parse error: {error}, response: {response.text}') continue if len(dfs) < 4: continue response_df = dfs[3].copy() response_df = response_df.dropna(axis=1, how='any') response_df['品种代码'] = response_df['品种代码'].apply( lambda x: f'{x:06d}') etf_id = f'etf_sz_{etf_code}' response_df = response_df[['品种代码', '品种名称']].copy() response_df.rename(columns={ '品种代码': 'stock_code', '品种名称': 'stock_name' }, inplace=True) response_df['entity_id'] = etf_id response_df['entity_type'] = 'etf' response_df['exchange'] = 'sz' response_df['code'] = etf_code response_df['name'] = etf['证券简称'] response_df['timestamp'] = now_pd_timestamp() response_df['stock_id'] = response_df['stock_code'].apply( lambda code: china_stock_code_to_id(code)) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{etf_id}_{x}') df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{etf["证券简称"]} - {etf_code} 成分股抓取完成...') self.sleep()
def report_cross_ma(): while True: error_count = 0 email_action = EmailInformer() try: # 抓取k线数据 # StockTradeDay.record_data(provider='joinquant') # Stock1dKdata.record_data(provider='joinquant') latest_day: StockTradeDay = StockTradeDay.query_data( order=StockTradeDay.timestamp.desc(), limit=1, return_type='domain') if latest_day: target_date = latest_day[0].timestamp else: target_date = now_pd_timestamp() # 计算均线 my_selector = TargetSelector(start_timestamp='2018-01-01', end_timestamp=target_date) # add the factors ma_factor = CrossMaFactor(start_timestamp='2018-01-01', end_timestamp=target_date) my_selector.add_filter_factor(ma_factor) my_selector.run() long_targets = my_selector.get_open_long_targets( timestamp=target_date) if long_targets: stocks = get_entities(provider='joinquant', entity_schema=Stock, entity_ids=long_targets, return_type='domain') info = [f'{stock.name}({stock.code})' for stock in stocks] msg = ' '.join(info) else: msg = 'no targets' logger.info(msg) email_action.send_message("*****@*****.**", f'{target_date} 均线选股结果', msg) break except Exception as e: logger.exception('report_cross_ma error:{}'.format(e)) time.sleep(60 * 3) error_count = error_count + 1 if error_count == 10: email_action.send_message("*****@*****.**", f'report_cross_ma error', 'report_cross_ma error:{}'.format(e))
def get_etf_stocks(code=None, codes=None, ids=None, timestamp=now_pd_timestamp(), provider=None): latests: List[EtfStock] = EtfStock.query_data( provider=provider, code=code, end_timestamp=timestamp, order=EtfStock.timestamp.desc(), limit=1, return_type='domain') if latests: latest_record = latests[0] # 获取最新的报表 df = EtfStock.query_data( provider=provider, code=code, codes=codes, ids=ids, end_timestamp=timestamp, filters=[EtfStock.report_date == latest_record.report_date]) # 最新的为年报或者半年报 if latest_record.report_period == ReportPeriod.year or latest_record.report_period == ReportPeriod.half_year: return df # 季报,需要结合 年报或半年报 来算持仓 else: step = 0 while True: report_date = get_recent_report_date(latest_record.report_date, step=step) pre_df = EtfStock.query_data( provider=provider, code=code, codes=codes, ids=ids, end_timestamp=timestamp, filters=[ EtfStock.report_date == to_pd_timestamp(report_date) ]) df = df.append(pre_df) # 半年报和年报 if (ReportPeriod.half_year.value in pre_df['report_period'].tolist()) or ( ReportPeriod.year.value in pre_df['report_period'].tolist()): # 保留最新的持仓 df = df.drop_duplicates(subset=['stock_code'], keep='first') return df step = step + 1 if step >= 20: break
def record(self, entity_item, start, end, size, timestamps): self.seed += 1 the_url = self.url.format(entity_item.code) user_info = get_result(url=the_url, token=GithubAccount.get_token(seed=self.seed)) if user_info: user_info['updated_timestamp'] = now_pd_timestamp() return [user_info] return []
def record(self, entity, start, end, size, timestamps): # 不复权 try: df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=[ 'date', 'open', 'close', 'low', 'high', 'volume', 'money' ], include_now=False) except Exception as e: # just ignore the error,for some new stocks not in the index self.logger.exception(e) return None df['name'] = entity.name df.rename(columns={'money': 'turnover'}, inplace=True) df['timestamp'] = pd.to_datetime(df['date']) df['provider'] = 'joinquant' df['level'] = self.level.value # 前复权 end_timestamp = to_time_str(now_pd_timestamp()) qfq_df = get_bars(to_jq_entity_id(entity), count=size, unit=self.jq_trading_level, fields=['date', 'open', 'close', 'low', 'high'], fq_ref_date=end_timestamp, include_now=False) # not need to update past df['qfq_close'] = qfq_df['close'] df['qfq_open'] = qfq_df['open'] df['qfq_high'] = qfq_df['high'] df['qfq_low'] = qfq_df['low'] check_df = qfq_df.head(1) check_date = check_df['date'][0] current_df = get_kdata(entity_id=entity.id, provider=self.provider, start_timestamp=check_date, end_timestamp=check_date, limit=1, level=self.level) if df_is_not_null(current_df): old = current_df.iloc[0, :]['qfq_close'] new = check_df['close'][0] # 相同时间的close不同,表明前复权需要重新计算 if old != new: self.factor = new / old self.last_timestamp = pd.Timestamp(check_date) return df.to_dict(orient='records')
def __init__(self, entity_ids=None, entity_schema=Stock, exchanges=None, codes=None, the_timestamp=None, start_timestamp=None, end_timestamp=None, long_threshold=0.8, short_threshold=0.2, level=IntervalLevel.LEVEL_1DAY, provider='eastmoney', portfolio_selector=None) -> None: self.entity_ids = entity_ids self.entity_schema = entity_schema self.exchanges = exchanges self.codes = codes self.provider = provider self.portfolio_selector: TargetSelector = portfolio_selector if self.portfolio_selector: assert self.portfolio_selector.entity_schema in [Etf, Block, Index] if the_timestamp: self.the_timestamp = to_pd_timestamp(the_timestamp) self.start_timestamp = self.the_timestamp self.end_timestamp = self.the_timestamp else: if start_timestamp: self.start_timestamp = to_pd_timestamp(start_timestamp) if end_timestamp: self.end_timestamp = to_pd_timestamp(end_timestamp) else: self.end_timestamp = now_pd_timestamp() self.long_threshold = long_threshold self.short_threshold = short_threshold self.level = level self.filter_factors: List[FilterFactor] = [] self.score_factors: List[ScoreFactor] = [] self.filter_result = None self.score_result = None self.open_long_df: DataFrame = None self.open_short_df: DataFrame = None self.init_factors(entity_ids=entity_ids, entity_schema=entity_schema, exchanges=exchanges, codes=codes, the_timestamp=the_timestamp, start_timestamp=start_timestamp, end_timestamp=end_timestamp, level=self.level)
def evaluate_start_end_size_timestamps(self, entity): latest_record = self.get_latest_saved_record(entity=entity) if latest_record: latest_timestamp = latest_record.updated_timestamp if latest_timestamp is not None: if (now_pd_timestamp() - latest_timestamp).days < 10: self.logger.info('entity_item:{},updated_timestamp:{},ignored'.format(entity.id, latest_timestamp)) return None, None, 0, None return None, None, self.default_size, None
def get_stocks(cls, code=None, codes=None, ids=None, timestamp=now_pd_timestamp(), provider=None): from zvt.api.common import get_etf_stocks return get_etf_stocks(code=code, codes=codes, ids=ids, timestamp=timestamp, provider=provider)
def report_core_company(): while True: error_count = 0 email_action = EmailInformer() try: StockTradeDay.record_data(provider='joinquant') Stock.record_data(provider='joinquant') FinanceFactor.record_data(provider='eastmoney') BalanceSheet.record_data(provider='eastmoney') latest_day: StockTradeDay = StockTradeDay.query_data( order=StockTradeDay.timestamp.desc(), limit=1, return_type='domain') if latest_day: target_date = latest_day[0].timestamp else: target_date = now_pd_timestamp() my_selector: TargetSelector = FundamentalSelector( start_timestamp='2015-01-01', end_timestamp=target_date) my_selector.run() long_targets = my_selector.get_open_long_targets( timestamp=target_date) if long_targets: stocks = get_entities(provider='joinquant', entity_schema=Stock, entity_ids=long_targets, return_type='domain') info = [f'{stock.name}({stock.code})' for stock in stocks] msg = ' '.join(info) else: msg = 'no targets' logger.info(msg) email_action.send_message([ '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**', '*****@*****.**' ], f'{to_time_str(target_date)} 核心资产选股结果', msg) break except Exception as e: logger.exception('report_core_company error:{}'.format(e)) time.sleep(60 * 3) error_count = error_count + 1 if error_count == 10: email_action.send_message( "*****@*****.**", f'report_core_company error', 'report_core_company error:{}'.format(e))
def record(self, entity, start, end, size, timestamps): if self.start_timestamp: start = max(self.start_timestamp, to_pd_timestamp(start)) # if self.level < IntervalLevel.LEVEL_1HOUR: # start = '2019-01-01' end = now_pd_timestamp() start_timestamp = to_time_str(start) # 聚宽get_price函数必须指定结束时间,否则会有未来数据 end_timestamp = to_time_str(end, fmt=TIME_FORMAT_MINUTE2) # 不复权 df = get_price( to_jq_entity_id(entity), start_date=to_time_str(start_timestamp), end_date=end_timestamp, frequency=self.jq_trading_level, fields=['open', 'close', 'low', 'high', 'volume', 'money'], skip_paused=True, fq=None) if df_is_not_null(df): df.index.name = 'timestamp' df.reset_index(inplace=True) df['name'] = entity.name df.rename(columns={'money': 'turnover'}, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value df['code'] = entity.code def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force=self.force_update) return None
def every_day_report(): while True: try: t = now_pd_timestamp() if t.dayofweek in (5, 6): logger.info(f'today:{t} is {t.day_name()},just ignore') today = to_time_str(t) # 抓取k线数据 JqChinaStockKdataRecorder(level=IntervalLevel.LEVEL_1DAY).run() JqChinaStockKdataRecorder(level=IntervalLevel.LEVEL_1WEEK).run() JqChinaStockKdataRecorder(level=IntervalLevel.LEVEL_1MON).run() # 计算均线 my_selector = TargetSelector(start_timestamp='2016-01-01', end_timestamp=today) # add the factors # 设置dry_run为True,因为我们只需要最近的数据,不需要加载全量数据进行回测 ma_factor = CrossMaFactor(start_timestamp='2016-01-01', end_timestamp=today, dry_run=True) my_selector.add_filter_factor(ma_factor) my_selector.run() long_targets = my_selector.get_open_long_targets(timestamp=today) if long_targets: df = get_entities(provider='eastmoney', entity_schema=Stock, entity_ids=long_targets, columns=['code', 'name']) info = [ df.loc[i, 'code'] + ' ' + df.loc[i, 'name'] for i in df.index ] msg = ' '.join(info) else: msg = 'no targets' logger.info(msg) email_action = EmailInformer() email_action.send_message("*****@*****.**", f'{today} 均线选股结果', msg) break except Exception as e: logger.exception('report1 sched error:{}'.format(e)) time.sleep(60 * 3)
def every_day_report(): while True: try: today = now_pd_timestamp() long_targets = select_by_finance(today) logger.info(f'selected:{len(long_targets)}') if long_targets: ma_factor = CrossMaFactor(start_timestamp='2015-01-01', end_timestamp=today, dry_run=True, persist_factor=False, entity_ids=long_targets, windows=[5, 30, 120]) my_selector = TargetSelector(start_timestamp='2015-01-01', end_timestamp=today, entity_ids=long_targets) my_selector.add_filter_factor(ma_factor) my_selector.run() final_targets = my_selector.get_open_long_targets(today) final_targets = list(set(final_targets)) logger.info(f'final selected:{len(final_targets)}') if final_targets: df = get_entities(provider='eastmoney', entity_schema=Stock, entity_ids=final_targets, columns=['code', 'name']) info = [ df.loc[i, 'code'] + ' ' + df.loc[i, 'name'] for i in df.index ] msg = ' '.join(info) else: msg = 'no targets' logger.info(msg) email_action = EmailInformer() email_action.send_message("*****@*****.**", f'{today} 基本面 + 技术面选股结果', msg) break except Exception as e: logger.exception('report3 sched error:{}'.format(e)) time.sleep(60 * 3)
def fetch_csi_index_component(self, df: pd.DataFrame): """ 抓取上证、中证指数成分股 """ query_url = 'http://www.csindex.com.cn/uploads/file/autofile/cons/{}cons.xls' for _, index in df.iterrows(): index_code = index['code'] url = query_url.format(index_code) try: response = requests.get(url) response.raise_for_status() except requests.HTTPError as error: self.logger.error( f'{index["name"]} - {index_code} 成分股抓取错误 ({error})') continue response_df = pd.read_excel(io.BytesIO(response.content)) response_df = response_df[[ '成分券代码Constituent Code', '成分券名称Constituent Name' ]].rename( columns={ '成分券代码Constituent Code': 'stock_code', '成分券名称Constituent Name': 'stock_name' }) index_id = f'index_cn_{index_code}' response_df['entity_id'] = index_id response_df['entity_type'] = 'index' response_df['exchange'] = 'cn' response_df['code'] = index_code response_df['name'] = index['name'] response_df['timestamp'] = now_pd_timestamp() response_df['stock_id'] = response_df['stock_code'].apply( lambda x: china_stock_code_to_id(str(x))) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{index_id}_{x}') df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider, force_update=True) self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...') self.sleep()
def fetch_cni_index_component(self, df: pd.DataFrame): """ 抓取国证指数成分股 """ query_url = 'http://www.cnindex.com.cn/docs/yb_{}.xls' for _, index in df.iterrows(): index_code = index['code'] url = query_url.format(index_code) try: response = requests.get(url) response.raise_for_status() except requests.HTTPError as error: self.logger.error( f'{index["name"]} - {index_code} 成分股抓取错误 ({error})') continue response_df = pd.read_excel(io.BytesIO(response.content), dtype='str') index_id = f'index_cn_{index_code}' try: response_df = response_df[['样本股代码']] except KeyError: response_df = response_df[['证券代码']] response_df['entity_id'] = index_id response_df['entity_type'] = 'index' response_df['exchange'] = 'cn' response_df['code'] = index_code response_df['name'] = index['name'] response_df['timestamp'] = now_pd_timestamp() response_df.columns = ['stock_code'] response_df['stock_id'] = response_df['stock_code'].apply( lambda x: china_stock_code_to_id(str(x))) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{index_id}_{x}') df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...') self.sleep()
def download_sh_etf_component(self, df: pd.DataFrame): """ ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF 5. 债券 ETF 6. 黄金 ETF :param df: ETF 列表数据 :return: None """ query_url = 'http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?' \ 'isPagination=false&type={}&etfClass={}' etf_df = df[(df['ETF_CLASS'] == '1') | (df['ETF_CLASS'] == '2')] etf_df = self.populate_sh_etf_type(etf_df) for _, etf in etf_df.iterrows(): url = query_url.format(etf['ETF_TYPE'], etf['ETF_CLASS']) response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER) response_dict = demjson.decode(response.text) response_df = pd.DataFrame(response_dict.get('result', [])) etf_code = etf['FUND_ID'] etf_id = f'etf_sh_{etf_code}' response_df = response_df[['instrumentId', 'instrumentName']].copy() response_df.rename(columns={ 'instrumentId': 'stock_code', 'instrumentName': 'stock_name' }, inplace=True) response_df['entity_id'] = etf_id response_df['entity_type'] = 'etf' response_df['exchange'] = 'sh' response_df['code'] = etf_code response_df['name'] = etf['FUND_NAME'] response_df['timestamp'] = now_pd_timestamp() response_df['stock_id'] = response_df['stock_code'].apply( lambda code: china_stock_code_to_id(code)) response_df['id'] = response_df['stock_id'].apply( lambda x: f'{etf_id}_{x}') df_to_db(data_schema=self.data_schema, df=response_df, provider=self.provider) self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...') self.sleep()