def record(self, entity, start, end, size, timestamps): sql_valuation = 'SELECT a.TRADE_DATE,a.TICKER_SYMBOL,a.PE,a.PB,a.PCF,a.PS FROM equ_factor_VS WHERE TRADE_DATE>=%s AND TRADE_DATE<=%s AND TICKER_SYMBOL=%s' df = pd.read_sql(sql_valuation, self.tonglian_conn, params=(to_time_str(start, fmt="YYYYMMDD"), now_time_str(fmt="YYYYMMDD"),entity.code)) df = pd.read_sql(sql_valuation, self.tonglian_conn, params=(to_time_str(start, fmt="YYYYMMDD").strip('-'), now_time_str(fmt="YYYYMMDD"),"000006")) df = df.rename({'TRADE_DATE': 'day', 'TICKER_SYMBOL': 'code', 'PE': 'pe', 'PB': 'pb', 'PS': 'ps', 'PCF': 'pcf'}, axis='columns') df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['day']) df['name'] = entity.name df['id'] = df['timestamp'].apply(lambda x: "{}_{}".format(entity.id, to_time_str(x))) df['market_cap'] = df['market_cap'] * 100000000 # 总市值 df['circulating_market_cap'] = df['circulating_market_cap'] * 100000000 # 流通市值 df['capitalization'] = df['capitalization'] * 10000 #总股本 df['circulating_cap'] = df['circulating_cap'] * 10000 #流通股 df['turnover_ratio'] = df['turnover_ratio'] * 0.01 #换手率 df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def init_entities(self): """ init the entities which we would record data for """ if self.entity_provider == self.provider and self.entity_schema == self.data_schema: self.entity_session = self.session else: self.entity_session = get_db_session(provider=self.entity_provider, data_schema=self.entity_schema) filters = None if self.day_data: df = self.data_schema.query_data(start_timestamp=now_time_str(), columns=['entity_id', 'timestamp'], provider=self.provider) if pd_is_not_null(df): entity_ids = df['entity_id'].tolist() self.logger.info(f'ignore entity_ids:{entity_ids}') filters = [self.entity_schema.entity_id.notin_(entity_ids)] # init the entity list self.entities = get_entities(session=self.entity_session, entity_schema=self.entity_schema, entity_type=self.entity_type, exchanges=self.exchanges, entity_ids=self.entity_ids, codes=self.codes, return_type='domain', provider=self.entity_provider, filters=filters)
def generate_request_param(self, security_item, start, end, size, timestamp): return { 'security_item': security_item, 'start': to_time_str(start, fmt=TIME_FORMAT_DAY1), 'end': now_time_str(fmt=TIME_FORMAT_DAY1), 'level': self.level.value }
def record(self, entity, start, end, size, timestamps): q = query(valuation).filter(valuation.code == to_jq_entity_id(entity)) count: pd.Timedelta = now_pd_timestamp() - start df = get_fundamentals_continuously(q, end_date=now_time_str(), count=count.days + 1, panel=False) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['day']) df['code'] = entity.code df['name'] = entity.name df['id'] = df['timestamp'].apply( lambda x: "{}_{}".format(entity.id, to_time_str(x))) df = df.rename( { 'pe_ratio_lyr': 'pe', 'pe_ratio': 'pe_ttm', 'pb_ratio': 'pb', 'ps_ratio': 'ps', 'pcf_ratio': 'pcf' }, axis='columns') df['market_cap'] = df['market_cap'] * 100000000 df['circulating_market_cap'] = df['circulating_market_cap'] * 100000000 df['capitalization'] = df['capitalization'] * 10000 df['circulating_cap'] = df['circulating_cap'] * 10000 df['turnover_ratio'] = df['turnover_ratio'] * 0.01 df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) return None
def risky_company(region: Region, the_date=None, income_yoy=-0.1, profit_yoy=-0.1, entity_ids=None): if not the_date: the_date = to_pd_timestamp(now_time_str(region)) codes = [] start_timestamp = to_pd_timestamp(the_date) - datetime.timedelta(130) # 营收降,利润降,流动比率低,速动比率低 finance_filter = or_(FinanceFactor.op_income_growth_yoy < income_yoy, FinanceFactor.net_profit_growth_yoy <= profit_yoy, FinanceFactor.current_ratio < 0.7, FinanceFactor.quick_ratio < 0.5) df = FinanceFactor.query_data(region=region, entity_ids=entity_ids, start_timestamp=start_timestamp, filters=[finance_filter], columns=['code']) if pd_is_not_null(df): codes = codes + df.code.tolist() # 高应收,高存货,高商誉 balance_filter = (BalanceSheet.accounts_receivable + BalanceSheet.inventories + BalanceSheet.goodwill) \ > BalanceSheet.total_equity df = BalanceSheet.query_data(region=region, entity_ids=entity_ids, start_timestamp=start_timestamp, filters=[balance_filter], columns=['code']) if pd_is_not_null(df): codes = codes + df.code.tolist() # 应收>利润*1/2 df1 = BalanceSheet.query_data( region=region, entity_ids=entity_ids, start_timestamp=start_timestamp, columns=[BalanceSheet.code, BalanceSheet.accounts_receivable]) if pd_is_not_null(df1): df1.drop_duplicates(subset='code', keep='last', inplace=True) df1 = df1.set_index('code', drop=True).sort_index() df2 = IncomeStatement.query_data( region=region, entity_ids=entity_ids, start_timestamp=start_timestamp, columns=[IncomeStatement.code, IncomeStatement.net_profit]) if pd_is_not_null(df2): df2.drop_duplicates(subset='code', keep='last', inplace=True) df2 = df2.set_index('code', drop=True).sort_index() if pd_is_not_null(df1) and pd_is_not_null(df2): codes = codes + df1[df1.accounts_receivable > df2.net_profit / 2].index.tolist() return list(set(codes))
def on_finish(self, security_item): kdatas = get_kdata( provider=self.provider, security_id=security_item.id, level=self.level.value, order=self.data_schema.timestamp.asc(), return_type='domain', session=self.session, filters=[ self.data_schema.hfq_close.is_(None), self.data_schema.timestamp >= to_pd_timestamp('2005-01-01') ]) if kdatas: start = kdatas[0].timestamp end = kdatas[-1].timestamp # get hfq from joinquant df = get_price(to_jq_security_id(security_item), start_date=to_time_str(start), end_date=now_time_str(), frequency='daily', fields=['factor', 'open', 'close', 'low', 'high'], skip_paused=True, fq='post') if df is not None and not df.empty: # fill hfq data for kdata in kdatas: time_str = to_time_str(kdata.timestamp) if time_str in df.index: kdata.hfq_open = df.loc[time_str, 'open'] kdata.hfq_close = df.loc[time_str, 'close'] kdata.hfq_high = df.loc[time_str, 'high'] kdata.hfq_low = df.loc[time_str, 'low'] kdata.factor = df.loc[time_str, 'factor'] self.session.commit() latest_factor = df.factor[-1] # factor not change yet, no need to reset the qfq past if latest_factor == self.current_factors.get(security_item.id): sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \ 'security_id=\'{}\' and level=\'{}\' and (qfq_close isnull or qfq_high isnull or qfq_low isnull or qfq_open isnull)'.format( self.data_schema.__table__, latest_factor, latest_factor, latest_factor, latest_factor, security_item.id, self.level.value) else: sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \ 'security_id=\'{}\' and level=\'{}\''.format(self.data_schema.__table__, latest_factor, latest_factor, latest_factor, latest_factor, security_item.id, self.level.value) self.logger.info(sql) self.session.execute(sql) self.session.commit() # TODO:use netease provider to get turnover_rate self.logger.info('use netease provider to get turnover_rate')
def record(self, entity, start, end, size, time_array): if entity.type == 'index': id_flag = "{}1".format(entity.code) elif entity.type == 'stock': if entity.exchange == 'sh': id_flag = "{}1".format(entity.code) if entity.exchange == 'sz': id_flag = "{}2".format(entity.code) the_url = self.url.format("{}".format(id_flag), eastmoney_map_zvt_trading_level(self.level), now_time_str(fmt=TIME_FORMAT_MINUTE), size) resp = requests.get(the_url) results = json_callback_param(resp.text) kdatas = [] for result in results: the_timestamp = to_pd_timestamp(result['time']) the_id = generate_kdata_id(entity_id=entity.id, timestamp=the_timestamp, level=self.level) if not data_exist(self.session, self.kdata_schema, the_id): kdatas.append( self.kdata_schema(id=the_id, timestamp=the_timestamp, entity_id=entity.id, code=entity.code, name=entity.name, level=self.level, open=to_float(result['open']), close=to_float(result['close']), high=to_float(result['high']), low=to_float(result['low']), volume=to_float(result['volume']), turnover=to_float(result['amount']), turnover_rate=to_float( result['turnoverrate']))) return kdatas
def request(self, url=None, method='get', param=None, path_fields=None): security_item = param['security_item'] start_timestamp = param['start_timestamp'] # 不复权 df = get_price( to_jq_security_id(security_item), start_date=to_time_str(start_timestamp), end_date=now_time_str(), frequency='daily', fields=['open', 'close', 'low', 'high', 'volume', 'money'], skip_paused=True, fq=None) df.index.name = 'timestamp' df.reset_index(inplace=True) df['name'] = security_item.name df.rename(columns={'money': 'turnover'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = Provider.JOINQUANT.value df['level'] = param['level'] return df.to_dict(orient='records')
def init_entities(self): """ init the entities which we would record data for """ if self.entity_provider == self.provider and self.entity_schema == self.data_schema: self.entity_session = self.session else: self.entity_session = get_db_session( provider=self.entity_provider, data_schema=self.entity_schema) if self.day_data: df = self.data_schema.query_data( start_timestamp=now_time_str(), columns=["entity_id", "timestamp"], provider=self.provider) if pd_is_not_null(df): entity_ids = df["entity_id"].tolist() self.logger.info(f"ignore entity_ids:{entity_ids}") if self.entity_filters: self.entity_filters.append( self.entity_schema.entity_id.notin_(entity_ids)) else: self.entity_filters = [ self.entity_schema.entity_id.notin_(entity_ids) ] #: init the entity list self.entities = get_entities( session=self.entity_session, entity_schema=self.entity_schema, exchanges=self.exchanges, entity_ids=self.entity_ids, codes=self.codes, return_type="domain", provider=self.entity_provider, filters=self.entity_filters, )
def record(self, entity, start, end, size, timestamps): the_url = self.url.format("{}".format(entity.code), level_flag(self.level), size, now_time_str(fmt=TIME_FORMAT_DAY1)) resp = requests.get(the_url) results = json_callback_param(resp.text) kdatas = [] if results: klines = results['data']['klines'] # TODO: ignore the last unfinished kdata now,could control it better if need for result in klines[:-1]: # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00" # time,open,close,high,low,volume,turnover fields = result.split(',') the_timestamp = to_pd_timestamp(fields[0]) the_id = generate_kdata_id(entity_id=entity.id, timestamp=the_timestamp, level=self.level) kdatas.append( dict(id=the_id, timestamp=the_timestamp, entity_id=entity.id, code=entity.code, name=entity.name, level=self.level.value, open=to_float(fields[1]), close=to_float(fields[2]), high=to_float(fields[3]), low=to_float(fields[4]), volume=to_float(fields[5]), turnover=to_float(fields[6]))) return kdatas
def record(self, entity, start, end, size, timestamps, http_session): url = self.url.format( "{}".format(entity.code), level_flag(self.level), size, now_time_str(region=Region.CHN, fmt=TIME_FORMAT_DAY1)) text = sync_get(http_session, url, return_type='text') if text is None: return None results = json_callback_param(text) if results: klines = results['data']['klines'] @njit(nopython=True) def numba_boost_up(klines): kdatas = [] # TODO: ignore the last unfinished kdata now,could control it better if need for result in klines[:-1]: # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00" # time,open,close,high,low,volume,turnover fields = result.split(',') kdatas.append( dict(timestamp=fields[0], open=to_float(fields[1]), close=to_float(fields[2]), high=to_float(fields[3]), low=to_float(fields[4]), volume=to_float(fields[5]), turnover=to_float(fields[6]))) return kdatas kdatas = numba_boost_up(klines) if len(kdatas) > 0: df = pd.DataFrame.from_records(kdatas) return df return None
def get_ui_path(region: Region, name): if name is None: return os.path.join(zvt_env['ui_path'], '{}.html'.format(now_time_str(region=region, fmt=TIME_FORMAT_ISO8601))) return os.path.join(zvt_env['ui_path'], f'{name}.html')
def get_ui_path(name): if name is None: name = '{}.html'.format(now_time_str(fmt=TIME_FORMAT_ISO8601)) return os.path.join(UI_PATH, '{}.html'.format(name))
def on_finish_entity(self, entity): kdatas = get_kdata( provider=self.provider, entity_id=entity.id, level=self.level.value, order=self.data_schema.timestamp.asc(), return_type='domain', session=self.session, filters=[ self.data_schema.hfq_close.is_(None), self.data_schema.timestamp >= to_pd_timestamp('2005-01-01') ]) if kdatas: start = kdatas[0].timestamp end = kdatas[-1].timestamp # get hfq from joinquant df = get_price(to_jq_entity_id(entity), start_date=to_time_str(start), end_date=now_time_str(), frequency='daily', fields=['factor', 'open', 'close', 'low', 'high'], skip_paused=True, fq='post') if df is not None and not df.empty: # fill hfq data for kdata in kdatas: time_str = to_time_str(kdata.timestamp) if time_str in df.index: kdata.hfq_open = df.loc[time_str, 'open'] kdata.hfq_close = df.loc[time_str, 'close'] kdata.hfq_high = df.loc[time_str, 'high'] kdata.hfq_low = df.loc[time_str, 'low'] kdata.factor = df.loc[time_str, 'factor'] self.session.commit() latest_factor = df.factor[-1] # factor not change yet, no need to reset the qfq past if latest_factor == self.current_factors.get(entity.id): sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \ 'entity_id=\'{}\' and level=\'{}\' and (qfq_close isnull or qfq_high isnull or qfq_low isnull or qfq_open isnull)'.format( self.data_schema.__table__, latest_factor, latest_factor, latest_factor, latest_factor, entity.id, self.level.value) else: sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \ 'entity_id=\'{}\' and level=\'{}\''.format(self.data_schema.__table__, latest_factor, latest_factor, latest_factor, latest_factor, entity.id, self.level.value) self.logger.info(sql) self.session.execute(sql) self.session.commit() # use netease provider to get turnover_rate query_url = 'http://quotes.money.163.com/service/chddata.html?code={}{}&start={}&end={}&fields=PCHG;TURNOVER' if entity.exchange == 'sh': exchange_flag = 0 else: exchange_flag = 1 url = query_url.format(exchange_flag, entity.code, to_time_str(start), to_time_str(end)) response = requests.get(url=url) df = read_csv(io.BytesIO(response.content), encoding='GB2312', na_values='None') df['日期'] = pd.to_datetime(df['日期']) df.set_index('日期', drop=True, inplace=True) if df is not None and not df.empty: # fill turnover_rate, pct_change for kdata in kdatas: if kdata.timestamp in df.index: kdata.turnover_rate = df.loc[kdata.timestamp, '换手率'] kdata.change_pct = df.loc[kdata.timestamp, '涨跌幅'] self.session.commit()