def on_finish_entity(self, entity): super().on_finish_entity(entity) if not self.fetch_jq_timestamp: return # fill the timestamp for report published date the_data_list = get_data( data_schema=self.data_schema, provider=self.provider, entity_id=entity.id, order=self.data_schema.timestamp.asc(), return_type='domain', session=self.session, filters=[ self.data_schema.timestamp == self.data_schema.report_date, self.data_schema.timestamp >= to_pd_timestamp('2005-01-01') ]) if the_data_list: if self.data_schema == FinanceFactor: for the_data in the_data_list: self.fill_timestamp_with_jq(entity, the_data) else: df = get_finance_factor( entity_id=entity.id, columns=[ FinanceFactor.timestamp, FinanceFactor.report_date, FinanceFactor.id ], filters=[ FinanceFactor.timestamp != FinanceFactor.report_date, FinanceFactor.timestamp >= to_pd_timestamp('2005-01-01'), FinanceFactor.report_date >= the_data_list[0].report_date, FinanceFactor.report_date <= the_data_list[-1].report_date, ]) if pd_is_not_null(df): index_df(df, index='report_date', time_field='report_date') for the_data in the_data_list: if (df is not None) and ( not df.empty) and the_data.report_date in df.index: the_data.timestamp = df.at[the_data.report_date, 'timestamp'] self.logger.info( 'db fill {} {} timestamp:{} for report_date:{}'. format(self.data_schema, entity.id, the_data.timestamp, the_data.report_date)) self.session.commit() else: # self.logger.info( # 'waiting jq fill {} {} timestamp:{} for report_date:{}'.format(self.data_schema, # security_item.id, # the_data.timestamp, # the_data.report_date)) self.fill_timestamp_with_jq(entity, the_data)
def __init__(self, entity_type='stock', exchanges=['sh', 'sz'], entity_ids=None, codes=None, batch_size=10, force_update=False, sleeping_time=5, default_size=2000, real_time=False, fix_duplicate_way='add', start_timestamp=None, end_timestamp=None, close_hour=0, close_minute=0) -> None: self.default_size = default_size self.real_time = real_time self.close_hour = close_hour self.close_minute = close_minute self.fix_duplicate_way = fix_duplicate_way self.start_timestamp = to_pd_timestamp(start_timestamp) self.end_timestamp = to_pd_timestamp(end_timestamp) super().__init__(entity_type, exchanges, entity_ids, codes, batch_size, force_update, sleeping_time)
def common_filter(query: Query, data_schema, start_timestamp=None, end_timestamp=None, filters=None, order=None, limit=None, time_field='timestamp'): assert data_schema is not None time_col = eval('data_schema.{}'.format(time_field)) if start_timestamp: query = query.filter(time_col >= to_pd_timestamp(start_timestamp)) if end_timestamp: query = query.filter(time_col <= to_pd_timestamp(end_timestamp)) if filters: for filter in filters: query = query.filter(filter) if order is not None: query = query.order_by(order) else: query = query.order_by(time_col.asc()) if limit: query = query.limit(limit) return query
def __init__(self, entity_ids=None, entity_schema=Stock, exchanges=None, codes=None, the_timestamp=None, start_timestamp=None, end_timestamp=None, long_threshold=0.8, short_threshold=0.2, level=IntervalLevel.LEVEL_1DAY, provider='eastmoney', portfolio_selector=None) -> None: self.entity_ids = entity_ids self.entity_schema = entity_schema self.exchanges = exchanges self.codes = codes self.provider = provider self.portfolio_selector: TargetSelector = portfolio_selector if self.portfolio_selector: assert self.portfolio_selector.entity_schema in [Etf, Block, Index] if the_timestamp: self.the_timestamp = to_pd_timestamp(the_timestamp) self.start_timestamp = self.the_timestamp self.end_timestamp = self.the_timestamp else: if start_timestamp: self.start_timestamp = to_pd_timestamp(start_timestamp) if end_timestamp: self.end_timestamp = to_pd_timestamp(end_timestamp) else: self.end_timestamp = now_pd_timestamp() self.long_threshold = long_threshold self.short_threshold = short_threshold self.level = level self.filter_factors: List[FilterFactor] = [] self.score_factors: List[ScoreFactor] = [] self.filter_result = None self.score_result = None self.open_long_df: DataFrame = None self.open_short_df: DataFrame = None self.init_factors(entity_ids=entity_ids, entity_schema=entity_schema, exchanges=exchanges, codes=codes, the_timestamp=the_timestamp, start_timestamp=start_timestamp, end_timestamp=end_timestamp, level=self.level)
def persist_index(self, df) -> None: df['timestamp'] = df['timestamp'].apply(lambda x: to_pd_timestamp(x)) df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x)) df['id'] = df['code'].apply(lambda code: f'index_cn_{code}') df['entity_id'] = df['id'] df['exchange'] = 'cn' df['entity_type'] = 'index' df['is_delisted'] = False df = df.dropna(axis=0, how='any') df = df.drop_duplicates(subset='id', keep='last') init_entities(df, entity_type='index', provider=self.provider)
def __init__(self, entity_ids=None, entity_type='stock', exchanges=['sh', 'sz'], codes=None, the_timestamp=None, start_timestamp=None, end_timestamp=None, long_threshold=0.8, short_threshold=0.2, level=IntervalLevel.LEVEL_1DAY, provider='eastmoney', block_selector=None) -> None: self.entity_ids = entity_ids self.entity_type = entity_type self.exchanges = exchanges self.codes = codes self.provider = provider self.block_selector: TargetSelector = block_selector if the_timestamp: self.the_timestamp = to_pd_timestamp(the_timestamp) self.start_timestamp = self.the_timestamp self.end_timestamp = self.the_timestamp elif start_timestamp and end_timestamp: self.start_timestamp = to_pd_timestamp(start_timestamp) self.end_timestamp = to_pd_timestamp(end_timestamp) else: assert False self.long_threshold = long_threshold self.short_threshold = short_threshold self.level = level self.filter_factors: List[FilterFactor] = [] self.score_factors: List[ScoreFactor] = [] self.filter_result = None self.score_result = None self.open_long_df: DataFrame = None self.open_short_df: DataFrame = None self.init_factors(entity_ids=entity_ids, entity_type=entity_type, exchanges=exchanges, codes=codes, the_timestamp=the_timestamp, start_timestamp=start_timestamp, end_timestamp=end_timestamp, level=self.level)
def record(self, entity, start, end, size, timestamps): if self.start_timestamp: start = max(self.start_timestamp, to_pd_timestamp(start)) start_timestamp = to_time_str(start) ccxt_exchange = CCXTAccount.get_ccxt_exchange(entity.exchange) if ccxt_exchange.has['fetchOHLCV']: limit = CCXTAccount.get_kdata_limit(entity.exchange) limit = min(size, limit) kdata_list = [] if CCXTAccount.exchange_conf[entity.exchange]['support_since']: kdatas = ccxt_exchange.fetch_ohlcv( entity.code, timeframe=self.ccxt_trading_level, since=start_timestamp) else: kdatas = ccxt_exchange.fetch_ohlcv( entity.code, timeframe=self.ccxt_trading_level, limit=limit) # always ignore the latest one,because it's not finished for kdata in kdatas[0:-1]: current_timestamp = kdata[0] if self.level == IntervalLevel.LEVEL_1DAY: current_timestamp = to_time_str(current_timestamp) kdata_json = { 'timestamp': to_pd_timestamp(current_timestamp), 'open': kdata[1], 'high': kdata[2], 'low': kdata[3], 'close': kdata[4], 'volume': kdata[5], 'name': entity.name, 'provider': 'ccxt', 'level': self.level.value } kdata_list.append(kdata_json) return kdata_list else: self.logger.warning("exchange:{} not support fetchOHLCV".format( entity.exchange))
def persist_index(self, df) -> None: df['timestamp'] = df['timestamp'].apply(lambda x: to_pd_timestamp(x)) df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x)) df['id'] = df['code'].apply(lambda code: f'index_cn_{code}') df['entity_id'] = df['id'] df['exchange'] = 'cn' df['entity_type'] = 'index' df = df.dropna(axis=0, how='any') df = df.drop_duplicates(subset='id', keep='last') df_to_db(df=df, data_schema=Index, provider=self.provider, force_update=False)
def record(self, entity, start, end, size, timestamps): if size < 20: size = 20 ccxt_exchange = CCXTAccount.get_ccxt_exchange(entity.exchange) if ccxt_exchange.has['fetchTrades']: limit = CCXTAccount.get_tick_limit(entity.exchange) limit = min(size, limit) kdata_list = [] trades = ccxt_exchange.fetch_trades(entity.code, limit=limit) for trade in trades: kdata_json = { 'name': entity.name, 'provider': 'ccxt', # 'id': trade['id'], 'level': 'tick', 'order': trade['order'], 'timestamp': to_pd_timestamp(trade['timestamp']), 'price': trade['price'], 'volume': trade['amount'], 'direction': trade['side'], 'order_type': trade['type'], 'turnover': trade['price'] * trade['amount'] } kdata_list.append(kdata_json) return kdata_list else: self.logger.warning("exchange:{} not support fetchOHLCV".format(entity.exchange))
def record(self, entity, start, end, size, timestamps): if self.start_timestamp: start = max(self.start_timestamp, to_pd_timestamp(start)) end = now_pd_timestamp() + timedelta(days=1) start_timestamp = to_time_str(start) end_timestamp = to_time_str(end) # 不复权 df = get_price(to_jq_entity_id(entity), start_date=to_time_str(start_timestamp), end_date=end_timestamp, frequency=self.jq_trading_level, fields=['open', 'close', 'low', 'high', 'volume', 'money'], skip_paused=True, fq=None) df.index.name = 'timestamp' df.reset_index(inplace=True) df['name'] = entity.name df.rename(columns={'money': 'turnover'}, inplace=True) df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value # remove the unfinished kdata if is_in_trading(entity_type='stock', exchange='sh', timestamp=df.iloc[-1, :]['timestamp']): df = df.iloc[:-1, :] return df.to_dict(orient='records')
def coin_finished_timestamp(timestamp: pd.Timestamp, level: IntervalLevel): timestamp = to_pd_timestamp(timestamp) if timestamp.microsecond != 0: return False return timestamp.minute % level.to_minute() == 0
def record(self, entity, start, end, size, timestamps): the_url = self.url.format("{}".format(entity.code), level_flag(self.level), size, now_time_str(fmt=TIME_FORMAT_DAY1)) resp = requests.get(the_url) results = json_callback_param(resp.text) kdatas = [] if results: klines = results['data']['klines'] # TODO: ignore the last unfinished kdata now,could control it better if need for result in klines[:-1]: # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00" # time,open,close,high,low,volume,turnover fields = result.split(',') the_timestamp = to_pd_timestamp(fields[0]) the_id = generate_kdata_id(entity_id=entity.id, timestamp=the_timestamp, level=self.level) kdatas.append(dict(id=the_id, timestamp=the_timestamp, entity_id=entity.id, code=entity.code, name=entity.name, level=self.level.value, open=to_float(fields[1]), close=to_float(fields[2]), high=to_float(fields[3]), low=to_float(fields[4]), volume=to_float(fields[5]), turnover=to_float(fields[6]))) return kdatas
def load_data(self): if self.entity_ids: self.data_df = get_data(data_schema=self.data_schema, entity_ids=self.entity_ids, provider=self.provider, columns=self.columns, start_timestamp=self.start_timestamp, end_timestamp=self.end_timestamp, filters=self.filters, order=self.order, limit=self.limit, level=self.level, time_field=self.time_field, index=self.time_field) else: self.data_df = get_data(data_schema=self.data_schema, codes=self.codes, provider=self.provider, columns=self.columns, start_timestamp=self.start_timestamp, end_timestamp=self.end_timestamp, filters=self.filters, order=self.order, limit=self.limit, level=self.level, time_field=self.time_field, index=self.time_field) if self.trip_timestamp: if self.level == IntervalLevel.LEVEL_1DAY: self.data_df[self.time_field] = self.data_df[self.time_field].apply( lambda x: to_pd_timestamp(to_time_str(x))) if df_is_not_null(self.data_df): self.normal_data = NormalData(df=self.data_df, category_field=self.category_field, index_field=self.time_field, is_timeseries=True) self.data_df = self.normal_data.data_df for listener in self.data_listeners: listener.on_data_loaded(self.data_df)
def is_in_trading(entity_type, exchange, timestamp): current = now_pd_timestamp() timestamp = to_pd_timestamp(timestamp) if is_same_date(current, timestamp): for start, end in get_trading_intervals(entity_type=entity_type, exchange=exchange): if current > date_and_time(current, start) and current < date_and_time(current, end): return True return False
def china_stock_finished_timestamp(timestamp: pd.Timestamp, level: IntervalLevel): timestamp = to_pd_timestamp(timestamp) if timestamp.microsecond != 0: return False return to_time_str(timestamp, fmt=TIME_FORMAT_MINUTE1) in china_stock_level_map_finished_timestamps.get( level.value)
def generate_domain(self, entity, original_data): """ generate the data_schema instance using entity and original_data,the original_data is from record result :param entity: :param original_data: """ got_new_data = False # if the domain is directly generated in record method, we just return it if isinstance(original_data, self.data_schema): got_new_data = True return got_new_data, original_data the_id = self.generate_domain_id(entity, original_data) # optional way # item = self.session.query(self.data_schema).get(the_id) items = get_data(data_schema=self.data_schema, session=self.session, provider=self.provider, entity_id=entity.id, filters=[self.data_schema.id == the_id], return_type='domain') if items and not self.force_update: self.logger.info('ignore the data {}:{} saved before'.format( self.data_schema, the_id)) return got_new_data, None if not items: timestamp_str = original_data[self.get_original_time_field()] timestamp = None try: timestamp = to_pd_timestamp(timestamp_str) except Exception as e: self.logger.exception(e) if 'name' in get_schema_columns(self.data_schema): domain_item = self.data_schema(id=the_id, code=entity.code, name=entity.name, entity_id=entity.id, timestamp=timestamp) else: domain_item = self.data_schema(id=the_id, code=entity.code, entity_id=entity.id, timestamp=timestamp) got_new_data = True else: domain_item = items[0] fill_domain_from_dict(domain_item, original_data, self.get_data_map()) return got_new_data, domain_item
def run(self): for security_item in self.entities: assert isinstance(security_item, Stock) if security_item.exchange == 'sh': fc = "{}01".format(security_item.code) if security_item.exchange == 'sz': fc = "{}02".format(security_item.code) # 基本资料 param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"} resp = requests.post( 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao', json=param) resp.encoding = 'utf8' resp_json = resp.json()['Result']['JiBenZiLiao'] security_item.profile = resp_json['CompRofile'] security_item.main_business = resp_json['MainBusiness'] security_item.date_of_establishment = to_pd_timestamp( resp_json['FoundDate']) # 关联行业 industries = ','.join(resp_json['Industry'].split('-')) security_item.industries = industries # 关联概念 security_item.concept_indices = resp_json['Block'] # 关联地区 security_item.area_indices = resp_json['Provice'] self.sleep() # 发行相关 param = {"color": "w", "fc": fc} resp = requests.post( 'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan', json=param) resp.encoding = 'utf8' resp_json = resp.json()['Result']['FaXingXiangGuan'] security_item.issue_pe = to_float(resp_json['PEIssued']) security_item.price = to_float(resp_json['IssuePrice']) security_item.issues = to_float(resp_json['ShareIssued']) security_item.raising_fund = to_float((resp_json['NetCollection'])) security_item.net_winning_rate = pct_to_float( resp_json['LotRateOn']) self.session.commit() self.logger.info('finish recording stock meta for:{}'.format( security_item.code)) self.sleep()
def on_trading_close(self, timestamp): self.logger.info('on_trading_close:{}'.format(timestamp)) self.latest_account['value'] = 0 self.latest_account['all_value'] = 0 for position in self.latest_account['positions']: # use qfq for stock entity_type, _, _ = decode_entity_id(position['entity_id']) data_schema = get_kdata_schema(entity_type, level=self.level) kdata = get_kdata(provider=self.provider, level=self.level, entity_id=position['entity_id'], order=data_schema.timestamp.desc(), end_timestamp=timestamp, limit=1) # use qfq for stock if entity_type == 'stock': closing_price = kdata['qfq_close'][0] else: closing_price = kdata['close'][0] position['available_long'] = position['long_amount'] position['available_short'] = position['short_amount'] if closing_price: if (position['long_amount'] is not None) and position['long_amount'] > 0: position['value'] = position['long_amount'] * closing_price self.latest_account['value'] += position['value'] elif (position['short_amount'] is not None) and position['short_amount'] > 0: position['value'] = 2 * (position['short_amount'] * position['average_short_price']) position[ 'value'] -= position['short_amount'] * closing_price self.latest_account['value'] += position['value'] else: self.logger.warning( 'could not refresh close value for position:{},timestamp:{}' .format(position['entity_id'], timestamp)) # remove the empty position self.latest_account['positions'] = [ position for position in self.latest_account['positions'] if position['long_amount'] > 0 or position['short_amount'] > 0 ] self.latest_account['all_value'] = self.latest_account[ 'value'] + self.latest_account['cash'] self.latest_account['closing'] = True self.latest_account['timestamp'] = to_pd_timestamp(timestamp) self.logger.info('on_trading_close:{},latest_account:{}'.format( timestamp, self.latest_account)) self.persist_account(timestamp)
def get_etf_stocks(code=None, codes=None, ids=None, timestamp=now_pd_timestamp(), provider=None): latests: List[EtfStock] = EtfStock.query_data( provider=provider, code=code, end_timestamp=timestamp, order=EtfStock.timestamp.desc(), limit=1, return_type='domain') if latests: latest_record = latests[0] # 获取最新的报表 df = EtfStock.query_data( provider=provider, code=code, codes=codes, ids=ids, end_timestamp=timestamp, filters=[EtfStock.report_date == latest_record.report_date]) # 最新的为年报或者半年报 if latest_record.report_period == ReportPeriod.year or latest_record.report_period == ReportPeriod.half_year: return df # 季报,需要结合 年报或半年报 来算持仓 else: step = 0 while True: report_date = get_recent_report_date(latest_record.report_date, step=step) pre_df = EtfStock.query_data( provider=provider, code=code, codes=codes, ids=ids, end_timestamp=timestamp, filters=[ EtfStock.report_date == to_pd_timestamp(report_date) ]) df = df.append(pre_df) # 半年报和年报 if (ReportPeriod.half_year.value in pre_df['report_period'].tolist()) or ( ReportPeriod.year.value in pre_df['report_period'].tolist()): # 保留最新的持仓 df = df.drop_duplicates(subset=['stock_code'], keep='first') return df step = step + 1 if step >= 20: break
def test_000778_rights_issue_detail(): result = get_rights_issue_detail(session=session, provider='eastmoney', return_type='domain', codes=['000778'], end_timestamp='2018-09-30', order=RightsIssueDetail.timestamp.desc()) assert len(result) == 2 latest: RightsIssueDetail = result[0] assert latest.timestamp == to_pd_timestamp('2001-09-10') assert latest.rights_issues == 43570000 assert latest.rights_raising_fund == 492300000 assert latest.rights_issue_price == 11.3
def test_000778_spo_detial(): result = get_spo_detail(session=session, provider='eastmoney', return_type='domain', codes=['000778'], end_timestamp='2018-09-30', order=SpoDetail.timestamp.desc()) assert len(result) == 4 latest: SpoDetail = result[0] assert latest.timestamp == to_pd_timestamp('2017-04-01') assert latest.spo_issues == 347600000 assert latest.spo_price == 5.15 assert latest.spo_raising_fund == 1766000000
def is_trading_date(entity_type, exchange, timestamp: pd.Timestamp): if type(timestamp) == str: timestamp = to_pd_timestamp(timestamp) # just ignore 00:00 # the_date = date_and_time(timestamp, '09:00') if entity_type == 'stock': return (timestamp.weekday() != 5) and (timestamp.weekday() != 6) return True
def get_targets(self, timestamp, target_type: TargetType = TargetType.open_long) -> pd.DataFrame: if target_type == TargetType.open_long: df = self.open_long_df if target_type == TargetType.open_short: df = self.open_short_df if pd_is_not_null(df): if timestamp in df.index: target_df = df.loc[[to_pd_timestamp(timestamp)], :] return target_df['entity_id'].tolist() return []
def test_000778_dividend_financing(): result = get_dividend_financing(session=session, provider='eastmoney', return_type='domain', codes=['000778'], end_timestamp='2018-09-30', order=DividendFinancing.timestamp.desc()) assert len(result) == 22 latest: DividendFinancing = result[1] assert latest.timestamp == to_pd_timestamp('2017') assert latest.dividend_money == 598632026.4 assert latest.spo_issues == 347572815.0 assert latest.rights_issues == 0 assert latest.ipo_issues == 0
def record(self, entity, start, end, size, timestamps): if self.start_timestamp: start = max(self.start_timestamp, to_pd_timestamp(start)) # if self.level < IntervalLevel.LEVEL_1HOUR: # start = '2019-01-01' end = now_pd_timestamp() start_timestamp = to_time_str(start) # 聚宽get_price函数必须指定结束时间,否则会有未来数据 end_timestamp = to_time_str(end, fmt=TIME_FORMAT_MINUTE2) # 不复权 df = get_price( to_jq_entity_id(entity), start_date=to_time_str(start_timestamp), end_date=end_timestamp, frequency=self.jq_trading_level, fields=['open', 'close', 'low', 'high', 'volume', 'money'], skip_paused=True, fq=None) if df_is_not_null(df): df.index.name = 'timestamp' df.reset_index(inplace=True) df['name'] = entity.name df.rename(columns={'money': 'turnover'}, inplace=True) df['entity_id'] = entity.id df['timestamp'] = pd.to_datetime(df['timestamp']) df['provider'] = 'joinquant' df['level'] = self.level.value df['code'] = entity.code def generate_kdata_id(se): if self.level >= IntervalLevel.LEVEL_1DAY: return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY)) else: return "{}_{}".format( se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601)) df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force=self.force_update) return None
def on_finish_entity(self, entity): kdatas = get_kdata( provider=self.provider, entity_id=entity.id, level=self.level.value, order=self.data_schema.timestamp.asc(), return_type='domain', session=self.session, filters=[ self.data_schema.hfq_close.is_(None), self.data_schema.timestamp >= to_pd_timestamp('2005-01-01') ]) if kdatas: start = kdatas[0].timestamp end = kdatas[-1].timestamp # get hfq from joinquant df = get_price(to_jq_entity_id(entity), start_date=to_time_str(start), end_date=now_time_str(), frequency='daily', fields=['factor', 'open', 'close', 'low', 'high'], skip_paused=True, fq='post') if df_is_not_null(df): # fill hfq data for kdata in kdatas: time_str = to_time_str(kdata.timestamp) if time_str in df.index: kdata.hfq_open = df.loc[time_str, 'open'] kdata.hfq_close = df.loc[time_str, 'close'] kdata.hfq_high = df.loc[time_str, 'high'] kdata.hfq_low = df.loc[time_str, 'low'] kdata.factor = df.loc[time_str, 'factor'] self.session.add_all(kdatas) self.session.commit() latest_factor = df.factor[-1] # factor not change yet, no need to reset the qfq past if latest_factor == self.current_factors.get(entity.id): sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \ 'entity_id=\'{}\' and level=\'{}\' and (qfq_close isnull or qfq_high isnull or qfq_low isnull or qfq_open isnull)'.format( self.data_schema.__table__, latest_factor, latest_factor, latest_factor, latest_factor, entity.id, self.level.value) else: sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \ 'entity_id=\'{}\' and level=\'{}\''.format(self.data_schema.__table__, latest_factor, latest_factor, latest_factor, latest_factor, entity.id, self.level.value) self.logger.info(sql) self.session.execute(sql) self.session.commit()
def to_report_period_type(report_period): the_date = to_pd_timestamp(report_period) if the_date.month == 3 and the_date.day == 31: return ReportPeriod.season1.value if the_date.month == 6 and the_date.day == 30: return ReportPeriod.half_year.value if the_date.month == 9 and the_date.day == 30: return ReportPeriod.season3.value if the_date.month == 12 and the_date.day == 31: return ReportPeriod.year.value return None
def risky_company(the_date=to_pd_timestamp(now_time_str()), income_yoy=-0.1, profit_yoy=-0.1, entity_ids=None): codes = [] start_timestamp = to_pd_timestamp(the_date) - datetime.timedelta(130) # 营收降,利润降,流动比率低,速动比率低 finance_filter = or_(FinanceFactor.op_income_growth_yoy < income_yoy, FinanceFactor.net_profit_growth_yoy <= profit_yoy, FinanceFactor.current_ratio < 0.7, FinanceFactor.quick_ratio < 0.5) df = FinanceFactor.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp, filters=[finance_filter], columns=['code']) if pd_is_not_null(df): codes = codes + df.code.tolist() # 高应收,高存货,高商誉 balance_filter = (BalanceSheet.accounts_receivable + BalanceSheet.inventories + BalanceSheet.goodwill) \ > BalanceSheet.total_equity / 2 df = BalanceSheet.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp, filters=[balance_filter], columns=['code']) if pd_is_not_null(df): codes = codes + df.code.tolist() # 应收>利润*1/2 df1 = BalanceSheet.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp, columns=[BalanceSheet.code, BalanceSheet.accounts_receivable]) if pd_is_not_null(df1): df1.drop_duplicates(subset='code', keep='last', inplace=True) df1 = df1.set_index('code', drop=True).sort_index() df2 = IncomeStatement.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp, columns=[IncomeStatement.code, IncomeStatement.net_profit]) if pd_is_not_null(df2): df2.drop_duplicates(subset='code', keep='last', inplace=True) df2 = df2.set_index('code', drop=True).sort_index() if pd_is_not_null(df1) and pd_is_not_null(df2): codes = codes + df1[df1.accounts_receivable > df2.net_profit / 2].index.tolist() return list(set(codes))
def init_entities(self): items = get_data( data_schema=self.data_schema, session=self.session, provider=self.provider, entity_id='user_github_mojombo', filters=[self.data_schema.id == 'user_github_mojombo'], return_type='domain') first_user = GithubUser( id='user_github_mojombo', entity_id='user_github_mojombo', node_id='MDQ6VXNlcjE=', avatar_url='https://avatars0.githubusercontent.com/u/1?v=4', gravatar_id=None, site_admin=False, code='mojombo', name='Tom Preston-Werner', company=None, blog='http://tom.preston-werner.com', location='San Francisco', email=None, hireable=False, bio=None, public_repos=61, public_gists=62, followers=21529, following=11, timestamp=to_pd_timestamp(to_time_str("2007-10-20T05:24:19Z")), created_timestamp=to_pd_timestamp( to_time_str("2007-10-20T05:24:19Z")), updated_timestamp=to_pd_timestamp( to_time_str("2019-06-25T17:22:10Z"))) if not items: self.session.add(first_user) self.session.commit() self.entities = [first_user]
def to_jq_report_period(timestamp): the_date = to_pd_timestamp(timestamp) report_period = to_report_period_type(timestamp) if report_period == ReportPeriod.year.value: return '{}'.format(the_date.year) if report_period == ReportPeriod.season1.value: return '{}q1'.format(the_date.year) if report_period == ReportPeriod.half_year.value: return '{}q2'.format(the_date.year) if report_period == ReportPeriod.season3.value: return '{}q3'.format(the_date.year) assert False