def __init__(self, entity_ids=None, predict_range=20, level: Union[IntervalLevel, str] = IntervalLevel.LEVEL_1DAY, adjust_type: Union[AdjustType, str] = None, relative_performance: bool = False) -> None: super().__init__() self.entity_ids = entity_ids self.predict_range = predict_range self.level = level if not adjust_type and self.entity_schema == Stock: self.adjust_type = AdjustType.hfq else: self.adjust_type = adjust_type self.relative_performance = relative_performance self.training_start_timestamp = to_pd_timestamp(self.training_start_timestamp) self.testing_start_timestamp = to_pd_timestamp(self.testing_start_timestamp) self.testing_end_timestamp = to_pd_timestamp(self.testing_end_timestamp) # init training data self.training_x_timestamps, self.training_y_timestamps = self.get_x_y_timestamps( start_timestamp=self.training_start_timestamp, end_timestamp=self.testing_start_timestamp) self.training_x_df = self.get_features(self.entity_ids, self.training_x_timestamps) self.training_y_df = self.get_labels(self.entity_ids, x_timestamps=self.training_x_timestamps, y_timestamps=self.training_y_timestamps) # init test data self.testing_x_timestamps, self.testing_y_timestamps = self.get_x_y_timestamps( start_timestamp=self.testing_start_timestamp, end_timestamp=self.testing_end_timestamp) self.testing_x_df = self.get_features(self.entity_ids, self.testing_x_timestamps) self.testing_y_df = self.get_labels(self.entity_ids, x_timestamps=self.testing_x_timestamps, y_timestamps=self.testing_y_timestamps)
def test_composite(): composite( entity_id="stock_sz_000338", data_schema=CashFlowStatement, columns=[ CashFlowStatement.net_op_cash_flows, CashFlowStatement.net_investing_cash_flows, CashFlowStatement.net_financing_cash_flows, ], filters=[ CashFlowStatement.report_period == "year", CashFlowStatement.report_date == to_pd_timestamp("2016-12-31"), ], ) composite( entity_id="stock_sz_000338", data_schema=BalanceSheet, columns=[ BalanceSheet.total_current_assets, BalanceSheet.total_non_current_assets, BalanceSheet.total_current_liabilities, BalanceSheet.total_non_current_liabilities, ], filters=[ BalanceSheet.report_period == "year", BalanceSheet.report_date == to_pd_timestamp("2016-12-31") ], )
def run(self): current_date = get_recent_report_date() pn = 1 ps = 2000 while to_pd_timestamp(current_date) >= to_pd_timestamp(self.start): if not self.state: current_date = get_recent_report_date() result = self.get_data(end_date=current_date, pn=pn, ps=ps) print(result) self.state = {"end_date": current_date, "pages": result["result"]["pages"], "pn": pn, "ps": ps} self.persist_state("stock_sz_000001", self.state) else: if self.state["pn"] >= self.state["pages"]: current_date = get_recent_report_date(the_date=self.state["end_date"], step=1) pn = pn ps = ps else: pn = self.state["pn"] + 1 ps = self.state["ps"] current_date = self.state["end_date"] result = self.get_data(end_date=current_date, pn=pn, ps=ps) print(result) self.state = {"end_date": current_date, "pages": result["result"]["pages"], "pn": pn, "ps": ps} self.persist_state("stock_sz_000001", self.state)
def get_cs_index(index_type='sh'): if index_type == 'csi': category_list = [IndexCategory.scope, IndexCategory.industry, IndexCategory.style] elif index_type == 'sh': category_list = [IndexCategory.scope] else: logger.warning(f'not support index type: {index_type}') assert False requests_session = requests.Session() for category in category_list: data = _get_params(index_type=index_type, category=category) print(data) resp = requests_session.post(url, headers=DEFAULT_HEADER, json=data) print(resp) results = _get_resp_data(resp) the_list = [] logger.info(f'category: {category} ') logger.info(f'results: {results} ') for i, result in enumerate(results): logger.info(f'to {i}/{len(results)}') code = result['indexCode'] info_url = f'https://www.csindex.com.cn/csindex-home/indexInfo/index-basic-info/{code}' info = _get_resp_data(requests_session.get(info_url)) name = result['indexName'] entity_id = f'index_sh_{code}' index_item = { 'id': entity_id, 'entity_id': entity_id, 'timestamp': to_pd_timestamp(info['basicDate']), 'entity_type': 'index', 'exchange': 'sh', 'code': code, 'name': name, 'category': category.value, 'list_date': to_pd_timestamp(result['publishDate']), 'base_point': info['basicIndex'], 'publisher': 'csindex' } logger.info(index_item) the_list.append(index_item) if the_list: return pd.DataFrame.from_records(the_list)
def get_cs_index(index_type="sh"): if index_type == "csi": category_list = [IndexCategory.scope, IndexCategory.industry, IndexCategory.style] elif index_type == "sh": category_list = [IndexCategory.scope] else: logger.warning(f"not support index type: {index_type}") assert False requests_session = requests.Session() for category in category_list: data = _get_params(index_type=index_type, category=category) print(data) resp = requests_session.post(url, headers=DEFAULT_HEADER, json=data) print(resp) results = _get_resp_data(resp) the_list = [] logger.info(f"category: {category} ") logger.info(f"results: {results} ") for i, result in enumerate(results): logger.info(f"to {i}/{len(results)}") code = result["indexCode"] info_url = f"https://www.csindex.com.cn/csindex-home/indexInfo/index-basic-info/{code}" info = _get_resp_data(requests_session.get(info_url)) name = result["indexName"] entity_id = f"index_sh_{code}" index_item = { "id": entity_id, "entity_id": entity_id, "timestamp": to_pd_timestamp(info["basicDate"]), "entity_type": "index", "exchange": "sh", "code": code, "name": name, "category": category.value, "list_date": to_pd_timestamp(result["publishDate"]), "base_point": info["basicIndex"], "publisher": "csindex", } logger.info(index_item) the_list.append(index_item) if the_list: return pd.DataFrame.from_records(the_list)
def composite_all(data_schema, column, timestamp, entity_ids=None, filters=None): if type(column) is not str: column = column.name if filters: filters.append([data_schema.timestamp == to_pd_timestamp(timestamp)]) else: filters = [data_schema.timestamp == to_pd_timestamp(timestamp)] df = data_schema.query_data(entity_ids=entity_ids, columns=['entity_id', 'timestamp', column], filters=filters, index='entity_id') entity_type, exchange, _ = decode_entity_id(df['entity_id'].iloc[0]) pie_df = pd.DataFrame(columns=df.index, data=[df[column].tolist()]) pie_df['entity_id'] = f'{entity_type}_{exchange}_{column}' pie_df['timestamp'] = timestamp drawer = Drawer(main_df=pie_df) drawer.draw_pie(show=True)
def get_news(entity_id, ps=200, index=1): sec_id = to_em_sec_id(entity_id=entity_id) url = f"https://np-listapi.eastmoney.com/comm/wap/getListInfo?cb=callback&client=wap&type=1&mTypeAndCode={sec_id}&pageSize={ps}&pageIndex={index}&callback=jQuery1830017478247906740352_{now_timestamp() - 1}&_={now_timestamp()}" resp = requests.get(url) # { # "Art_ShowTime": "2022-02-11 14:29:25", # "Art_Image": "", # "Art_MediaName": "每日经济新闻", # "Art_Code": "202202112274017262", # "Art_Title": "潍柴动力:巴拉德和锡里斯不纳入合并财务报表范围", # "Art_SortStart": "1644560965017262", # "Art_VideoCount": 0, # "Art_OriginUrl": "http://finance.eastmoney.com/news/1354,202202112274017262.html", # "Art_Url": "http://finance.eastmoney.com/a/202202112274017262.html", # } if resp.status_code == 200: json_text = resp.text[resp.text.index("(") + 1 : resp.text.rindex(")")] json_result = demjson3.decode(json_text)["data"]["list"] if json_result: json_result = [ { "id": f'{entity_id}_{item["Art_ShowTime"]}', "entity_id": entity_id, "timestamp": to_pd_timestamp(item["Art_ShowTime"]), "news_title": item["Art_Title"], } for item in json_result ] next_data = get_news(entity_id=entity_id, ps=ps, index=index + 1) if next_data: return json_result + next_data else: return json_result
def on_time(self, timestamp: pd.Timestamp): recent_report_date = to_pd_timestamp(get_recent_report_date(timestamp)) if self.finish_date and is_same_date(recent_report_date, self.finish_date): return filters = [ StockActorSummary.actor_type == ActorType.raised_fund.value, StockActorSummary.report_date == recent_report_date ] if self.entity_ids: filters = filters + [ StockActorSummary.entity_id.in_(self.entity_ids) ] df = StockActorSummary.query_data(filters=filters) if pd_is_not_null(df): self.logger.info(f'{df}') self.finish_date = recent_report_date long_df = df[df['change_ratio'] > 0.05] short_df = df[df['change_ratio'] < -0.5] try: self.trade_the_targets( due_timestamp=timestamp, happen_timestamp=timestamp, long_selected=set(long_df['entity_id'].to_list()), short_selected=set(short_df['entity_id'].to_list())) except Exception as e: self.logger.error(e)
def record(self, entity, start, end, size, timestamps): if start: start_date = to_time_str(next_date(start)) else: start_date = None datas = em_api.get_dragon_and_tiger(code=entity.code, start_date=start_date) if datas: records = [] for data in datas: timestamp = to_pd_timestamp(data["TRADE_DATE"]) record = { "id": "{}_{}_{}".format( entity.id, data["TRADE_ID"], to_time_str(timestamp, fmt=TIME_FORMAT_DAY)), "entity_id": entity.id, "timestamp": timestamp, "code": entity.code, "name": entity.name, "reason": data["EXPLANATION"], "turnover": data["ACCUM_AMOUNT"], "change_pct": data["CHANGE_RATE"], "net_in": data["NET_BUY"], } # 营业部列表 deps = data["LIST"] for dep in deps: flag = "" if dep["TRADE_DIRECTION"] == "0" else "_" rank = dep["RANK"] dep_name = f"dep{flag}{rank}" dep_in = f"{dep_name}_in" dep_out = f"{dep_name}_out" dep_rate = f"{dep_name}_rate" record[dep_name] = dep["OPERATEDEPT_NAME"] record[dep_in] = dep["BUY_AMT_REAL"] record[dep_out] = dep["SELL_AMT_REAL"] record[dep_rate] = (dep["BUY_RATIO"] if dep["BUY_RATIO"] else 0) - (dep["SELL_RATIO"] if dep["SELL_RATIO"] else 0) records.append(record) df = pd.DataFrame.from_records(records) df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update) else: self.logger.info(f"no data for {entity.id}")
def get_treasury_yield(pn=1, ps=2000, fetch_all=True): results = get_em_data( request_type="RPTA_WEB_TREASURYYIELD", fields="ALL", sort_by="SOLAR_DATE", sort="desc", pn=pn, ps=ps, fetch_all=fetch_all, ) yields = [] for item in results: date = item["SOLAR_DATE"] # 中国 yields.append( { "id": f"country_galaxy_CN_{to_time_str(date)}", "entity_id": "country_galaxy_CN", "timestamp": to_pd_timestamp(date), "code": "CN", "yield_2": item.get("EMM00588704"), "yield_5": item.get("EMM00166462"), "yield_10": item.get("EMM00166466"), "yield_30": item.get("EMM00166469"), } ) yields.append( { "id": f"country_galaxy_US_{to_time_str(date)}", "entity_id": "country_galaxy_US", "timestamp": to_pd_timestamp(date), "code": "US", "yield_2": item.get("EMG00001306"), "yield_5": item.get("EMG00001308"), "yield_10": item.get("EMG00001310"), "yield_30": item.get("EMG00001312"), } ) return yields
def get_tag_domain(self, entity_id, timestamp, **fill_kv): the_date = to_time_str(timestamp, fmt=TIME_FORMAT_DAY) the_id = f"{entity_id}_{the_date}" the_domain = self.data_schema.get_one(id=the_id) if the_domain: for k, v in fill_kv.items(): exec(f"the_domain.{k}=v") else: return self.data_schema(id=the_id, entity_id=entity_id, timestamp=to_pd_timestamp(the_date), **fill_kv) return the_domain
def run(self): current_date = get_recent_report_date() pn = 1 ps = 2000 while to_pd_timestamp(current_date) >= to_pd_timestamp(self.start): if not self.state: current_date = get_recent_report_date() result = self.get_data(end_date=current_date, pn=pn, ps=ps) print(result) self.state = { 'end_date': current_date, 'pages': result['result']['pages'], 'pn': pn, 'ps': ps } self.persist_state('stock_sz_000001', self.state) else: if self.state['pn'] >= self.state['pages']: current_date = get_recent_report_date( the_date=self.state['end_date'], step=1) pn = pn ps = ps else: pn = self.state['pn'] + 1 ps = self.state['ps'] current_date = self.state['end_date'] result = self.get_data(end_date=current_date, pn=pn, ps=ps) print(result) self.state = { 'end_date': current_date, 'pages': result['result']['pages'], 'pn': pn, 'ps': ps } self.persist_state('stock_sz_000001', self.state)
def get_indicator_data(indicator, indicator_name=None, country=None, date=None): datas = _wb_get(paths={ "country": country, "indicator": indicator }, date=date) records = [{ "code": item["country"]["id"], "timestamp": to_pd_timestamp(item["date"]), item["indicator"]["id"] if not indicator_name else indicator_name: item["value"], } for item in datas] df = pd.DataFrame.from_records(data=records) df = df.set_index(["code", "timestamp"]) return df
def get_tag_domains(self, entity_ids, timestamp, **fill_kv): the_date = to_time_str(timestamp, fmt=TIME_FORMAT_DAY) ids = [f"{entity_id}_{the_date}" for entity_id in entity_ids] the_domains = self.data_schema.query_data(ids=ids, return_type="domain") if the_domains: for the_domain in the_domains: for k, v in fill_kv.items(): exec(f"the_domain.{k}=v") current_ids = [item.id for item in the_domains] need_new_ids = set(ids) - set(current_ids) new_domains = [ self.data_schema(id=f"{entity_id}_{the_date}", entity_id=entity_id, timestamp=to_pd_timestamp(the_date), **fill_kv) for entity_id in need_new_ids ] return the_domains + new_domains
def get_entity_list_by_cap(timestamp, cap_start, cap_end, entity_type="stock", provider=None, adjust_type=None, retry_times=20): if not adjust_type: adjust_type = default_adjust_type(entity_type=entity_type) kdata_schema = get_kdata_schema(entity_type, level=IntervalLevel.LEVEL_1DAY, adjust_type=adjust_type) df = kdata_schema.query_data( provider=provider, filters=[kdata_schema.timestamp == to_pd_timestamp(timestamp)], index="entity_id", ) if pd_is_not_null(df): df["cap"] = df["turnover"] / df["turnover_rate"] df_result = df.copy() if cap_start: df_result = df_result.loc[(df["cap"] >= cap_start)] if cap_end: df_result = df_result.loc[(df["cap"] <= cap_end)] return df_result.index.tolist() else: if retry_times == 0: return [] return get_entity_list_by_cap( timestamp=next_date(timestamp, 1), cap_start=cap_start, cap_end=cap_end, entity_type=entity_type, provider=provider, adjust_type=adjust_type, retry_times=retry_times - 1, )
def record(self, entity, start, end, size, timestamps): ccxt_exchange = get_coin_exchange(entity.exchange) if ccxt_exchange.has['fetchOHLCV']: config = get_exchange_config(entity.exchange) limit = config['kdata_limit'] limit = min(size, limit) kdata_list = [] if config['support_since'] and start: kdatas = ccxt_exchange.fetch_ohlcv( entity.code, timeframe=self.ccxt_trading_level, since=int(start.timestamp() * 1000)) else: kdatas = ccxt_exchange.fetch_ohlcv( entity.code, timeframe=self.ccxt_trading_level, limit=limit) for kdata in kdatas: current_timestamp = kdata[0] if self.level == IntervalLevel.LEVEL_1DAY: current_timestamp = to_time_str(current_timestamp) if self.level >= IntervalLevel.LEVEL_1DAY: kdata_id = "{}_{}".format(entity.id, current_timestamp, fmt=TIME_FORMAT_DAY) else: kdata_id = "{}_{}".format(entity.id, current_timestamp, fmt=TIME_FORMAT_ISO8601) kdata_json = { 'id': kdata_id, 'entity_id': entity.id, 'code': entity.code, 'name': entity.name, 'timestamp': to_pd_timestamp(current_timestamp), 'open': kdata[1], 'high': kdata[2], 'low': kdata[3], 'close': kdata[4], 'volume': kdata[5], 'provider': 'ccxt', 'level': self.level.value } kdata_list.append(kdata_json) if kdata_list: df = pd.DataFrame.from_records(kdata_list) df_to_db(data_schema=self.data_schema, df=df, provider=self.provider, force_update=True) else: self.logger.warning("exchange:{} not support fetchOHLCV".format( entity.exchange))
drawer.draw_pie(show=True) def _group_entity_ids(entity_ids): entity_type_map_ids = {} for entity_id in entity_ids: entity_type, _, _ = decode_entity_id(entity_id) ids: List = entity_type_map_ids.setdefault(entity_type, []) ids.append(entity_id) return entity_type_map_ids if __name__ == "__main__": from zvt.domain import CashFlowStatement composite( entity_id="stock_sz_000338", data_schema=CashFlowStatement, columns=[ CashFlowStatement.net_op_cash_flows, CashFlowStatement.net_investing_cash_flows, CashFlowStatement.net_financing_cash_flows, ], filters=[ CashFlowStatement.report_period == "year", CashFlowStatement.report_date == to_pd_timestamp("2015-12-31"), ], ) # the __all__ is generated __all__ = ["compare", "distribute", "composite", "composite_all"]
def test_composite_all(): composite_all(entity_ids=None, data_schema=Stock1dKdata, column=Stock1dKdata.turnover, timestamp=to_pd_timestamp("2016-12-02"))
def record_dragon_tiger(data_provider="em", entity_provider="em", sleeping_time=2): # 龙虎榜数据 run_data_recorder( domain=DragonAndTiger, data_provider=data_provider, entity_provider=entity_provider, day_data=True, sleeping_time=sleeping_time, ) email_action = EmailInformer() # recent year start_timestamp = next_date(current_date(), -400) # 最近一年牛x的营业部 players = get_big_players(start_timestamp=start_timestamp) # 最近30天有牛x的营业部上榜的个股 recent_date = next_date(current_date(), -30) selected = [] for player in players: filters = [ or_( and_(DragonAndTiger.dep1 == player, DragonAndTiger.dep1_rate >= 5), and_(DragonAndTiger.dep2 == player, DragonAndTiger.dep2_rate >= 5), and_(DragonAndTiger.dep3 == player, DragonAndTiger.dep3_rate >= 5), and_(DragonAndTiger.dep4 == player, DragonAndTiger.dep4_rate >= 5), and_(DragonAndTiger.dep5 == player, DragonAndTiger.dep5_rate >= 5), ) ] df = DragonAndTiger.query_data( start_timestamp=recent_date, filters=filters, columns=[ DragonAndTiger.timestamp, DragonAndTiger.entity_id, DragonAndTiger.code, DragonAndTiger.name ], index="entity_id", ) selected = selected + df.index.tolist() if selected: selected = list(set(selected)) target_date = get_latest_kdata_date(provider=data_provider, entity_type="stock", adjust_type="hfq") df = Stock1dHfqKdata.query_data( provider=data_provider, entity_ids=selected, filters=[ Stock1dHfqKdata.turnover_rate > 0.02, Stock1dHfqKdata.timestamp == to_pd_timestamp(target_date), Stock1dHfqKdata.turnover > 300000000, ], index=["entity_id"], ) inform( action=email_action, entity_ids=df.index.tolist(), target_date=current_date(), title="report 龙虎榜", entity_provider=entity_provider, entity_type="stock", em_group="重要指数", em_group_over_write=False, )
def get_kdata(entity_id, level=IntervalLevel.LEVEL_1DAY, adjust_type=AdjustType.qfq, limit=10000): entity_type, exchange, code = decode_entity_id(entity_id) level = IntervalLevel(level) sec_id = to_em_sec_id(entity_id) fq_flag = to_em_fq_flag(adjust_type) level_flag = to_em_level_flag(level) url = f'https://push2his.eastmoney.com/api/qt/stock/kline/get?secid={sec_id}&klt={level_flag}&fqt={fq_flag}&lmt={limit}&end=20500000&iscca=1&fields1=f1,f2,f3,f4,f5,f6,f7,f8&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64&ut=f057cbcbce2a86e2866ab8877db1d059&forcect=1' resp = requests.get(url, headers=DEFAULT_HEADER) resp.raise_for_status() results = resp.json() data = results['data'] kdatas = [] if data: klines = data['klines'] name = data['name'] # TODO: ignore the last unfinished kdata now,could control it better if need for result in klines[:-1]: # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00" # "2021-08-27,19.39,20.30,20.30,19.25,1688497,3370240912.00,5.48,6.01,1.15,3.98,0,0,0" # time,open,close,high,low,volume,turnover fields = result.split(',') the_timestamp = to_pd_timestamp(fields[0]) the_id = generate_kdata_id(entity_id=entity_id, timestamp=the_timestamp, level=level) open = to_float(fields[1]) close = to_float(fields[2]) high = to_float(fields[3]) low = to_float(fields[4]) volume = to_float(fields[5]) turnover = to_float(fields[6]) # 7 振幅 change_pct = value_to_pct(to_float(fields[8])) # 9 变动 turnover_rate = value_to_pct(to_float(fields[10])) kdatas.append( dict(id=the_id, timestamp=the_timestamp, entity_id=entity_id, provider='em', code=code, name=name, level=level.value, open=open, close=close, high=high, low=low, volume=volume, turnover=turnover, turnover_rate=turnover_rate, change_pct=change_pct)) if kdatas: df = pd.DataFrame.from_records(kdatas) return df
def get_kdata(entity_id, level=IntervalLevel.LEVEL_1DAY, adjust_type=AdjustType.qfq, limit=10000): entity_type, exchange, code = decode_entity_id(entity_id) level = IntervalLevel(level) sec_id = to_em_sec_id(entity_id) fq_flag = to_em_fq_flag(adjust_type) level_flag = to_em_level_flag(level) # f131 结算价 # f133 持仓 # 目前未获取 url = f"https://push2his.eastmoney.com/api/qt/stock/kline/get?secid={sec_id}&klt={level_flag}&fqt={fq_flag}&lmt={limit}&end=20500000&iscca=1&fields1=f1,f2,f3,f4,f5,f6,f7,f8&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64&ut=f057cbcbce2a86e2866ab8877db1d059&forcect=1" resp = requests.get(url, headers=DEFAULT_HEADER) resp.raise_for_status() results = resp.json() data = results["data"] kdatas = [] if data: klines = data["klines"] name = data["name"] for result in klines: # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00" # "2021-08-27,19.39,20.30,20.30,19.25,1688497,3370240912.00,5.48,6.01,1.15,3.98,0,0,0" # time,open,close,high,low,volume,turnover # "2022-04-13,10708,10664,10790,10638,402712,43124771328,1.43,0.57,60,0.00,4667112399583576064,4690067230254170112,1169270784" fields = result.split(",") the_timestamp = to_pd_timestamp(fields[0]) the_id = generate_kdata_id(entity_id=entity_id, timestamp=the_timestamp, level=level) open = to_float(fields[1]) close = to_float(fields[2]) high = to_float(fields[3]) low = to_float(fields[4]) volume = to_float(fields[5]) turnover = to_float(fields[6]) # 7 振幅 change_pct = value_to_pct(to_float(fields[8])) # 9 变动 turnover_rate = value_to_pct(to_float(fields[10])) kdatas.append( dict( id=the_id, timestamp=the_timestamp, entity_id=entity_id, provider="em", code=code, name=name, level=level.value, open=open, close=close, high=high, low=low, volume=volume, turnover=turnover, turnover_rate=turnover_rate, change_pct=change_pct, ) ) if kdatas: df = pd.DataFrame.from_records(kdatas) return df
def get_cn_index(index_type='cni', category=IndexCategory.style): if index_type == 'cni': category_map_url = cni_category_map_url elif index_type == 'sz': category_map_url = sz_category_map_url else: logger.error(f'not support index_type: {index_type}') assert False requests_session = requests.Session() url = category_map_url.get(category) resp = requests_session.get(url, headers=DEFAULT_HEADER) results = _get_resp_data(resp)['rows'] # e.g # amount: 277743699997.9 # closeingPoint: 6104.7592 # docchannel: 1039 # freeMarketValue: 10794695531696.15 # id: 142 # indexcode: "399370" # indexename: "CNI Growth" # indexfullcname: "国证1000成长指数" # indexfullename: "CNI 1000 Growth Index" # indexname: "国证成长" # indexsource: "1" # indextype: "202" # pb: 5.34 # peDynamic: 29.8607 # peStatic: 33.4933 # percent: 0.0022 # prefixmonth: null # realtimemarket: "1" # remark: "" # sampleshowdate: null # samplesize: 332 # showcnindex: "1" # totalMarketValue: 23113641352198.32 the_list = [] logger.info(f'category: {category} ') logger.info(f'results: {results} ') for i, result in enumerate(results): logger.info(f'to {i}/{len(results)}') code = result['indexcode'] info_resp = requests_session.get( f'http://www.cnindex.net.cn/index-intro?indexcode={code}') # fbrq: "2010-01-04" # jd: 1000 # jr: "2002-12-31" # jsfs: "自由流通市值" # jsjj: "国证成长由国证1000指数样本股中成长风格突出的股票组成,为投资者提供更丰富的指数化投资工具。" # qzsx: null # typl: 2 # xyfw: "沪深A股" # xygz: "在国证1000指数样本股中,选取主营业务收入增长率、净利润增长率和净资产收益率综合排名前332只" index_info = _get_resp_data(info_resp) name = result['indexname'] entity_id = f'index_sz_{code}' index_item = { 'id': entity_id, 'entity_id': entity_id, 'timestamp': to_pd_timestamp(index_info['jr']), 'entity_type': 'index', 'exchange': 'sz', 'code': code, 'name': name, 'category': category.value, 'list_date': to_pd_timestamp(index_info['fbrq']), 'base_point': index_info['jd'], 'publisher': 'cnindex' } logger.info(index_item) the_list.append(index_item) time.sleep(3) if the_list: return pd.DataFrame.from_records(the_list)
def init_timestamps(self, entity_item) -> List[pd.Timestamp]: result = get_holder_report_dates(code=entity_item.code) if result: return [to_pd_timestamp(item['END_DATE']) for item in result]
def __init__( self, entity_ids: List[str] = None, start_timestamp: Union[str, pd.Timestamp] = "2015-01-01", end_timestamp: Union[str, pd.Timestamp] = "2021-12-01", predict_start_timestamp: Union[str, pd.Timestamp] = "2021-06-01", predict_steps: int = 20, level: Union[IntervalLevel, str] = IntervalLevel.LEVEL_1DAY, adjust_type: Union[AdjustType, str] = None, data_provider: str = None, label_method: str = "raw", ) -> None: """ :param entity_ids: :param start_timestamp: :param end_timestamp: :param predict_start_timestamp: :param predict_steps: :param level: :param adjust_type: :param data_provider: :param label_method: raw, change, or behavior_cls """ super().__init__() self.entity_ids = entity_ids self.start_timestamp = to_pd_timestamp(start_timestamp) self.end_timestamp = to_pd_timestamp(end_timestamp) self.predict_start_timestamp = to_pd_timestamp(predict_start_timestamp) assert self.start_timestamp < self.predict_start_timestamp < self.end_timestamp self.predict_steps = predict_steps self.level = level if not adjust_type: adjust_type = default_adjust_type( entity_type=self.entity_schema.__name__) self.adjust_type = adjust_type self.data_provider = data_provider self.label_method = label_method self.kdata_df = self.build_kdata() if not pd_is_not_null(self.kdata_df): logger.error("not kdta") assert False self.feature_df = self.build_feature(self.entity_ids, self.start_timestamp, self.end_timestamp) # drop na in feature self.feature_df = self.feature_df.dropna() self.feature_names = list( set(self.feature_df.columns) - {"entity_id", "timestamp"}) self.feature_df = self.feature_df.loc[:, self.feature_names] self.label_ser = self.build_label() # keep same index with feature df self.label_ser = self.label_ser.loc[self.feature_df.index] self.label_name = self.label_ser.name self.training_X, self.training_y, self.testing_X, self.testing_y = self.split_data( ) logger.info(self.training_X) logger.info(self.training_y) self.model = None self.pred_y = None