コード例 #1
0
ファイル: ml.py プロジェクト: durgagokina/zvt
    def __init__(self, entity_ids=None, predict_range=20, level: Union[IntervalLevel, str] = IntervalLevel.LEVEL_1DAY,
                 adjust_type: Union[AdjustType, str] = None, relative_performance: bool = False) -> None:
        super().__init__()
        self.entity_ids = entity_ids
        self.predict_range = predict_range
        self.level = level
        if not adjust_type and self.entity_schema == Stock:
            self.adjust_type = AdjustType.hfq
        else:
            self.adjust_type = adjust_type

        self.relative_performance = relative_performance

        self.training_start_timestamp = to_pd_timestamp(self.training_start_timestamp)
        self.testing_start_timestamp = to_pd_timestamp(self.testing_start_timestamp)
        self.testing_end_timestamp = to_pd_timestamp(self.testing_end_timestamp)

        # init training data
        self.training_x_timestamps, self.training_y_timestamps = self.get_x_y_timestamps(
            start_timestamp=self.training_start_timestamp,
            end_timestamp=self.testing_start_timestamp)

        self.training_x_df = self.get_features(self.entity_ids, self.training_x_timestamps)
        self.training_y_df = self.get_labels(self.entity_ids, x_timestamps=self.training_x_timestamps,
                                             y_timestamps=self.training_y_timestamps)

        # init test data
        self.testing_x_timestamps, self.testing_y_timestamps = self.get_x_y_timestamps(
            start_timestamp=self.testing_start_timestamp,
            end_timestamp=self.testing_end_timestamp)
        self.testing_x_df = self.get_features(self.entity_ids, self.testing_x_timestamps)
        self.testing_y_df = self.get_labels(self.entity_ids, x_timestamps=self.testing_x_timestamps,
                                            y_timestamps=self.testing_y_timestamps)
コード例 #2
0
ファイル: test_intent.py プロジェクト: stungkit/zvt
def test_composite():
    composite(
        entity_id="stock_sz_000338",
        data_schema=CashFlowStatement,
        columns=[
            CashFlowStatement.net_op_cash_flows,
            CashFlowStatement.net_investing_cash_flows,
            CashFlowStatement.net_financing_cash_flows,
        ],
        filters=[
            CashFlowStatement.report_period == "year",
            CashFlowStatement.report_date == to_pd_timestamp("2016-12-31"),
        ],
    )
    composite(
        entity_id="stock_sz_000338",
        data_schema=BalanceSheet,
        columns=[
            BalanceSheet.total_current_assets,
            BalanceSheet.total_non_current_assets,
            BalanceSheet.total_current_liabilities,
            BalanceSheet.total_non_current_liabilities,
        ],
        filters=[
            BalanceSheet.report_period == "year",
            BalanceSheet.report_date == to_pd_timestamp("2016-12-31")
        ],
    )
コード例 #3
0
    def run(self):
        current_date = get_recent_report_date()
        pn = 1
        ps = 2000

        while to_pd_timestamp(current_date) >= to_pd_timestamp(self.start):
            if not self.state:
                current_date = get_recent_report_date()
                result = self.get_data(end_date=current_date, pn=pn, ps=ps)
                print(result)
                self.state = {"end_date": current_date, "pages": result["result"]["pages"], "pn": pn, "ps": ps}
                self.persist_state("stock_sz_000001", self.state)
            else:
                if self.state["pn"] >= self.state["pages"]:
                    current_date = get_recent_report_date(the_date=self.state["end_date"], step=1)
                    pn = pn
                    ps = ps
                else:
                    pn = self.state["pn"] + 1
                    ps = self.state["ps"]
                    current_date = self.state["end_date"]

                result = self.get_data(end_date=current_date, pn=pn, ps=ps)
                print(result)
                self.state = {"end_date": current_date, "pages": result["result"]["pages"], "pn": pn, "ps": ps}
                self.persist_state("stock_sz_000001", self.state)
コード例 #4
0
ファイル: cs_index_api.py プロジェクト: durgagokina/zvt
def get_cs_index(index_type='sh'):
    if index_type == 'csi':
        category_list = [IndexCategory.scope,
                         IndexCategory.industry,
                         IndexCategory.style]
    elif index_type == 'sh':
        category_list = [IndexCategory.scope]
    else:
        logger.warning(f'not support index type: {index_type}')
        assert False

    requests_session = requests.Session()

    for category in category_list:
        data = _get_params(index_type=index_type, category=category)
        print(data)
        resp = requests_session.post(url, headers=DEFAULT_HEADER, json=data)

        print(resp)
        results = _get_resp_data(resp)
        the_list = []

        logger.info(f'category: {category} ')
        logger.info(f'results: {results} ')
        for i, result in enumerate(results):
            logger.info(f'to {i}/{len(results)}')
            code = result['indexCode']

            info_url = f'https://www.csindex.com.cn/csindex-home/indexInfo/index-basic-info/{code}'
            info = _get_resp_data(requests_session.get(info_url))

            name = result['indexName']
            entity_id = f'index_sh_{code}'
            index_item = {
                'id': entity_id,
                'entity_id': entity_id,
                'timestamp': to_pd_timestamp(info['basicDate']),
                'entity_type': 'index',
                'exchange': 'sh',
                'code': code,
                'name': name,
                'category': category.value,
                'list_date': to_pd_timestamp(result['publishDate']),
                'base_point': info['basicIndex'],
                'publisher': 'csindex'
            }
            logger.info(index_item)
            the_list.append(index_item)
        if the_list:
            return pd.DataFrame.from_records(the_list)
コード例 #5
0
def get_cs_index(index_type="sh"):
    if index_type == "csi":
        category_list = [IndexCategory.scope, IndexCategory.industry, IndexCategory.style]
    elif index_type == "sh":
        category_list = [IndexCategory.scope]
    else:
        logger.warning(f"not support index type: {index_type}")
        assert False

    requests_session = requests.Session()

    for category in category_list:
        data = _get_params(index_type=index_type, category=category)
        print(data)
        resp = requests_session.post(url, headers=DEFAULT_HEADER, json=data)

        print(resp)
        results = _get_resp_data(resp)
        the_list = []

        logger.info(f"category: {category} ")
        logger.info(f"results: {results} ")
        for i, result in enumerate(results):
            logger.info(f"to {i}/{len(results)}")
            code = result["indexCode"]

            info_url = f"https://www.csindex.com.cn/csindex-home/indexInfo/index-basic-info/{code}"
            info = _get_resp_data(requests_session.get(info_url))

            name = result["indexName"]
            entity_id = f"index_sh_{code}"
            index_item = {
                "id": entity_id,
                "entity_id": entity_id,
                "timestamp": to_pd_timestamp(info["basicDate"]),
                "entity_type": "index",
                "exchange": "sh",
                "code": code,
                "name": name,
                "category": category.value,
                "list_date": to_pd_timestamp(result["publishDate"]),
                "base_point": info["basicIndex"],
                "publisher": "csindex",
            }
            logger.info(index_item)
            the_list.append(index_item)
        if the_list:
            return pd.DataFrame.from_records(the_list)
コード例 #6
0
ファイル: intent.py プロジェクト: durgagokina/zvt
def composite_all(data_schema, column, timestamp, entity_ids=None, filters=None):
    if type(column) is not str:
        column = column.name
    if filters:
        filters.append([data_schema.timestamp == to_pd_timestamp(timestamp)])
    else:
        filters = [data_schema.timestamp == to_pd_timestamp(timestamp)]
    df = data_schema.query_data(entity_ids=entity_ids, columns=['entity_id', 'timestamp', column], filters=filters,
                                index='entity_id')
    entity_type, exchange, _ = decode_entity_id(df['entity_id'].iloc[0])
    pie_df = pd.DataFrame(columns=df.index, data=[df[column].tolist()])
    pie_df['entity_id'] = f'{entity_type}_{exchange}_{column}'
    pie_df['timestamp'] = timestamp

    drawer = Drawer(main_df=pie_df)
    drawer.draw_pie(show=True)
コード例 #7
0
ファイル: em_api.py プロジェクト: stungkit/zvt
def get_news(entity_id, ps=200, index=1):
    sec_id = to_em_sec_id(entity_id=entity_id)
    url = f"https://np-listapi.eastmoney.com/comm/wap/getListInfo?cb=callback&client=wap&type=1&mTypeAndCode={sec_id}&pageSize={ps}&pageIndex={index}&callback=jQuery1830017478247906740352_{now_timestamp() - 1}&_={now_timestamp()}"
    resp = requests.get(url)
    # {
    #     "Art_ShowTime": "2022-02-11 14:29:25",
    #     "Art_Image": "",
    #     "Art_MediaName": "每日经济新闻",
    #     "Art_Code": "202202112274017262",
    #     "Art_Title": "潍柴动力:巴拉德和锡里斯不纳入合并财务报表范围",
    #     "Art_SortStart": "1644560965017262",
    #     "Art_VideoCount": 0,
    #     "Art_OriginUrl": "http://finance.eastmoney.com/news/1354,202202112274017262.html",
    #     "Art_Url": "http://finance.eastmoney.com/a/202202112274017262.html",
    # }
    if resp.status_code == 200:
        json_text = resp.text[resp.text.index("(") + 1 : resp.text.rindex(")")]
        json_result = demjson3.decode(json_text)["data"]["list"]
        if json_result:
            json_result = [
                {
                    "id": f'{entity_id}_{item["Art_ShowTime"]}',
                    "entity_id": entity_id,
                    "timestamp": to_pd_timestamp(item["Art_ShowTime"]),
                    "news_title": item["Art_Title"],
                }
                for item in json_result
            ]
            next_data = get_news(entity_id=entity_id, ps=ps, index=index + 1)
            if next_data:
                return json_result + next_data
            else:
                return json_result
コード例 #8
0
    def on_time(self, timestamp: pd.Timestamp):
        recent_report_date = to_pd_timestamp(get_recent_report_date(timestamp))
        if self.finish_date and is_same_date(recent_report_date,
                                             self.finish_date):
            return
        filters = [
            StockActorSummary.actor_type == ActorType.raised_fund.value,
            StockActorSummary.report_date == recent_report_date
        ]

        if self.entity_ids:
            filters = filters + [
                StockActorSummary.entity_id.in_(self.entity_ids)
            ]

        df = StockActorSummary.query_data(filters=filters)

        if pd_is_not_null(df):
            self.logger.info(f'{df}')
            self.finish_date = recent_report_date

        long_df = df[df['change_ratio'] > 0.05]
        short_df = df[df['change_ratio'] < -0.5]
        try:
            self.trade_the_targets(
                due_timestamp=timestamp,
                happen_timestamp=timestamp,
                long_selected=set(long_df['entity_id'].to_list()),
                short_selected=set(short_df['entity_id'].to_list()))
        except Exception as e:
            self.logger.error(e)
コード例 #9
0
    def record(self, entity, start, end, size, timestamps):
        if start:
            start_date = to_time_str(next_date(start))
        else:
            start_date = None
        datas = em_api.get_dragon_and_tiger(code=entity.code,
                                            start_date=start_date)
        if datas:
            records = []
            for data in datas:
                timestamp = to_pd_timestamp(data["TRADE_DATE"])
                record = {
                    "id":
                    "{}_{}_{}".format(
                        entity.id, data["TRADE_ID"],
                        to_time_str(timestamp, fmt=TIME_FORMAT_DAY)),
                    "entity_id":
                    entity.id,
                    "timestamp":
                    timestamp,
                    "code":
                    entity.code,
                    "name":
                    entity.name,
                    "reason":
                    data["EXPLANATION"],
                    "turnover":
                    data["ACCUM_AMOUNT"],
                    "change_pct":
                    data["CHANGE_RATE"],
                    "net_in":
                    data["NET_BUY"],
                }

                # 营业部列表
                deps = data["LIST"]
                for dep in deps:
                    flag = "" if dep["TRADE_DIRECTION"] == "0" else "_"
                    rank = dep["RANK"]
                    dep_name = f"dep{flag}{rank}"
                    dep_in = f"{dep_name}_in"
                    dep_out = f"{dep_name}_out"
                    dep_rate = f"{dep_name}_rate"

                    record[dep_name] = dep["OPERATEDEPT_NAME"]
                    record[dep_in] = dep["BUY_AMT_REAL"]
                    record[dep_out] = dep["SELL_AMT_REAL"]
                    record[dep_rate] = (dep["BUY_RATIO"] if dep["BUY_RATIO"]
                                        else 0) - (dep["SELL_RATIO"]
                                                   if dep["SELL_RATIO"] else 0)

                records.append(record)
            df = pd.DataFrame.from_records(records)
            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)
        else:
            self.logger.info(f"no data for {entity.id}")
コード例 #10
0
ファイル: em_api.py プロジェクト: stungkit/zvt
def get_treasury_yield(pn=1, ps=2000, fetch_all=True):
    results = get_em_data(
        request_type="RPTA_WEB_TREASURYYIELD",
        fields="ALL",
        sort_by="SOLAR_DATE",
        sort="desc",
        pn=pn,
        ps=ps,
        fetch_all=fetch_all,
    )
    yields = []
    for item in results:
        date = item["SOLAR_DATE"]
        # 中国
        yields.append(
            {
                "id": f"country_galaxy_CN_{to_time_str(date)}",
                "entity_id": "country_galaxy_CN",
                "timestamp": to_pd_timestamp(date),
                "code": "CN",
                "yield_2": item.get("EMM00588704"),
                "yield_5": item.get("EMM00166462"),
                "yield_10": item.get("EMM00166466"),
                "yield_30": item.get("EMM00166469"),
            }
        )
        yields.append(
            {
                "id": f"country_galaxy_US_{to_time_str(date)}",
                "entity_id": "country_galaxy_US",
                "timestamp": to_pd_timestamp(date),
                "code": "US",
                "yield_2": item.get("EMG00001306"),
                "yield_5": item.get("EMG00001308"),
                "yield_10": item.get("EMG00001310"),
                "yield_30": item.get("EMG00001312"),
            }
        )
    return yields
コード例 #11
0
ファイル: tag.py プロジェクト: stungkit/zvt
    def get_tag_domain(self, entity_id, timestamp, **fill_kv):
        the_date = to_time_str(timestamp, fmt=TIME_FORMAT_DAY)
        the_id = f"{entity_id}_{the_date}"
        the_domain = self.data_schema.get_one(id=the_id)

        if the_domain:
            for k, v in fill_kv.items():
                exec(f"the_domain.{k}=v")
        else:
            return self.data_schema(id=the_id,
                                    entity_id=entity_id,
                                    timestamp=to_pd_timestamp(the_date),
                                    **fill_kv)
        return the_domain
コード例 #12
0
    def run(self):
        current_date = get_recent_report_date()
        pn = 1
        ps = 2000

        while to_pd_timestamp(current_date) >= to_pd_timestamp(self.start):
            if not self.state:
                current_date = get_recent_report_date()
                result = self.get_data(end_date=current_date, pn=pn, ps=ps)
                print(result)
                self.state = {
                    'end_date': current_date,
                    'pages': result['result']['pages'],
                    'pn': pn,
                    'ps': ps
                }
                self.persist_state('stock_sz_000001', self.state)
            else:
                if self.state['pn'] >= self.state['pages']:
                    current_date = get_recent_report_date(
                        the_date=self.state['end_date'], step=1)
                    pn = pn
                    ps = ps
                else:
                    pn = self.state['pn'] + 1
                    ps = self.state['ps']
                    current_date = self.state['end_date']

                result = self.get_data(end_date=current_date, pn=pn, ps=ps)
                print(result)
                self.state = {
                    'end_date': current_date,
                    'pages': result['result']['pages'],
                    'pn': pn,
                    'ps': ps
                }
                self.persist_state('stock_sz_000001', self.state)
コード例 #13
0
def get_indicator_data(indicator,
                       indicator_name=None,
                       country=None,
                       date=None):
    datas = _wb_get(paths={
        "country": country,
        "indicator": indicator
    },
                    date=date)
    records = [{
        "code":
        item["country"]["id"],
        "timestamp":
        to_pd_timestamp(item["date"]),
        item["indicator"]["id"] if not indicator_name else indicator_name:
        item["value"],
    } for item in datas]
    df = pd.DataFrame.from_records(data=records)
    df = df.set_index(["code", "timestamp"])
    return df
コード例 #14
0
ファイル: tag.py プロジェクト: stungkit/zvt
    def get_tag_domains(self, entity_ids, timestamp, **fill_kv):
        the_date = to_time_str(timestamp, fmt=TIME_FORMAT_DAY)
        ids = [f"{entity_id}_{the_date}" for entity_id in entity_ids]

        the_domains = self.data_schema.query_data(ids=ids,
                                                  return_type="domain")

        if the_domains:
            for the_domain in the_domains:
                for k, v in fill_kv.items():
                    exec(f"the_domain.{k}=v")

        current_ids = [item.id for item in the_domains]
        need_new_ids = set(ids) - set(current_ids)
        new_domains = [
            self.data_schema(id=f"{entity_id}_{the_date}",
                             entity_id=entity_id,
                             timestamp=to_pd_timestamp(the_date),
                             **fill_kv) for entity_id in need_new_ids
        ]
        return the_domains + new_domains
コード例 #15
0
def get_entity_list_by_cap(timestamp,
                           cap_start,
                           cap_end,
                           entity_type="stock",
                           provider=None,
                           adjust_type=None,
                           retry_times=20):
    if not adjust_type:
        adjust_type = default_adjust_type(entity_type=entity_type)

    kdata_schema = get_kdata_schema(entity_type,
                                    level=IntervalLevel.LEVEL_1DAY,
                                    adjust_type=adjust_type)
    df = kdata_schema.query_data(
        provider=provider,
        filters=[kdata_schema.timestamp == to_pd_timestamp(timestamp)],
        index="entity_id",
    )
    if pd_is_not_null(df):
        df["cap"] = df["turnover"] / df["turnover_rate"]
        df_result = df.copy()
        if cap_start:
            df_result = df_result.loc[(df["cap"] >= cap_start)]
        if cap_end:
            df_result = df_result.loc[(df["cap"] <= cap_end)]
        return df_result.index.tolist()
    else:
        if retry_times == 0:
            return []
        return get_entity_list_by_cap(
            timestamp=next_date(timestamp, 1),
            cap_start=cap_start,
            cap_end=cap_end,
            entity_type=entity_type,
            provider=provider,
            adjust_type=adjust_type,
            retry_times=retry_times - 1,
        )
コード例 #16
0
    def record(self, entity, start, end, size, timestamps):
        ccxt_exchange = get_coin_exchange(entity.exchange)

        if ccxt_exchange.has['fetchOHLCV']:
            config = get_exchange_config(entity.exchange)
            limit = config['kdata_limit']

            limit = min(size, limit)

            kdata_list = []

            if config['support_since'] and start:
                kdatas = ccxt_exchange.fetch_ohlcv(
                    entity.code,
                    timeframe=self.ccxt_trading_level,
                    since=int(start.timestamp() * 1000))
            else:
                kdatas = ccxt_exchange.fetch_ohlcv(
                    entity.code,
                    timeframe=self.ccxt_trading_level,
                    limit=limit)

            for kdata in kdatas:
                current_timestamp = kdata[0]
                if self.level == IntervalLevel.LEVEL_1DAY:
                    current_timestamp = to_time_str(current_timestamp)

                if self.level >= IntervalLevel.LEVEL_1DAY:
                    kdata_id = "{}_{}".format(entity.id,
                                              current_timestamp,
                                              fmt=TIME_FORMAT_DAY)
                else:
                    kdata_id = "{}_{}".format(entity.id,
                                              current_timestamp,
                                              fmt=TIME_FORMAT_ISO8601)

                kdata_json = {
                    'id': kdata_id,
                    'entity_id': entity.id,
                    'code': entity.code,
                    'name': entity.name,
                    'timestamp': to_pd_timestamp(current_timestamp),
                    'open': kdata[1],
                    'high': kdata[2],
                    'low': kdata[3],
                    'close': kdata[4],
                    'volume': kdata[5],
                    'provider': 'ccxt',
                    'level': self.level.value
                }
                kdata_list.append(kdata_json)

            if kdata_list:
                df = pd.DataFrame.from_records(kdata_list)
                df_to_db(data_schema=self.data_schema,
                         df=df,
                         provider=self.provider,
                         force_update=True)

        else:
            self.logger.warning("exchange:{} not support fetchOHLCV".format(
                entity.exchange))
コード例 #17
0
    drawer.draw_pie(show=True)


def _group_entity_ids(entity_ids):
    entity_type_map_ids = {}
    for entity_id in entity_ids:
        entity_type, _, _ = decode_entity_id(entity_id)
        ids: List = entity_type_map_ids.setdefault(entity_type, [])
        ids.append(entity_id)
    return entity_type_map_ids


if __name__ == "__main__":
    from zvt.domain import CashFlowStatement

    composite(
        entity_id="stock_sz_000338",
        data_schema=CashFlowStatement,
        columns=[
            CashFlowStatement.net_op_cash_flows,
            CashFlowStatement.net_investing_cash_flows,
            CashFlowStatement.net_financing_cash_flows,
        ],
        filters=[
            CashFlowStatement.report_period == "year",
            CashFlowStatement.report_date == to_pd_timestamp("2015-12-31"),
        ],
    )
# the __all__ is generated
__all__ = ["compare", "distribute", "composite", "composite_all"]
コード例 #18
0
ファイル: test_intent.py プロジェクト: stungkit/zvt
def test_composite_all():
    composite_all(entity_ids=None,
                  data_schema=Stock1dKdata,
                  column=Stock1dKdata.turnover,
                  timestamp=to_pd_timestamp("2016-12-02"))
コード例 #19
0
def record_dragon_tiger(data_provider="em",
                        entity_provider="em",
                        sleeping_time=2):
    # 龙虎榜数据
    run_data_recorder(
        domain=DragonAndTiger,
        data_provider=data_provider,
        entity_provider=entity_provider,
        day_data=True,
        sleeping_time=sleeping_time,
    )

    email_action = EmailInformer()
    # recent year
    start_timestamp = next_date(current_date(), -400)
    # 最近一年牛x的营业部
    players = get_big_players(start_timestamp=start_timestamp)

    # 最近30天有牛x的营业部上榜的个股
    recent_date = next_date(current_date(), -30)
    selected = []
    for player in players:
        filters = [
            or_(
                and_(DragonAndTiger.dep1 == player,
                     DragonAndTiger.dep1_rate >= 5),
                and_(DragonAndTiger.dep2 == player,
                     DragonAndTiger.dep2_rate >= 5),
                and_(DragonAndTiger.dep3 == player,
                     DragonAndTiger.dep3_rate >= 5),
                and_(DragonAndTiger.dep4 == player,
                     DragonAndTiger.dep4_rate >= 5),
                and_(DragonAndTiger.dep5 == player,
                     DragonAndTiger.dep5_rate >= 5),
            )
        ]
        df = DragonAndTiger.query_data(
            start_timestamp=recent_date,
            filters=filters,
            columns=[
                DragonAndTiger.timestamp, DragonAndTiger.entity_id,
                DragonAndTiger.code, DragonAndTiger.name
            ],
            index="entity_id",
        )
        selected = selected + df.index.tolist()

    if selected:
        selected = list(set(selected))

    target_date = get_latest_kdata_date(provider=data_provider,
                                        entity_type="stock",
                                        adjust_type="hfq")
    df = Stock1dHfqKdata.query_data(
        provider=data_provider,
        entity_ids=selected,
        filters=[
            Stock1dHfqKdata.turnover_rate > 0.02,
            Stock1dHfqKdata.timestamp == to_pd_timestamp(target_date),
            Stock1dHfqKdata.turnover > 300000000,
        ],
        index=["entity_id"],
    )
    inform(
        action=email_action,
        entity_ids=df.index.tolist(),
        target_date=current_date(),
        title="report 龙虎榜",
        entity_provider=entity_provider,
        entity_type="stock",
        em_group="重要指数",
        em_group_over_write=False,
    )
コード例 #20
0
def get_kdata(entity_id,
              level=IntervalLevel.LEVEL_1DAY,
              adjust_type=AdjustType.qfq,
              limit=10000):
    entity_type, exchange, code = decode_entity_id(entity_id)
    level = IntervalLevel(level)

    sec_id = to_em_sec_id(entity_id)
    fq_flag = to_em_fq_flag(adjust_type)
    level_flag = to_em_level_flag(level)
    url = f'https://push2his.eastmoney.com/api/qt/stock/kline/get?secid={sec_id}&klt={level_flag}&fqt={fq_flag}&lmt={limit}&end=20500000&iscca=1&fields1=f1,f2,f3,f4,f5,f6,f7,f8&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64&ut=f057cbcbce2a86e2866ab8877db1d059&forcect=1'

    resp = requests.get(url, headers=DEFAULT_HEADER)
    resp.raise_for_status()
    results = resp.json()
    data = results['data']

    kdatas = []

    if data:
        klines = data['klines']
        name = data['name']

        # TODO: ignore the last unfinished kdata now,could control it better if need
        for result in klines[:-1]:
            # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00"
            # "2021-08-27,19.39,20.30,20.30,19.25,1688497,3370240912.00,5.48,6.01,1.15,3.98,0,0,0"
            # time,open,close,high,low,volume,turnover
            fields = result.split(',')
            the_timestamp = to_pd_timestamp(fields[0])

            the_id = generate_kdata_id(entity_id=entity_id,
                                       timestamp=the_timestamp,
                                       level=level)

            open = to_float(fields[1])
            close = to_float(fields[2])
            high = to_float(fields[3])
            low = to_float(fields[4])
            volume = to_float(fields[5])
            turnover = to_float(fields[6])
            # 7 振幅
            change_pct = value_to_pct(to_float(fields[8]))
            # 9 变动
            turnover_rate = value_to_pct(to_float(fields[10]))

            kdatas.append(
                dict(id=the_id,
                     timestamp=the_timestamp,
                     entity_id=entity_id,
                     provider='em',
                     code=code,
                     name=name,
                     level=level.value,
                     open=open,
                     close=close,
                     high=high,
                     low=low,
                     volume=volume,
                     turnover=turnover,
                     turnover_rate=turnover_rate,
                     change_pct=change_pct))
    if kdatas:
        df = pd.DataFrame.from_records(kdatas)
        return df
コード例 #21
0
ファイル: em_api.py プロジェクト: stungkit/zvt
def get_kdata(entity_id, level=IntervalLevel.LEVEL_1DAY, adjust_type=AdjustType.qfq, limit=10000):
    entity_type, exchange, code = decode_entity_id(entity_id)
    level = IntervalLevel(level)

    sec_id = to_em_sec_id(entity_id)
    fq_flag = to_em_fq_flag(adjust_type)
    level_flag = to_em_level_flag(level)
    # f131 结算价
    # f133 持仓
    # 目前未获取
    url = f"https://push2his.eastmoney.com/api/qt/stock/kline/get?secid={sec_id}&klt={level_flag}&fqt={fq_flag}&lmt={limit}&end=20500000&iscca=1&fields1=f1,f2,f3,f4,f5,f6,f7,f8&fields2=f51,f52,f53,f54,f55,f56,f57,f58,f59,f60,f61,f62,f63,f64&ut=f057cbcbce2a86e2866ab8877db1d059&forcect=1"

    resp = requests.get(url, headers=DEFAULT_HEADER)
    resp.raise_for_status()
    results = resp.json()
    data = results["data"]

    kdatas = []

    if data:
        klines = data["klines"]
        name = data["name"]

        for result in klines:
            # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00"
            # "2021-08-27,19.39,20.30,20.30,19.25,1688497,3370240912.00,5.48,6.01,1.15,3.98,0,0,0"
            # time,open,close,high,low,volume,turnover
            # "2022-04-13,10708,10664,10790,10638,402712,43124771328,1.43,0.57,60,0.00,4667112399583576064,4690067230254170112,1169270784"
            fields = result.split(",")
            the_timestamp = to_pd_timestamp(fields[0])

            the_id = generate_kdata_id(entity_id=entity_id, timestamp=the_timestamp, level=level)

            open = to_float(fields[1])
            close = to_float(fields[2])
            high = to_float(fields[3])
            low = to_float(fields[4])
            volume = to_float(fields[5])
            turnover = to_float(fields[6])
            # 7 振幅
            change_pct = value_to_pct(to_float(fields[8]))
            # 9 变动
            turnover_rate = value_to_pct(to_float(fields[10]))

            kdatas.append(
                dict(
                    id=the_id,
                    timestamp=the_timestamp,
                    entity_id=entity_id,
                    provider="em",
                    code=code,
                    name=name,
                    level=level.value,
                    open=open,
                    close=close,
                    high=high,
                    low=low,
                    volume=volume,
                    turnover=turnover,
                    turnover_rate=turnover_rate,
                    change_pct=change_pct,
                )
            )
    if kdatas:
        df = pd.DataFrame.from_records(kdatas)
        return df
コード例 #22
0
def get_cn_index(index_type='cni', category=IndexCategory.style):
    if index_type == 'cni':
        category_map_url = cni_category_map_url
    elif index_type == 'sz':
        category_map_url = sz_category_map_url
    else:
        logger.error(f'not support index_type: {index_type}')
        assert False

    requests_session = requests.Session()

    url = category_map_url.get(category)

    resp = requests_session.get(url, headers=DEFAULT_HEADER)

    results = _get_resp_data(resp)['rows']
    # e.g
    # amount: 277743699997.9
    # closeingPoint: 6104.7592
    # docchannel: 1039
    # freeMarketValue: 10794695531696.15
    # id: 142
    # indexcode: "399370"
    # indexename: "CNI Growth"
    # indexfullcname: "国证1000成长指数"
    # indexfullename: "CNI 1000 Growth Index"
    # indexname: "国证成长"
    # indexsource: "1"
    # indextype: "202"
    # pb: 5.34
    # peDynamic: 29.8607
    # peStatic: 33.4933
    # percent: 0.0022
    # prefixmonth: null
    # realtimemarket: "1"
    # remark: ""
    # sampleshowdate: null
    # samplesize: 332
    # showcnindex: "1"
    # totalMarketValue: 23113641352198.32
    the_list = []

    logger.info(f'category: {category} ')
    logger.info(f'results: {results} ')
    for i, result in enumerate(results):
        logger.info(f'to {i}/{len(results)}')
        code = result['indexcode']
        info_resp = requests_session.get(
            f'http://www.cnindex.net.cn/index-intro?indexcode={code}')
        # fbrq: "2010-01-04"
        # jd: 1000
        # jr: "2002-12-31"
        # jsfs: "自由流通市值"
        # jsjj: "国证成长由国证1000指数样本股中成长风格突出的股票组成,为投资者提供更丰富的指数化投资工具。"
        # qzsx: null
        # typl: 2
        # xyfw: "沪深A股"
        # xygz: "在国证1000指数样本股中,选取主营业务收入增长率、净利润增长率和净资产收益率综合排名前332只"
        index_info = _get_resp_data(info_resp)
        name = result['indexname']
        entity_id = f'index_sz_{code}'
        index_item = {
            'id': entity_id,
            'entity_id': entity_id,
            'timestamp': to_pd_timestamp(index_info['jr']),
            'entity_type': 'index',
            'exchange': 'sz',
            'code': code,
            'name': name,
            'category': category.value,
            'list_date': to_pd_timestamp(index_info['fbrq']),
            'base_point': index_info['jd'],
            'publisher': 'cnindex'
        }
        logger.info(index_item)
        the_list.append(index_item)
        time.sleep(3)
    if the_list:
        return pd.DataFrame.from_records(the_list)
コード例 #23
0
 def init_timestamps(self, entity_item) -> List[pd.Timestamp]:
     result = get_holder_report_dates(code=entity_item.code)
     if result:
         return [to_pd_timestamp(item['END_DATE']) for item in result]
コード例 #24
0
ファイル: ml.py プロジェクト: stungkit/zvt
    def __init__(
        self,
        entity_ids: List[str] = None,
        start_timestamp: Union[str, pd.Timestamp] = "2015-01-01",
        end_timestamp: Union[str, pd.Timestamp] = "2021-12-01",
        predict_start_timestamp: Union[str, pd.Timestamp] = "2021-06-01",
        predict_steps: int = 20,
        level: Union[IntervalLevel, str] = IntervalLevel.LEVEL_1DAY,
        adjust_type: Union[AdjustType, str] = None,
        data_provider: str = None,
        label_method: str = "raw",
    ) -> None:
        """

        :param entity_ids:
        :param start_timestamp:
        :param end_timestamp:
        :param predict_start_timestamp:
        :param predict_steps:
        :param level:
        :param adjust_type:
        :param data_provider:
        :param label_method: raw, change, or behavior_cls
        """
        super().__init__()
        self.entity_ids = entity_ids
        self.start_timestamp = to_pd_timestamp(start_timestamp)
        self.end_timestamp = to_pd_timestamp(end_timestamp)
        self.predict_start_timestamp = to_pd_timestamp(predict_start_timestamp)
        assert self.start_timestamp < self.predict_start_timestamp < self.end_timestamp
        self.predict_steps = predict_steps

        self.level = level
        if not adjust_type:
            adjust_type = default_adjust_type(
                entity_type=self.entity_schema.__name__)
        self.adjust_type = adjust_type

        self.data_provider = data_provider
        self.label_method = label_method

        self.kdata_df = self.build_kdata()
        if not pd_is_not_null(self.kdata_df):
            logger.error("not kdta")
            assert False

        self.feature_df = self.build_feature(self.entity_ids,
                                             self.start_timestamp,
                                             self.end_timestamp)
        # drop na in feature
        self.feature_df = self.feature_df.dropna()
        self.feature_names = list(
            set(self.feature_df.columns) - {"entity_id", "timestamp"})
        self.feature_df = self.feature_df.loc[:, self.feature_names]

        self.label_ser = self.build_label()
        # keep same index with feature df
        self.label_ser = self.label_ser.loc[self.feature_df.index]
        self.label_name = self.label_ser.name

        self.training_X, self.training_y, self.testing_X, self.testing_y = self.split_data(
        )

        logger.info(self.training_X)
        logger.info(self.training_y)

        self.model = None
        self.pred_y = None