Esempio n. 1
0
    def __init__(self,
                 entity_ids=None,
                 entity_schema=Stock,
                 exchanges=None,
                 codes=None,
                 the_timestamp=None,
                 start_timestamp=None,
                 end_timestamp=None,
                 long_threshold=0.8,
                 short_threshold=0.2,
                 level=IntervalLevel.LEVEL_1DAY,
                 provider=None,
                 portfolio_selector=None) -> None:
        self.entity_ids = entity_ids
        self.entity_schema = entity_schema
        self.exchanges = exchanges
        self.codes = codes
        self.provider = provider
        self.portfolio_selector: TargetSelector = portfolio_selector

        if self.portfolio_selector:
            assert self.portfolio_selector.entity_schema in [Etf, Block, Index]

        if the_timestamp:
            self.the_timestamp = to_pd_timestamp(the_timestamp)
            self.start_timestamp = self.the_timestamp
            self.end_timestamp = self.the_timestamp
        else:
            if start_timestamp:
                self.start_timestamp = to_pd_timestamp(start_timestamp)
            if end_timestamp:
                self.end_timestamp = to_pd_timestamp(end_timestamp)
            else:
                self.end_timestamp = now_pd_timestamp()

        self.long_threshold = long_threshold
        self.short_threshold = short_threshold
        self.level = level

        self.filter_factors: List[FilterFactor] = []
        self.score_factors: List[ScoreFactor] = []
        self.state_factors: List[StateFactor] = []
        self.filter_result = None
        self.score_result = None

        self.open_long_df: DataFrame = None
        self.open_short_df: DataFrame = None

        self.init_factors(entity_ids=entity_ids,
                          entity_schema=entity_schema,
                          exchanges=exchanges,
                          codes=codes,
                          the_timestamp=the_timestamp,
                          start_timestamp=start_timestamp,
                          end_timestamp=end_timestamp,
                          level=self.level)
Esempio n. 2
0
def get_etf_stocks(code=None,
                   codes=None,
                   ids=None,
                   timestamp=now_pd_timestamp(),
                   provider=None):
    latests: List[EtfStock] = EtfStock.query_data(
        provider=provider,
        code=code,
        end_timestamp=timestamp,
        order=EtfStock.timestamp.desc(),
        limit=1,
        return_type='domain')
    if latests:
        latest_record = latests[0]
        # 获取最新的报表
        df = EtfStock.query_data(
            provider=provider,
            code=code,
            codes=codes,
            ids=ids,
            end_timestamp=timestamp,
            filters=[EtfStock.report_date == latest_record.report_date])
        # 最新的为年报或者半年报
        if latest_record.report_period == ReportPeriod.year or latest_record.report_period == ReportPeriod.half_year:
            return df
        # 季报,需要结合 年报或半年报 来算持仓
        else:
            step = 0
            while True:
                report_date = get_recent_report_date(latest_record.report_date,
                                                     step=step)

                pre_df = EtfStock.query_data(
                    provider=provider,
                    code=code,
                    codes=codes,
                    ids=ids,
                    end_timestamp=timestamp,
                    filters=[
                        EtfStock.report_date == to_pd_timestamp(report_date)
                    ])
                df = df.append(pre_df)

                # 半年报和年报
                if (ReportPeriod.half_year.value
                        in pre_df['report_period'].tolist()) or (
                            ReportPeriod.year.value
                            in pre_df['report_period'].tolist()):
                    # 保留最新的持仓
                    df = df.drop_duplicates(subset=['stock_code'],
                                            keep='first')
                    return df
                step = step + 1

                if step >= 20:
                    break
    def record(self, entity, start, end, size, timestamps):
        if not end:
            end = to_time_str(now_pd_timestamp())
        start = to_time_str(start)
        reportdate_list = list(
            {to_time_str(i)[:4]
             for i in pd.date_range(start, end)})
        em_code = to_em_entity_id(entity)
        df = pd.DataFrame()

        columns_dict = {
            "RTISSANNCDATE": "配股公告日",
            "RTISSREGISTDATE": "股权登记日",
            "RTISSEXDIVDATE": "配股除权日",
            "RTISSLISTDATE": "配股上市日",
            "RTISSPAYSDATE": "缴款起始日",
            "RTISSPAYEDATE": "缴款终止日",
            "RTISSPERTISSHARE": "每股配股数",
            "RTISSBASESHARES": "基准股本",
            "RTISSPLANNEDVOL": "计划配股数",
            "RTISSACTVOL": "实际配股数",
            "RTISSPRICE": "配股价格",
            "RTISSCOLLECTION": "配股募集资金",
            "RTISSNETCOLLECTION": "配股募集资金净额",
            "RTISSEXPENSE": "配股费用",
        }
        div_columns_list = list(columns_dict.keys())
        for reportdate in reportdate_list:
            # 方案
            div_df = c.css(em_code, div_columns_list,
                           "Year =" + reportdate + ",ispandas=1")

            df = df.append(div_df)
        df = df.dropna(subset=["RTISSEXDIVDATE"])
        df = df.sort_values("RTISSEXDIVDATE", ascending=True)
        if pd_is_not_null(df):
            df.reset_index(drop=True, inplace=True)
            df.rename(columns=self.data_schema.get_data_map(self),
                      inplace=True)
            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df.rtiss_date)
            df['provider'] = 'emquantapi'
            df['code'] = entity.code

            def generate_id(se):
                return "{}_{}".format(
                    se['entity_id'],
                    to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_id,
                                                            axis=1)
            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)
        return None
Esempio n. 4
0
 def in_trading_time(cls, timestamp=None):
     if not timestamp:
         timestamp = now_pd_timestamp()
     else:
         timestamp = pd.Timestamp(timestamp)
     open_time = date_and_time(the_date=timestamp.date(),
                               the_time=cls.get_trading_intervals()[0][0])
     close_time = date_and_time(the_date=timestamp.date(),
                                the_time=cls.get_trading_intervals()[-1][1])
     return open_time < timestamp < close_time
Esempio n. 5
0
def is_in_trading(security_type, exchange, timestamp):
    current = now_pd_timestamp()
    timestamp = to_pd_timestamp(timestamp)
    if is_same_date(current, timestamp):
        for start, end in get_trading_intervals(security_type=security_type,
                                                exchange=exchange):
            if current > date_and_time(
                    current, start) and current < date_and_time(current, end):
                return True
    return False
    def record(self, entity, start, end, size, timestamps, http_session):
        the_quarters = get_year_quarters(start, now_pd_timestamp(Region.CHN))
        if not is_same_date(entity.timestamp, start) and len(the_quarters) > 1:
            the_quarters = the_quarters[1:]

        param = {
            'security_item': entity,
            'quarters': the_quarters,
            'level': self.level.value
        }

        security_item = param['security_item']
        quarters = param['quarters']
        level = param['level']

        result_df = pd.DataFrame()
        for year, quarter in quarters:
            query_url = self.url.format(security_item.code, year, quarter)
            response = request_get(http_session, query_url)
            response.encoding = 'gbk'

            try:
                dfs = pd.read_html(response.text)
            except ValueError as error:
                self.logger.error(
                    f'skip ({year}-{quarter:02d}){security_item.code}{security_item.name}({error})'
                )
                self.sleep()
                continue

            if len(dfs) < 5:
                self.sleep()
                continue

            df = dfs[4].copy()
            df = df.iloc[1:]
            df.columns = [
                'timestamp', 'open', 'high', 'close', 'low', 'volume',
                'turnover'
            ]
            df['name'] = security_item.name
            df['level'] = level
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = Provider.Sina

            result_df = pd.concat([result_df, df])

            self.logger.info(
                f'({security_item.code}{security_item.name})({year}-{quarter:02d})'
            )
            self.sleep()

        result_df = result_df.sort_values(by='timestamp')

        return result_df.to_dict(orient='records')
Esempio n. 7
0
    def download_sh_etf_component(self, df: pd.DataFrame, http_session):
        """
        ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF
                        5. 债券 ETF 6. 黄金 ETF
        :param df: ETF 列表数据
        :return: None
        """
        query_url = 'http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?' \
                    'isPagination=false&type={}&etfClass={}'

        etf_df = df[(df['ETF_CLASS'] == '1') | (df['ETF_CLASS'] == '2')]
        etf_df = self.populate_sh_etf_type(etf_df, http_session)

        for _, etf in etf_df.iterrows():
            url = query_url.format(etf['ETF_TYPE'], etf['ETF_CLASS'])
            text = sync_get(http_session,
                            url,
                            headers=DEFAULT_SH_ETF_LIST_HEADER,
                            return_type='text')
            if text is None:
                continue
            response_dict = demjson.decode(text)
            response_df = pd.DataFrame(response_dict.get('result', []))

            etf_code = etf['FUND_ID']
            etf_id = f'etf_sh_{etf_code}'
            response_df = response_df[['instrumentId',
                                       'instrumentName']].copy()
            response_df.rename(columns={
                'instrumentId': 'stock_code',
                'instrumentName': 'stock_name'
            },
                               inplace=True)

            response_df['entity_id'] = etf_id
            response_df['entity_type'] = EntityType.ETF.value
            response_df['exchange'] = 'sh'
            response_df['code'] = etf_code
            response_df['name'] = etf['FUND_NAME']
            response_df['timestamp'] = now_pd_timestamp(Region.CHN)

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda code: china_stock_code_to_id(code))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{etf_id}_{x}')

            df_to_db(df=response_df,
                     ref_df=None,
                     region=Region.CHN,
                     data_schema=self.data_schema,
                     provider=self.provider)
            self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...')

            self.sleep()
Esempio n. 8
0
def get_fund_stocks(code=None,
                    codes=None,
                    ids=None,
                    timestamp=now_pd_timestamp(),
                    provider=None):
    return get_portfolio_stocks(portfolio_entity=Fund,
                                code=code,
                                codes=codes,
                                ids=ids,
                                timestamp=timestamp,
                                provider=provider)
    def record(self, entity, start, end, size, timestamps):
        # if entity.exchange == "swl1":
        #     return None
        if not end:
            if (now_pd_timestamp() - start).days > 365:
                from datetime import timedelta
                end = to_time_str(start + timedelta(days=365))
            else:
                end = to_time_str(now_pd_timestamp())
        start = to_time_str(start)

        df = c.csd(f"{entity.code}.SWI", "OPEN,CLOSE,HIGH,LOW,VOLUME,AMOUNT", start, end,
                   "period=1,adjustflag=1,curtype=1,order=1,ispandas=1")
        if type(df) != pd.DataFrame:
            return None
        df.rename(columns={
            'DATES': 'timestamp',
            'OPEN': 'open',
            'CLOSE': 'close',
            'HIGH': 'high',
            'LOW': 'low',
            'VOLUME': 'volume',
            'AMOUNT': 'turnover',
        }, inplace=True)

        if pd_is_not_null(df):
            df['name'] = entity.name
            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = 'joinquant'
            df['level'] = '1d'
            df['code'] = entity.code

            def generate_kdata_id(se):
                return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id, axis=1)

            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

        return None
Esempio n. 10
0
 def get_stocks(cls,
                code=None,
                codes=None,
                ids=None,
                timestamp=now_pd_timestamp(),
                provider=None):
     from zvt.api.portfolio import get_etf_stocks
     return get_etf_stocks(code=code,
                           codes=codes,
                           ids=ids,
                           timestamp=timestamp,
                           provider=provider)
Esempio n. 11
0
    def record(self, entity, start, end, size, timestamps):
        if not end:
            end = to_time_str(now_pd_timestamp())
        start = to_time_str(start)
        reportdate_list = sorted(list({to_time_str(i)[:4] + '-12-31' for i in pd.date_range(start, end)}))
        em_code = to_em_entity_id(entity)
        df = pd.DataFrame()

        div_columns_dict = {
            "DIVAGMANNCDATE": "股东大会公告日",
            "DIVEXDATE": "除权除息日",
            "DIVRECORDDATE": "股权登记日",
            "DIVIMPLANNCDATE": "分红实施公告日",
            "DIVLASTTRDDATESHAREB": "B股最后交易日",
            "DIVCASHPSAFTAX": "每股股利(税后)",
            "DIVCASHPSBFTAX": "每股股利(税前)",
            "DIVPROGRESS": "分红方案进度",
            "DIVPAYDATE": "派息日",
            # "DIVCASHDATE": "最新现金分红报告期",
            "DIVSTOCKPS": "每股送股比例",
            "DIVCAPITALIZATIONPS": "每股转增比例",
            "DIVCASHANDSTOCKPS": "分红送转方案",
        }

        div_columns_list = [i for i in div_columns_dict.keys()]
        for reportdate in reportdate_list:
            # 方案
            div_df = c.css(em_code, div_columns_list,
                           "ReportDate =" + reportdate + ",ispandas=1,AssignFeature=1,YesNo=1")
            div_df['report_date'] = reportdate
            df = df.append(div_df)
        # df.rename(columns=div_columns_dict, inplace=True)
        df = df.dropna(subset=["DIVEXDATE"])
        df = df.sort_values("DIVEXDATE", ascending=True)
        df['DIVCASHPSAFTAX'] = df['DIVCASHPSAFTAX'].apply(lambda x:str(x).split('或')[0])
        df['DIVCASHANDSTOCKPS'] =  df['DIVCASHANDSTOCKPS'].apply(lambda x: str(x).split('(')[0])
        if pd_is_not_null(df):
            df.reset_index(drop=True,inplace=True)
            df.rename(columns=self.data_schema.get_data_map(self), inplace=True)
            df['dividend'] = df['dividend'].apply(lambda x: str(x).split('(')[0])
            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df.dividend_date)
            df['provider'] = 'emquantapi'
            df['code'] = entity.code

            def generate_id(se):
                return "{}_{}".format(se['entity_id'], to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_id, axis=1)
            df.replace('None',pd.NaT,inplace=True)
            df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)
        return None
Esempio n. 12
0
def get_top_fund_holding_stocks(timestamp=None, pct=0.3, by=None):
    if not timestamp:
        timestamp = now_pd_timestamp()
    # 季报一般在report_date后1个月内公布,年报2个月内,年报4个月内
    # 所以取时间点的最近的两个公布点,保证取到数据
    # 所以,这是个滞后的数据,只是为了看个大概,毕竟模糊的正确better than 精确的错误
    report_date = get_recent_report_date(timestamp, 1)
    df = FundStock.query_data(region=Region.CHN,
                              filters=[
                                  FundStock.report_date >= report_date,
                                  FundStock.timestamp <= timestamp
                              ],
                              columns=['stock_id', 'market_cap'])
    fund_cap_df = fund_cap_df.groupby(
        'stock_id')['market_cap'].sum().sort_values(ascending=False)

    # 直接根据持有市值返回
    if not by:
        s = fund_cap_df.iloc[:int(len(fund_cap_df) * pct)]

        return s.to_frame()

    # 按流通盘比例
    if by == 'trading':
        columns = ['entity_id', 'circulating_market_cap']
    # 按市值比例
    elif by == 'all':
        columns = ['entity_id', 'market_cap']

    entity_ids = fund_cap_df.index.tolist()
    start_timestamp = next_date(timestamp, -30)
    cap_df = StockValuation.query_data(
        entity_ids=entity_ids,
        filters=[
            StockValuation.timestamp >= start_timestamp,
            StockValuation.timestamp <= timestamp
        ],
        columns=columns)
    if by == 'trading':
        cap_df = cap_df.rename(columns={'circulating_market_cap': 'cap'})
    elif by == 'all':
        cap_df = cap_df.rename(columns={'market_cap': 'cap'})

    cap_df = cap_df.groupby('entity_id').mean()
    result_df = pd.concat([cap_df, fund_cap_df], axis=1, join='inner')
    result_df['pct'] = result_df['market_cap'] / result_df['cap']

    pct_df = result_df['pct'].sort_values(ascending=False)

    s = pct_df.iloc[:int(len(pct_df) * pct)]

    return s.to_frame()
Esempio n. 13
0
    def __init__(self,
                 region: Region,
                 entity_ids=None,
                 entity_schema=Stock,
                 exchanges=None,
                 codes=None,
                 the_timestamp=None,
                 start_timestamp=None,
                 end_timestamp=None,
                 long_threshold=0.8,
                 short_threshold=0.2,
                 level=IntervalLevel.LEVEL_1DAY,
                 provider: Provider = Provider.Default) -> None:
        self.entity_ids = entity_ids
        self.entity_schema = entity_schema
        self.exchanges = exchanges
        self.codes = codes
        self.region = region
        self.provider = provider

        if the_timestamp:
            self.the_timestamp = to_pd_timestamp(the_timestamp)
            self.start_timestamp = self.the_timestamp
            self.end_timestamp = self.the_timestamp
        else:
            if start_timestamp:
                self.start_timestamp = to_pd_timestamp(start_timestamp)
            if end_timestamp:
                self.end_timestamp = to_pd_timestamp(end_timestamp)
            else:
                self.end_timestamp = now_pd_timestamp(self.region)

        self.long_threshold = long_threshold
        self.short_threshold = short_threshold
        self.level = level

        self.filter_factors: List[FilterFactor] = []
        self.score_factors: List[ScoreFactor] = []
        self.filter_result = None
        self.score_result = None

        self.open_long_df: DataFrame = None
        self.open_short_df: DataFrame = None

        self.init_factors(entity_ids=entity_ids,
                          entity_schema=entity_schema,
                          exchanges=exchanges,
                          codes=codes,
                          the_timestamp=the_timestamp,
                          start_timestamp=start_timestamp,
                          end_timestamp=end_timestamp,
                          level=self.level)
Esempio n. 14
0
    def record(self, entity, start, end, size, timestamps, http_session):
        the_quarters = get_year_quarters(start, now_pd_timestamp(Region.CHN))
        if not is_same_date(entity.timestamp, start) and len(the_quarters) > 1:
            the_quarters = the_quarters[1:]

        param = {
            'security_item': entity,
            'quarters': the_quarters,
            'level': self.level.value
        }

        security_item = param['security_item']
        quarters = param['quarters']
        level = param['level']

        result_df = pd.DataFrame()
        for year, quarter in quarters:
            query_url = self.url.format(security_item.code, year, quarter)
            text = sync_get(http_session,
                            query_url,
                            encoding='gbk',
                            return_type='text')
            if text is None:
                continue

            try:
                dfs = pd.read_html(text)
            except ValueError as error:
                self.logger.error(
                    f'skip ({year}-{quarter:02d}){security_item.code}{security_item.name}({error})'
                )
                self.sleep()
                continue

            if len(dfs) < 5:
                self.sleep()
                continue

            df = dfs[4].copy()
            df = df.iloc[1:]
            df.columns = [
                'timestamp', 'open', 'high', 'close', 'low', 'volume',
                'turnover'
            ]
            result_df = pd.concat([result_df, df])

            self.sleep()

        if pd_is_not_null(result_df):
            result_df['level'] = level
            return result_df
        return None
    def format(self, entity, df):
        df['timestamp'] = now_pd_timestamp(Region.CHN)

        df['entity_id'] = entity.id
        df['provider'] = self.provider.value
        df['code'] = entity.code
        df['name'] = entity.name
        df['level'] = self.level.value
        df['exchange'] = entity.exchange
        df['entity_type'] = EntityType.Block.value

        df['id'] = self.generate_domain_id(entity, df)
        return df
Esempio n. 16
0
    def record(self, entity, start, end, size, timestamps, http_session):
        end = min(now_pd_timestamp(self.region), start + Timedelta(days=500))
        count: Timedelta = end - start

        df = jq_get_fundamentals(table='valuation',
                                 code=to_jq_entity_id(entity),
                                 date=to_time_str(end),
                                 count=min(count.days, 500))

        if pd_is_not_null(df):
            return df

        return None
Esempio n. 17
0
    def generate_request_param(self, security_item, start, end, size, timestamp):
        if self.start_timestamp:
            start = max(self.start_timestamp, to_pd_timestamp(start))

        end = now_pd_timestamp() + timedelta(days=1)

        return {
            'security_item': security_item,
            'start_timestamp': to_time_str(start),
            'end_timestamp': to_time_str(end),
            'level': self.level.value,
            'jq_level': self.jq_trading_level
        }
Esempio n. 18
0
    def record(self, entity, start, end, size, timestamps):
        if not end:
            end = to_time_str(now_pd_timestamp())
        start = to_time_str(start)
        em_code = to_em_entity_id(entity)

        columns_list = list(self.data_schema.get_data_map(self))
        data = c.ctr(
            "HoldTradeDetailInfo", columns_list, "secucode=" + em_code +
            ",StartDate=" + start + ",EndDate=" + end + ",HoldType=0")
        if data.Data == {}:
            return None
        df = pd.DataFrame(data.Data).T
        df.columns = data.Indicators
        df = df.sort_values("NOTICEDATE", ascending=True)
        df['TOTALSHARE'] = df.NOTICEDATE.apply(
            lambda x: c.css(em_code, "TOTALSHARE", "EndDate=" + x +
                            ",ispandas=1").TOTALSHARE[0])
        # 变动比例(千分位) h = (df['变动_流通股数量(万股)'] / (df['变动后_持股总数(万股)'] / (df['变动后_占总股本比例(%)'] / 100)))
        df['CHANGENUM'] = df['CHANGENUM'] * 10000
        df['BDHCGZS'] = df['BDHCGZS'] * 10000  # 变动后_持股总数
        df['change_pct'] = abs(
            df['CHANGENUM'] / df['TOTALSHARE']).astype(float) * 1000
        df['change_pct'] = df['change_pct'].round(5)
        if pd_is_not_null(df):
            df.reset_index(drop=True, inplace=True)
            df.rename(columns=self.data_schema.get_data_map(self),
                      inplace=True)
            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df.holder_end_date)
            df['provider'] = 'emquantapi'
            df['code'] = entity.code

            def generate_id(se):
                return "{}_{}_{}".format(
                    se['entity_id'],
                    to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY), se.name)

            df_res = pd.concat([
                i.reset_index(drop=True)
                for i in dict(list(df.groupby('timestamp'))).values()
            ])
            df_res.index += 1
            df_res['id'] = df_res[['entity_id',
                                   'timestamp']].apply(generate_id, axis=1)

            df_to_db(df=df_res,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=self.force_update)
        return None
Esempio n. 19
0
def report_core_company():
    while True:
        error_count = 0
        email_action = EmailInformer()

        try:
            # StockTradeDay.record_data(provider='joinquant')
            # Stock.record_data(provider='joinquant')
            # FinanceFactor.record_data(provider='eastmoney')
            # BalanceSheet.record_data(provider='eastmoney')

            target_date = to_time_str(now_pd_timestamp())

            my_selector: TargetSelector = FundamentalSelector(start_timestamp='2016-01-01', end_timestamp=target_date)
            my_selector.run()

            long_targets = my_selector.get_open_long_targets(timestamp=target_date)
            if long_targets:
                stocks = get_entities(provider='joinquant', entity_schema=Stock, entity_ids=long_targets,
                                      return_type='domain')

                # add them to eastmoney
                try:
                    try:
                        eastmoneypy.del_group('core')
                    except:
                        pass
                    eastmoneypy.create_group('core')
                    for stock in stocks:
                        eastmoneypy.add_to_group(stock.code, group_name='core')
                except Exception as e:
                    email_action.send_message(zvt_config['email_username'], f'report_core_company error',
                                              'report_core_company error:{}'.format(e))

                infos = stocks_with_info(stocks)
                msg = '\n'.join(infos)
            else:
                msg = 'no targets'

            logger.info(msg)

            email_action.send_message(get_subscriber_emails(), f'{to_time_str(target_date)} 核心资产选股结果', msg)
            break
        except Exception as e:
            logger.exception('report_core_company error:{}'.format(e))
            time.sleep(60 * 3)
            error_count = error_count + 1
            if error_count == 10:
                email_action.send_message(zvt_config['email_username'], f'report_core_company error',
                                          'report_core_company error:{}'.format(e))
def report_core_company(region: Region, provider: Provider):
    while True:
        error_count = 0
        email_action = EmailInformer()

        try:
            # StockTradeDay.record_data(provider=Provider.JoinQuant)
            # Stock.record_data(provider=Provider.JoinQuant)
            # FinanceFactor.record_data(provider=Provider.EastMoney)
            # BalanceSheet.record_data(provider=Provider.EastMoney)

            target_date = to_time_str(now_pd_timestamp(region))

            my_selector: TargetSelector = FundamentalSelector(region=region, start_timestamp='2016-01-01', end_timestamp=target_date)
            my_selector.run()

            long_targets = my_selector.get_open_long_targets(timestamp=target_date)
            if long_targets:
                stocks = get_entities(region=region, provider=provider, entity_schema=Stock, entity_ids=long_targets,
                                      return_type='domain')

                # add them to eastmoney
                try:
                    try:
                        eastmoneypy.del_group('core')
                    except:
                        pass
                    eastmoneypy.create_group('core')
                    for stock in stocks:
                        eastmoneypy.add_to_group(stock.code, group_name='core')
                except Exception as e:
                    email_action.send_message("*****@*****.**", 'report_core_company error',
                                              'report_core_company error:{}'.format(e))

                info = [f'{stock.name}({stock.code})' for stock in stocks]
                msg = ' '.join(info)
            else:
                msg = 'no targets'

            logger.info(msg)

            email_action.send_message(get_subscriber_emails(), f'{to_time_str(target_date)} 核心资产选股结果', msg)
            break
        except Exception as e:
            logger.exception('report_core_company error:{}'.format(e))
            time.sleep(60 * 3)
            error_count = error_count + 1
            if error_count == 10:
                email_action.send_message("*****@*****.**", f'report_core_company error',
                                          'report_core_company error:{}'.format(e))
Esempio n. 21
0
def report_core_company():
    while True:
        error_count = 0
        email_action = EmailInformer()

        try:
            # StockTradeDay.record_data(provider='joinquant')
            # Stock.record_data(provider='joinquant')
            # FinanceFactor.record_data(provider='eastmoney')
            # BalanceSheet.record_data(provider='eastmoney')

            target_date = to_time_str(now_pd_timestamp())

            my_selector: TargetSelector = FundamentalSelector(start_timestamp="2016-01-01", end_timestamp=target_date)
            my_selector.run()

            long_targets = my_selector.get_open_long_targets(timestamp=target_date)
            if long_targets:
                stocks = get_entities(
                    provider="joinquant", entity_schema=Stock, entity_ids=long_targets, return_type="domain"
                )

                # add them to eastmoney
                try:
                    codes = [stock.code for stock in stocks]
                    add_to_eastmoney(codes=codes, entity_type="stock", group="core")
                except Exception as e:
                    email_action.send_message(
                        zvt_config["email_username"],
                        f"report_core_company error",
                        "report_core_company error:{}".format(e),
                    )

                infos = stocks_with_info(stocks)
                msg = "\n".join(infos)
            else:
                msg = "no targets"

            logger.info(msg)

            email_action.send_message(get_subscriber_emails(), f"{to_time_str(target_date)} 核心资产选股结果", msg)
            break
        except Exception as e:
            logger.exception("report_core_company error:{}".format(e))
            time.sleep(60 * 3)
            error_count = error_count + 1
            if error_count == 10:
                email_action.send_message(
                    zvt_config["email_username"], f"report_core_company error", "report_core_company error:{}".format(e)
                )
 def run(self):
     # get stock blocks from sina
     for category_map_dict in self.category_map:
         # df = get_industries(name=category, date=None)
         category, name_ch = category_map_dict.items()
         df = pd.DataFrame(index=[0])
         if '一级板块代码' in category:
             df['code'] = category[1]
             if category[1].startswith('003'):
                 df['exchange'] = 'cn'
             elif category[1].startswith('204'):
                 df['exchange'] = 'us'
             elif category[1].startswith('402'):
                 df['exchange'] = 'hk'
             df['block_type'] = 'gicsl1'
         elif '二级板块代码' in category:
             df['code'] = category[1]
             if category[1].startswith('003'):
                 df['exchange'] = 'cn'
             elif category[1].startswith('204'):
                 df['exchange'] = 'us'
             elif category[1].startswith('402'):
                 df['exchange'] = 'hk'
             df['block_type'] = 'gicsl2'
         elif '三级板块代码' in category:
             df['code'] = category[1]
             if category[1].startswith('003'):
                 df['exchange'] = 'cn'
             elif category[1].startswith('204'):
                 df['exchange'] = 'us'
             elif category[1].startswith('402'):
                 df['exchange'] = 'hk'
             df['block_type'] = 'gicsl3'
         elif '四级板块代码' in category:
             df['code'] = category[1]
             if category[1].startswith('003'):
                 df['exchange'] = 'cn'
             elif category[1].startswith('204'):
                 df['exchange'] = 'us'
             elif category[1].startswith('402'):
                 df['exchange'] = 'hk'
             df['block_type'] = 'gicsl4'
         df['timestamp'] = now_pd_timestamp()
         df['name'] = name_ch[1]
         df['entity_type'] = 'block'
         df['category'] = "industry"
         df['id'] = df['entity_id'] = df.apply(lambda x: "block_" + x.exchange + "_" + x.code, axis=1)
         df_to_db(data_schema=self.data_schema, df=df, provider=self.provider,
                  force_update=True)
         self.logger.info(f"完成choice数据行业数据保存:{category[1],name_ch[1]}")
    def fetch_csi_index_component(self, df: pd.DataFrame, http_session):
        """
        抓取上证、中证指数成分股
        """
        query_url = 'http://www.csindex.com.cn/uploads/file/autofile/cons/{}cons.xls'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)

            try:
                response = request_get(http_session, url)
                response.raise_for_status()
            except requests.HTTPError as error:
                self.logger.error(
                    f'{index["name"]} - {index_code} 成分股抓取错误 ({error})')
                continue

            response_df = pd.read_excel(io.BytesIO(response.content))

            response_df = response_df[[
                '成分券代码Constituent Code', '成分券名称Constituent Name'
            ]].rename(
                columns={
                    '成分券代码Constituent Code': 'stock_code',
                    '成分券名称Constituent Name': 'stock_name'
                })

            index_id = f'index_cn_{index_code}'
            response_df['entity_id'] = index_id
            response_df['entity_type'] = EntityType.Index.value
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index['name']
            response_df['timestamp'] = now_pd_timestamp(Region.CHN)

            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(df=response_df,
                     region=Region.CHN,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force_update=True)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()
    def fetch_cni_index_component(self, df: pd.DataFrame, http_session):
        """
        抓取国证指数成分股
        """
        query_url = 'http://www.cnindex.com.cn/docs/yb_{}.xls'

        for _, index in df.iterrows():
            index_code = index['code']

            url = query_url.format(index_code)

            try:
                response = request_get(http_session, url)
                response.raise_for_status()
            except requests.HTTPError as error:
                self.logger.error(
                    f'{index["name"]} - {index_code} 成分股抓取错误 ({error})')
                continue

            response_df = pd.read_excel(io.BytesIO(response.content),
                                        dtype='str')

            index_id = f'index_cn_{index_code}'

            try:
                response_df = response_df[['样本股代码']]
            except KeyError:
                response_df = response_df[['证券代码']]

            response_df['entity_id'] = index_id
            response_df['entity_type'] = EntityType.Index.value
            response_df['exchange'] = 'cn'
            response_df['code'] = index_code
            response_df['name'] = index['name']
            response_df['timestamp'] = now_pd_timestamp(Region.CHN)

            response_df.columns = ['stock_code']
            response_df['stock_id'] = response_df['stock_code'].apply(
                lambda x: china_stock_code_to_id(str(x)))
            response_df['id'] = response_df['stock_id'].apply(
                lambda x: f'{index_id}_{x}')

            df_to_db(df=response_df,
                     region=Region.CHN,
                     data_schema=self.data_schema,
                     provider=self.provider)
            self.logger.info(f'{index["name"]} - {index_code} 成分股抓取完成...')

            self.sleep()
    def record(self, entity, start, end, size, timestamps, http_session):
        if self.adjust_type == AdjustType.hfq:
            fq_ref_date = '2000-01-01'
        else:
            fq_ref_date = to_time_str(now_pd_timestamp(Region.CHN))

        if not self.end_timestamp:
            df = jq_get_bars(to_jq_entity_id(entity),
                             count=size,
                             unit=self.jq_trading_level,
                             # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'],
                             fq_ref_date=fq_ref_date)
        else:
            end_timestamp = to_time_str(self.end_timestamp)
            df = jq_get_bars(to_jq_entity_id(entity),
                             count=size,
                             unit=self.jq_trading_level,
                             # fields=['date', 'open', 'close', 'low', 'high', 'volume', 'money'],
                             end_date=end_timestamp,
                             fq_ref_date=fq_ref_date)
        # self.logger.info("record {} for {}, size:{}".format(self.data_schema.__name__, entity.id, len(df)))

        if pd_is_not_null(df):
            # start_timestamp = to_time_str(df.iloc[1]['timestamp'])
            # end_timestamp = to_time_str(df.iloc[-1]['timestamp'])

            # 判断是否需要重新计算之前保存的前复权数据
            if self.adjust_type == AdjustType.qfq:
                check_df = df.head(1)
                check_date = check_df['timestamp'][0]
                current_df = get_kdata(region=self.region,
                                       entity_id=entity.id,
                                       provider=self.provider,
                                       start_timestamp=check_date,
                                       end_timestamp=check_date,
                                       limit=1,
                                       level=self.level,
                                       adjust_type=self.adjust_type)
                if pd_is_not_null(current_df):
                    old = current_df.iloc[0, :]['close']
                    new = check_df['close'][0]
                    # 相同时间的close不同,表明前复权需要重新计算
                    if round(old, 2) != round(new, 2):
                        qfq_factor = new / old
                        last_timestamp = pd.Timestamp(check_date)
                        self.recompute_qfq(entity, qfq_factor=qfq_factor, last_timestamp=last_timestamp)
            return df

        return None
Esempio n. 26
0
def finance_score(data_schema,
                  security_id=None,
                  codes=None,
                  provider='eastmoney',
                  fields=None,
                  timestamp=now_pd_timestamp(),
                  report_count=20):
    fields = fields + ['security_id', 'timestamp', 'report_date']

    data_df = get_data(data_schema=data_schema,
                       security_id=security_id,
                       codes=codes,
                       provider=provider,
                       columns=fields,
                       end_timestamp=timestamp)

    time_series = data_df['report_date'].drop_duplicates()
    time_series = time_series[-report_count:]

    data_df = index_df_with_security_time(data_df)

    idx = pd.IndexSlice

    df = data_df.loc[idx[:, time_series], ]
    print(df)

    df = df.groupby(df['security_id']).mean()
    print(df)

    quantile = df.quantile([0.1, 0.3, 0.5, 0.7, 0.9])

    def evaluate_score(s, column):
        the_column = column
        if s > quantile.loc[0.9, the_column]:
            return 0.9
        if s > quantile.loc[0.7, the_column]:
            return 0.7
        if s > quantile.loc[0.5, the_column]:
            return 0.5
        if s > quantile.loc[0.3, the_column]:
            return 0.3
        if s > quantile.loc[0.1, the_column]:
            return 0.1
        return 0

    for item in quantile.columns:
        df[item] = df[item].apply(lambda x: evaluate_score(x, item))

    print(df)
Esempio n. 27
0
    def __init__(
        self,
        entity_ids=None,
        entity_schema=Stock,
        exchanges=None,
        codes=None,
        start_timestamp=None,
        end_timestamp=None,
        long_threshold=0.8,
        short_threshold=0.2,
        level=IntervalLevel.LEVEL_1DAY,
        provider=None,
        select_mode: SelectMode = SelectMode.condition_and,
    ) -> None:
        self.entity_ids = entity_ids
        self.entity_schema = entity_schema
        self.exchanges = exchanges
        self.codes = codes
        self.provider = provider
        self.select_mode = select_mode

        if start_timestamp:
            self.start_timestamp = to_pd_timestamp(start_timestamp)
        if end_timestamp:
            self.end_timestamp = to_pd_timestamp(end_timestamp)
        else:
            self.end_timestamp = now_pd_timestamp()

        self.long_threshold = long_threshold
        self.short_threshold = short_threshold
        self.level = level

        self.factors: List[Factor] = []
        self.filter_result = None
        self.score_result = None

        self.open_long_df: Optional[DataFrame] = None
        self.open_short_df: Optional[DataFrame] = None
        self.keep_df: Optional[DataFrame] = None

        self.init_factors(
            entity_ids=entity_ids,
            entity_schema=entity_schema,
            exchanges=exchanges,
            codes=codes,
            start_timestamp=start_timestamp,
            end_timestamp=end_timestamp,
            level=self.level,
        )
Esempio n. 28
0
    def process_entity(self, entity_item, trade_day, stock_detail, http_session):
        step1 = time.time()
        now = now_pd_timestamp(self.region)

        start_timestamp, end_timestamp, end_date, size, timestamps = \
            self.evaluate_start_end_size_timestamps(now, entity_item, trade_day, stock_detail, http_session)
        size = int(size)
        # self.logger.info("evaluate entity_item:{}, time cost:{}".format(entity_item.id, time.time()-step1))

        # no more to record
        if size == 0:
            start = start_timestamp.strftime('%Y-%m-%d') if start_timestamp else None
            # self.logger.info("no update {} {}, {}, cost: {}".format(
            #     self.data_schema.__name__, start_timestamp, entity_item.id, time.time()-step1))
            self.on_finish_entity(entity_item, http_session)
            return True

        # fetch and save
        start = start_timestamp.strftime('%Y-%m-%d') if start_timestamp else None
        trade_day = trade_day[0].strftime('%Y-%m-%d') if trade_day else None
        end = end_date.strftime('%Y-%m-%d') if end_date else None
        self.logger.info('request {}, {}, {}, {}, {}, {}'.format(entity_item.id, size, jq_get_query_count(), trade_day, start, end))
        original_list = self.record(entity_item, start=start_timestamp, end=end_timestamp, size=size,
                                    timestamps=timestamps, http_session=http_session)        
        # self.logger.info("record entity_item:{}, time cost:{}".format(entity_item.id, time.time()-step1))

        # handle duplicate items
        entity_finished, all_duplicated = self.process_duplicate(original_list, entity_item)
        if entity_finished:
            # self.logger.info("ignore original duplicate item: {}, time cost: {}".format(domain_item.id, time.time()-step1))
            return True

        # handle realtime items
        entity_finished = self.process_realtime(entity_item, original_list, all_duplicated, now, http_session)
        if entity_finished:
            # if zvt_env['zvt_debug']:
            #     latest_saved_record = self.get_latest_saved_record(entity=entity_item)
            #     if latest_saved_record:
            #         start_timestamp = eval('latest_saved_record.{}'.format(self.get_evaluated_time_field()))
            #     self.logger.info("finish recording {} id: {}, latest_timestamp: {}, time cost: {}".format(
            #         self.data_schema.__name__, entity_item.id, start_timestamp, time.time()-step1))
            # else:
            #     self.logger.info("finish recording {} id: {}, time cost: {}".format(
            #         self.data_schema.__name__, entity_item.id, time.time()-step1))
            return True

        self.logger.info("update recording {} id: {}, time cost: {}".format(
            self.data_schema.__name__, entity_item.id, time.time()-step1))
        return False
Esempio n. 29
0
    def download_sh_etf_component(self, df: pd.DataFrame):
        """
        ETF_CLASS => 1. 单市场 ETF 2.跨市场 ETF 3. 跨境 ETF
                        5. 债券 ETF 6. 黄金 ETF
        :param df: ETF 列表数据
        :return: None
        """
        query_url = (
            "http://query.sse.com.cn/infodisplay/queryConstituentStockInfo.do?"
            "isPagination=false&type={}&etfClass={}")

        etf_df = df[(df["ETF_CLASS"] == "1") | (df["ETF_CLASS"] == "2")]
        etf_df = self.populate_sh_etf_type(etf_df)

        for _, etf in etf_df.iterrows():
            url = query_url.format(etf["ETF_TYPE"], etf["ETF_CLASS"])
            response = requests.get(url, headers=DEFAULT_SH_ETF_LIST_HEADER)
            response_dict = demjson3.decode(response.text)
            response_df = pd.DataFrame(response_dict.get("result", []))

            etf_code = etf["FUND_ID"]
            etf_id = f"etf_sh_{etf_code}"
            response_df = response_df[["instrumentId",
                                       "instrumentName"]].copy()
            response_df.rename(columns={
                "instrumentId": "stock_code",
                "instrumentName": "stock_name"
            },
                               inplace=True)

            response_df["entity_id"] = etf_id
            response_df["entity_type"] = "etf"
            response_df["exchange"] = "sh"
            response_df["code"] = etf_code
            response_df["name"] = etf["FUND_NAME"]
            response_df["timestamp"] = now_pd_timestamp()

            response_df["stock_id"] = response_df["stock_code"].apply(
                lambda code: china_stock_code_to_id(code))
            response_df["id"] = response_df["stock_id"].apply(
                lambda x: f"{etf_id}_{x}")

            df_to_db(data_schema=self.data_schema,
                     df=response_df,
                     provider=self.provider)
            self.logger.info(f'{etf["FUND_NAME"]} - {etf_code} 成分股抓取完成...')

            self.sleep()
Esempio n. 30
0
    def on_trading_signals(self, trading_signals: List[TradingSignal]):
        # 发送交易信号
        target_date = trading_signals[0].happen_timestamp

        # 发送最近20天的交易信号
        if target_date + datetime.timedelta(20) > now_pd_timestamp(
                self.region):
            email_action = EmailInformer()

            msg = ''

            # 目前持仓情况
            positions = self.get_current_positions()
            if positions:
                current_stocks = [position.entity_id for position in positions]
                msg = msg + '目前持仓: ' + entity_ids_to_msg(
                    self.region, current_stocks) + '\n'

            # 多空信号
            long_stocks = []
            short_stocks = []

            for trading_signal in trading_signals:
                if trading_signal.trading_signal_type == TradingSignalType.open_long:
                    long_stocks.append(trading_signal.entity_id)
                elif trading_signal.trading_signal_type == TradingSignalType.close_long:
                    short_stocks.append(trading_signal.entity_id)

            if long_stocks:
                msg = msg + '买入: ' + entity_ids_to_msg(self.region,
                                                       long_stocks) + '\n'

            if short_stocks:
                msg = msg + '卖出: ' + entity_ids_to_msg(self.region,
                                                       short_stocks) + '\n'

            # 账户情况
            account = self.get_current_account()

            pct = round((account.all_value - account.input_money) /
                        account.input_money * 100, 4)

            msg = msg + f'投入金额:{account.input_money},目前总市值:{account.all_value},收益率:{pct}%'

            email_action.send_message("*****@*****.**", f'{target_date} 交易信号',
                                      msg)

        super().on_trading_signals(trading_signals)