Exemple #1
0
    def record(self, entity, start, end, size, timestamps):

        sql_valuation = 'SELECT a.TRADE_DATE,a.TICKER_SYMBOL,a.PE,a.PB,a.PCF,a.PS FROM equ_factor_VS WHERE TRADE_DATE>=%s AND TRADE_DATE<=%s AND TICKER_SYMBOL=%s'

        df = pd.read_sql(sql_valuation, self.tonglian_conn,
                         params=(to_time_str(start, fmt="YYYYMMDD"), now_time_str(fmt="YYYYMMDD"),entity.code))


        df = pd.read_sql(sql_valuation, self.tonglian_conn,
                         params=(to_time_str(start, fmt="YYYYMMDD").strip('-'), now_time_str(fmt="YYYYMMDD"),"000006"))
        df = df.rename({'TRADE_DATE': 'day',
                        'TICKER_SYMBOL': 'code',
                        'PE': 'pe',
                        'PB': 'pb',
                        'PS': 'ps',
                        'PCF': 'pcf'},
                       axis='columns')


        df['entity_id'] = entity.id
        df['timestamp'] = pd.to_datetime(df['day'])
        df['name'] = entity.name
        df['id'] = df['timestamp'].apply(lambda x: "{}_{}".format(entity.id, to_time_str(x)))

        df['market_cap'] = df['market_cap'] * 100000000  # 总市值
        df['circulating_market_cap'] = df['circulating_market_cap'] * 100000000  # 流通市值
        df['capitalization'] = df['capitalization'] * 10000   #总股本
        df['circulating_cap'] = df['circulating_cap'] * 10000  #流通股
        df['turnover_ratio'] = df['turnover_ratio'] * 0.01    #换手率
        df_to_db(df=df, data_schema=self.data_schema, provider=self.provider, force_update=self.force_update)

        return None
Exemple #2
0
    def init_entities(self):
        """
        init the entities which we would record data for

        """
        if self.entity_provider == self.provider and self.entity_schema == self.data_schema:
            self.entity_session = self.session
        else:
            self.entity_session = get_db_session(provider=self.entity_provider, data_schema=self.entity_schema)

        filters = None
        if self.day_data:
            df = self.data_schema.query_data(start_timestamp=now_time_str(), columns=['entity_id', 'timestamp'],
                                             provider=self.provider)
            if pd_is_not_null(df):
                entity_ids = df['entity_id'].tolist()
                self.logger.info(f'ignore entity_ids:{entity_ids}')
                filters = [self.entity_schema.entity_id.notin_(entity_ids)]

        # init the entity list
        self.entities = get_entities(session=self.entity_session,
                                     entity_schema=self.entity_schema,
                                     entity_type=self.entity_type,
                                     exchanges=self.exchanges,
                                     entity_ids=self.entity_ids,
                                     codes=self.codes,
                                     return_type='domain',
                                     provider=self.entity_provider,
                                     filters=filters)
Exemple #3
0
 def generate_request_param(self, security_item, start, end, size, timestamp):
     return {
         'security_item': security_item,
         'start': to_time_str(start, fmt=TIME_FORMAT_DAY1),
         'end': now_time_str(fmt=TIME_FORMAT_DAY1),
         'level': self.level.value
     }
    def record(self, entity, start, end, size, timestamps):
        q = query(valuation).filter(valuation.code == to_jq_entity_id(entity))
        count: pd.Timedelta = now_pd_timestamp() - start
        df = get_fundamentals_continuously(q,
                                           end_date=now_time_str(),
                                           count=count.days + 1,
                                           panel=False)
        df['entity_id'] = entity.id
        df['timestamp'] = pd.to_datetime(df['day'])
        df['code'] = entity.code
        df['name'] = entity.name
        df['id'] = df['timestamp'].apply(
            lambda x: "{}_{}".format(entity.id, to_time_str(x)))
        df = df.rename(
            {
                'pe_ratio_lyr': 'pe',
                'pe_ratio': 'pe_ttm',
                'pb_ratio': 'pb',
                'ps_ratio': 'ps',
                'pcf_ratio': 'pcf'
            },
            axis='columns')

        df['market_cap'] = df['market_cap'] * 100000000
        df['circulating_market_cap'] = df['circulating_market_cap'] * 100000000
        df['capitalization'] = df['capitalization'] * 10000
        df['circulating_cap'] = df['circulating_cap'] * 10000
        df['turnover_ratio'] = df['turnover_ratio'] * 0.01
        df_to_db(df=df,
                 data_schema=self.data_schema,
                 provider=self.provider,
                 force_update=self.force_update)

        return None
Exemple #5
0
def risky_company(region: Region,
                  the_date=None,
                  income_yoy=-0.1,
                  profit_yoy=-0.1,
                  entity_ids=None):
    if not the_date:
        the_date = to_pd_timestamp(now_time_str(region))

    codes = []
    start_timestamp = to_pd_timestamp(the_date) - datetime.timedelta(130)
    # 营收降,利润降,流动比率低,速动比率低
    finance_filter = or_(FinanceFactor.op_income_growth_yoy < income_yoy,
                         FinanceFactor.net_profit_growth_yoy <= profit_yoy,
                         FinanceFactor.current_ratio < 0.7,
                         FinanceFactor.quick_ratio < 0.5)
    df = FinanceFactor.query_data(region=region,
                                  entity_ids=entity_ids,
                                  start_timestamp=start_timestamp,
                                  filters=[finance_filter],
                                  columns=['code'])
    if pd_is_not_null(df):
        codes = codes + df.code.tolist()

    # 高应收,高存货,高商誉
    balance_filter = (BalanceSheet.accounts_receivable + BalanceSheet.inventories + BalanceSheet.goodwill) \
                     > BalanceSheet.total_equity
    df = BalanceSheet.query_data(region=region,
                                 entity_ids=entity_ids,
                                 start_timestamp=start_timestamp,
                                 filters=[balance_filter],
                                 columns=['code'])
    if pd_is_not_null(df):
        codes = codes + df.code.tolist()

    # 应收>利润*1/2
    df1 = BalanceSheet.query_data(
        region=region,
        entity_ids=entity_ids,
        start_timestamp=start_timestamp,
        columns=[BalanceSheet.code, BalanceSheet.accounts_receivable])
    if pd_is_not_null(df1):
        df1.drop_duplicates(subset='code', keep='last', inplace=True)
        df1 = df1.set_index('code', drop=True).sort_index()

    df2 = IncomeStatement.query_data(
        region=region,
        entity_ids=entity_ids,
        start_timestamp=start_timestamp,
        columns=[IncomeStatement.code, IncomeStatement.net_profit])
    if pd_is_not_null(df2):
        df2.drop_duplicates(subset='code', keep='last', inplace=True)
        df2 = df2.set_index('code', drop=True).sort_index()

    if pd_is_not_null(df1) and pd_is_not_null(df2):
        codes = codes + df1[df1.accounts_receivable > df2.net_profit /
                            2].index.tolist()

    return list(set(codes))
Exemple #6
0
    def on_finish(self, security_item):
        kdatas = get_kdata(
            provider=self.provider,
            security_id=security_item.id,
            level=self.level.value,
            order=self.data_schema.timestamp.asc(),
            return_type='domain',
            session=self.session,
            filters=[
                self.data_schema.hfq_close.is_(None),
                self.data_schema.timestamp >= to_pd_timestamp('2005-01-01')
            ])
        if kdatas:
            start = kdatas[0].timestamp
            end = kdatas[-1].timestamp

            # get hfq from joinquant
            df = get_price(to_jq_security_id(security_item),
                           start_date=to_time_str(start),
                           end_date=now_time_str(),
                           frequency='daily',
                           fields=['factor', 'open', 'close', 'low', 'high'],
                           skip_paused=True,
                           fq='post')
            if df is not None and not df.empty:
                # fill hfq data
                for kdata in kdatas:
                    time_str = to_time_str(kdata.timestamp)
                    if time_str in df.index:
                        kdata.hfq_open = df.loc[time_str, 'open']
                        kdata.hfq_close = df.loc[time_str, 'close']
                        kdata.hfq_high = df.loc[time_str, 'high']
                        kdata.hfq_low = df.loc[time_str, 'low']
                        kdata.factor = df.loc[time_str, 'factor']
                self.session.commit()

                latest_factor = df.factor[-1]
                # factor not change yet, no need to reset the qfq past
                if latest_factor == self.current_factors.get(security_item.id):
                    sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \
                          'security_id=\'{}\' and level=\'{}\' and (qfq_close isnull or qfq_high isnull or qfq_low isnull or qfq_open isnull)'.format(
                        self.data_schema.__table__, latest_factor, latest_factor, latest_factor, latest_factor,
                        security_item.id, self.level.value)
                else:
                    sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \
                          'security_id=\'{}\' and level=\'{}\''.format(self.data_schema.__table__, latest_factor,
                                                                       latest_factor, latest_factor, latest_factor,
                                                                       security_item.id,
                                                                       self.level.value)
                self.logger.info(sql)
                self.session.execute(sql)
                self.session.commit()

        # TODO:use netease provider to get turnover_rate
        self.logger.info('use netease provider to get turnover_rate')
    def record(self, entity, start, end, size, time_array):
        if entity.type == 'index':
            id_flag = "{}1".format(entity.code)
        elif entity.type == 'stock':
            if entity.exchange == 'sh':
                id_flag = "{}1".format(entity.code)
            if entity.exchange == 'sz':
                id_flag = "{}2".format(entity.code)

        the_url = self.url.format("{}".format(id_flag),
                                  eastmoney_map_zvt_trading_level(self.level),
                                  now_time_str(fmt=TIME_FORMAT_MINUTE), size)

        resp = requests.get(the_url)
        results = json_callback_param(resp.text)

        kdatas = []

        for result in results:
            the_timestamp = to_pd_timestamp(result['time'])
            the_id = generate_kdata_id(entity_id=entity.id,
                                       timestamp=the_timestamp,
                                       level=self.level)

            if not data_exist(self.session, self.kdata_schema, the_id):
                kdatas.append(
                    self.kdata_schema(id=the_id,
                                      timestamp=the_timestamp,
                                      entity_id=entity.id,
                                      code=entity.code,
                                      name=entity.name,
                                      level=self.level,
                                      open=to_float(result['open']),
                                      close=to_float(result['close']),
                                      high=to_float(result['high']),
                                      low=to_float(result['low']),
                                      volume=to_float(result['volume']),
                                      turnover=to_float(result['amount']),
                                      turnover_rate=to_float(
                                          result['turnoverrate'])))
        return kdatas
    def request(self, url=None, method='get', param=None, path_fields=None):
        security_item = param['security_item']
        start_timestamp = param['start_timestamp']
        # 不复权
        df = get_price(
            to_jq_security_id(security_item),
            start_date=to_time_str(start_timestamp),
            end_date=now_time_str(),
            frequency='daily',
            fields=['open', 'close', 'low', 'high', 'volume', 'money'],
            skip_paused=True,
            fq=None)
        df.index.name = 'timestamp'
        df.reset_index(inplace=True)
        df['name'] = security_item.name
        df.rename(columns={'money': 'turnover'}, inplace=True)

        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['provider'] = Provider.JOINQUANT.value
        df['level'] = param['level']

        return df.to_dict(orient='records')
Exemple #9
0
    def init_entities(self):
        """
        init the entities which we would record data for

        """
        if self.entity_provider == self.provider and self.entity_schema == self.data_schema:
            self.entity_session = self.session
        else:
            self.entity_session = get_db_session(
                provider=self.entity_provider, data_schema=self.entity_schema)

        if self.day_data:
            df = self.data_schema.query_data(
                start_timestamp=now_time_str(),
                columns=["entity_id", "timestamp"],
                provider=self.provider)
            if pd_is_not_null(df):
                entity_ids = df["entity_id"].tolist()
                self.logger.info(f"ignore entity_ids:{entity_ids}")
                if self.entity_filters:
                    self.entity_filters.append(
                        self.entity_schema.entity_id.notin_(entity_ids))
                else:
                    self.entity_filters = [
                        self.entity_schema.entity_id.notin_(entity_ids)
                    ]

        #: init the entity list
        self.entities = get_entities(
            session=self.entity_session,
            entity_schema=self.entity_schema,
            exchanges=self.exchanges,
            entity_ids=self.entity_ids,
            codes=self.codes,
            return_type="domain",
            provider=self.entity_provider,
            filters=self.entity_filters,
        )
    def record(self, entity, start, end, size, timestamps):
        the_url = self.url.format("{}".format(entity.code),
                                  level_flag(self.level), size,
                                  now_time_str(fmt=TIME_FORMAT_DAY1))

        resp = requests.get(the_url)
        results = json_callback_param(resp.text)

        kdatas = []

        if results:
            klines = results['data']['klines']

            # TODO: ignore the last unfinished kdata now,could control it better if need
            for result in klines[:-1]:
                # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00"
                # time,open,close,high,low,volume,turnover
                fields = result.split(',')
                the_timestamp = to_pd_timestamp(fields[0])

                the_id = generate_kdata_id(entity_id=entity.id,
                                           timestamp=the_timestamp,
                                           level=self.level)

                kdatas.append(
                    dict(id=the_id,
                         timestamp=the_timestamp,
                         entity_id=entity.id,
                         code=entity.code,
                         name=entity.name,
                         level=self.level.value,
                         open=to_float(fields[1]),
                         close=to_float(fields[2]),
                         high=to_float(fields[3]),
                         low=to_float(fields[4]),
                         volume=to_float(fields[5]),
                         turnover=to_float(fields[6])))
        return kdatas
    def record(self, entity, start, end, size, timestamps, http_session):
        url = self.url.format(
            "{}".format(entity.code), level_flag(self.level), size,
            now_time_str(region=Region.CHN, fmt=TIME_FORMAT_DAY1))
        text = sync_get(http_session, url, return_type='text')
        if text is None:
            return None

        results = json_callback_param(text)

        if results:
            klines = results['data']['klines']

            @njit(nopython=True)
            def numba_boost_up(klines):
                kdatas = []
                # TODO: ignore the last unfinished kdata now,could control it better if need
                for result in klines[:-1]:
                    # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00"
                    # time,open,close,high,low,volume,turnover
                    fields = result.split(',')
                    kdatas.append(
                        dict(timestamp=fields[0],
                             open=to_float(fields[1]),
                             close=to_float(fields[2]),
                             high=to_float(fields[3]),
                             low=to_float(fields[4]),
                             volume=to_float(fields[5]),
                             turnover=to_float(fields[6])))
                return kdatas

            kdatas = numba_boost_up(klines)
            if len(kdatas) > 0:
                df = pd.DataFrame.from_records(kdatas)
                return df
        return None
Exemple #12
0
def get_ui_path(region: Region, name):
    if name is None:
        return os.path.join(zvt_env['ui_path'], '{}.html'.format(now_time_str(region=region, fmt=TIME_FORMAT_ISO8601)))
    return os.path.join(zvt_env['ui_path'], f'{name}.html')
Exemple #13
0
def get_ui_path(name):
    if name is None:
        name = '{}.html'.format(now_time_str(fmt=TIME_FORMAT_ISO8601))
    return os.path.join(UI_PATH, '{}.html'.format(name))
    def on_finish_entity(self, entity):
        kdatas = get_kdata(
            provider=self.provider,
            entity_id=entity.id,
            level=self.level.value,
            order=self.data_schema.timestamp.asc(),
            return_type='domain',
            session=self.session,
            filters=[
                self.data_schema.hfq_close.is_(None),
                self.data_schema.timestamp >= to_pd_timestamp('2005-01-01')
            ])
        if kdatas:
            start = kdatas[0].timestamp
            end = kdatas[-1].timestamp

            # get hfq from joinquant
            df = get_price(to_jq_entity_id(entity),
                           start_date=to_time_str(start),
                           end_date=now_time_str(),
                           frequency='daily',
                           fields=['factor', 'open', 'close', 'low', 'high'],
                           skip_paused=True,
                           fq='post')
            if df is not None and not df.empty:
                # fill hfq data
                for kdata in kdatas:
                    time_str = to_time_str(kdata.timestamp)
                    if time_str in df.index:
                        kdata.hfq_open = df.loc[time_str, 'open']
                        kdata.hfq_close = df.loc[time_str, 'close']
                        kdata.hfq_high = df.loc[time_str, 'high']
                        kdata.hfq_low = df.loc[time_str, 'low']
                        kdata.factor = df.loc[time_str, 'factor']
                self.session.commit()

                latest_factor = df.factor[-1]
                # factor not change yet, no need to reset the qfq past
                if latest_factor == self.current_factors.get(entity.id):
                    sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \
                          'entity_id=\'{}\' and level=\'{}\' and (qfq_close isnull or qfq_high isnull or qfq_low isnull or qfq_open isnull)'.format(
                        self.data_schema.__table__, latest_factor, latest_factor, latest_factor, latest_factor,
                        entity.id, self.level.value)
                else:
                    sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \
                          'entity_id=\'{}\' and level=\'{}\''.format(self.data_schema.__table__, latest_factor,
                                                                     latest_factor, latest_factor, latest_factor,
                                                                     entity.id,
                                                                     self.level.value)
                self.logger.info(sql)
                self.session.execute(sql)
                self.session.commit()

            # use netease provider to get turnover_rate
            query_url = 'http://quotes.money.163.com/service/chddata.html?code={}{}&start={}&end={}&fields=PCHG;TURNOVER'

            if entity.exchange == 'sh':
                exchange_flag = 0
            else:
                exchange_flag = 1

            url = query_url.format(exchange_flag, entity.code,
                                   to_time_str(start), to_time_str(end))
            response = requests.get(url=url)

            df = read_csv(io.BytesIO(response.content),
                          encoding='GB2312',
                          na_values='None')
            df['日期'] = pd.to_datetime(df['日期'])
            df.set_index('日期', drop=True, inplace=True)

            if df is not None and not df.empty:
                # fill turnover_rate, pct_change
                for kdata in kdatas:
                    if kdata.timestamp in df.index:
                        kdata.turnover_rate = df.loc[kdata.timestamp, '换手率']
                        kdata.change_pct = df.loc[kdata.timestamp, '涨跌幅']
                self.session.commit()