Ejemplo n.º 1
0
    def on_finish_entity(self, entity):
        super().on_finish_entity(entity)

        if not self.fetch_jq_timestamp:
            return

        # fill the timestamp for report published date
        the_data_list = get_data(
            data_schema=self.data_schema,
            provider=self.provider,
            entity_id=entity.id,
            order=self.data_schema.timestamp.asc(),
            return_type='domain',
            session=self.session,
            filters=[
                self.data_schema.timestamp == self.data_schema.report_date,
                self.data_schema.timestamp >= to_pd_timestamp('2005-01-01')
            ])
        if the_data_list:
            if self.data_schema == FinanceFactor:
                for the_data in the_data_list:
                    self.fill_timestamp_with_jq(entity, the_data)
            else:
                df = get_finance_factor(
                    entity_id=entity.id,
                    columns=[
                        FinanceFactor.timestamp, FinanceFactor.report_date,
                        FinanceFactor.id
                    ],
                    filters=[
                        FinanceFactor.timestamp != FinanceFactor.report_date,
                        FinanceFactor.timestamp >=
                        to_pd_timestamp('2005-01-01'),
                        FinanceFactor.report_date >=
                        the_data_list[0].report_date,
                        FinanceFactor.report_date <=
                        the_data_list[-1].report_date,
                    ])

                if pd_is_not_null(df):
                    index_df(df, index='report_date', time_field='report_date')

                for the_data in the_data_list:
                    if (df is not None) and (
                            not df.empty) and the_data.report_date in df.index:
                        the_data.timestamp = df.at[the_data.report_date,
                                                   'timestamp']
                        self.logger.info(
                            'db fill {} {} timestamp:{} for report_date:{}'.
                            format(self.data_schema, entity.id,
                                   the_data.timestamp, the_data.report_date))
                        self.session.commit()
                    else:
                        # self.logger.info(
                        #     'waiting jq fill {} {} timestamp:{} for report_date:{}'.format(self.data_schema,
                        #                                                                    security_item.id,
                        #                                                                    the_data.timestamp,
                        #                                                                    the_data.report_date))

                        self.fill_timestamp_with_jq(entity, the_data)
Ejemplo n.º 2
0
    def __init__(self,
                 entity_type='stock',
                 exchanges=['sh', 'sz'],
                 entity_ids=None,
                 codes=None,
                 batch_size=10,
                 force_update=False,
                 sleeping_time=5,
                 default_size=2000,
                 real_time=False,
                 fix_duplicate_way='add',
                 start_timestamp=None,
                 end_timestamp=None,
                 close_hour=0,
                 close_minute=0) -> None:

        self.default_size = default_size
        self.real_time = real_time

        self.close_hour = close_hour
        self.close_minute = close_minute

        self.fix_duplicate_way = fix_duplicate_way

        self.start_timestamp = to_pd_timestamp(start_timestamp)
        self.end_timestamp = to_pd_timestamp(end_timestamp)

        super().__init__(entity_type, exchanges, entity_ids, codes, batch_size, force_update, sleeping_time)
Ejemplo n.º 3
0
def common_filter(query: Query,
                  data_schema,
                  start_timestamp=None,
                  end_timestamp=None,
                  filters=None,
                  order=None,
                  limit=None,
                  time_field='timestamp'):
    assert data_schema is not None
    time_col = eval('data_schema.{}'.format(time_field))

    if start_timestamp:
        query = query.filter(time_col >= to_pd_timestamp(start_timestamp))
    if end_timestamp:
        query = query.filter(time_col <= to_pd_timestamp(end_timestamp))

    if filters:
        for filter in filters:
            query = query.filter(filter)
    if order is not None:
        query = query.order_by(order)
    else:
        query = query.order_by(time_col.asc())
    if limit:
        query = query.limit(limit)

    return query
Ejemplo n.º 4
0
    def __init__(self,
                 entity_ids=None,
                 entity_schema=Stock,
                 exchanges=None,
                 codes=None,
                 the_timestamp=None,
                 start_timestamp=None,
                 end_timestamp=None,
                 long_threshold=0.8,
                 short_threshold=0.2,
                 level=IntervalLevel.LEVEL_1DAY,
                 provider='eastmoney',
                 portfolio_selector=None) -> None:
        self.entity_ids = entity_ids
        self.entity_schema = entity_schema
        self.exchanges = exchanges
        self.codes = codes
        self.provider = provider
        self.portfolio_selector: TargetSelector = portfolio_selector

        if self.portfolio_selector:
            assert self.portfolio_selector.entity_schema in [Etf, Block, Index]

        if the_timestamp:
            self.the_timestamp = to_pd_timestamp(the_timestamp)
            self.start_timestamp = self.the_timestamp
            self.end_timestamp = self.the_timestamp
        else:
            if start_timestamp:
                self.start_timestamp = to_pd_timestamp(start_timestamp)
            if end_timestamp:
                self.end_timestamp = to_pd_timestamp(end_timestamp)
            else:
                self.end_timestamp = now_pd_timestamp()

        self.long_threshold = long_threshold
        self.short_threshold = short_threshold
        self.level = level

        self.filter_factors: List[FilterFactor] = []
        self.score_factors: List[ScoreFactor] = []
        self.filter_result = None
        self.score_result = None

        self.open_long_df: DataFrame = None
        self.open_short_df: DataFrame = None

        self.init_factors(entity_ids=entity_ids,
                          entity_schema=entity_schema,
                          exchanges=exchanges,
                          codes=codes,
                          the_timestamp=the_timestamp,
                          start_timestamp=start_timestamp,
                          end_timestamp=end_timestamp,
                          level=self.level)
Ejemplo n.º 5
0
    def persist_index(self, df) -> None:
        df['timestamp'] = df['timestamp'].apply(lambda x: to_pd_timestamp(x))
        df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x))
        df['id'] = df['code'].apply(lambda code: f'index_cn_{code}')
        df['entity_id'] = df['id']
        df['exchange'] = 'cn'
        df['entity_type'] = 'index'
        df['is_delisted'] = False

        df = df.dropna(axis=0, how='any')
        df = df.drop_duplicates(subset='id', keep='last')

        init_entities(df, entity_type='index', provider=self.provider)
Ejemplo n.º 6
0
    def __init__(self,
                 entity_ids=None,
                 entity_type='stock',
                 exchanges=['sh', 'sz'],
                 codes=None,
                 the_timestamp=None,
                 start_timestamp=None,
                 end_timestamp=None,
                 long_threshold=0.8,
                 short_threshold=0.2,
                 level=IntervalLevel.LEVEL_1DAY,
                 provider='eastmoney',
                 block_selector=None) -> None:
        self.entity_ids = entity_ids
        self.entity_type = entity_type
        self.exchanges = exchanges
        self.codes = codes
        self.provider = provider
        self.block_selector: TargetSelector = block_selector

        if the_timestamp:
            self.the_timestamp = to_pd_timestamp(the_timestamp)
            self.start_timestamp = self.the_timestamp
            self.end_timestamp = self.the_timestamp
        elif start_timestamp and end_timestamp:
            self.start_timestamp = to_pd_timestamp(start_timestamp)
            self.end_timestamp = to_pd_timestamp(end_timestamp)
        else:
            assert False

        self.long_threshold = long_threshold
        self.short_threshold = short_threshold
        self.level = level

        self.filter_factors: List[FilterFactor] = []
        self.score_factors: List[ScoreFactor] = []
        self.filter_result = None
        self.score_result = None

        self.open_long_df: DataFrame = None
        self.open_short_df: DataFrame = None

        self.init_factors(entity_ids=entity_ids,
                          entity_type=entity_type,
                          exchanges=exchanges,
                          codes=codes,
                          the_timestamp=the_timestamp,
                          start_timestamp=start_timestamp,
                          end_timestamp=end_timestamp,
                          level=self.level)
Ejemplo n.º 7
0
    def record(self, entity, start, end, size, timestamps):
        if self.start_timestamp:
            start = max(self.start_timestamp, to_pd_timestamp(start))

        start_timestamp = to_time_str(start)

        ccxt_exchange = CCXTAccount.get_ccxt_exchange(entity.exchange)

        if ccxt_exchange.has['fetchOHLCV']:
            limit = CCXTAccount.get_kdata_limit(entity.exchange)

            limit = min(size, limit)

            kdata_list = []

            if CCXTAccount.exchange_conf[entity.exchange]['support_since']:
                kdatas = ccxt_exchange.fetch_ohlcv(
                    entity.code,
                    timeframe=self.ccxt_trading_level,
                    since=start_timestamp)
            else:
                kdatas = ccxt_exchange.fetch_ohlcv(
                    entity.code,
                    timeframe=self.ccxt_trading_level,
                    limit=limit)

            # always ignore the latest one,because it's not finished
            for kdata in kdatas[0:-1]:
                current_timestamp = kdata[0]
                if self.level == IntervalLevel.LEVEL_1DAY:
                    current_timestamp = to_time_str(current_timestamp)

                kdata_json = {
                    'timestamp': to_pd_timestamp(current_timestamp),
                    'open': kdata[1],
                    'high': kdata[2],
                    'low': kdata[3],
                    'close': kdata[4],
                    'volume': kdata[5],
                    'name': entity.name,
                    'provider': 'ccxt',
                    'level': self.level.value
                }
                kdata_list.append(kdata_json)

            return kdata_list
        else:
            self.logger.warning("exchange:{} not support fetchOHLCV".format(
                entity.exchange))
Ejemplo n.º 8
0
    def persist_index(self, df) -> None:
        df['timestamp'] = df['timestamp'].apply(lambda x: to_pd_timestamp(x))
        df['list_date'] = df['list_date'].apply(lambda x: to_pd_timestamp(x))
        df['id'] = df['code'].apply(lambda code: f'index_cn_{code}')
        df['entity_id'] = df['id']
        df['exchange'] = 'cn'
        df['entity_type'] = 'index'

        df = df.dropna(axis=0, how='any')
        df = df.drop_duplicates(subset='id', keep='last')

        df_to_db(df=df,
                 data_schema=Index,
                 provider=self.provider,
                 force_update=False)
Ejemplo n.º 9
0
    def record(self, entity, start, end, size, timestamps):
        if size < 20:
            size = 20

        ccxt_exchange = CCXTAccount.get_ccxt_exchange(entity.exchange)

        if ccxt_exchange.has['fetchTrades']:
            limit = CCXTAccount.get_tick_limit(entity.exchange)

            limit = min(size, limit)

            kdata_list = []

            trades = ccxt_exchange.fetch_trades(entity.code, limit=limit)

            for trade in trades:
                kdata_json = {
                    'name': entity.name,
                    'provider': 'ccxt',
                    # 'id': trade['id'],
                    'level': 'tick',
                    'order': trade['order'],
                    'timestamp': to_pd_timestamp(trade['timestamp']),
                    'price': trade['price'],
                    'volume': trade['amount'],
                    'direction': trade['side'],
                    'order_type': trade['type'],
                    'turnover': trade['price'] * trade['amount']
                }
                kdata_list.append(kdata_json)

            return kdata_list
        else:
            self.logger.warning("exchange:{} not support fetchOHLCV".format(entity.exchange))
Ejemplo n.º 10
0
    def record(self, entity, start, end, size, timestamps):
        if self.start_timestamp:
            start = max(self.start_timestamp, to_pd_timestamp(start))

        end = now_pd_timestamp() + timedelta(days=1)

        start_timestamp = to_time_str(start)
        end_timestamp = to_time_str(end)
        # 不复权
        df = get_price(to_jq_entity_id(entity), start_date=to_time_str(start_timestamp),
                       end_date=end_timestamp,
                       frequency=self.jq_trading_level,
                       fields=['open', 'close', 'low', 'high', 'volume', 'money'],
                       skip_paused=True, fq=None)
        df.index.name = 'timestamp'
        df.reset_index(inplace=True)
        df['name'] = entity.name
        df.rename(columns={'money': 'turnover'}, inplace=True)

        df['timestamp'] = pd.to_datetime(df['timestamp'])
        df['provider'] = 'joinquant'
        df['level'] = self.level.value

        # remove the unfinished kdata
        if is_in_trading(entity_type='stock', exchange='sh', timestamp=df.iloc[-1, :]['timestamp']):
            df = df.iloc[:-1, :]

        return df.to_dict(orient='records')
Ejemplo n.º 11
0
def coin_finished_timestamp(timestamp: pd.Timestamp, level: IntervalLevel):
    timestamp = to_pd_timestamp(timestamp)

    if timestamp.microsecond != 0:
        return False

    return timestamp.minute % level.to_minute() == 0
Ejemplo n.º 12
0
    def record(self, entity, start, end, size, timestamps):
        the_url = self.url.format("{}".format(entity.code), level_flag(self.level), size,
                                  now_time_str(fmt=TIME_FORMAT_DAY1))

        resp = requests.get(the_url)
        results = json_callback_param(resp.text)

        kdatas = []

        if results:
            klines = results['data']['klines']

            # TODO: ignore the last unfinished kdata now,could control it better if need
            for result in klines[:-1]:
                # "2000-01-28,1005.26,1012.56,1173.12,982.13,3023326,3075552000.00"
                # time,open,close,high,low,volume,turnover
                fields = result.split(',')
                the_timestamp = to_pd_timestamp(fields[0])

                the_id = generate_kdata_id(entity_id=entity.id, timestamp=the_timestamp, level=self.level)

                kdatas.append(dict(id=the_id,
                                   timestamp=the_timestamp,
                                   entity_id=entity.id,
                                   code=entity.code,
                                   name=entity.name,
                                   level=self.level.value,
                                   open=to_float(fields[1]),
                                   close=to_float(fields[2]),
                                   high=to_float(fields[3]),
                                   low=to_float(fields[4]),
                                   volume=to_float(fields[5]),
                                   turnover=to_float(fields[6])))
        return kdatas
Ejemplo n.º 13
0
    def load_data(self):
        if self.entity_ids:
            self.data_df = get_data(data_schema=self.data_schema, entity_ids=self.entity_ids,
                                    provider=self.provider, columns=self.columns,
                                    start_timestamp=self.start_timestamp,
                                    end_timestamp=self.end_timestamp, filters=self.filters, order=self.order,
                                    limit=self.limit,
                                    level=self.level,
                                    time_field=self.time_field,
                                    index=self.time_field)
        else:
            self.data_df = get_data(data_schema=self.data_schema, codes=self.codes,
                                    provider=self.provider, columns=self.columns,
                                    start_timestamp=self.start_timestamp,
                                    end_timestamp=self.end_timestamp, filters=self.filters, order=self.order,
                                    limit=self.limit,
                                    level=self.level,
                                    time_field=self.time_field,
                                    index=self.time_field)

        if self.trip_timestamp:
            if self.level == IntervalLevel.LEVEL_1DAY:
                self.data_df[self.time_field] = self.data_df[self.time_field].apply(
                    lambda x: to_pd_timestamp(to_time_str(x)))

        if df_is_not_null(self.data_df):
            self.normal_data = NormalData(df=self.data_df, category_field=self.category_field,
                                          index_field=self.time_field, is_timeseries=True)
            self.data_df = self.normal_data.data_df

        for listener in self.data_listeners:
            listener.on_data_loaded(self.data_df)
Ejemplo n.º 14
0
def is_in_trading(entity_type, exchange, timestamp):
    current = now_pd_timestamp()
    timestamp = to_pd_timestamp(timestamp)
    if is_same_date(current, timestamp):
        for start, end in get_trading_intervals(entity_type=entity_type, exchange=exchange):
            if current > date_and_time(current, start) and current < date_and_time(current, end):
                return True
    return False
Ejemplo n.º 15
0
def china_stock_finished_timestamp(timestamp: pd.Timestamp, level: IntervalLevel):
    timestamp = to_pd_timestamp(timestamp)

    if timestamp.microsecond != 0:
        return False

    return to_time_str(timestamp, fmt=TIME_FORMAT_MINUTE1) in china_stock_level_map_finished_timestamps.get(
        level.value)
Ejemplo n.º 16
0
    def generate_domain(self, entity, original_data):
        """
        generate the data_schema instance using entity and original_data,the original_data is from record result

        :param entity:
        :param original_data:
        """

        got_new_data = False

        # if the domain is directly generated in record method, we just return it
        if isinstance(original_data, self.data_schema):
            got_new_data = True
            return got_new_data, original_data

        the_id = self.generate_domain_id(entity, original_data)

        # optional way
        # item = self.session.query(self.data_schema).get(the_id)

        items = get_data(data_schema=self.data_schema,
                         session=self.session,
                         provider=self.provider,
                         entity_id=entity.id,
                         filters=[self.data_schema.id == the_id],
                         return_type='domain')

        if items and not self.force_update:
            self.logger.info('ignore the data {}:{} saved before'.format(
                self.data_schema, the_id))
            return got_new_data, None

        if not items:
            timestamp_str = original_data[self.get_original_time_field()]
            timestamp = None
            try:
                timestamp = to_pd_timestamp(timestamp_str)
            except Exception as e:
                self.logger.exception(e)

            if 'name' in get_schema_columns(self.data_schema):
                domain_item = self.data_schema(id=the_id,
                                               code=entity.code,
                                               name=entity.name,
                                               entity_id=entity.id,
                                               timestamp=timestamp)
            else:
                domain_item = self.data_schema(id=the_id,
                                               code=entity.code,
                                               entity_id=entity.id,
                                               timestamp=timestamp)
            got_new_data = True
        else:
            domain_item = items[0]

        fill_domain_from_dict(domain_item, original_data, self.get_data_map())
        return got_new_data, domain_item
Ejemplo n.º 17
0
    def run(self):
        for security_item in self.entities:
            assert isinstance(security_item, Stock)

            if security_item.exchange == 'sh':
                fc = "{}01".format(security_item.code)
            if security_item.exchange == 'sz':
                fc = "{}02".format(security_item.code)

            # 基本资料
            param = {"color": "w", "fc": fc, "SecurityCode": "SZ300059"}
            resp = requests.post(
                'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetJiBenZiLiao',
                json=param)
            resp.encoding = 'utf8'

            resp_json = resp.json()['Result']['JiBenZiLiao']

            security_item.profile = resp_json['CompRofile']
            security_item.main_business = resp_json['MainBusiness']
            security_item.date_of_establishment = to_pd_timestamp(
                resp_json['FoundDate'])

            # 关联行业
            industries = ','.join(resp_json['Industry'].split('-'))
            security_item.industries = industries

            # 关联概念
            security_item.concept_indices = resp_json['Block']

            # 关联地区
            security_item.area_indices = resp_json['Provice']

            self.sleep()

            # 发行相关
            param = {"color": "w", "fc": fc}
            resp = requests.post(
                'https://emh5.eastmoney.com/api/GongSiGaiKuang/GetFaXingXiangGuan',
                json=param)
            resp.encoding = 'utf8'

            resp_json = resp.json()['Result']['FaXingXiangGuan']

            security_item.issue_pe = to_float(resp_json['PEIssued'])
            security_item.price = to_float(resp_json['IssuePrice'])
            security_item.issues = to_float(resp_json['ShareIssued'])
            security_item.raising_fund = to_float((resp_json['NetCollection']))
            security_item.net_winning_rate = pct_to_float(
                resp_json['LotRateOn'])

            self.session.commit()

            self.logger.info('finish recording stock meta for:{}'.format(
                security_item.code))

            self.sleep()
Ejemplo n.º 18
0
    def on_trading_close(self, timestamp):
        self.logger.info('on_trading_close:{}'.format(timestamp))

        self.latest_account['value'] = 0
        self.latest_account['all_value'] = 0
        for position in self.latest_account['positions']:
            # use qfq for stock
            entity_type, _, _ = decode_entity_id(position['entity_id'])
            data_schema = get_kdata_schema(entity_type, level=self.level)

            kdata = get_kdata(provider=self.provider,
                              level=self.level,
                              entity_id=position['entity_id'],
                              order=data_schema.timestamp.desc(),
                              end_timestamp=timestamp,
                              limit=1)

            # use qfq for stock
            if entity_type == 'stock':
                closing_price = kdata['qfq_close'][0]
            else:
                closing_price = kdata['close'][0]

            position['available_long'] = position['long_amount']
            position['available_short'] = position['short_amount']

            if closing_price:
                if (position['long_amount']
                        is not None) and position['long_amount'] > 0:
                    position['value'] = position['long_amount'] * closing_price
                    self.latest_account['value'] += position['value']
                elif (position['short_amount']
                      is not None) and position['short_amount'] > 0:
                    position['value'] = 2 * (position['short_amount'] *
                                             position['average_short_price'])
                    position[
                        'value'] -= position['short_amount'] * closing_price
                    self.latest_account['value'] += position['value']
            else:
                self.logger.warning(
                    'could not refresh close value for position:{},timestamp:{}'
                    .format(position['entity_id'], timestamp))

        # remove the empty position
        self.latest_account['positions'] = [
            position for position in self.latest_account['positions']
            if position['long_amount'] > 0 or position['short_amount'] > 0
        ]

        self.latest_account['all_value'] = self.latest_account[
            'value'] + self.latest_account['cash']
        self.latest_account['closing'] = True
        self.latest_account['timestamp'] = to_pd_timestamp(timestamp)

        self.logger.info('on_trading_close:{},latest_account:{}'.format(
            timestamp, self.latest_account))
        self.persist_account(timestamp)
Ejemplo n.º 19
0
def get_etf_stocks(code=None,
                   codes=None,
                   ids=None,
                   timestamp=now_pd_timestamp(),
                   provider=None):
    latests: List[EtfStock] = EtfStock.query_data(
        provider=provider,
        code=code,
        end_timestamp=timestamp,
        order=EtfStock.timestamp.desc(),
        limit=1,
        return_type='domain')
    if latests:
        latest_record = latests[0]
        # 获取最新的报表
        df = EtfStock.query_data(
            provider=provider,
            code=code,
            codes=codes,
            ids=ids,
            end_timestamp=timestamp,
            filters=[EtfStock.report_date == latest_record.report_date])
        # 最新的为年报或者半年报
        if latest_record.report_period == ReportPeriod.year or latest_record.report_period == ReportPeriod.half_year:
            return df
        # 季报,需要结合 年报或半年报 来算持仓
        else:
            step = 0
            while True:
                report_date = get_recent_report_date(latest_record.report_date,
                                                     step=step)

                pre_df = EtfStock.query_data(
                    provider=provider,
                    code=code,
                    codes=codes,
                    ids=ids,
                    end_timestamp=timestamp,
                    filters=[
                        EtfStock.report_date == to_pd_timestamp(report_date)
                    ])
                df = df.append(pre_df)

                # 半年报和年报
                if (ReportPeriod.half_year.value
                        in pre_df['report_period'].tolist()) or (
                            ReportPeriod.year.value
                            in pre_df['report_period'].tolist()):
                    # 保留最新的持仓
                    df = df.drop_duplicates(subset=['stock_code'],
                                            keep='first')
                    return df
                step = step + 1

                if step >= 20:
                    break
Ejemplo n.º 20
0
def test_000778_rights_issue_detail():
    result = get_rights_issue_detail(session=session, provider='eastmoney', return_type='domain',
                                     codes=['000778'], end_timestamp='2018-09-30',
                                     order=RightsIssueDetail.timestamp.desc())
    assert len(result) == 2
    latest: RightsIssueDetail = result[0]
    assert latest.timestamp == to_pd_timestamp('2001-09-10')
    assert latest.rights_issues == 43570000
    assert latest.rights_raising_fund == 492300000
    assert latest.rights_issue_price == 11.3
Ejemplo n.º 21
0
def test_000778_spo_detial():
    result = get_spo_detail(session=session, provider='eastmoney', return_type='domain',
                            codes=['000778'], end_timestamp='2018-09-30',
                            order=SpoDetail.timestamp.desc())
    assert len(result) == 4
    latest: SpoDetail = result[0]
    assert latest.timestamp == to_pd_timestamp('2017-04-01')
    assert latest.spo_issues == 347600000
    assert latest.spo_price == 5.15
    assert latest.spo_raising_fund == 1766000000
Ejemplo n.º 22
0
def is_trading_date(entity_type, exchange, timestamp: pd.Timestamp):
    if type(timestamp) == str:
        timestamp = to_pd_timestamp(timestamp)

    # just ignore 00:00
    # the_date = date_and_time(timestamp, '09:00')

    if entity_type == 'stock':
        return (timestamp.weekday() != 5) and (timestamp.weekday() != 6)

    return True
Ejemplo n.º 23
0
    def get_targets(self, timestamp, target_type: TargetType = TargetType.open_long) -> pd.DataFrame:
        if target_type == TargetType.open_long:
            df = self.open_long_df
        if target_type == TargetType.open_short:
            df = self.open_short_df

        if pd_is_not_null(df):
            if timestamp in df.index:
                target_df = df.loc[[to_pd_timestamp(timestamp)], :]
                return target_df['entity_id'].tolist()
        return []
Ejemplo n.º 24
0
def test_000778_dividend_financing():
    result = get_dividend_financing(session=session, provider='eastmoney', return_type='domain',
                                    codes=['000778'], end_timestamp='2018-09-30',
                                    order=DividendFinancing.timestamp.desc())
    assert len(result) == 22
    latest: DividendFinancing = result[1]
    assert latest.timestamp == to_pd_timestamp('2017')
    assert latest.dividend_money == 598632026.4
    assert latest.spo_issues == 347572815.0
    assert latest.rights_issues == 0
    assert latest.ipo_issues == 0
Ejemplo n.º 25
0
    def record(self, entity, start, end, size, timestamps):
        if self.start_timestamp:
            start = max(self.start_timestamp, to_pd_timestamp(start))

        # if self.level < IntervalLevel.LEVEL_1HOUR:
        #     start = '2019-01-01'

        end = now_pd_timestamp()

        start_timestamp = to_time_str(start)

        # 聚宽get_price函数必须指定结束时间,否则会有未来数据
        end_timestamp = to_time_str(end, fmt=TIME_FORMAT_MINUTE2)
        # 不复权
        df = get_price(
            to_jq_entity_id(entity),
            start_date=to_time_str(start_timestamp),
            end_date=end_timestamp,
            frequency=self.jq_trading_level,
            fields=['open', 'close', 'low', 'high', 'volume', 'money'],
            skip_paused=True,
            fq=None)
        if df_is_not_null(df):
            df.index.name = 'timestamp'
            df.reset_index(inplace=True)
            df['name'] = entity.name
            df.rename(columns={'money': 'turnover'}, inplace=True)

            df['entity_id'] = entity.id
            df['timestamp'] = pd.to_datetime(df['timestamp'])
            df['provider'] = 'joinquant'
            df['level'] = self.level.value
            df['code'] = entity.code

            def generate_kdata_id(se):
                if self.level >= IntervalLevel.LEVEL_1DAY:
                    return "{}_{}".format(
                        se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_DAY))
                else:
                    return "{}_{}".format(
                        se['entity_id'],
                        to_time_str(se['timestamp'], fmt=TIME_FORMAT_ISO8601))

            df['id'] = df[['entity_id', 'timestamp']].apply(generate_kdata_id,
                                                            axis=1)

            df_to_db(df=df,
                     data_schema=self.data_schema,
                     provider=self.provider,
                     force=self.force_update)

        return None
Ejemplo n.º 26
0
    def on_finish_entity(self, entity):
        kdatas = get_kdata(
            provider=self.provider,
            entity_id=entity.id,
            level=self.level.value,
            order=self.data_schema.timestamp.asc(),
            return_type='domain',
            session=self.session,
            filters=[
                self.data_schema.hfq_close.is_(None),
                self.data_schema.timestamp >= to_pd_timestamp('2005-01-01')
            ])
        if kdatas:
            start = kdatas[0].timestamp
            end = kdatas[-1].timestamp

            # get hfq from joinquant
            df = get_price(to_jq_entity_id(entity),
                           start_date=to_time_str(start),
                           end_date=now_time_str(),
                           frequency='daily',
                           fields=['factor', 'open', 'close', 'low', 'high'],
                           skip_paused=True,
                           fq='post')
            if df_is_not_null(df):
                # fill hfq data
                for kdata in kdatas:
                    time_str = to_time_str(kdata.timestamp)
                    if time_str in df.index:
                        kdata.hfq_open = df.loc[time_str, 'open']
                        kdata.hfq_close = df.loc[time_str, 'close']
                        kdata.hfq_high = df.loc[time_str, 'high']
                        kdata.hfq_low = df.loc[time_str, 'low']
                        kdata.factor = df.loc[time_str, 'factor']
                self.session.add_all(kdatas)
                self.session.commit()

                latest_factor = df.factor[-1]
                # factor not change yet, no need to reset the qfq past
                if latest_factor == self.current_factors.get(entity.id):
                    sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \
                          'entity_id=\'{}\' and level=\'{}\' and (qfq_close isnull or qfq_high isnull or qfq_low isnull or qfq_open isnull)'.format(
                        self.data_schema.__table__, latest_factor, latest_factor, latest_factor, latest_factor,
                        entity.id, self.level.value)
                else:
                    sql = 'UPDATE {} SET qfq_close=hfq_close/{},qfq_high=hfq_high/{}, qfq_open= hfq_open/{}, qfq_low= hfq_low/{} where ' \
                          'entity_id=\'{}\' and level=\'{}\''.format(self.data_schema.__table__, latest_factor,
                                                                     latest_factor, latest_factor, latest_factor,
                                                                     entity.id,
                                                                     self.level.value)
                self.logger.info(sql)
                self.session.execute(sql)
                self.session.commit()
Ejemplo n.º 27
0
def to_report_period_type(report_period):
    the_date = to_pd_timestamp(report_period)
    if the_date.month == 3 and the_date.day == 31:
        return ReportPeriod.season1.value
    if the_date.month == 6 and the_date.day == 30:
        return ReportPeriod.half_year.value
    if the_date.month == 9 and the_date.day == 30:
        return ReportPeriod.season3.value
    if the_date.month == 12 and the_date.day == 31:
        return ReportPeriod.year.value

    return None
Ejemplo n.º 28
0
def risky_company(the_date=to_pd_timestamp(now_time_str()), income_yoy=-0.1, profit_yoy=-0.1, entity_ids=None):
    codes = []
    start_timestamp = to_pd_timestamp(the_date) - datetime.timedelta(130)
    # 营收降,利润降,流动比率低,速动比率低
    finance_filter = or_(FinanceFactor.op_income_growth_yoy < income_yoy,
                         FinanceFactor.net_profit_growth_yoy <= profit_yoy,
                         FinanceFactor.current_ratio < 0.7,
                         FinanceFactor.quick_ratio < 0.5)
    df = FinanceFactor.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp, filters=[finance_filter],
                                  columns=['code'])
    if pd_is_not_null(df):
        codes = codes + df.code.tolist()

    # 高应收,高存货,高商誉
    balance_filter = (BalanceSheet.accounts_receivable + BalanceSheet.inventories + BalanceSheet.goodwill) \
                     > BalanceSheet.total_equity / 2
    df = BalanceSheet.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp, filters=[balance_filter],
                                 columns=['code'])
    if pd_is_not_null(df):
        codes = codes + df.code.tolist()

    # 应收>利润*1/2
    df1 = BalanceSheet.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp,
                                  columns=[BalanceSheet.code, BalanceSheet.accounts_receivable])
    if pd_is_not_null(df1):
        df1.drop_duplicates(subset='code', keep='last', inplace=True)
        df1 = df1.set_index('code', drop=True).sort_index()

    df2 = IncomeStatement.query_data(entity_ids=entity_ids, start_timestamp=start_timestamp,
                                     columns=[IncomeStatement.code,
                                              IncomeStatement.net_profit])
    if pd_is_not_null(df2):
        df2.drop_duplicates(subset='code', keep='last', inplace=True)
        df2 = df2.set_index('code', drop=True).sort_index()

    if pd_is_not_null(df1) and pd_is_not_null(df2):
        codes = codes + df1[df1.accounts_receivable > df2.net_profit / 2].index.tolist()

    return list(set(codes))
Ejemplo n.º 29
0
    def init_entities(self):
        items = get_data(
            data_schema=self.data_schema,
            session=self.session,
            provider=self.provider,
            entity_id='user_github_mojombo',
            filters=[self.data_schema.id == 'user_github_mojombo'],
            return_type='domain')

        first_user = GithubUser(
            id='user_github_mojombo',
            entity_id='user_github_mojombo',
            node_id='MDQ6VXNlcjE=',
            avatar_url='https://avatars0.githubusercontent.com/u/1?v=4',
            gravatar_id=None,
            site_admin=False,
            code='mojombo',
            name='Tom Preston-Werner',
            company=None,
            blog='http://tom.preston-werner.com',
            location='San Francisco',
            email=None,
            hireable=False,
            bio=None,
            public_repos=61,
            public_gists=62,
            followers=21529,
            following=11,
            timestamp=to_pd_timestamp(to_time_str("2007-10-20T05:24:19Z")),
            created_timestamp=to_pd_timestamp(
                to_time_str("2007-10-20T05:24:19Z")),
            updated_timestamp=to_pd_timestamp(
                to_time_str("2019-06-25T17:22:10Z")))

        if not items:
            self.session.add(first_user)
            self.session.commit()

        self.entities = [first_user]
Ejemplo n.º 30
0
def to_jq_report_period(timestamp):
    the_date = to_pd_timestamp(timestamp)
    report_period = to_report_period_type(timestamp)
    if report_period == ReportPeriod.year.value:
        return '{}'.format(the_date.year)
    if report_period == ReportPeriod.season1.value:
        return '{}q1'.format(the_date.year)
    if report_period == ReportPeriod.half_year.value:
        return '{}q2'.format(the_date.year)
    if report_period == ReportPeriod.season3.value:
        return '{}q3'.format(the_date.year)

    assert False