Esempio n. 1
0
    def eval_size_of_timestamp(self,
                               start_timestamp: pd.Timestamp,
                               end_timestamp: pd.Timestamp,
                               level: IntervalLevel,
                               one_day_trading_minutes):
        assert end_timestamp is not None

        time_delta = end_timestamp - to_pd_timestamp(start_timestamp)

        one_day_trading_seconds = one_day_trading_minutes * 60

        if level == IntervalLevel.LEVEL_1DAY:
            return time_delta.days

        if level == IntervalLevel.LEVEL_1WEEK:
            return int(math.ceil(time_delta.days / 7))

        if level == IntervalLevel.LEVEL_1MON:
            return int(math.ceil(time_delta.days / 30))

        if time_delta.days > 0:
            seconds = (time_delta.days + 1) * one_day_trading_seconds
            return int(math.ceil(seconds / level.to_second()))
        else:
            seconds = time_delta.total_seconds()
            return min(int(math.ceil(seconds / level.to_second())),
                       one_day_trading_seconds / level.to_second())
Esempio n. 2
0
    def __init__(self,
                 entity_type: EntityType = EntityType.Stock,
                 exchanges=None,
                 entity_ids=None,
                 codes=None,
                 batch_size=10,
                 force_update=True,
                 sleeping_time=10,
                 default_size=findy_config['batch_size'],
                 real_time=False,
                 fix_duplicate_way='ignore',
                 start_timestamp=None,
                 end_timestamp=None,
                 close_hour=0,
                 close_minute=0,
                 # child add
                 level=IntervalLevel.LEVEL_1DAY,
                 kdata_use_begin_time=False,
                 one_day_trading_minutes=24 * 60,
                 share_para=None):
        super().__init__(entity_type, exchanges, entity_ids, codes, batch_size,
                         force_update, sleeping_time, default_size, real_time,
                         fix_duplicate_way, start_timestamp, end_timestamp,
                         close_hour, close_minute, share_para=share_para)

        self.level = IntervalLevel(level)
        self.kdata_use_begin_time = kdata_use_begin_time
        self.one_day_trading_minutes = one_day_trading_minutes
Esempio n. 3
0
    def get_interval_timestamps(cls, start_date, end_date, level: IntervalLevel):
        """
        generate the timestamps for the level

        :param start_date:
        :param end_date:
        :param level:
        """

        for current_date in cls.get_trading_dates(start_date=start_date, end_date=end_date):
            if level >= IntervalLevel.LEVEL_1DAY:
                yield current_date
            else:
                start_end_list = cls.get_trading_intervals()

                for start_end in start_end_list:
                    start = start_end[0]
                    end = start_end[1]

                    current_timestamp = to_pd_datetime(the_date=current_date, the_time=start)
                    end_timestamp = to_pd_datetime(the_date=current_date, the_time=end)

                    while current_timestamp <= end_timestamp:
                        yield current_timestamp
                        current_timestamp = current_timestamp + timedelta(minutes=level.to_minute())
Esempio n. 4
0
    def __init__(self,
                 exchanges=[e.value for e in ChnExchange],
                 entity_ids=None,
                 codes=None,
                 batch_size=10,
                 force_update=True,
                 sleeping_time=0,
                 default_size=findy_config['batch_size'],
                 real_time=False,
                 fix_duplicate_way='ignore',
                 start_timestamp=None,
                 end_timestamp=None,
                 level=IntervalLevel.LEVEL_1WEEK,
                 kdata_use_begin_time=False,
                 close_hour=15,
                 close_minute=0,
                 one_day_trading_minutes=4 * 60,
                 adjust_type=AdjustType.qfq,
                 share_para=None) -> None:
        level = IntervalLevel(level)
        adjust_type = AdjustType(adjust_type)
        self.data_schema = self.get_kdata_schema(entity_type=EntityType.Stock, level=level, adjust_type=adjust_type)
        self.bao_trading_level = to_bao_trading_level(level)

        super().__init__(EntityType.Stock, exchanges, entity_ids, codes, batch_size, force_update, sleeping_time,
                         default_size, real_time, fix_duplicate_way, start_timestamp, end_timestamp, close_hour,
                         close_minute, level, kdata_use_begin_time, one_day_trading_minutes, share_para=share_para)
        self.adjust_type = adjust_type
def level_flag(level: IntervalLevel):
    level = IntervalLevel(level)
    if level == IntervalLevel.LEVEL_1DAY:
        return 101
    if level == IntervalLevel.LEVEL_1WEEK:
        return 102
    if level == IntervalLevel.LEVEL_1MON:
        return 103
Esempio n. 6
0
    def get_kdata_schema(entity_type: EntityType,
                         level: Union[IntervalLevel, str] = IntervalLevel.LEVEL_1DAY,
                         adjust_type: Union[AdjustType, str] = None):
        if type(level) == str:
            level = IntervalLevel(level)
        if type(adjust_type) == str:
            adjust_type = AdjustType(adjust_type)

        # kdata schema rule
        # 1)name:{SecurityType.value.capitalize()}{IntervalLevel.value.upper()}Kdata
        if adjust_type and (adjust_type != AdjustType.qfq):
            schema_str = f'{entity_type.value.capitalize()}{level.value.capitalize()}{adjust_type.value.capitalize()}Kdata'
        else:
            schema_str = f'{entity_type.value.capitalize()}{level.value.capitalize()}Kdata'
        return get_schema_by_name(schema_str)
Esempio n. 7
0
def to_high_level_kdata(kdata_df: pd.DataFrame, to_level: IntervalLevel):
    def to_close(s):
        if pd_valid(s):
            return s[-1]

    def to_open(s):
        if pd_valid(s):
            return s[0]

    def to_high(s):
        return np.max(s)

    def to_low(s):
        return np.min(s)

    def to_sum(s):
        return np.sum(s)

    original_level = kdata_df['level'][0]
    entity_id = kdata_df['entity_id'][0]
    provider = kdata_df['provider'][0]
    name = kdata_df['name'][0]
    code = kdata_df['code'][0]

    entity_type, _, _ = decode_entity_id(entity_id=entity_id)

    assert IntervalLevel(original_level) <= IntervalLevel.LEVEL_1DAY
    assert IntervalLevel(original_level) < IntervalLevel(to_level)

    df: pd.DataFrame = None
    if to_level == IntervalLevel.LEVEL_1WEEK:
        # loffset='-2' 用周五作为时间标签
        if entity_type == EntityType.Stock:
            df = kdata_df.resample('W', loffset=pd.DateOffset(days=-2)).apply({
                'close':
                to_close,
                'open':
                to_open,
                'high':
                to_high,
                'low':
                to_low,
                'volume':
                to_sum,
                'turnover':
                to_sum
            })
        else:
            df = kdata_df.resample('W', loffset=pd.DateOffset(days=-2)).apply({
                'close':
                to_close,
                'open':
                to_open,
                'high':
                to_high,
                'low':
                to_low,
                'volume':
                to_sum,
                'turnover':
                to_sum
            })
    df = df.dropna()
    # id        entity_id  timestamp   provider    code  name level
    df['entity_id'] = entity_id
    df['provider'] = provider
    df['code'] = code
    df['name'] = name
    return df
Esempio n. 8
0
    def __init__(self,
                 region: Region,
                 data_schema: Type[Mixin],
                 entity_schema: Type[EntityMixin],
                 provider: Provider = None,
                 entity_ids: List[str] = None,
                 exchanges: List[str] = None,
                 codes: List[str] = None,
                 the_timestamp: Union[str, pd.Timestamp] = None,
                 start_timestamp: Union[str, pd.Timestamp] = None,
                 end_timestamp: Union[str, pd.Timestamp] = None,
                 columns: List = None,
                 filters: List = None,
                 order: object = None,
                 limit: int = None,
                 level: IntervalLevel = None,
                 category_field: str = 'entity_id',
                 time_field: str = 'timestamp',
                 computing_window: int = None) -> None:
        self.logger = logging.getLogger(self.__class__.__name__)

        self.data_schema = data_schema
        self.entity_schema = entity_schema

        self.region = region
        self.provider = provider

        if end_timestamp is None:
            end_timestamp = now_pd_timestamp(self.region)

        self.the_timestamp = the_timestamp
        if the_timestamp:
            self.start_timestamp = the_timestamp
            self.end_timestamp = the_timestamp
        else:
            self.start_timestamp = start_timestamp
            self.end_timestamp = end_timestamp

        self.start_timestamp = to_pd_timestamp(self.start_timestamp)
        self.end_timestamp = to_pd_timestamp(self.end_timestamp)

        self.exchanges = exchanges

        if codes:
            if type(codes) == str:
                codes = codes.replace(' ', '')
                if codes.startswith('[') and codes.endswith(']'):
                    codes = json.loads(codes)
                else:
                    codes = codes.split(',')

        self.codes = codes
        self.entity_ids = entity_ids
        self.filters = filters
        self.order = order
        self.limit = limit

        if level:
            self.level = IntervalLevel(level)
        else:
            self.level = level

        self.category_field = category_field
        self.time_field = time_field
        self.computing_window = computing_window

        self.category_col = eval(f'self.data_schema.{self.category_field}')
        self.time_col = eval(f'self.data_schema.{self.time_field}')

        self.columns = columns

        # we store the data in a multiple index(category_column,timestamp) Dataframe
        if self.columns:
            # support str
            if type(columns[0]) == str:
                self.columns = []
                for col in columns:
                    self.columns.append(eval(f'data_schema.{col}'))

            # always add category_column and time_field for normalizing
            self.columns = list(
                set(self.columns) | {self.category_col, self.time_col})

        self.data_listeners: List[DataListener] = []

        self.data_df: pd.DataFrame = None
Esempio n. 9
0
    async def record_data(cls,
                          region: Region,
                          provider: Provider,
                          exchanges=None,
                          entity_ids=None,
                          codes=None,
                          batch_size=None,
                          force_update=None,
                          sleeping_time=None,
                          default_size=None,
                          real_time=None,
                          fix_duplicate_way=None,
                          start_timestamp=None,
                          end_timestamp=None,
                          close_hour=None,
                          close_minute=None,
                          one_day_trading_minutes=None,
                          **kwargs):
        assert hasattr(cls, 'provider_map_recorder') and cls.provider_map_recorder
        # print(f'{cls.__name__} registered recorders:{cls.provider_map_recorder}')

        assert region is not None or provider is not None

        recorder_class = cls.provider_map_recorder[region][provider]

        # get args for specific recorder class
        from findy.database.plugins.recorder import TimeSeriesDataRecorder
        if issubclass(recorder_class, TimeSeriesDataRecorder):
            args = [item for item in inspect.getfullargspec(cls.record_data).args if
                    item not in ('cls', 'region', 'provider')]
        else:
            args = ['batch_size', 'force_update', 'sleeping_time']

        # just fill the None arg to kw,so we could use the recorder_class default args
        kw = {}
        for arg in args:
            tmp = eval(arg)
            if tmp is not None:
                kw[arg] = tmp

        # KDataRecorder
        from findy.database.plugins.recorder import KDataRecorder
        if issubclass(recorder_class, KDataRecorder):
            # contract:
            # 1)use KDataRecorder to record the data with IntervalLevel
            # 2)the table of schema with IntervalLevel format is {entity}_{level}_[adjust_type]_{event}
            table: str = cls.__tablename__
            try:
                items = table.split('_')
                if len(items) == 4:
                    adjust_type = items[2]
                    kw['adjust_type'] = adjust_type
                level = IntervalLevel(items[1])
            except:
                # for other schema not with normal format,but need to calculate size for remaining days
                level = IntervalLevel.LEVEL_1DAY

            kw['level'] = level

        # add other custom args
        for k in kwargs:
            kw[k] = kwargs[k]

        r = recorder_class(**kw)
        await r.run()