def eval_size_of_timestamp(self, start_timestamp: pd.Timestamp, end_timestamp: pd.Timestamp, level: IntervalLevel, one_day_trading_minutes): assert end_timestamp is not None time_delta = end_timestamp - to_pd_timestamp(start_timestamp) one_day_trading_seconds = one_day_trading_minutes * 60 if level == IntervalLevel.LEVEL_1DAY: return time_delta.days if level == IntervalLevel.LEVEL_1WEEK: return int(math.ceil(time_delta.days / 7)) if level == IntervalLevel.LEVEL_1MON: return int(math.ceil(time_delta.days / 30)) if time_delta.days > 0: seconds = (time_delta.days + 1) * one_day_trading_seconds return int(math.ceil(seconds / level.to_second())) else: seconds = time_delta.total_seconds() return min(int(math.ceil(seconds / level.to_second())), one_day_trading_seconds / level.to_second())
def __init__(self, entity_type: EntityType = EntityType.Stock, exchanges=None, entity_ids=None, codes=None, batch_size=10, force_update=True, sleeping_time=10, default_size=findy_config['batch_size'], real_time=False, fix_duplicate_way='ignore', start_timestamp=None, end_timestamp=None, close_hour=0, close_minute=0, # child add level=IntervalLevel.LEVEL_1DAY, kdata_use_begin_time=False, one_day_trading_minutes=24 * 60, share_para=None): super().__init__(entity_type, exchanges, entity_ids, codes, batch_size, force_update, sleeping_time, default_size, real_time, fix_duplicate_way, start_timestamp, end_timestamp, close_hour, close_minute, share_para=share_para) self.level = IntervalLevel(level) self.kdata_use_begin_time = kdata_use_begin_time self.one_day_trading_minutes = one_day_trading_minutes
def get_interval_timestamps(cls, start_date, end_date, level: IntervalLevel): """ generate the timestamps for the level :param start_date: :param end_date: :param level: """ for current_date in cls.get_trading_dates(start_date=start_date, end_date=end_date): if level >= IntervalLevel.LEVEL_1DAY: yield current_date else: start_end_list = cls.get_trading_intervals() for start_end in start_end_list: start = start_end[0] end = start_end[1] current_timestamp = to_pd_datetime(the_date=current_date, the_time=start) end_timestamp = to_pd_datetime(the_date=current_date, the_time=end) while current_timestamp <= end_timestamp: yield current_timestamp current_timestamp = current_timestamp + timedelta(minutes=level.to_minute())
def __init__(self, exchanges=[e.value for e in ChnExchange], entity_ids=None, codes=None, batch_size=10, force_update=True, sleeping_time=0, default_size=findy_config['batch_size'], real_time=False, fix_duplicate_way='ignore', start_timestamp=None, end_timestamp=None, level=IntervalLevel.LEVEL_1WEEK, kdata_use_begin_time=False, close_hour=15, close_minute=0, one_day_trading_minutes=4 * 60, adjust_type=AdjustType.qfq, share_para=None) -> None: level = IntervalLevel(level) adjust_type = AdjustType(adjust_type) self.data_schema = self.get_kdata_schema(entity_type=EntityType.Stock, level=level, adjust_type=adjust_type) self.bao_trading_level = to_bao_trading_level(level) super().__init__(EntityType.Stock, exchanges, entity_ids, codes, batch_size, force_update, sleeping_time, default_size, real_time, fix_duplicate_way, start_timestamp, end_timestamp, close_hour, close_minute, level, kdata_use_begin_time, one_day_trading_minutes, share_para=share_para) self.adjust_type = adjust_type
def level_flag(level: IntervalLevel): level = IntervalLevel(level) if level == IntervalLevel.LEVEL_1DAY: return 101 if level == IntervalLevel.LEVEL_1WEEK: return 102 if level == IntervalLevel.LEVEL_1MON: return 103
def get_kdata_schema(entity_type: EntityType, level: Union[IntervalLevel, str] = IntervalLevel.LEVEL_1DAY, adjust_type: Union[AdjustType, str] = None): if type(level) == str: level = IntervalLevel(level) if type(adjust_type) == str: adjust_type = AdjustType(adjust_type) # kdata schema rule # 1)name:{SecurityType.value.capitalize()}{IntervalLevel.value.upper()}Kdata if adjust_type and (adjust_type != AdjustType.qfq): schema_str = f'{entity_type.value.capitalize()}{level.value.capitalize()}{adjust_type.value.capitalize()}Kdata' else: schema_str = f'{entity_type.value.capitalize()}{level.value.capitalize()}Kdata' return get_schema_by_name(schema_str)
def to_high_level_kdata(kdata_df: pd.DataFrame, to_level: IntervalLevel): def to_close(s): if pd_valid(s): return s[-1] def to_open(s): if pd_valid(s): return s[0] def to_high(s): return np.max(s) def to_low(s): return np.min(s) def to_sum(s): return np.sum(s) original_level = kdata_df['level'][0] entity_id = kdata_df['entity_id'][0] provider = kdata_df['provider'][0] name = kdata_df['name'][0] code = kdata_df['code'][0] entity_type, _, _ = decode_entity_id(entity_id=entity_id) assert IntervalLevel(original_level) <= IntervalLevel.LEVEL_1DAY assert IntervalLevel(original_level) < IntervalLevel(to_level) df: pd.DataFrame = None if to_level == IntervalLevel.LEVEL_1WEEK: # loffset='-2' 用周五作为时间标签 if entity_type == EntityType.Stock: df = kdata_df.resample('W', loffset=pd.DateOffset(days=-2)).apply({ 'close': to_close, 'open': to_open, 'high': to_high, 'low': to_low, 'volume': to_sum, 'turnover': to_sum }) else: df = kdata_df.resample('W', loffset=pd.DateOffset(days=-2)).apply({ 'close': to_close, 'open': to_open, 'high': to_high, 'low': to_low, 'volume': to_sum, 'turnover': to_sum }) df = df.dropna() # id entity_id timestamp provider code name level df['entity_id'] = entity_id df['provider'] = provider df['code'] = code df['name'] = name return df
def __init__(self, region: Region, data_schema: Type[Mixin], entity_schema: Type[EntityMixin], provider: Provider = None, entity_ids: List[str] = None, exchanges: List[str] = None, codes: List[str] = None, the_timestamp: Union[str, pd.Timestamp] = None, start_timestamp: Union[str, pd.Timestamp] = None, end_timestamp: Union[str, pd.Timestamp] = None, columns: List = None, filters: List = None, order: object = None, limit: int = None, level: IntervalLevel = None, category_field: str = 'entity_id', time_field: str = 'timestamp', computing_window: int = None) -> None: self.logger = logging.getLogger(self.__class__.__name__) self.data_schema = data_schema self.entity_schema = entity_schema self.region = region self.provider = provider if end_timestamp is None: end_timestamp = now_pd_timestamp(self.region) self.the_timestamp = the_timestamp if the_timestamp: self.start_timestamp = the_timestamp self.end_timestamp = the_timestamp else: self.start_timestamp = start_timestamp self.end_timestamp = end_timestamp self.start_timestamp = to_pd_timestamp(self.start_timestamp) self.end_timestamp = to_pd_timestamp(self.end_timestamp) self.exchanges = exchanges if codes: if type(codes) == str: codes = codes.replace(' ', '') if codes.startswith('[') and codes.endswith(']'): codes = json.loads(codes) else: codes = codes.split(',') self.codes = codes self.entity_ids = entity_ids self.filters = filters self.order = order self.limit = limit if level: self.level = IntervalLevel(level) else: self.level = level self.category_field = category_field self.time_field = time_field self.computing_window = computing_window self.category_col = eval(f'self.data_schema.{self.category_field}') self.time_col = eval(f'self.data_schema.{self.time_field}') self.columns = columns # we store the data in a multiple index(category_column,timestamp) Dataframe if self.columns: # support str if type(columns[0]) == str: self.columns = [] for col in columns: self.columns.append(eval(f'data_schema.{col}')) # always add category_column and time_field for normalizing self.columns = list( set(self.columns) | {self.category_col, self.time_col}) self.data_listeners: List[DataListener] = [] self.data_df: pd.DataFrame = None
async def record_data(cls, region: Region, provider: Provider, exchanges=None, entity_ids=None, codes=None, batch_size=None, force_update=None, sleeping_time=None, default_size=None, real_time=None, fix_duplicate_way=None, start_timestamp=None, end_timestamp=None, close_hour=None, close_minute=None, one_day_trading_minutes=None, **kwargs): assert hasattr(cls, 'provider_map_recorder') and cls.provider_map_recorder # print(f'{cls.__name__} registered recorders:{cls.provider_map_recorder}') assert region is not None or provider is not None recorder_class = cls.provider_map_recorder[region][provider] # get args for specific recorder class from findy.database.plugins.recorder import TimeSeriesDataRecorder if issubclass(recorder_class, TimeSeriesDataRecorder): args = [item for item in inspect.getfullargspec(cls.record_data).args if item not in ('cls', 'region', 'provider')] else: args = ['batch_size', 'force_update', 'sleeping_time'] # just fill the None arg to kw,so we could use the recorder_class default args kw = {} for arg in args: tmp = eval(arg) if tmp is not None: kw[arg] = tmp # KDataRecorder from findy.database.plugins.recorder import KDataRecorder if issubclass(recorder_class, KDataRecorder): # contract: # 1)use KDataRecorder to record the data with IntervalLevel # 2)the table of schema with IntervalLevel format is {entity}_{level}_[adjust_type]_{event} table: str = cls.__tablename__ try: items = table.split('_') if len(items) == 4: adjust_type = items[2] kw['adjust_type'] = adjust_type level = IntervalLevel(items[1]) except: # for other schema not with normal format,but need to calculate size for remaining days level = IntervalLevel.LEVEL_1DAY kw['level'] = level # add other custom args for k in kwargs: kw[k] = kwargs[k] r = recorder_class(**kw) await r.run()