def test_china_stock_reader(): data_reader = DataReader( codes=["002572", "000338"], data_schema=Stock1dKdata, entity_schema=Stock, start_timestamp="2019-01-01", end_timestamp="2019-06-10", entity_provider="eastmoney", ) categories = data_reader.data_df.index.levels[0].to_list() df = data_reader.data_df assert "stock_sz_002572" in categories assert "stock_sz_000338" in categories assert ("stock_sz_002572", "2019-01-02") in df.index assert ("stock_sz_000338", "2019-01-02") in df.index assert ("stock_sz_002572", "2019-06-10") in df.index assert ("stock_sz_000338", "2019-06-10") in df.index for timestamp in Stock.get_interval_timestamps( start_date="2019-06-11", end_date="2019-06-14", level=IntervalLevel.LEVEL_1DAY): data_reader.move_on(to_timestamp=timestamp) df = data_reader.data_df assert ("stock_sz_002572", timestamp) in df.index assert ("stock_sz_000338", to_time_str(timestamp)) in df.index
def test_china_stock_reader(): data_reader = DataReader(codes=['002572', '000338'], data_schema=Stock1dKdata, entity_schema=Stock, start_timestamp='2019-01-01', end_timestamp='2019-06-10', entity_provider='eastmoney') categories = data_reader.data_df.index.levels[0].to_list() df = data_reader.data_df assert 'stock_sz_002572' in categories assert 'stock_sz_000338' in categories assert ('stock_sz_002572', '2019-01-02') in df.index assert ('stock_sz_000338', '2019-01-02') in df.index assert ('stock_sz_002572', '2019-06-10') in df.index assert ('stock_sz_000338', '2019-06-10') in df.index for timestamp in Stock.get_interval_timestamps( start_date='2019-06-11', end_date='2019-06-14', level=IntervalLevel.LEVEL_1DAY): data_reader.move_on(to_timestamp=timestamp) df = data_reader.data_df assert ('stock_sz_002572', timestamp) in df.index assert ('stock_sz_000338', to_time_str(timestamp)) in df.index
def get_trading_signals_figure(order_reader: OrderReader, entity_id: str, start_timestamp=None, end_timestamp=None, adjust_type=None): entity_type, _, _ = decode_entity_id(entity_id) data_schema = get_kdata_schema(entity_type=entity_type, level=order_reader.level, adjust_type=adjust_type) if not start_timestamp: start_timestamp = order_reader.start_timestamp if not end_timestamp: end_timestamp = order_reader.end_timestamp kdata_reader = DataReader( entity_ids=[entity_id], data_schema=data_schema, entity_schema=zvt_context.tradable_schema_map.get(entity_type), start_timestamp=start_timestamp, end_timestamp=end_timestamp, level=order_reader.level, ) # generate the annotation df order_reader.move_on(timeout=0) df = order_reader.data_df.copy() df = df[df.entity_id == entity_id].copy() if pd_is_not_null(df): df["value"] = df["order_price"] df["flag"] = df["order_type"].apply(lambda x: order_type_flag(x)) df["color"] = df["order_type"].apply(lambda x: order_type_color(x)) print(df.tail()) drawer = Drawer(main_df=kdata_reader.data_df, annotation_df=df) return drawer.draw_kline(show=False, height=800)
def get_trading_signals_figure(order_reader: OrderReader, entity_id: str, start_timestamp=None, end_timestamp=None): entity_type, _, _ = decode_entity_id(entity_id) data_schema = get_kdata_schema(entity_type=entity_type, level=order_reader.level) if not start_timestamp: start_timestamp = order_reader.start_timestamp if not end_timestamp: end_timestamp = order_reader.end_timestamp kdata_reader = DataReader(entity_ids=[entity_id], data_schema=data_schema, entity_schema=entity_schema_map.get(entity_type), start_timestamp=start_timestamp, end_timestamp=end_timestamp, level=order_reader.level) # generate the annotation df order_reader.move_on(timeout=0) df = order_reader.data_df.copy() df = df[df.entity_id == entity_id].copy() if pd_is_not_null(df): df['value'] = df['order_price'] df['flag'] = df['order_type'].apply(lambda x: order_type_flag(x)) df['color'] = df['order_type'].apply(lambda x: order_type_color(x)) print(df.tail()) drawer = Drawer(main_df=kdata_reader.data_df, annotation_df=df) return drawer.draw_kline()
def baseline_strat(region, ticker, start, end): reader = DataReader(region=region, codes=[ticker], start_timestamp=start, end_timestamp=end, data_schema=Stock1dKdata, entity_schema=Stock, columns=[ 'entity_id', 'timestamp', 'open', 'close', 'high', 'low', 'volume' ], provider=Provider.Yahoo) dji = reader.data_df dji['daily_return'] = dji['close'].pct_change(1) dow_strat = backtest_strat(dji) return dji, dow_strat
def test_reader_move_on(): data_reader = DataReader(region=Region.CHN, codes=['002572', '000338'], data_schema=Stock1dKdata, entity_schema=Stock, start_timestamp='2019-06-13', end_timestamp='2019-06-14') data_reader.move_on(to_timestamp='2019-06-15') assert ('stock_sz_002572', '2019-06-15') not in data_reader.data_df.index assert ('stock_sz_000338', '2019-06-15') not in data_reader.data_df.index start_time = time.time() data_reader.move_on(to_timestamp='2019-06-20', timeout=5) assert time.time() - start_time < 5
def test_reader_move_on(): data_reader = DataReader( codes=["002572", "000338"], data_schema=Stock1dKdata, entity_schema=Stock, start_timestamp="2019-06-13", end_timestamp="2019-06-14", entity_provider="eastmoney", ) data_reader.move_on(to_timestamp="2019-06-15") assert ("stock_sz_002572", "2019-06-15") not in data_reader.data_df.index assert ("stock_sz_000338", "2019-06-15") not in data_reader.data_df.index start_time = time.time() data_reader.move_on(to_timestamp="2019-06-20", timeout=5) assert time.time() - start_time < 5
def __init__( self, data_schema: Type[Mixin], entity_schema: Type[TradableEntity] = None, provider: str = None, entity_provider: str = None, entity_ids: List[str] = None, exchanges: List[str] = None, codes: List[str] = None, start_timestamp: Union[str, pd.Timestamp] = None, end_timestamp: Union[str, pd.Timestamp] = None, columns: List = None, filters: List = None, order: object = None, limit: int = None, level: Union[str, IntervalLevel] = IntervalLevel.LEVEL_1DAY, category_field: str = "entity_id", time_field: str = "timestamp", computing_window: int = None, keep_all_timestamp: bool = False, fill_method: str = "ffill", effective_number: int = None, transformer: Transformer = None, accumulator: Accumulator = None, need_persist: bool = False, only_compute_factor: bool = False, factor_name: str = None, clear_state: bool = False, only_load_factor: bool = False, ) -> None: """ :param keep_all_timestamp: :param fill_method: :param effective_number: :param transformer: :param accumulator: :param need_persist: whether persist factor :param only_compute_factor: only compute factor nor result :param factor_name: :param clear_state: :param only_load_factor: only load factor and compute result """ self.only_load_factor = only_load_factor #: define unique name of your factor if you want to keep factor state #: the factor state is defined by factor_name and entity_id if not factor_name: self.name = to_snake_str(type(self).__name__) else: self.name = factor_name DataReader.__init__( self, data_schema, entity_schema, provider, entity_provider, entity_ids, exchanges, codes, start_timestamp, end_timestamp, columns, filters, order, limit, level, category_field, time_field, computing_window, ) EntityStateService.__init__(self, entity_ids=entity_ids) self.clear_state = clear_state self.keep_all_timestamp = keep_all_timestamp self.fill_method = fill_method self.effective_number = effective_number if transformer: self.transformer = transformer else: self.transformer = self.__class__.transformer if accumulator: self.accumulator = accumulator else: self.accumulator = self.__class__.accumulator self.need_persist = need_persist self.dry_run = only_compute_factor #: 中间结果,不持久化 #: data_df->pipe_df self.pipe_df: pd.DataFrame = None #: 计算因子的结果,可持久化,通过对pipe_df的计算得到 #: pipe_df->factor_df self.factor_df: pd.DataFrame = None #: result_df是用于选股的标准df,通过对factor_df的计算得到 #: factor_df->result_df self.result_df: pd.DataFrame = None if self.clear_state: self.clear_state_data() elif self.need_persist or self.only_load_factor: self.load_factor() #: 根据已经计算的factor_df和computing_window来保留data_df #: 因为读取data_df的目的是为了计算factor_df,选股和回测只依赖factor_df #: 所以如果有持久化的factor_df,只需保留需要用于计算的data_df即可 if pd_is_not_null(self.data_df) and self.computing_window: dfs = [] for entity_id, df in self.data_df.groupby(level=0): latest_laved = get_data( provider="zvt", data_schema=self.factor_schema, entity_id=entity_id, order=self.factor_schema.timestamp.desc(), limit=1, index=[self.category_field, self.time_field], return_type="domain", ) if latest_laved: df1 = df[df.timestamp < latest_laved[0]. timestamp].iloc[-self.computing_window:] if pd_is_not_null(df1): df = df[df.timestamp >= df1.iloc[0].timestamp] dfs.append(df) self.data_df = pd.concat(dfs) self.register_data_listener(self) #: the compute logic is not triggered from load data #: for the case:1)load factor from db 2)compute the result if self.only_load_factor: self.compute()
the_timestamp: Union[str, pd.Timestamp] = None, start_timestamp: Union[str, pd.Timestamp] = None, end_timestamp: Union[str, pd.Timestamp] = None, columns: List = ['id', 'entity_id', 'timestamp', 'level', 'open', 'close', 'high', 'low'], filters: List = None, order: object = None, limit: int = None, level: Union[str, IntervalLevel] = IntervalLevel.LEVEL_1DAY, category_field: str = 'entity_id', time_field: str = 'timestamp', computing_window: int = None, keep_all_timestamp: bool = False, fill_method: str = 'ffill', effective_number: int = None, accumulator: Accumulator = None, need_persist: bool = False, dry_run: bool = False, adjust_type: Union[AdjustType, str] = None, window=30) -> None: self.adjust_type = adjust_type transformer = TopBottomTransformer(window=window) super().__init__(entity_schema, provider, entity_provider, entity_ids, exchanges, codes, the_timestamp, start_timestamp, end_timestamp, columns, filters, order, limit, level, category_field, time_field, computing_window, keep_all_timestamp, fill_method, effective_number, transformer, accumulator, need_persist, dry_run, adjust_type) if __name__ == '__main__': factor = TopBottomFactor(codes=['601318'], start_timestamp='2005-01-01', end_timestamp=now_pd_timestamp(), level=IntervalLevel.LEVEL_1DAY, window=120) print(factor.factor_df) data_reader1 = DataReader(codes=['601318'], data_schema=Stock1dKdata, entity_schema=Stock) drawer = Drawer(main_df=data_reader1.data_df, factor_df_list=[factor.factor_df[['top', 'bottom']]]) drawer.draw_kline() # the __all__ is generated __all__ = ['TopBottomTransformer', 'TopBottomFactor']
xref='x', yref='y', text=item['flag'], showarrow=True, align='center', arrowhead=2, arrowsize=1, arrowwidth=2, # arrowcolor='#030813', ax=-10, ay=-30, bordercolor='#c7c7c7', borderwidth=1, bgcolor=color, opacity=0.8 )) return annotations if __name__ == '__main__': data_reader1 = DataReader(region=Region.CHN, codes=['002223'], data_schema=Stock1dKdata, entity_schema=Stock) data_reader2 = DataReader(region=Region.CHN, codes=['002223'], data_schema=Stock1dMaStateStats, entity_schema=Stock, columns=['ma5', 'ma10', 'current_count', 'current_pct']) data_reader2.data_df['slope'] = 100 * data_reader2.data_df['current_pct'] / data_reader2.data_df['current_count'] drawer = Drawer(main_df=data_reader1.data_df, factor_df=data_reader2.data_df[['ma5', 'ma10']], sub_df=data_reader2.data_df[['slope']]) drawer.draw_kline()
'CDLSHORTLINE': 'Short Line Candle', 'CDLSPINNINGTOP': 'Spinning Top', 'CDLSTALLEDPATTERN': 'Stalled Pattern', 'CDLSTICKSANDWICH': 'Stick Sandwich', 'CDLTAKURI': 'Takuri (Dragonfly Doji with very long lower shadow)', 'CDLTASUKIGAP': 'Tasuki Gap', 'CDLTHRUSTING': 'Thrusting Pattern', 'CDLTRISTAR': 'Tristar Pattern', 'CDLUNIQUE3RIVER': 'Unique 3 River', 'CDLUPSIDEGAP2CROWS': 'Upside Gap Two Crows', 'CDLXSIDEGAP3METHODS': 'Upside/Downside Gap Three Methods' } reader = DataReader(region=Region.US, start_timestamp='2020-01-01', data_schema=Stock1dKdata, entity_schema=Stock, provider=Provider.Yahoo) gb = reader.data_df.groupby('code') app = Flask(__name__) @app.route('/') def index(): pattern = request.args.get('pattern', False) stocks = {} if pattern: pattern_function = getattr(talib, pattern)
transformer = TopBottomTransformer(window=window) super().__init__(region, entity_schema, provider, entity_ids, exchanges, codes, the_timestamp, start_timestamp, end_timestamp, columns, filters, order, limit, level, category_field, time_field, computing_window, keep_all_timestamp, fill_method, effective_number, transformer, accumulator, need_persist, dry_run, factor_name, clear_state, not_load_data, adjust_type) if __name__ == '__main__': factor = TopBottomFactor(codes=['601318'], start_timestamp='2005-01-01', end_timestamp=now_pd_timestamp(Region.CHN), level=IntervalLevel.LEVEL_1DAY, window=120) print(factor.factor_df) data_reader1 = DataReader(region=Region.CHN, codes=['601318'], data_schema=Stock1dKdata, entity_schema=Stock) drawer = Drawer(main_df=data_reader1.data_df, factor_df_list=[factor.factor_df[['top', 'bottom']]]) drawer.draw_kline(show=True) # the __all__ is generated __all__ = ['TopBottomTransformer', 'TopBottomFactor']
def __init__(self): None def fit(self, X, y): self.model = LassoLarsIC(criterion='aic').fit(X, y) return self def transform(self, X): return np.asarray(X)[:, abs(self.model.coef_) > 0] if __name__ == '__main__': now = time.time() reader = DataReader(region=Region.US, codes=['FB', 'AMD'], data_schema=Stock1dKdata, entity_schema=Stock, provider=Provider.Yahoo) gb = reader.data_df.groupby('code') dfs = {x: gb.get_group(x) for x in gb.groups} df = dfs['AMD'][['open', 'close', 'volume', 'high', 'low']].copy() x_train, y_train, x_test, y_test, y_test_cohort = dataXY(df) plt.close() parameters = { # 'clf__base_estimator__n_estimators': np.round(np.linspace(100,400,10)).astype('int'), # 'clf__base_estimator__max_depth': [10,11,12], # 'clf__base_estimator__min_child_weight': [1],