Example #1
0
def test_china_stock_reader():
    data_reader = DataReader(
        codes=["002572", "000338"],
        data_schema=Stock1dKdata,
        entity_schema=Stock,
        start_timestamp="2019-01-01",
        end_timestamp="2019-06-10",
        entity_provider="eastmoney",
    )

    categories = data_reader.data_df.index.levels[0].to_list()

    df = data_reader.data_df

    assert "stock_sz_002572" in categories
    assert "stock_sz_000338" in categories

    assert ("stock_sz_002572", "2019-01-02") in df.index
    assert ("stock_sz_000338", "2019-01-02") in df.index
    assert ("stock_sz_002572", "2019-06-10") in df.index
    assert ("stock_sz_000338", "2019-06-10") in df.index

    for timestamp in Stock.get_interval_timestamps(
            start_date="2019-06-11",
            end_date="2019-06-14",
            level=IntervalLevel.LEVEL_1DAY):
        data_reader.move_on(to_timestamp=timestamp)

        df = data_reader.data_df

        assert ("stock_sz_002572", timestamp) in df.index
        assert ("stock_sz_000338", to_time_str(timestamp)) in df.index
Example #2
0
def test_china_stock_reader():
    data_reader = DataReader(codes=['002572', '000338'],
                             data_schema=Stock1dKdata,
                             entity_schema=Stock,
                             start_timestamp='2019-01-01',
                             end_timestamp='2019-06-10',
                             entity_provider='eastmoney')

    categories = data_reader.data_df.index.levels[0].to_list()

    df = data_reader.data_df

    assert 'stock_sz_002572' in categories
    assert 'stock_sz_000338' in categories

    assert ('stock_sz_002572', '2019-01-02') in df.index
    assert ('stock_sz_000338', '2019-01-02') in df.index
    assert ('stock_sz_002572', '2019-06-10') in df.index
    assert ('stock_sz_000338', '2019-06-10') in df.index

    for timestamp in Stock.get_interval_timestamps(
            start_date='2019-06-11',
            end_date='2019-06-14',
            level=IntervalLevel.LEVEL_1DAY):
        data_reader.move_on(to_timestamp=timestamp)

        df = data_reader.data_df

        assert ('stock_sz_002572', timestamp) in df.index
        assert ('stock_sz_000338', to_time_str(timestamp)) in df.index
Example #3
0
def get_trading_signals_figure(order_reader: OrderReader,
                               entity_id: str,
                               start_timestamp=None,
                               end_timestamp=None,
                               adjust_type=None):
    entity_type, _, _ = decode_entity_id(entity_id)

    data_schema = get_kdata_schema(entity_type=entity_type,
                                   level=order_reader.level,
                                   adjust_type=adjust_type)
    if not start_timestamp:
        start_timestamp = order_reader.start_timestamp
    if not end_timestamp:
        end_timestamp = order_reader.end_timestamp
    kdata_reader = DataReader(
        entity_ids=[entity_id],
        data_schema=data_schema,
        entity_schema=zvt_context.tradable_schema_map.get(entity_type),
        start_timestamp=start_timestamp,
        end_timestamp=end_timestamp,
        level=order_reader.level,
    )

    # generate the annotation df
    order_reader.move_on(timeout=0)
    df = order_reader.data_df.copy()
    df = df[df.entity_id == entity_id].copy()
    if pd_is_not_null(df):
        df["value"] = df["order_price"]
        df["flag"] = df["order_type"].apply(lambda x: order_type_flag(x))
        df["color"] = df["order_type"].apply(lambda x: order_type_color(x))
    print(df.tail())

    drawer = Drawer(main_df=kdata_reader.data_df, annotation_df=df)
    return drawer.draw_kline(show=False, height=800)
Example #4
0
def get_trading_signals_figure(order_reader: OrderReader,
                               entity_id: str,
                               start_timestamp=None,
                               end_timestamp=None):
    entity_type, _, _ = decode_entity_id(entity_id)

    data_schema = get_kdata_schema(entity_type=entity_type, level=order_reader.level)
    if not start_timestamp:
        start_timestamp = order_reader.start_timestamp
    if not end_timestamp:
        end_timestamp = order_reader.end_timestamp
    kdata_reader = DataReader(entity_ids=[entity_id], data_schema=data_schema,
                              entity_schema=entity_schema_map.get(entity_type),
                              start_timestamp=start_timestamp,
                              end_timestamp=end_timestamp,
                              level=order_reader.level)

    # generate the annotation df
    order_reader.move_on(timeout=0)
    df = order_reader.data_df.copy()
    df = df[df.entity_id == entity_id].copy()
    if pd_is_not_null(df):
        df['value'] = df['order_price']
        df['flag'] = df['order_type'].apply(lambda x: order_type_flag(x))
        df['color'] = df['order_type'].apply(lambda x: order_type_color(x))
    print(df.tail())

    drawer = Drawer(main_df=kdata_reader.data_df, annotation_df=df)
    return drawer.draw_kline()
Example #5
0
def baseline_strat(region, ticker, start, end):
    reader = DataReader(region=region,
                        codes=[ticker],
                        start_timestamp=start,
                        end_timestamp=end,
                        data_schema=Stock1dKdata,
                        entity_schema=Stock,
                        columns=[
                            'entity_id', 'timestamp', 'open', 'close', 'high',
                            'low', 'volume'
                        ],
                        provider=Provider.Yahoo)
    dji = reader.data_df
    dji['daily_return'] = dji['close'].pct_change(1)
    dow_strat = backtest_strat(dji)
    return dji, dow_strat
Example #6
0
def test_reader_move_on():
    data_reader = DataReader(region=Region.CHN, codes=['002572', '000338'],
                             data_schema=Stock1dKdata, entity_schema=Stock,
                             start_timestamp='2019-06-13', end_timestamp='2019-06-14')

    data_reader.move_on(to_timestamp='2019-06-15')
    assert ('stock_sz_002572', '2019-06-15') not in data_reader.data_df.index
    assert ('stock_sz_000338', '2019-06-15') not in data_reader.data_df.index

    start_time = time.time()
    data_reader.move_on(to_timestamp='2019-06-20', timeout=5)
    assert time.time() - start_time < 5
Example #7
0
def test_reader_move_on():
    data_reader = DataReader(
        codes=["002572", "000338"],
        data_schema=Stock1dKdata,
        entity_schema=Stock,
        start_timestamp="2019-06-13",
        end_timestamp="2019-06-14",
        entity_provider="eastmoney",
    )

    data_reader.move_on(to_timestamp="2019-06-15")
    assert ("stock_sz_002572", "2019-06-15") not in data_reader.data_df.index
    assert ("stock_sz_000338", "2019-06-15") not in data_reader.data_df.index

    start_time = time.time()
    data_reader.move_on(to_timestamp="2019-06-20", timeout=5)
    assert time.time() - start_time < 5
Example #8
0
    def __init__(
        self,
        data_schema: Type[Mixin],
        entity_schema: Type[TradableEntity] = None,
        provider: str = None,
        entity_provider: str = None,
        entity_ids: List[str] = None,
        exchanges: List[str] = None,
        codes: List[str] = None,
        start_timestamp: Union[str, pd.Timestamp] = None,
        end_timestamp: Union[str, pd.Timestamp] = None,
        columns: List = None,
        filters: List = None,
        order: object = None,
        limit: int = None,
        level: Union[str, IntervalLevel] = IntervalLevel.LEVEL_1DAY,
        category_field: str = "entity_id",
        time_field: str = "timestamp",
        computing_window: int = None,
        keep_all_timestamp: bool = False,
        fill_method: str = "ffill",
        effective_number: int = None,
        transformer: Transformer = None,
        accumulator: Accumulator = None,
        need_persist: bool = False,
        only_compute_factor: bool = False,
        factor_name: str = None,
        clear_state: bool = False,
        only_load_factor: bool = False,
    ) -> None:
        """
        :param keep_all_timestamp:
        :param fill_method:
        :param effective_number:
        :param transformer:
        :param accumulator:
        :param need_persist: whether persist factor
        :param only_compute_factor: only compute factor nor result
        :param factor_name:
        :param clear_state:
        :param only_load_factor: only load factor and compute result
        """
        self.only_load_factor = only_load_factor

        #: define unique name of your factor if you want to keep factor state
        #: the factor state is defined by factor_name and entity_id
        if not factor_name:
            self.name = to_snake_str(type(self).__name__)
        else:
            self.name = factor_name

        DataReader.__init__(
            self,
            data_schema,
            entity_schema,
            provider,
            entity_provider,
            entity_ids,
            exchanges,
            codes,
            start_timestamp,
            end_timestamp,
            columns,
            filters,
            order,
            limit,
            level,
            category_field,
            time_field,
            computing_window,
        )

        EntityStateService.__init__(self, entity_ids=entity_ids)

        self.clear_state = clear_state

        self.keep_all_timestamp = keep_all_timestamp
        self.fill_method = fill_method
        self.effective_number = effective_number

        if transformer:
            self.transformer = transformer
        else:
            self.transformer = self.__class__.transformer

        if accumulator:
            self.accumulator = accumulator
        else:
            self.accumulator = self.__class__.accumulator

        self.need_persist = need_persist
        self.dry_run = only_compute_factor

        #: 中间结果,不持久化
        #: data_df->pipe_df
        self.pipe_df: pd.DataFrame = None

        #: 计算因子的结果,可持久化,通过对pipe_df的计算得到
        #: pipe_df->factor_df
        self.factor_df: pd.DataFrame = None

        #: result_df是用于选股的标准df,通过对factor_df的计算得到
        #: factor_df->result_df
        self.result_df: pd.DataFrame = None

        if self.clear_state:
            self.clear_state_data()
        elif self.need_persist or self.only_load_factor:
            self.load_factor()

            #: 根据已经计算的factor_df和computing_window来保留data_df
            #: 因为读取data_df的目的是为了计算factor_df,选股和回测只依赖factor_df
            #: 所以如果有持久化的factor_df,只需保留需要用于计算的data_df即可
            if pd_is_not_null(self.data_df) and self.computing_window:
                dfs = []
                for entity_id, df in self.data_df.groupby(level=0):
                    latest_laved = get_data(
                        provider="zvt",
                        data_schema=self.factor_schema,
                        entity_id=entity_id,
                        order=self.factor_schema.timestamp.desc(),
                        limit=1,
                        index=[self.category_field, self.time_field],
                        return_type="domain",
                    )
                    if latest_laved:
                        df1 = df[df.timestamp < latest_laved[0].
                                 timestamp].iloc[-self.computing_window:]
                        if pd_is_not_null(df1):
                            df = df[df.timestamp >= df1.iloc[0].timestamp]
                    dfs.append(df)

                self.data_df = pd.concat(dfs)

        self.register_data_listener(self)

        #: the compute logic is not triggered from load data
        #: for the case:1)load factor from db 2)compute the result
        if self.only_load_factor:
            self.compute()
Example #9
0
                 the_timestamp: Union[str, pd.Timestamp] = None, start_timestamp: Union[str, pd.Timestamp] = None,
                 end_timestamp: Union[str, pd.Timestamp] = None,
                 columns: List = ['id', 'entity_id', 'timestamp', 'level', 'open', 'close', 'high', 'low'],
                 filters: List = None, order: object = None, limit: int = None,
                 level: Union[str, IntervalLevel] = IntervalLevel.LEVEL_1DAY, category_field: str = 'entity_id',
                 time_field: str = 'timestamp', computing_window: int = None, keep_all_timestamp: bool = False,
                 fill_method: str = 'ffill', effective_number: int = None,
                 accumulator: Accumulator = None, need_persist: bool = False, dry_run: bool = False,
                 adjust_type: Union[AdjustType, str] = None, window=30) -> None:
        self.adjust_type = adjust_type

        transformer = TopBottomTransformer(window=window)

        super().__init__(entity_schema, provider, entity_provider, entity_ids, exchanges, codes, the_timestamp,
                         start_timestamp, end_timestamp, columns, filters, order, limit, level, category_field,
                         time_field, computing_window, keep_all_timestamp, fill_method, effective_number, transformer,
                         accumulator, need_persist, dry_run, adjust_type)


if __name__ == '__main__':
    factor = TopBottomFactor(codes=['601318'], start_timestamp='2005-01-01',
                             end_timestamp=now_pd_timestamp(),
                             level=IntervalLevel.LEVEL_1DAY, window=120)
    print(factor.factor_df)

    data_reader1 = DataReader(codes=['601318'], data_schema=Stock1dKdata, entity_schema=Stock)

    drawer = Drawer(main_df=data_reader1.data_df, factor_df_list=[factor.factor_df[['top', 'bottom']]])
    drawer.draw_kline()
# the __all__ is generated
__all__ = ['TopBottomTransformer', 'TopBottomFactor']
Example #10
0
                        xref='x',
                        yref='y',
                        text=item['flag'],
                        showarrow=True,
                        align='center',
                        arrowhead=2,
                        arrowsize=1,
                        arrowwidth=2,
                        # arrowcolor='#030813',
                        ax=-10,
                        ay=-30,
                        bordercolor='#c7c7c7',
                        borderwidth=1,
                        bgcolor=color,
                        opacity=0.8
                    ))

    return annotations


if __name__ == '__main__':
    data_reader1 = DataReader(region=Region.CHN, codes=['002223'], data_schema=Stock1dKdata, entity_schema=Stock)
    data_reader2 = DataReader(region=Region.CHN, codes=['002223'], data_schema=Stock1dMaStateStats, entity_schema=Stock,
                              columns=['ma5', 'ma10', 'current_count', 'current_pct'])

    data_reader2.data_df['slope'] = 100 * data_reader2.data_df['current_pct'] / data_reader2.data_df['current_count']

    drawer = Drawer(main_df=data_reader1.data_df, factor_df=data_reader2.data_df[['ma5', 'ma10']],
                    sub_df=data_reader2.data_df[['slope']])
    drawer.draw_kline()
Example #11
0
    'CDLSHORTLINE': 'Short Line Candle',
    'CDLSPINNINGTOP': 'Spinning Top',
    'CDLSTALLEDPATTERN': 'Stalled Pattern',
    'CDLSTICKSANDWICH': 'Stick Sandwich',
    'CDLTAKURI': 'Takuri (Dragonfly Doji with very long lower shadow)',
    'CDLTASUKIGAP': 'Tasuki Gap',
    'CDLTHRUSTING': 'Thrusting Pattern',
    'CDLTRISTAR': 'Tristar Pattern',
    'CDLUNIQUE3RIVER': 'Unique 3 River',
    'CDLUPSIDEGAP2CROWS': 'Upside Gap Two Crows',
    'CDLXSIDEGAP3METHODS': 'Upside/Downside Gap Three Methods'
}

reader = DataReader(region=Region.US,
                    start_timestamp='2020-01-01',
                    data_schema=Stock1dKdata,
                    entity_schema=Stock,
                    provider=Provider.Yahoo)
gb = reader.data_df.groupby('code')

app = Flask(__name__)


@app.route('/')
def index():
    pattern = request.args.get('pattern', False)
    stocks = {}

    if pattern:
        pattern_function = getattr(talib, pattern)
        transformer = TopBottomTransformer(window=window)

        super().__init__(region, entity_schema, provider, entity_ids,
                         exchanges, codes, the_timestamp, start_timestamp,
                         end_timestamp, columns, filters, order, limit, level,
                         category_field, time_field, computing_window,
                         keep_all_timestamp, fill_method, effective_number,
                         transformer, accumulator, need_persist, dry_run,
                         factor_name, clear_state, not_load_data, adjust_type)


if __name__ == '__main__':
    factor = TopBottomFactor(codes=['601318'],
                             start_timestamp='2005-01-01',
                             end_timestamp=now_pd_timestamp(Region.CHN),
                             level=IntervalLevel.LEVEL_1DAY,
                             window=120)
    print(factor.factor_df)

    data_reader1 = DataReader(region=Region.CHN,
                              codes=['601318'],
                              data_schema=Stock1dKdata,
                              entity_schema=Stock)

    drawer = Drawer(main_df=data_reader1.data_df,
                    factor_df_list=[factor.factor_df[['top', 'bottom']]])
    drawer.draw_kline(show=True)

# the __all__ is generated
__all__ = ['TopBottomTransformer', 'TopBottomFactor']
Example #13
0
    def __init__(self):
        None

    def fit(self, X, y):
        self.model = LassoLarsIC(criterion='aic').fit(X, y)
        return self

    def transform(self, X):
        return np.asarray(X)[:, abs(self.model.coef_) > 0]


if __name__ == '__main__':
    now = time.time()
    reader = DataReader(region=Region.US,
                        codes=['FB', 'AMD'],
                        data_schema=Stock1dKdata,
                        entity_schema=Stock,
                        provider=Provider.Yahoo)

    gb = reader.data_df.groupby('code')
    dfs = {x: gb.get_group(x) for x in gb.groups}

    df = dfs['AMD'][['open', 'close', 'volume', 'high', 'low']].copy()
    x_train, y_train, x_test, y_test, y_test_cohort = dataXY(df)

    plt.close()

    parameters = {
        #    'clf__base_estimator__n_estimators': np.round(np.linspace(100,400,10)).astype('int'),
        #    'clf__base_estimator__max_depth': [10,11,12],
        #    'clf__base_estimator__min_child_weight': [1],