コード例 #1
0
def training_k_data(start, end):
    df = stock_pool_dao.get_list()
    codes = df['code'].values[start:end]
    for code in codes:
        try:
            logger.debug('begin training mode, code:%s' % code)
            data, features = k_data_dao.get_k_data_with_features(
                code, '2015-01-01',
                datetime.now().strftime("%Y-%m-%d"))

            pca = PCAModel('k_data')
            lr = LogisticRegressionClassifier()
            svc = SupportVectorClassifier()
            rf = RandomForestClassifierModel()
            xgb = XGBoostClassier()
            #ann = SequantialNeuralClassifier()

            pca.training_model(code, data, features)
            lr.training_model(code, data, features)
            svc.training_model(code, data, features)
            rf.training_model(code, data, features)
            xgb.training_model(code, data, features)
            #ann.training_model(code, data, features)

            logger.debug('training mode end, code:%s' % code)
        except Exception as e:
            logger.error("training k data error, code:%s, error:%s" %
                         (code, repr(e)))
コード例 #2
0
def predict_k_data():
    df = ts.get_hs300s()
    df_index = index_k_data_60m_dao.get_rel_price()

    for code in df['code'].values:
        try:

            logger.debug('begin predict, code:%s' % code)
            data, features = k_data_60m_dao.get_k_predict_data_with_features(
                code, df_index)

            lr = LogisticRegressionClassifier()
            svc = SupportVectorClassifier()
            rf = RandomForestClassifierModel()
            xgb = XGBoostClassier()
            ann = SequantialNeuralClassifier()

            lr_pred = lr.predict(code, data)
            svc_pred = svc.predict(code, data)
            rf_pred = rf.predict(code, data)
            xgb_pred = xgb.predict(code, data)
            ann_pred = ann.predict(code, data)

            k_data_60m_predict_log_dao.insert(
                code,
                logistic_regression=lr_pred,
                support_vector_classifier=svc_pred,
                random_forest_classifier=rf_pred,
                xgb_classifier=xgb_pred,
                sequantial_neural=ann_pred)
            logger.debug('predict end, code:%s' % code)

        except Exception as e:
            logger.error("predict k data error, code:%s, error:%s" %
                         (code, repr(e)))
コード例 #3
0
def back_test():
    context = Context(start='2017-01-01', end='2018-07-14', base_capital=50000)

    kdj = KDJStrategy()
    kdj.init(context)

    try:

        trade_days = list(
            k_data_dao.get_trading_days(start=context.start,
                                        end=context.end,
                                        futu_quote_ctx=kdj.futu_quote_ctx))

        kdj.before_trade()

        for date in trade_days:

            context.current_date = date
            kdj.before_handle_data()
            kdj.handle_data()

    finally:
        kdj.futu_quote_ctx.close()

    context_json = json.dumps(context, default=obj_dict)
    logger.debug("context:" + context_json)
コード例 #4
0
    def get_index_k_data_test(self):

        df = index_k_data_dao.get_k_data("^HSI",
                                         start="2018-01-01",
                                         end="2018-05-21")
        logger.debug(df.head())
        self.assertIsNotNone(df)
コード例 #5
0
def training_k_data():
    df = ts.get_hs300s()
    for code in df['code'].values:
        try:
            logger.debug('begin training mode, code:%s' % code)
            data, features = k_data_60m_dao.get_k_data_with_features(
                code, '2015-01-01',
                datetime.now().strftime("%Y-%m-%d"))

            pca = PCAModel(MODULE_NAME)
            lr = LogisticRegressionClassifier()
            svc = SupportVectorClassifier()
            rf = RandomForestClassifierModel()
            xgb = XGBoostClassier()
            ann = SequantialNeuralClassifier()

            pca.training_model(code, data, features)
            lr.training_model(code, data, features)
            svc.training_model(code, data, features)
            rf.training_model(code, data, features)
            xgb.training_model(code, data, features)
            ann.training_model(code, data, features)

            logger.debug('training mode end, code:%s' % code)
        except Exception as e:
            logger.error("training k data error, code:%s, error:%s" %
                         (code, repr(e)))
コード例 #6
0
    def get_k_data(self, code, start_date, end_date):
        try:
            start_date = self.string2ts(start_date)
            end_date = self.string2ts(end_date)

            cookie, crumb = self.get_cookie_crumb(code)

            url = "https://query1.finance.yahoo.com/v7/finance/download/%s?period1=%s&period2=%s&interval=1d&events" \
                  "=history&crumb=%s" % (
                      code, start_date, end_date, crumb)

            logger.debug(url)
            response = requests.get(url, cookies=cookie)

            df = pd.read_csv(io.StringIO(response.content.decode('utf-8')))
            df["code"] = code
            df = df.drop(columns=['Adj Close'])
            df = df.rename(columns={'Date': 'date', 'Open': 'open', 'High': 'high',
                                    'Low': 'low', 'Close': 'close', 'Volume': 'volume'})

            df['pre_close'] = df['close'].shift(1)
            df = df.dropna()
            return df
        except Exception as e:
            logger.error(repr(e))
            raise e
コード例 #7
0
ファイル: jqka_finance_api.py プロジェクト: zhaofengqiu/quant
    def get_stock_performance(self, code, name):
        headers = {
            'user-agent':
            'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.181 Safari/537.36'
        }
        url = "http://basic.10jqka.com.cn/%s/finance.html#stockpage" % (code)

        r = requests.get(url, headers=headers)
        #logger.debug(r.content.decode('gbk'))
        try:
            selector = etree.HTML(r.content.decode('gbk'))

            content = selector.xpath('//*[@id="main"]/text()')[0]

            data = json.loads(content)

            df1 = pd.DataFrame(data['simple'], index=None)

            df2 = df1.T
            df = pd.DataFrame(
                df2.values,
                columns=[
                    'report_date', 'esp', 'net_profits', 'profits_yoy',
                    'not_net_profits', 'not_profits_yoy', 'business_income',
                    'business_income_yoy', 'bvps', 'roe', 'roe_tanbo',
                    'net_debt_ratio', 'reservedPerShare',
                    'undistributed_profit_per_share', 'cash_flow_per_share',
                    'sales_gross_margin', 'inventory_turnover', 'sales_margin'
                ])

            df = df.reindex(columns=[
                'code', 'name', 'report_date', 'esp', 'net_profits',
                'profits_yoy', 'not_net_profits', 'not_profits_yoy',
                'business_income', 'business_income_yoy', 'bvps', 'roe',
                'roe_tanbo', 'net_debt_ratio', 'reservedPerShare',
                'undistributed_profit_per_share', 'cash_flow_per_share',
                'sales_gross_margin', 'inventory_turnover', 'sales_margin'
            ])
            df['profits_yoy'] = df['profits_yoy'].str.strip('%')
            df['not_profits_yoy'] = df['not_profits_yoy'].str.strip('%')
            df['business_income_yoy'] = df['business_income_yoy'].str.strip(
                '%')
            df['roe'] = df['roe'].str.strip('%')
            df['roe_tanbo'] = df['roe_tanbo'].str.strip('%')
            df['net_debt_ratio'] = df['net_debt_ratio'].str.strip('%')
            df['sales_gross_margin'] = df['sales_gross_margin'].str.strip('%')
            df['sales_margin'] = df['sales_margin'].str.strip('%')
            df['net_profits'] = df['net_profits'].str.strip('亿')
            df['not_net_profits'] = df['not_net_profits'].str.strip('亿')
            df['business_income'] = df['business_income'].str.strip('亿')
            df['code'] = code
            df['name'] = name

            return df

            #logger.debug(data)
        except Exception as e:
            logger.debug(repr(e))
            return None
コード例 #8
0
    def training_model(self, code, data, features):
        X = data[features]
        y = data['next_direction']

        # normalization
        X = preprocessing.scale(X)

        # pca缩放
        pca = PCAModel(self.module_name).load(code)
        X = pca.transform(X)

        X_train, x_test, y_train, y_test = train_test_split(X, y, test_size=.3)

        # normalization
        X_train = preprocessing.scale(X_train)
        x_test = preprocessing.scale(x_test)

        input_dim_len = len(features)

        sequantial_model = Sequential()

        sequantial_model.add(
            Dense(512, input_dim=input_dim_len, activation='relu'))
        sequantial_model.add(Dropout(0.5))
        sequantial_model.add(Dense(128, activation='relu'))
        sequantial_model.add(Dropout(0.5))

        sequantial_model.add(Dense(1, activation='tanh'))
        sequantial_model.compile(optimizer='sgd',
                                 loss='binary_crossentropy',
                                 metrics=['accuracy'])

        # traning performance
        sequantial_model.fit(X_train, y_train, epochs=10, batch_size=128)
        train_model_score = sequantial_model.evaluate(X_train,
                                                      y_train,
                                                      batch_size=128)

        # test performance
        test_model_score = sequantial_model.evaluate(x_test,
                                                     y_test,
                                                     batch_size=128)
        logger.debug('test model score: %s' % test_model_score)

        full_model_score = sequantial_model.evaluate(data[features],
                                                     data['next_direction'])
        logger.debug('full model score: %s' % full_model_score)

        # 记录日志
        k_data_60m_model_log_dao.insert(code=code,
                                        name=self.model_name,
                                        best_estimator=None,
                                        train_score=train_model_score[1],
                                        test_score=test_model_score[1],
                                        desc="full_model_score:%s" %
                                        full_model_score[1])
        # 输出模型, 使用h5的格式保存起来
        sequantial_model.save(
            self.get_model_path(code, self.module_name, self.model_name, 'h5'))
コード例 #9
0
    def test_predict(self):
        df_index = index_k_data_dao.get_rel_price()
        df, features = k_data_dao.get_k_predict_data_with_features("600196", df_index)
        logger.debug("features:%s, length:%s" % (features, len(features)))

        df.to_csv("result.csv")
        model = SupportVectorClassifier()
        y_predict = model.predict("600196", df[features])

        logger.debug("predict:%s" % y_predict)
コード例 #10
0
    def exists(self, code):
        # 绑定引擎
        # 连接数据表
        table = Table('k_data_model_log', dataSource.mysql_quant_metadata, autoload=True)
        s = select([table.c.code, table.c.date]) \
            .where(and_(table.c.date == datetime_utils.get_current_date(), table.c.code == code))

        result = dataSource.mysql_quant_conn.execute(s)
        logger.debug("row count:%s" % result.rowcount)
        return result.rowcount > 0
コード例 #11
0
    def test_training(self):
        code = '600196'
        data, features = k_data_60m_dao.get_k_data_with_features(code, '2015-01-01',
                                                             datetime.now().strftime("%Y-%m-%d"))

        logger.debug("features:%s, length:%s" % (features, len(features)))

        pac = PCAModel('k_data')
        pac.training_model(code=code, data=data, features=features)

        model = RidgeRegressionModel()
        model.training_model(code, data, features)
コード例 #12
0
    def test_training(self):
        code = '600276'
        # 从数据库中获取2015-01-01到今天的所有数据
        data, features = k_data_dao.get_k_data_with_features(code, '2015-01-01', datetime.now().strftime("%Y-%m-%d"))

        logger.debug("features:%s" % features)

        pac = PCAModel('k_data')
        pac.training_model(code=code, data=data,features=features)

        model = SupportVectorClassifier()
        model.training_model(code, data, features)
コード例 #13
0
    def test_predict(self):
        code = '600276'
        df_index = k_data_60m_dao.get_rel_price()
        df, features = index_k_data_60m_dao.get_k_predict_data_with_features(
            code, df_index)
        logger.debug("features:%s, length:%s" % (features, len(features)))

        df.to_csv("result.csv")
        model = XGBoostClassier()
        y_predict = model.predict(code, df[features])

        print(y_predict)
コード例 #14
0
    def test_predict(self):
        code = "600276"
        df_index = index_k_data_60m_dao.get_rel_price()
        df, features = k_data_60m_dao.get_k_predict_data_with_features(
            code, df_index)
        logger.debug("features:%s, length:%s" % (features, len(features)))

        df.to_csv("result.csv")
        model = RandomForestClassifierModel()
        y_predict = model.predict("600196", df[features])

        logger.debug("predict:%s" % y_predict)
コード例 #15
0
    def test_predict(self):
        code = '600704'

        df_index = index_k_data_dao.get_rel_price()
        df, features = k_data_dao.get_k_predict_data_with_features(
            code, df_index)
        logger.debug("features:%s, length:%s" % (features, len(features)))

        df.to_csv("result.csv")
        model = SequantialNeuralClassifier()
        y_predict = model.predict(code, df[features])

        logger.debug("predict:%s" % y_predict)
コード例 #16
0
    def test_predict(self):
        code = '600196'
        df_index = index_k_data_60m_dao.get_rel_price()

        df, features = k_data_60m_dao.get_k_predict_data_with_features(
            "600196", df_index)
        logger.debug("features:%s, length:%s" % (features, len(features)))

        df.to_csv("result.csv")
        model = LogisticRegressionClassifier()
        y_predict = model.predict(code, df[features])

        print(y_predict)
コード例 #17
0
ファイル: decorators.py プロジェクト: zhaofengqiu/quant
    def fn(*args, **kv):
        start_time = time.time()
        tmp = func(*args, **kv)
        end_time = time.time()

        if not kv:
            logger.debug("%s executed,  elapsed time: %.2f ms" %
                         (func.__name__, (end_time - start_time) * 1000))
        else:
            logger.debug("%s executed, kv:%s,  elapsed time: %.2f ms" %
                         (func.__name__, str(kv),
                          (end_time - start_time) * 1000))
        return tmp
コード例 #18
0
    def test_training(self):
        code = '600276'
        # 从数据库中获取2015-01-01到今天的所有数据
        data, features = k_data_60m_dao.get_k_data_with_features(
            code, '2015-01-01',
            datetime.now().strftime("%Y-%m-%d"))

        logger.debug("features:%s" % features)

        pac = PCAModel(MODULE_NAME)
        pac.training_model(code=code, data=data, features=features)

        model = XGBoostClassier()
        model.training_model(code, data, features)
コード例 #19
0
    def test_training(self):
        code = '600196'
        # 从数据库中获取2015-01-01到今天的所有数据
        data, features = k_data_60m_dao.get_k_data_with_features(
            code, '2015-01-01',
            datetime.now().strftime("%Y-%m-%d"))

        logger.debug("features:%s, length:%s" % (features, len(features)))

        data.to_csv("result.csv")
        pac = PCAModel(MODULE_NAME)
        pac.training_model(code=code, data=data, features=features)

        model = LogisticRegressionClassifier()
        model.training_model(code, data, features)
コード例 #20
0
    def test_training(self):
        code = '600196'
        # 从数据库中获取2015-01-01到今天的所有数据
        data, features = k_data_dao.get_k_training_data(
            code, '2012-01-01',
            datetime.now().strftime("%Y-%m-%d"), self.futu_quote_ctx)

        data.to_csv("result.csv")
        logger.debug("features:%s, length:%s" % (features, len(features)))

        pac = PCAModel('k_data')
        pac.training_model(code=code, data=data, features=features)

        model = LogisticRegressionClassifier()
        model.training_model(code, data, features)
コード例 #21
0
    def training_model(self, code, data, features, *args):
        X = data[features]
        # if not args:
        y = data['close']

        # normalization
        X = preprocessing.scale(X)

        # pca缩放
        pca = PCAModel(self.module_name).load(code)
        X = pca.transform(X)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=.3,
                                                            shuffle=False,
                                                            random_state=10)

        LR_model = linear_model.LinearRegression()

        LR_model.fit(X_train, y_train)

        test_score = LR_model.score(X_test, y_test)

        y_pred = LR_model.predict(X_test)

        mse = metrics.mean_squared_error(y_test, y_pred)

        mse = '%.4e' % mse

        logger.debug('mse: %s' % metrics.mean_squared_error(y_test, y_pred))

        # full data training
        LR_model.fit(X, y)

        # 记录日志
        k_data_model_log_dao.insert(code=code,
                                    name=self.model_name,
                                    best_estimator=LR_model,
                                    train_score=test_score,
                                    test_score=mse)

        # 输出模型
        joblib.dump(
            LR_model,
            self.get_model_path(code, self.module_name, self.model_name))
コード例 #22
0
ファイル: stock_pool_dao.py プロジェクト: zhaofengqiu/quant
    def init_pool(self):

        self.truncate()

        stocks = stock_dao.query_all()

        k_data_list = k_data_dao.get_multiple_k_data(start=get_next_date(-720), end=get_current_date())
        df = pd.DataFrame(columns=['code', 'name'])

        for stock in stocks:
            try:

                k_data = k_data_list.loc[k_data_list['code'] == fill_market(stock.code)]
                k_data = k_data.join(cal_macd(k_data))
                k_data['turnover7'] = cal_mavol7(k_data, column='turnover')

                k_turnover7 = k_data['turnover7'].values[-1]


                if len(k_data['code'].values) == 0:
                    continue

                stock_basic = stock_basic_dao.get_by_code(stock.code)
                eps_value = stock_basic['eps'].values[0]
                profits_yoy_value = stock_basic['profits_yoy'].values[0]

                if eps_value < 0:
                    continue

                if profits_yoy_value < 0:
                    continue

                if k_turnover7 < 65000000:
                    continue

                dict = {"code": stock.code, "name": stock.name}
                df = df.append(dict, ignore_index=True)
                logger.debug("append code:%s" % stock.code)
            except Exception as e:
                logger.debug("code:%s, error:%s" % (stock.code, traceback.format_exc()))

        df.to_sql('stock_pool', dataSource.mysql_quant_engine, if_exists='append', index=False)
        '''
コード例 #23
0
ファイル: hight_light.py プロジェクト: zhaofengqiu/quant
def cal_bk_vol():
    bk_vol_frame = pd.DataFrame(columns=['bkcode', 'bk_name', 'total_mavol_3'])

    filter_list = [
        'BK0743', 'BK0804', 'BK0568', 'BK0707', 'BK0701', 'BK0611', 'BK0705',
        'BK0612', 'BK0500'
    ]
    org_bk_code_list = list(stock_industry_dao.get_bkcode_list().values)

    bk_code_list = [c for c in org_bk_code_list if c not in filter_list]

    for bk in bk_code_list:
        bk_stocks = stock_industry_dao.get_by_bkcode(bk[0])
        bk_vol3 = 0
        try:
            bk_name = bk_stocks['bk_name'][0]
        except:
            bk_name = 'N/A'

        for code in bk_stocks['code'].values:

            stock_df = k_data_dao.get_k_data(code=code,
                                             start=get_next_date(-30),
                                             end=get_current_date())

            if len(stock_df) == 0:
                continue

            stock_df['mavol3'] = cal_mavol3(stock_df)
            try:
                bk_vol3 += stock_df['mavol3'].values[-1:][0]
            except Exception as e:
                logger.debug("code:%s, error:%s" % (code, repr(e)))
        bk_vol_frame.loc[bk_vol_frame.shape[0] + 1] = {
            'bkcode': bk,
            'bk_name': bk_name,
            'total_mavol_3': bk_vol3
        }

    bk_vol_frame = bk_vol_frame.sort_values('total_mavol_3', ascending=False)

    bk_vol_frame.to_csv('bk_csv.csv', encoding='utf_8_sig')
コード例 #24
0
    def training_model(self, code, data, features):
        X = data[features]
        y = data['next_direction']

        # normalization
        X = preprocessing.scale(X)

        # pca缩放
        pca = PCAModel(self.module_name).load(code)
        X = pca.transform(X)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=.3,
                                                            shuffle=False)

        parameters_grid = [{
            'learning_rate': [0.05, 0.1, 0.3],
            'max_depth': range(2, 8, 2),
            'subsample': [
                0.7,
            ],
            'min_child_weight': range(1, 6, 2)
        }]

        gs_search = GridSearchCV(estimator=xgb.XGBClassifier(n_estimators=100,
                                                             random_state=10),
                                 param_grid=parameters_grid,
                                 n_jobs=-1)

        gs_result = gs_search.fit(X_train, y_train)

        logger.debug(gs_search.best_params_)
        logger.debug("XGBoost Classier's best score: %.4f" %
                     gs_result.best_score_)  # 训练的评分结果

        xgb_classifier = gs_search.best_estimator_
        # 使用训练数据, 重新训练
        xgb_classifier.fit(X_train, y_train)

        # 使用测试数据对模型进评平分
        y_test_pred = xgb_classifier.predict(X_test)

        # 在测试集中的评分
        test_score = accuracy_score(y_test, y_test_pred)
        logger.debug('test score: %.4f' % test_score)

        # 使用所有数据, 重新训练
        xgb_classifier.fit(X, y)

        # 记录日志
        k_data_model_log_dao.insert(code=code,
                                    name=self.model_name,
                                    best_estimator=gs_search.best_estimator_,
                                    train_score=gs_search.best_score_,
                                    test_score=test_score)
        # 输出模型
        joblib.dump(
            xgb_classifier,
            self.get_model_path(code, self.module_name, self.model_name))
コード例 #25
0
    def training_model(self, code, data, features):
        X = data[features]
        y = data['next_direction']

        # normalization
        X = preprocessing.scale(X)

        # pca缩放
        pca = PCAModel(self.module_name).load(code)
        X = pca.transform(X)

        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=.3,
                                                            shuffle=False)

        tuned_parameters = [{
            'kernel': ['rbf'],
            'gamma': [1e-3, 1e-4],
            'C': [1, 10, 100, 1000]
        }]

        # # tsne缩放
        # X_train = TSNE(n_components=2, learning_rate=100).fit_transform(X_train)
        # X_test = TSNE(n_components=2, learning_rate=100).fit_transform(X_test)

        # 网格搜寻最优参数
        grid = GridSearchCV(svm.SVC(), tuned_parameters, cv=None, n_jobs=-1)
        grid.fit(X_train, y_train)

        logger.debug(grid.best_estimator_)  # 训练的结果
        logger.debug("Support Vector Classifier's best score: %.4f" %
                     grid.best_score_)  # 训练的评分结果

        support_vector_classifier = grid.best_estimator_
        # 使用训练数据, 重新训练
        support_vector_classifier.fit(X_train, y_train)

        # 使用测试数据对模型进评平分
        y_test_pred = support_vector_classifier.predict(X_test)

        # 在测试集中的评分
        test_score = accuracy_score(y_test, y_test_pred)
        logger.debug('test score: %.4f' % test_score)

        # 使用所有数据, 重新训练
        support_vector_classifier.fit(X, y)

        # 记录日志
        k_data_model_log_dao.insert(code=code,
                                    name=self.model_name,
                                    best_estimator=grid.best_estimator_,
                                    train_score=grid.best_score_,
                                    test_score=test_score)

        # 输出模型
        joblib.dump(
            support_vector_classifier,
            self.get_model_path(code, self.module_name, self.model_name))
コード例 #26
0
    def training_model(self, code, data, features):

        X = data[features]
        y = data['next_direction']

        X = preprocessing.scale(X)

        pca = PCAModel(self.module_name).load(code)
        X = pca.transform(X)

        # 数据按30%测试数据, 70%训练数据进行拆分
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=.3,
                                                            shuffle=False)

        # 交叉验证查找合适的超参数: penalty, C
        # penalty
        tuned_parameters = {
            'penalty': ['l1', 'l2'],
            'C': [0.001, 0.01, 0.1, 1, 10, 100]
        }

        # 网格搜索训练
        grid = GridSearchCV(LogisticRegression(), tuned_parameters, cv=None)
        grid.fit(X_train, y_train)
        logger.debug(grid.best_estimator_)  # 训练的结果
        logger.debug("logistic regression's best score: %.4f" %
                     grid.best_score_)  # 训练的评分结果

        logistic_regression = grid.best_estimator_
        # 使用训练数据, 重新训练
        logistic_regression.fit(X_train, y_train)

        # 使用测试数据对模型进评平分
        y_test_pred = logistic_regression.predict(X_test)

        # test_score = logistic_regression.score(X_test, y_test)

        # 在测试集中的评分
        test_score = accuracy_score(y_test, y_test_pred)
        logger.debug('test score: %.4f' % test_score)

        # 使用所有数据, 重新训练
        logistic_regression.fit(X, y)

        # 记录日志
        k_data_model_log_dao.insert(code=code,
                                    name=self.model_name,
                                    best_estimator=grid.best_estimator_,
                                    train_score=grid.best_score_,
                                    test_score=test_score)
        # 输出模型
        joblib.dump(
            logistic_regression,
            self.get_model_path(code, self.module_name, self.model_name))
コード例 #27
0
    def training_model(self, code, data, features):
        X = data[features]
        y = data['next_direction']

        # normalization
        X = preprocessing.scale(X)

        # pca缩放
        pca = PCAModel(self.module_name).load(code)
        X = pca.transform(X)

        X_train, X_test, y_train, y_test = train_test_split(
            data[features],
            data['next_direction'],
            test_size=.3,
            shuffle=False)

        rfc_model = RandomForestClassifier(max_features='sqrt',
                                           max_depth=14,
                                           oob_score=True)

        tuned_parameter = {
            'n_estimators': [
                50,
            ],
            'min_samples_leaf': range(10, 60, 10),
            'min_samples_split': range(20, 100, 20)
        }

        gs_result = GridSearchCV(estimator=rfc_model,
                                 param_grid=tuned_parameter,
                                 scoring='roc_auc',
                                 cv=None,
                                 n_jobs=-1)

        gs_result.fit(X_train, y_train)

        logger.debug('auc: %s' % gs_result.best_score_)

        min_samples_leaf = gs_result.best_params_['min_samples_leaf']
        min_samples_split = gs_result.best_params_['min_samples_split']

        rf1 = RandomForestClassifier(n_estimators=50,
                                     min_samples_leaf=min_samples_leaf,
                                     min_samples_split=min_samples_split,
                                     max_features='sqrt',
                                     max_depth=3,
                                     oob_score=True,
                                     n_jobs=-1,
                                     random_state=10)

        rf1.fit(X_train, y_train)

        logger.debug('oob: %s' % rf1.oob_score_)

        # 在测试集中的评分
        test_score = rf1.score(X_test, y_test)
        logger.debug('test score: %.4f' % test_score)

        # 使用所有数据, 重新训练
        rf1.fit(X, y)

        rf1_str = "RandomForestClassifier(n_estimators=50, min_samples_leaf=%s" \
                  ",min_samples_split=%s, max_features='sqrt',max_depth=3, " \
                  "oob_score=True, n_jobs=-1, random_state=10)" % (min_samples_leaf, min_samples_split)

        # 记录日志
        k_data_model_log_dao.insert(code=code,
                                    name=self.model_name,
                                    best_estimator=rf1_str,
                                    train_score=gs_result.best_score_,
                                    test_score=test_score,
                                    desc="oob_score_:%s" % rf1.oob_score_)

        # 输出模型
        joblib.dump(
            rf1, self.get_model_path(code, self.module_name, self.model_name))
コード例 #28
0
        # 死叉
        # if pre_k > pre_d and ((k_value <= d_value) or (abs(k_value - d_value) <= 10)):
        #
        #     shares = self.context.portfolio.positions[code].shares
        #     # 清仓
        #     if shares > 0:
        #         self.sell_value(code, shares)


if __name__ == '__main__':
    context = Context(start='2018-07-01', end='2018-07-14', base_capital=50000)

    kdj = KDJStrategy()
    kdj.init(context)

    context.current_date = get_current_date()
    kdj.handle_data()

    logger.debug("base_capital:%s" % context.base_capital)
    logger.debug("blance:%s" % context.blance)

    # context.current_date = convert_to_datetime('2018-07-04')
    # kdj.handle_data()

    # logger.debug(context.order_book[1])
    logger.debug("blance:%s" % context.blance)
    logger.debug("base_capital:%s" % context.base_capital)

    kdj.futu_quote_ctx.close()
コード例 #29
0
ファイル: kdj.py プロジェクト: zhaofengqiu/quant
    return label, k, d


if __name__ == '__main__':

    df_pool = stock_pool_dao.get_list()

    data = pd.DataFrame(columns=[
        'code', 'date', 'name', 'bk_code', 'bk_name', 'k', 'd', 'label'
    ])

    list = []
    for index, row in df_pool.iterrows():
        code = row['code']
        logger.debug("execute code:%s" % code)
        try:
            label, k, d = cal_single_stock(code)

            if label is None:
                continue

            if label == 'up':
                df_stock_industry = stock_industry_dao.get_by_code(code[3:])
                name = df_stock_industry['name'].values[0]

                if name.find('ST') > -1:
                    continue

                bk_code = df_stock_industry['bk_code'].values[0]
                bk_name = df_stock_industry['bk_name'].values[0]
コード例 #30
0
ファイル: short_stop.py プロジェクト: zhaofengqiu/quant
async def register(websocket):
    USERS.add(websocket)
    logger.debug(USERS)
    await notify_users()