def before_trading(self, event):
        """
        默认在这里显式调用各个flow。
        """

        # 交易日,各个flow的输入日期都是交易日
        trade_date = event.trading_dt.strftime("%Y%m%d")

        user_log.info(f"{trade_date}")

        # 如果store里面数量为None,那么就是要preheat,把store_flow里面充满
        # 那么就preheat,silent=True
        store_num = self.user_context.DM.get_tensor(
            self.data_source.trading_dates.get_previous_trading_date(
                trade_date), "store_flow.store.factorReturn").data
        if store_num is None:
            for _date in self.data_source.trading_dates.get_previous_trading_date(
                    trade_date, self.user_context.rolling_window -
                    np.arange(self.user_context.rolling_window)):
                self.user_context.DM.load_tensor(_date)
                self._store_flow.run(_date)

        # 当预测时,假定是每天开盘前进行预测

        self.user_context.DM.load_tensor(trade_date)
        self._store_flow.run(date=trade_date)
        self._prediction_stock.run(date=trade_date)
Example #2
0
    def before_trading(self, event):
        """
        默认在这里显式调用各个flow。
        """
        # 交易日,各个flow的输入日期都是交易日
        trade_date = event.trading_dt.strftime("%Y%m%d")

        user_log.info(f"{trade_date}")

        # fit
        self._optim_flow.run(trade_date)
    def before_trading(self, event):
        self.user_context.DM.load_tensor(event.trading_dt.strftime("%Y%m%d"))
        """
        默认在这里显式调用各个flow。
        """
        # 交易日,各个flow的输入日期都是交易日
        trade_date = event.trading_dt.strftime("%Y%m%d")

        user_log.info(f"{trade_date}")

        # fit
        self._estimation_flow.run(trade_date)
Example #4
0
    def query(self, tablename, factors=[], features=[], **kwargs):
        """
        根据条件查询
        返回 dataframe:
        cols如下:
           [factor,trade_date,featere1 ,featrue2,featrue3]
        :param tablename:
        :param factors:
        :param features:
        :return:
        """
        startdate = kwargs.get("startdate", "20100101")
        enddate = kwargs.get("enddate", "22222222")

        sql = "select trade_date,factor,item,value from " + tablename + " where trade_date >= %s and trade_date <= %s "

        data = []

        for feature in features:
            sql1 = sql + " and item = %s"

            self._re_conn()

            try:
                print(sql1)
                cur = self.conn.cursor()
                cur.execute(sql1, (startdate, enddate, feature))

                results = cur.fetchall()

                self.conn.commit()

                results = pd.DataFrame(
                    list(results),
                    columns=['trade_date', 'factor', 'item', 'value'])
                results.index = [results["factor"], results["trade_date"]]
                if len(factors) > 0:
                    results = results.ix[factors]

                results[feature] = results["value"]
                data.append(results[feature])
            except Exception as e:
                user_log.warning(e)
                return False
            finally:
                self.close()

        user_log.info("query done,begin merge!")
        df = pd.concat(data, axis=1)
        df = df.reset_index(level=["factor", "trade_date"])

        return df
Example #5
0
    def insert_feature(self, data, tablename, **kwargs):
        """

        data: dataframe 对象,取["trade_date","factor","item","value"]]4列
        :param data:
        :param tablename:
        :param kwargs:
        :return:
        """

        # 检查数据格式
        try:
            check_data = data[["trade_date", "factor", "item", "value"]]
        except Exception as e:
            user_log.error("data column incorrect,stop insert database")
            return False

        # 检查数据 inf 和 nan
        check_data = check_data.replace([np.inf, -np.inf], np.nan)
        check_data = check_data.where(check_data.notnull(), None)

        force = kwargs.get("force", False)

        user_log.info("insert into table {},data records {} ".format(
            tablename, len(check_data)))
        sql = "insert into " + tablename + "(trade_date,factor,item,value) values(%s,%s,%s,%s)"
        if force:
            sql = sql.replace("insert", "replace")

        fill_values = check_data.values.tolist()

        self._re_conn()

        try:
            cur = self.conn.cursor()

            cur.executemany(sql, fill_values)
            self.conn.commit()
        except Exception as e:
            user_log.warning(e)
            return False
        finally:
            self.close()
        return True
Example #6
0
    def delete_feature(self, tablename, factors=[], features=[], **kwargs):
        """
         删除rows,
         如果 factors = [] ,则删除所有factors的记录
         如果 featrure = [] ,则删除所有feature的记录
         如果 同时为空,不进行删除

        :param tablename:
        :param factors:
        :param features:
        :param kwargs:
        :return:
        """

        if len(factors) == 0 and len(features) == 0:
            user_log.info("miss parameters")
            return False

        sql = "delete from " + tablename + " where "

        params = []

        if len(factors) > 0:
            sql += " factor in  %s and"
            params.append(factors)
        if len(features) > 0:
            sql += " item in %s and"
            params.append(features)
        sql = sql[:-4]

        self._re_conn()
        try:
            user_log.info("delete rows.....")
            cur = self.conn.cursor()
            cur.execute(sql, params)
            self.conn.commit()
            user_log.info("delete done")
        except Exception as e:
            user_log.warning(e)
            return False
        finally:
            self.close()
    def _init_optim(self):
        prediction_flow = self.user_context.flow_config.get(
            "pred_flow_name", "prediction_stock")
        optim_flow = self.user_context.flow_config.get("optim_flow_name",
                                                       "optim_flow")
        forward_return_flow = self.user_context.flow_config.get(
            "forward_return_flow", "flow_forward_return")

        self._optim_flow = SensorFlow(name=optim_flow,
                                      data_manager=self.user_context.DM)

        # module 19. 昨日持仓
        self._optim_flow.add_next_step(sensor=GetHolding,
                                       args=["holding", [], {}],
                                       kwds={"account": self.account})

        self._optim_flow.add_next_step(sensor=GetDate,
                                       args=["factor_as_of_date", [], {}],
                                       kwds={'offset': 1})

        # module 11. 确定对Alpha/Risk数据进行数据清洗的集合
        self._optim_flow.add_next_step(
            sensor=GetFundamentalPool,
            args=[
                "stockCandidate",
                [
                    f"{optim_flow}.holding.weight",
                    f"{optim_flow}.factor_as_of_date.date"
                ], {}
            ],
            kwds={
                "pool_name": self.user_context.ff_name,
                "threshold": 0.3,
                # "benchmark_weight": "weight_index_500"
            },
            silent=False)

        factorList = {}
        for k in self.user_context.alphaFactorDataFrame.factor_dataFrame.factor:
            factorList[k + "_f1"] = FACTOR_STYLE.ALPHA

        # module 7. 取alpha数据
        self._optim_flow.add_next_step2(
            name="alphaPredData",
            sensor=GetFactorData,
            call=None,
            input_var=[f"{optim_flow}.factor_as_of_date.date"],
            kwds={"factorList": factorList})

        # module 8. 取fitted_forward_return(也是用到未来数据)
        self._optim_flow.add_next_step2(
            name="fittedForwardReturnData",
            sensor=GetFactorData,
            call=None,
            input_var=[f"{optim_flow}.factor_as_of_date.date"],
            #kwds={"factorList": {'flow_estimation_fitted_f1': FACTOR_STYLE.ALPHA}}
            kwds={
                "factorList": {
                    'fake_forward_return_f1': FACTOR_STYLE.ALPHA
                }
            })

        # module 20. 股票权重优化

        kwds = {}
        kwds.update(self.optim_options)
        user_log.info("check constraint:{}".format(kwds))

        self._optim_flow.add_next_step(
            sensor=OptimizationStockWeight,
            args=[
                "optimizationStockWeight",
                [
                    "%s.fittedForwardReturnData.exposure" % optim_flow,
                    "%s.predictionFactorCovariance.factorCovariance" %
                    prediction_flow,
                    "%s.alphaPredData.exposure" % optim_flow,
                    "%s.alphaPredData.factorName" % optim_flow,
                    "%s.riskFactorData.exposure" % forward_return_flow,
                    "%s.riskFactorData.factorName" % forward_return_flow,
                    "%s.stockCandidate.pool" % optim_flow,
                    "%s.holding.weight" % optim_flow,
                    "%s.factor_as_of_date.date" % optim_flow
                ], {
                    "%s.alphaPredData.exposure" % optim_flow:
                    "alphaExposure",
                    "%s.alphaPredData.factorName" % optim_flow:
                    "alphaName",
                    "%s.riskFactorData.exposure" % forward_return_flow:
                    "riskExposure",
                    "%s.riskFactorData.factorName" % forward_return_flow:
                    "riskName",
                    "%s.fittedForwardReturnData.exposure" % optim_flow:
                    "stockReturn"
                }
            ],
            kwds=kwds,
            silent=True)
    def _init_estimation_flow(self):
        flow_name = self.user_context.flow_config.get("est_flow_name", "est_flow")
        self._estimation_flow = SensorFlow(name=flow_name, data_manager=self.user_context.DM)

        # factor date
        self._estimation_flow.add_next_step2(name="factor_as_of_date",
                                             sensor=GetDate,
                                             kwds={'offset': self.user_context.forward_period + 2}
                                             )

        # module 4. 确定对Risk数据进行数据清洗的集合
        self._estimation_flow.add_next_step2(name="riskPool",
                                             sensor=GetPool,
                                             call=None,
                                             # 这里用factor_as_of_date
                                             input_var=[f"{flow_name}.factor_as_of_date.date"],
                                             kwds={"pool_name": self.user_context.pool_name})

        factorList = {}
        for k in self.user_context.riskFactorDataFrame.factor_dataFrame.factor:
            factorList[k] = FACTOR_STYLE.SECTOR if k.startswith("industry") else FACTOR_STYLE.RISK

        # module 6. 取risk数据
        self._estimation_flow.add_next_step2(name="riskFactorData",
                                             sensor=GetFactorData,
                                             call=None,
                                             input_var=[f"{flow_name}.riskPool.pool",
                                                        f"{flow_name}.factor_as_of_date.date"
                                                        ],
                                             kwds={"factorList": factorList,
                                                   "data_process_methods": {
                                                       FACTOR_STYLE.SECTOR: [],
                                                       FACTOR_STYLE.RISK: [
                                                           DataProcessing.do_process_extremum_winsorize,
                                                           DataProcessing.do_z_score_processing
                                                       ]
                                                   }},
                                             silent=False)

        try:
            open_price_type = self.user_context.est_open_price
        except Exception as e:
            user_log.warning("no est_open_price in config file")
            open_price_type = "open_aft"

        try:
            close_price_type = self.user_context.est_close_price
        except Exception as e:
            user_log.warning("no est_close_price in config file")
            close_price_type = "open_aft"

        user_log.info("est open_price_type is : " + open_price_type)
        user_log.info("est close_price_type is : " + close_price_type)

        # module 8. 取return数据
        self._estimation_flow.add_next_step2(name="returnData",
                                             sensor=GetReturnData, call=None,
                                             input_var=[f"{flow_name}.riskFactorData.exposure",
                                                        f"{flow_name}.riskPool.pool",
                                                        f"{flow_name}.factor_as_of_date.date"
                                                        ],
                                             alias={
                                                 f"{flow_name}.riskFactorData.exposure": "neutralize_matrix",
                                             },
                                             kwds={"data_process_methods": [
                                                 DataProcessing.do_process_extremum_winsorize,
                                                 DataProcessing.neutrialize],
                                                 "n": self.user_context.forward_period,
                                                 "open_price_type": open_price_type,
                                                 "close_price_type": close_price_type
                                             },
                                             silent=True)

        self._estimation_flow.add_next_step2(
            name="saveToNpy_return",
            sensor=SaveToBundleSensor,
            call=None,
            input_var=[f"{flow_name}.factor_as_of_date.date",
                       f"{flow_name}.returnData.stockReturn"],
            kwds={
                'bundle': self.user_context.config.base.data_bundle_path,
                'suffix': 'f1',
                'type': "return",
                'name': "forward_return_%d" % self.user_context.forward_period
            }
        )
Example #9
0
    def query_groupby(self, tablename, features={}, **kwargs):
        """
        查询,对features进行groupby 操作,
           例如:查询ic平均, 参数为features= {ic:SqlUtils.AVG}
        :param tabelename:
        :param features:
        :param kwargs:
        :return:
        """

        startdate = kwargs.get("startdate", "20120101")
        enddate = kwargs.get("enddate", "20222222")

        user_log.info("query start - end : {} - {} ".format(
            startdate, enddate))

        if len(features) == 0:
            user_log.warning("no features in paramter")
            return None

        result = []

        for feature in features.keys():
            for func in features[feature]:

                if type(func) == str:

                    count_close_parentheses = func.count("(") + 1
                    temp = [')'] * count_close_parentheses
                    sql = "select factor," + func + "(value" + "".join(temp) \
                          + " as " + feature + "_" + func.replace("(", "_") \
                          + " from " + tablename

                    sql += " where trade_date >= %s and trade_date  <= %s"
                    sql += " and item = %s"
                    sql += " group by factor"
                    user_log.info(sql)

                    self._re_conn()

                    columns = [
                        "factor", feature + "_" + func.replace("(", "_")
                    ]

                    try:
                        cur = self.conn.cursor()
                        cur.execute(sql, (startdate, enddate, feature))
                        data = cur.fetchall()
                        self.conn.commit()

                        df = pd.DataFrame(list(data), columns=columns)

                        df.index = df["factor"]

                        result.append(df[columns[-1]])

                    except Exception as e:
                        user_log.warning(e)
                        user_log.warning(
                            "error in query_groupby {}".format(feature))
                    finally:

                        self.close()
                else:

                    columns = [
                        "factor", feature + "_abs_gt_" + str(SqlUtils.GT_ABS_2)
                    ]

                    sql = "select factor, sum(if(abs(value) > %s,1,0))/count(value) "
                    sql += " as " + columns[-1]
                    sql += " from factor_main where "
                    sql += " trade_date >= %s and trade_date  <= %s  and item = %s group by factor"

                    user_log.info(sql)

                    self._re_conn()

                    try:
                        cur = self.conn.cursor()

                        cur.execute(
                            sql,
                            (SqlUtils.GT_ABS_2, startdate, enddate, feature))
                        data = cur.fetchall()
                        self.conn.commit()

                        df = pd.DataFrame(list(data), columns=columns)
                        df.index = df["factor"]
                        result.append(df[columns[-1]])

                    except Exception as e:
                        user_log.warning(e)
                        user_log.warning(
                            "error in query_groupby {}".format(feature))

                    finally:
                        self.close()

        df = pd.concat(result, axis=1)
        df = df.reset_index()

        return df
Example #10
0
    def do(self, date, mp, **kwargs):

        # region 读入参数, 对应输入的数据

        # 优化器的参数
        lambdax = kwargs.get("lambdax", 1)  # lambda = 0.5?
        tc_a = kwargs.get("tc_a", 0.5)  # 交易惩罚项中参数
        tc_b = kwargs.get("tc_b", 1)  # 交易惩罚项中参数
        tc_power = kwargs.get("tc_power", 1.5)  # 交易惩罚项中参数
        tc_c = kwargs.get("tc_power", 0)
        n = kwargs.get("top", 200)  # 前n个股票进入优化器
        single_max = kwargs.get("single_max", 0.02)  # 个股最大权重
        total_value = kwargs.get("total_value", 1000000)

        # benchmark weight
        weight_index = kwargs.get("benchmark_weight", "weight_index_500")

        # 因子矩阵
        column = mp.alphaName
        exog = mp.alphaExposure

        # 行业风格矩阵

        risk_column = mp.riskName
        risk_factor = mp.riskExposure

        # 协方差矩阵
        cov = mp.factorCovariance

        # 特质风险
        if hasattr(mp, "sp_risk"):
            sp = mp.sp_risk
        else:
            sp = np.zeros_like(mp.stockReturn)

        # 停牌股票, non_suspend全是True/False, 没有nan
        is_suspend = kwargs.get("is_susp", np.full(mp.stockReturn.size, 0))
        non_suspend = is_suspend == 0

        # 计算benchmark因子暴露
        benchmark_exposure = mp.data_manager.get_bar(date=mp.date,
                                                     columns=[weight_index
                                                              ])[weight_index]
        benchmark_exposure = np.nan_to_num(benchmark_exposure) / np.nansum(
            benchmark_exposure)
        benchmark_expo = np.dot(benchmark_exposure, np.nan_to_num(risk_factor))

        # endregion

        success = False
        while (not success) and n < 1500:
            stock_return = mp.stockReturn.copy()
            stock_return[np.any(np.isnan(exog), axis=1)] = np.nan

            # region 计算进行优化的股票集合
            # 1. mp.pool中计算top_flag
            # 2. holding | top_flag
            # 3. 因子不缺

            # step 1. 在mp.pool中计算top_flag
            stock_return[
                ~mp.
                pool] = np.nan  # 这里在while-loop中虽然是重复计算,但是为了代码的可读性,还是放在loop里面
            non_nan_cnt = np.sum(~np.isnan(stock_return))
            if non_nan_cnt < n:
                self.logger.warning("non_nan_cnt(%s) < n(%s)" %
                                    (non_nan_cnt, n))
                n = non_nan_cnt
            return_ordered_index = np.argsort(-stock_return)[:non_nan_cnt]
            top_flag = np.full(stock_return.size, False, dtype=bool)
            top_flag[return_ordered_index[:n]] = True

            candidates = top_flag.copy(
            )  # 在top_flag中的肯定是有predicted_stock_return的,所以数据肯定不缺失
            # 在candidates中去掉其他nan的情况
            # case 1. special_risk为nan
            # candidates &= ~np.isnan(sp)

            # 在candidates中还需要加入以下的情况
            # case 1. 持仓 且 有stock_return (即数据不缺失)
            candidates |= (mp.weight > 0) & (~np.isnan(stock_return))

            # to solve : 待求解变量w
            w = cp.Variable(np.sum(candidates))

            # 持仓 且 停牌 且 数据缺失
            holding_suspend = (mp.weight > 0) & (is_suspend == 1) & (
                np.isnan(stock_return))
            holding_suspend_sum = np.sum(mp.weight[holding_suspend])
            candidates_cnt = np.nansum(candidates)

            # 以下的部分都是基于candidates_cnt的向量进行.
            # risk_matrix = risk_factor[candidates]
            x = exog[candidates]
            w0 = mp.weight[candidates]

            if any(holding_suspend):
                for ix, _ in enumerate(holding_suspend):
                    if _:
                        if any(np.isnan(exog[ix])):
                            self.logger.warn(
                                "Holding %s have nan factors %s" %
                                (mp.data_manager.codes[ix], column[np.isnan(
                                    exog[ix]).ravel()]))
                        if any(np.isnan(risk_factor[ix])):
                            self.logger.warn("Holding %s have nan factors %s" %
                                             (mp.data_manager.codes[ix],
                                              risk_column[np.isnan(
                                                  risk_factor[ix]).ravel()]))

            # constraint: weights < 1 - holding_suspend_sum
            constraints = [cp.sum(w) == 1 - holding_suspend_sum]

            # constraint: suspend locked
            weight_locked = (candidates & ~non_suspend)[candidates]
            if np.sum(weight_locked) >= 1:
                constraints += [w[weight_locked] == w0[weight_locked]]

            # constraint:for the non suspend, single_max constraint
            constraints += [w[~weight_locked] <= single_max]

            # constraint:for the non suspend, weight > 0
            constraints += [w[~weight_locked] >= 0]

            # 3. 行业风格暴露约束,相对benchmark上界约束
            risk_condition = kwargs.get("risk_condition", {
                "up": {},
                "down": {}
            })

            # constraint: risk expo control , ceil
            for k, v in risk_condition['up'].items():
                col_index = risk_column == k
                expo = risk_factor[candidates][:, col_index]
                ceil = benchmark_expo[col_index] + v
                constraints += [
                    cp.sum(cp.multiply(np.ravel(expo), -w)) >= -ceil
                ]

            # constraint:risk expo control, floor
            for k, v in risk_condition['down'].items():
                col_index = risk_column == k
                expo = risk_factor[candidates][:, col_index]
                floor = benchmark_expo[col_index] - v
                constraints += [
                    cp.sum(cp.multiply(np.ravel(expo), w)) >= floor
                ]

            try:
                # transaction cost terms
                as_of_date = mp.date
                z = w - w0

                all_spread = mp.data_manager.get_bar(
                    date=as_of_date,
                    columns=["trade_spread_0935_1000"],
                    codes=mp.data_manager.codes)["trade_spread_0935_1000"]
                all_trade_price = mp.data_manager.get_bar(
                    date=as_of_date,
                    columns=["trade_price_0935_1000_n"],
                    codes=mp.data_manager.codes)["trade_price_0935_1000_n"]
                all_amount = mp.data_manager.get_bar(
                    date=as_of_date,
                    columns=["amount"],
                    codes=mp.data_manager.codes)["amount"]
                all_tcost_sigma = \
                mp.data_manager.get_bar(date=as_of_date, columns=["pct_std22"], codes=mp.data_manager.codes)[
                    "pct_std22"]
                all_a = tc_a * all_spread / all_trade_price

                #transaction cost: first term coefficient
                a = all_a[candidates]

                tcost_sigma = all_tcost_sigma[candidates]
                # transaction cost: second term coefficient
                c1 = tcost_sigma / np.sqrt(
                    all_amount[candidates] / total_value)

                # missing transaction cost,use default : 0.003
                ix = np.isnan(a) | np.isnan(c1) | np.isinf(c1)
                if ix.sum() > 0:
                    self.logger.info("%s missing transaction cost" % ix.sum())
                a[ix] = 0.003
                c1[ix] = 0.0

                # transaction cost: first term
                exp1 = cp.multiply(a, cp.abs(z))

                # transaction cost: second term
                power = tc_power
                exp2 = tc_b * cp.multiply(c1, cp.abs(z)**power)

                # transaction cost: third term
                exp3 = tc_c * z

                tcost_expr = exp1 + exp2 + exp3
                tcost_expr = cp.sum(tcost_expr)

                # predicted return term
                pred_returnp_expr = cp.sum(
                    cp.multiply(stock_return[candidates], w))

                assert (pred_returnp_expr.is_concave())

                # risk term
                """
                self.expression = cvx.sum_squares(cvx.multiply(
                np.sqrt(locator(self.idiosync, t).values), wplus)) + \
                cvx.quad_form((wplus.T * locator(self.exposures, t).values.T).T,
                              locator(self.factor_Sigma, t).values)
                """
                risk_expr = 2 * lambdax * cp.sum(cp.quad_form(
                    (w.T * x).T, cov))

                assert (risk_expr.is_convex())

                for el in constraints:
                    assert (el.is_dcp())

                prob = cp.Problem(
                    cp.Maximize(pred_returnp_expr - risk_expr - tcost_expr),
                    constraints)

                prob.solve(solver=cp.ECOS)

                if prob.status == "optimal" or prob.status == "optimal_inaccurate":
                    user_log.info("status:{}".format(prob.status))
                    # user_log.info("w : {}".format(w.value))
                    user_log.info("sum(w):{}".format(np.sum(w.value)))

                    target_weight = np.full(stock_return.size,
                                            0,
                                            dtype=np.double)
                    target_weight[holding_suspend] = mp.weight[holding_suspend]
                    target_weight[candidates] = np.round(w.value, 6)

                    # check expo
                    # user_log.info("max(w):{}", np.max(w.value))
                    # user_log.info("min(w):{}", np.min(w.value))
                    #
                    # import pandas as pd
                    # expo = pd.DataFrame()
                    #
                    # diff = risk_factor[candidates].T.dot(w.value) - benchmark_expo
                    # expo["factor"] = risk_column
                    # expo["diff"] = diff
                    # expo["abs"] = expo["diff"].abs()
                    # expo = expo.sort_values(by="abs", ascending=False)
                    # user_log.info(expo.head(50))

                    return target_weight,

                else:
                    user_log.info("status: {}".format(prob.status))
                    user_log.warning(
                        "optim failed at top n={} ,continue n+300".format(n))
                    n += 300

            except Exception as e:
                import traceback
                traceback.print_exc()
                break

        target_weight = mp.weight
        return target_weight,