Esempio n. 1
0
def factor_wash(factor_data, stockList, industry_code, date):
    #去极值,目前private_tools支持单series的去极值化处理,待完成dataframe级别后替代
    factor_data = winsorize_med(factor_data, scale=5, inf2nan=False, axis=0)
    #缺失值处理,目前已完成private_tools支持
    factor_data = tools.replace_nan_indu(factor_data, stockList, industry_code,
                                         date)
    #中性化处理,目前已完成private_tools支持
    factor_data = neutralize(factor_data, mkt_cap=False, industry=False)
    #标准化处理,目前private_tools支持单series的去极值化处理,待完成dataframe级别后替代
    factor_data = standardlize(factor_data, axis=0)
def data_preprocessing(factor_data,stockList,industry_code,date):
    #去极值
    factor_data=winsorize_med(factor_data, scale=5, inf2nan=False,axis=0)
    #缺失值处理
    factor_data=replace_nan_indu(factor_data,stockList,industry_code,date)
    #中性化处理
    factor_data=neutralize(factor_data, how=['sw_l1', 'market_cap'], date=date, axis=0)
    #标准化处理
    factor_data=standardlize(factor_data,axis=0)
    return factor_data
Esempio n. 3
0
def clean_factor(factors, date):
    factors = factors.fillna(factors.mean())

    factors = winsorize_med(factors,
                            scale=3,
                            inclusive=True,
                            inf2nan=True,
                            axis=0)

    factors = standardlize(factors, inf2nan=True, axis=0)

    factors = neutralize(factors, ['sw_l1', 'pe_ratio'],
                         date=str(date),
                         axis=0)
    return factors
Esempio n. 4
0
def handle_data(context,data):
    if g.if_trade == True:
        # 记录交易次数
        g.__tradeCount = g.__tradeCount + 1

        # 训练集合成
        yesterday = context.previous_date

        df_train = get_df_train(g.__q,yesterday,g.__trainlength,g.__intervals)
        df_train = initialize_df(df_train)

        # T日截面数据(测试集)
        df = get_fundamentals(g.__q, date = None)
        df = initialize_df(df)
    
        # 离散值处理
        for fac in g.__winsorizeList:
            df_train[fac] = winsorize_med(df_train[fac], scale=5, inclusive=True, inf2nan=True, axis=0)    
            df[fac] = winsorize_med(df[fac], scale=5, inclusive=True, inf2nan=True, axis=0)    
        
        # 标准化处理        
        for fac in g.__standardizeList:
            df_train[fac] = standardlize(df_train[fac], inf2nan=True, axis=0)
            df[fac] = standardlize(df[fac], inf2nan=True, axis=0)

        # 中性化处理(行业中性化)
        df_train = neutralize(df_train,g.__industry_set)
        df = neutralize(df,g.__industry_set)

        #训练集(包括验证集)
        X_trainval = df_train[g.__factorList]
        X_trainval = X_trainval.fillna(0)
        
        #定义机器学习训练集输出
        y_trainval = df_train[['log_mcap']]
        y_trainval = y_trainval.fillna(0)
 
        #测试集
        X = df[g.__factorList]
        X = X.fillna(0)
        
        #定义机器学习测试集输出
        y = df[['log_mcap']]
        y.index = df['code']
        y = y.fillna(0)
 
        kfold = KFold(n_splits=4)
        if g.__gridserach == False:
            #不带网格搜索的机器学习
            if g.method == 'svr': #SVR
                from sklearn.svm import SVR
                model = SVR(C=100, gamma=1)
            elif g.method == 'lr':
                from sklearn.linear_model import LinearRegression
                model = LinearRegression()
            elif g.method == 'ridge': #岭回归
                from sklearn.linear_model import Ridge
                model = Ridge(random_state=42,alpha=100)
            elif g.method == 'rf': #随机森林
                from sklearn.ensemble import RandomForestRegressor
                model = RandomForestRegressor(random_state=42,n_estimators=500,n_jobs=-1)
            else:
                g.__scoreWrite = False
        else:
            # 带网格搜索的机器学习
            para_grid = {}
            if g.method == 'svr':
                from sklearn.svm import SVR  
                para_grid = {'C':[10,100],'gamma':[0.1,1,10]}
                grid_search_model = SVR()
            elif g.method == 'lr':
                from sklearn.linear_model import LinearRegression
                grid_search_model = LinearRegression()
            elif g.method == 'ridge':
                from sklearn.linear_model import Ridge
                para_grid = {'alpha':[1,10,100]}
                grid_search_model = Ridge()
            elif g.method == 'rf':
                from sklearn.ensemble import RandomForestRegressor
                para_grid = {'n_estimators':[100,500,1000]}
                grid_search_model = RandomForestRegressor()
            else:
                g.__scoreWrite = False
    
            from sklearn.model_selection import GridSearchCV
            model = GridSearchCV(grid_search_model,para_grid,cv=kfold,n_jobs=-1)
        
        # 拟合训练集,生成模型
        model.fit(X_trainval,y_trainval)
        # 预测值
        y_pred = model.predict(X)

        # 新的因子:实际值与预测值之差    
        factor = y - pd.DataFrame(y_pred, index = y.index, columns = ['log_mcap'])
        
        #对新的因子,即残差进行排序(按照从小到大)
        factor = factor.sort_index(by = 'log_mcap')
        
        ###  分组测试用 ##############
        if g.invest_by_group == True:
            len_secCodeList = len(list(factor.index))
            g.stocknum = int(len_secCodeList * g.quantile)
        ###  分组测试用 ##############

        start = g.stocknum * (g.group-1)
        end = g.stocknum * g.group
        stockset = list(factor.index[start:end])

        current_data = get_current_data()

        #卖出
        sell_list = list(context.portfolio.positions.keys())
        for stock in sell_list:
            if stock not in stockset:
                if stock in g.__feasible_stocks:
                    if current_data[stock].last_price == current_data[stock].high_limit:
                        pass
                    else:
                        stock_sell = stock
                        order_target_value(stock_sell, 0)

        #分配买入资金    
        if len(context.portfolio.positions) < g.stocknum:
            num = g.stocknum - len(context.portfolio.positions)
            cash = context.portfolio.cash/num
        else:
            cash = 0
            num = 0
            
        #买入
        for stock in stockset[:g.stocknum]:
            if stock in sell_list:
                pass
            else:
                if current_data[stock].last_price == current_data[stock].low_limit:
                    pass
                else:
                    stock_buy = stock
                    order_target_value(stock_buy, cash)
                    num = num - 1
                    if num == 0:
                        break