Example #1
0
def main():
    sf = FS.Select(Sequence=True, Random=True,
                   Cross=False)  #select the way you want to process searching
    sf.ImportDF(prepareData(), label='is_trade')
    sf.ImportLossFunction(modelscore, direction='descend')
    sf.ImportCrossMethod(CrossMethod)
    sf.InitialNonTrainableFeatures([
        'used', 'instance_id', 'item_property_list', 'context_id',
        'context_timestamp', 'predict_category_property', 'is_trade'
    ])
    sf.InitialFeatures([
        'item_category_list', 'item_price_level', 'item_sales_level',
        'item_collected_level', 'item_pv_level', 'day'
    ])
    sf.GenerateCol(key='mean', selectstep=2)
    sf.SetSample(0.1, samplemode=0, samplestate=0)
    #    sf.SetFeaturesLimit(5)
    sf.SetTimeLimit(1)
    sf.clf = lgbm.LGBMClassifier(random_state=1,
                                 num_leaves=6,
                                 n_estimators=5000,
                                 max_depth=3,
                                 learning_rate=0.05,
                                 n_jobs=8)
    sf.SetLogFile('recordml.log')
    sf.run(validation)
Example #2
0
def main():
    sf = FS.Select(Sequence = True, Random = False, Cross = True)
    sf.ImportDF(prepareData(),label = 'Survived')
    sf.ImportLossFunction(modelscore,direction = 'ascend')
    sf.ImportCrossMethod(CrossMethod)
    sf.InitialNonTrainableFeatures(['Survived'])
    sf.InitialFeatures([])
    sf.AddPotentialFeatures(['Pclass'])
    sf.clf = LogisticRegression()
    sf.SetLogFile('record.log')
    sf.run(validation)
Example #3
0
def main():
    sf = FS.Select(Sequence = True, Random = True, Cross = True)
    sf.ImportDF(prepareData(),label = 'Survived')
    sf.ImportLossFunction(modelscore,direction = 'ascend')
    sf.ImportCrossMethod(CrossMethod)
    sf.NonTrainableFeatures = ['Survived']
    sf.InitialFeatures([])
    sf.PotentialAdd = ['Pclass']
    #sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6, n_estimators=5000, max_depth=3, learning_rate = 0.05, n_jobs=1)
    sf.clf = LogisticRegression()
    sf.logfile = 'record.log'
    sf.run(validation)
def main():
    sf = FS.Select(Sequence = True, Random = False, Cross = True)
    sf.ImportDF(prepareData(),label = 'Survived')
    sf.ImportLossFunction(modelscore,direction = 'ascend')
    sf.ImportCrossMethod(CrossMethod)
    sf.InitialNonTrainableFeatures(['Survived'])
    sf.InitialFeatures([])
    sf.GenerateCol()
    sf.SetSample(0.5, samplemode = 0, samplestate = 0)
    sf.AddPotentialFeatures(['Pclass'])
    sf.clf = LogisticRegression()
    sf.SetLogFile('record2.log')
#    sf.SetFeaturesLimit(5)
    sf.SetTimeLimit(0.2)
    sf.run(validation)
Example #5
0
def main():
    sf = FS.Select(Sequence=True, Random=False, Cross=False)
    sf.ImportDF(prepareData(), label='buy')
    sf.ImportLossFunction(score1, direction='ascend')
    #sf.ImportCrossMethod(CrossMethod)
    sf.InitialNonTrainableFeatures(
        ['buy', 'nextbuy', 'o_date', 'a_date', 'PredictDays', 'user_id'])
    sf.InitialFeatures(
        ['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt', 'daybeforelastbuy_o_ave'])
    #sf.PotentialAdd = ['daybeforelastbuy_o_mean']
    #sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6, n_estimators=5000, max_depth=3, learning_rate = 0.05, n_jobs=1)
    sf.clf = lgbm.LGBMClassifier(random_state=1,
                                 num_leaves=6,
                                 n_estimators=1000,
                                 max_depth=3,
                                 learning_rate=0.2,
                                 n_jobs=8)
    sf.SetLogFile('record.log')
    sf.run(validate)
def main():
    sf = FS.Select(Sequence=True, Random=False, Cross=False)  #初始化选择器,选择你需要的流程
    sf.ImportDF(df, label='nextbuy')  #导入数据集以及目标标签
    sf.ImportLossFunction(score, direction='ascend')  #导入评价函数以及优化方向
    sf.InitialNonTrainableFeatures(
        ['buy', 'nextbuy', 'o_date', 'a_date', 'PredictDays',
         'user_id'])  #初始化不能用的特征
    sf.InitialFeatures(
        ['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt',
         'daybeforelastbuy_o_ave'])  #初始化其实特征组合
    sf.GenerateCol()  #生成特征库 (具体该函数变量请参考根目录下的readme)
    sf.SetSample(1, samplemode=1)  #初始化抽样比例和随机过程
    sf.SetTimeLimit(100)  #设置算法运行最长时间,以分钟为单位
    sf.clf = lgbm.LGBMRegressor(random_state=1,
                                num_leaves=6,
                                n_estimators=1000,
                                max_depth=3,
                                learning_rate=0.2,
                                n_jobs=8)  #设定回归模型
    sf.SetLogFile('record.log')  #初始化日志文件
    sf.run(validate)  #输入检验函数并开始运行
Example #7
0
def main():
    sf = FS.Select(Sequence = False, Random = True, Cross = False) #select the way you want to process searching
    sf.ImportDF(prepareData(),label = 'is_trade')
    sf.ImportLossFunction(modelscore,direction = 'descend')
    sf.ImportCrossMethod(CrossMethod)
    sf.InitialNonTrainableFeatures(['context_timestamp', 'is_trade'])

    features_import = loadCSV('model/dif_on_feature_mean_4_25.csv')
    initial_features = list(features_import['feature'].values)[::-1][:100]
    sf.InitialFeatures(initial_features)
    sf.clf = lgbm.LGBMClassifier(max_depth=7, 
                        n_jobs=60,
                        num_leaves=64,
                        seed=1080,
                        learning_rate=0.05,
                        n_estimators=400,
                        colsample_bytree = 0.8,
                        subsample = 0.8)

    sf.SetLogFile('./model/record_100_7_50.log')
    sf.run(validation)
Example #8
0
def main():
    sf = FS.Select(Sequence=True, Random=False,
                   Cross=False)  #select the way you want to process searching
    sf.ImportDF(prepareData(), label='is_trade')
    sf.ImportLossFunction(modelscore, direction='descend')
    sf.ImportCrossMethod(CrossMethod)
    sf.InitialNonTrainableFeatures(columns_2.drop_columns)
    sf.InitialFeatures([])
    sf.GenerateCol(key='mean', selectstep=2)
    sf.SetSample(0.1, samplemode=0, samplestate=0)
    sf.AddPotentialFeatures(columns_2.all_select_columns)
    #    sf.SetFeaturesLimit(20)
    sf.SetTimeLimit(100)
    sf.clf = lgbm.LGBMClassifier(random_state=1024,
                                 num_leaves=63,
                                 n_estimators=100,
                                 max_depth=7,
                                 learning_rate=0.1)
    sf.SetLogFile(
        '/home/share/liangxiao/second_stage/feature_selection/recordml_2.log')
    sf.run(validation)
Example #9
0
    return np.mean(totaltest)

def add(x,y):
    return x + y

def substract(x,y):
    return x - y

def times(x,y):
    return x * y

def divide(x,y):
    return (x + 0.001)/(y + 0.001)

CrossMethod = {'+':add,
               '-':substract,
               '*':times,
               '/':divide,}

sf = FS.Select(Sequence = True, Random = False, Cross = False) #初始化选择器,选择你需要的流程
sf.ImportDF(df, label ='label') #导入数据集以及目标标签
#sf.ImportCrossMethod(CrossMethod)
sf.ImportLossFunction(score, direction = 'descend') #导入评价函数以及优化方向
sf.InitialNonTrainableFeatures(['id','date', 'label']) #初始化不能用的特征
sf.InitialFeatures(feature_columns) #初始化其实特征组合
sf.GenerateCol() #生成特征库 (具体该函数变量请参考根目录下的readme)
sf.SetSample(1, samplemode = 1) #初始化抽样比例和随机过程
sf.SetTimeLimit(240) #设置算法运行最长时间,以分钟为单位
sf.clf = lgb.LGBMClassifier(random_state=10, num_leaves =15, n_estimators=200, max_depth=5, learning_rate = 0.1, n_jobs=-1) #设定模型
sf.SetLogFile('record.log') #初始化日志文件
sf.run(validation) #输入检验函数并开始运行