def main(): sf = FS.Select(Sequence=True, Random=True, Cross=False) #select the way you want to process searching sf.ImportDF(prepareData(), label='is_trade') sf.ImportLossFunction(modelscore, direction='descend') sf.ImportCrossMethod(CrossMethod) sf.InitialNonTrainableFeatures([ 'used', 'instance_id', 'item_property_list', 'context_id', 'context_timestamp', 'predict_category_property', 'is_trade' ]) sf.InitialFeatures([ 'item_category_list', 'item_price_level', 'item_sales_level', 'item_collected_level', 'item_pv_level', 'day' ]) sf.GenerateCol(key='mean', selectstep=2) sf.SetSample(0.1, samplemode=0, samplestate=0) # sf.SetFeaturesLimit(5) sf.SetTimeLimit(1) sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves=6, n_estimators=5000, max_depth=3, learning_rate=0.05, n_jobs=8) sf.SetLogFile('recordml.log') sf.run(validation)
def main(): sf = FS.Select(Sequence = True, Random = False, Cross = True) sf.ImportDF(prepareData(),label = 'Survived') sf.ImportLossFunction(modelscore,direction = 'ascend') sf.ImportCrossMethod(CrossMethod) sf.InitialNonTrainableFeatures(['Survived']) sf.InitialFeatures([]) sf.AddPotentialFeatures(['Pclass']) sf.clf = LogisticRegression() sf.SetLogFile('record.log') sf.run(validation)
def main(): sf = FS.Select(Sequence = True, Random = True, Cross = True) sf.ImportDF(prepareData(),label = 'Survived') sf.ImportLossFunction(modelscore,direction = 'ascend') sf.ImportCrossMethod(CrossMethod) sf.NonTrainableFeatures = ['Survived'] sf.InitialFeatures([]) sf.PotentialAdd = ['Pclass'] #sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6, n_estimators=5000, max_depth=3, learning_rate = 0.05, n_jobs=1) sf.clf = LogisticRegression() sf.logfile = 'record.log' sf.run(validation)
def main(): sf = FS.Select(Sequence = True, Random = False, Cross = True) sf.ImportDF(prepareData(),label = 'Survived') sf.ImportLossFunction(modelscore,direction = 'ascend') sf.ImportCrossMethod(CrossMethod) sf.InitialNonTrainableFeatures(['Survived']) sf.InitialFeatures([]) sf.GenerateCol() sf.SetSample(0.5, samplemode = 0, samplestate = 0) sf.AddPotentialFeatures(['Pclass']) sf.clf = LogisticRegression() sf.SetLogFile('record2.log') # sf.SetFeaturesLimit(5) sf.SetTimeLimit(0.2) sf.run(validation)
def main(): sf = FS.Select(Sequence=True, Random=False, Cross=False) sf.ImportDF(prepareData(), label='buy') sf.ImportLossFunction(score1, direction='ascend') #sf.ImportCrossMethod(CrossMethod) sf.InitialNonTrainableFeatures( ['buy', 'nextbuy', 'o_date', 'a_date', 'PredictDays', 'user_id']) sf.InitialFeatures( ['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt', 'daybeforelastbuy_o_ave']) #sf.PotentialAdd = ['daybeforelastbuy_o_mean'] #sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves = 6, n_estimators=5000, max_depth=3, learning_rate = 0.05, n_jobs=1) sf.clf = lgbm.LGBMClassifier(random_state=1, num_leaves=6, n_estimators=1000, max_depth=3, learning_rate=0.2, n_jobs=8) sf.SetLogFile('record.log') sf.run(validate)
def main(): sf = FS.Select(Sequence=True, Random=False, Cross=False) #初始化选择器,选择你需要的流程 sf.ImportDF(df, label='nextbuy') #导入数据集以及目标标签 sf.ImportLossFunction(score, direction='ascend') #导入评价函数以及优化方向 sf.InitialNonTrainableFeatures( ['buy', 'nextbuy', 'o_date', 'a_date', 'PredictDays', 'user_id']) #初始化不能用的特征 sf.InitialFeatures( ['age_x', 'sex_x', 'user_lv_cd_x', 'buycnt', 'daybeforelastbuy_o_ave']) #初始化其实特征组合 sf.GenerateCol() #生成特征库 (具体该函数变量请参考根目录下的readme) sf.SetSample(1, samplemode=1) #初始化抽样比例和随机过程 sf.SetTimeLimit(100) #设置算法运行最长时间,以分钟为单位 sf.clf = lgbm.LGBMRegressor(random_state=1, num_leaves=6, n_estimators=1000, max_depth=3, learning_rate=0.2, n_jobs=8) #设定回归模型 sf.SetLogFile('record.log') #初始化日志文件 sf.run(validate) #输入检验函数并开始运行
def main(): sf = FS.Select(Sequence = False, Random = True, Cross = False) #select the way you want to process searching sf.ImportDF(prepareData(),label = 'is_trade') sf.ImportLossFunction(modelscore,direction = 'descend') sf.ImportCrossMethod(CrossMethod) sf.InitialNonTrainableFeatures(['context_timestamp', 'is_trade']) features_import = loadCSV('model/dif_on_feature_mean_4_25.csv') initial_features = list(features_import['feature'].values)[::-1][:100] sf.InitialFeatures(initial_features) sf.clf = lgbm.LGBMClassifier(max_depth=7, n_jobs=60, num_leaves=64, seed=1080, learning_rate=0.05, n_estimators=400, colsample_bytree = 0.8, subsample = 0.8) sf.SetLogFile('./model/record_100_7_50.log') sf.run(validation)
def main(): sf = FS.Select(Sequence=True, Random=False, Cross=False) #select the way you want to process searching sf.ImportDF(prepareData(), label='is_trade') sf.ImportLossFunction(modelscore, direction='descend') sf.ImportCrossMethod(CrossMethod) sf.InitialNonTrainableFeatures(columns_2.drop_columns) sf.InitialFeatures([]) sf.GenerateCol(key='mean', selectstep=2) sf.SetSample(0.1, samplemode=0, samplestate=0) sf.AddPotentialFeatures(columns_2.all_select_columns) # sf.SetFeaturesLimit(20) sf.SetTimeLimit(100) sf.clf = lgbm.LGBMClassifier(random_state=1024, num_leaves=63, n_estimators=100, max_depth=7, learning_rate=0.1) sf.SetLogFile( '/home/share/liangxiao/second_stage/feature_selection/recordml_2.log') sf.run(validation)
return np.mean(totaltest) def add(x,y): return x + y def substract(x,y): return x - y def times(x,y): return x * y def divide(x,y): return (x + 0.001)/(y + 0.001) CrossMethod = {'+':add, '-':substract, '*':times, '/':divide,} sf = FS.Select(Sequence = True, Random = False, Cross = False) #初始化选择器,选择你需要的流程 sf.ImportDF(df, label ='label') #导入数据集以及目标标签 #sf.ImportCrossMethod(CrossMethod) sf.ImportLossFunction(score, direction = 'descend') #导入评价函数以及优化方向 sf.InitialNonTrainableFeatures(['id','date', 'label']) #初始化不能用的特征 sf.InitialFeatures(feature_columns) #初始化其实特征组合 sf.GenerateCol() #生成特征库 (具体该函数变量请参考根目录下的readme) sf.SetSample(1, samplemode = 1) #初始化抽样比例和随机过程 sf.SetTimeLimit(240) #设置算法运行最长时间,以分钟为单位 sf.clf = lgb.LGBMClassifier(random_state=10, num_leaves =15, n_estimators=200, max_depth=5, learning_rate = 0.1, n_jobs=-1) #设定模型 sf.SetLogFile('record.log') #初始化日志文件 sf.run(validation) #输入检验函数并开始运行