def main(): random.seed(11) np.random.seed(11) use_xtra_features = True train_path = '../processed/' model_path = '../models/' model_name_suffix = '_final_subm' offset_amount = 0.07# fraction of the train set to use as hold out num_over = 2 num_threads = 7 #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. train, integer_labels,actual_labels, cutoff = fn.load_train_data(train_path,7,18,offset_amount) reduced_labels = fn.aggregate_labels([[range(4,6),4],[range(6,70),5]], integer_labels).iloc[:,0] #.iloc becuase series and df don't behave the same if use_xtra_features: types = ['TimeToEnd','Reflectivity','Zdr','RR2','ReflectivityQC','RadarQualityIndex','RR3','RR1','Composite','RhoHV','HybridScan','LogWaterVolume'] xtra_train = pd.DataFrame() for i in range(len(types)): xtra_train_temp = pd.read_csv(train_path+'train_'+types[i]+'8_17.csv',index_col=0) xtra_train = pd.concat([xtra_train,xtra_train_temp],axis=1) xtra_train = xtra_train.reindex(train.index) train= pd.concat([train, xtra_train],axis=1) data = (train.iloc[cutoff:,:],reduced_labels.iloc[cutoff:],train.iloc[:cutoff,:],reduced_labels.iloc[:cutoff]) bst1 = fn.train_tree_xgb(data, 0.020, 1.5, 14, 55, .6, .5,6, num_threads, num_over) bst1.save_model(model_path+'bst4_1'+model_name_suffix)
def main(): random.seed(11) np.random.seed(11) train_path = '../processed/' model_path = '../models/' model_name_suffix = '_final_subm' offset_amount = 0.07 # fraction of the train set to use as hold out num_over = 2 num_threads = 7 #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. train, integer_labels, actual_labels, cutoff = fn.load_train_data( train_path, 3, 8, offset_amount) reduced_labels = fn.aggregate_labels( [[range(3, 7), 3], [range(7, 70), 4]], integer_labels ).iloc[:, 0] #.iloc becuase series and df don't behave the same data = (train.iloc[cutoff:, :], reduced_labels.iloc[cutoff:], train.iloc[:cutoff, :], reduced_labels.iloc[:cutoff]) bst1 = fn.train_tree_xgb(data, 0.02, 1.5, 14, 45, .45, .5, 5, num_threads, num_over) bst1.save_model(model_path + 'bst3_1' + model_name_suffix)
def main(): random.seed(11) np.random.seed(11) train_path = '../processed/' model_path = '../models/' model_name_suffix = '_final_subm' offset_amount = 0.07 # fraction of the train set to use as hold out num_over = 2 num_threads = 7 #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. train, integer_labels, actual_labels, cutoff = fn.load_train_data( train_path, 1, 1, offset_amount) #drop the columns with constant values train = train.loc[:, train.mean() != -99999] #aggregate the original labels into 3 groups, 0mm,1mm, and 2-69mm reduced_labels = fn.aggregate_labels( [[range(2, 70), 2]], integer_labels ).iloc[:, 0] #.iloc becuase series and df don't behave the same #split into a train and validation set for early stopping, this makes the call to xgb readable data = (train.iloc[cutoff:, :], reduced_labels.iloc[cutoff:], train.iloc[:cutoff, :], reduced_labels.iloc[:cutoff]) #train_tree_xgb(data,eta, gamma, max_d, min_child, subsamp, col_samp,num_classes, num_threads, num_over=3,eval_func=None): bst1 = fn.train_tree_xgb(data, 0.015, 1.5, 9, 55, .45, .55, 3, num_threads, num_over) #done with this model save for later when we make the predictions bst1.save_model(model_path + 'bst1_1' + model_name_suffix)
def main(): random.seed(11) np.random.seed(11) #set this to false for a faster training time use_xtra_features = True train_path = '../processed/' model_path = '../models/' model_name_suffix = '_final_subm' offset_amount = 0.07# fraction of the train set to use as hold out num_over = 2 num_threads = 7 #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. train, integer_labels,actual_labels, cutoff = fn.load_train_data(train_path,17,1000,offset_amount) reduced_labels = fn.aggregate_labels([[range(8,10),8],[range(10,14),9],[range(14,19),10],[range(19,70),11]], integer_labels).iloc[:,0] if use_xtra_features: types = ['TimeToEnd','Reflectivity','Zdr','RR2','ReflectivityQC','RadarQualityIndex','RR3','RR1','Composite','RhoHV','HybridScan','LogWaterVolume'] xtra_train = pd.DataFrame() for i in range(len(types)): xtra_train_temp = pd.read_csv(train_path+'train_'+types[i]+'18_199.csv', index_col=0) xtra_train = pd.concat([xtra_train,xtra_train_temp],axis=1) xtra_train = xtra_train.reindex(train.index) train= pd.concat([train, xtra_train],axis=1) data = (train.iloc[cutoff:,:],reduced_labels.iloc[cutoff:],train.iloc[:cutoff,:],reduced_labels.iloc[:cutoff]) bst1 = fn.train_tree_xgb(data, 0.025, 2.5, 14, 85, .65, .5,12, num_threads, num_over) bst1.save_model(model_path+'bst5_1'+model_name_suffix)
def main(): random.seed(11) np.random.seed(11) #set this to false for a faster training time use_xtra_features = True train_path = '../processed/' model_path = '../models/' model_name_suffix = '_final_subm' offset_amount = 0.07 # fraction of the train set to use as hold out num_over = 2 num_threads = 7 #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. train, integer_labels, actual_labels, cutoff = fn.load_train_data( train_path, 17, 1000, offset_amount) reduced_labels = fn.aggregate_labels( [[range(8, 10), 8], [range(10, 14), 9], [range(14, 19), 10], [range(19, 70), 11]], integer_labels).iloc[:, 0] if use_xtra_features: types = [ 'TimeToEnd', 'Reflectivity', 'Zdr', 'RR2', 'ReflectivityQC', 'RadarQualityIndex', 'RR3', 'RR1', 'Composite', 'RhoHV', 'HybridScan', 'LogWaterVolume' ] xtra_train = pd.DataFrame() for i in range(len(types)): xtra_train_temp = pd.read_csv(train_path + 'train_' + types[i] + '18_199.csv', index_col=0) xtra_train = pd.concat([xtra_train, xtra_train_temp], axis=1) xtra_train = xtra_train.reindex(train.index) train = pd.concat([train, xtra_train], axis=1) data = (train.iloc[cutoff:, :], reduced_labels.iloc[cutoff:], train.iloc[:cutoff, :], reduced_labels.iloc[:cutoff]) bst1 = fn.train_tree_xgb(data, 0.025, 2.5, 14, 85, .65, .5, 12, num_threads, num_over) bst1.save_model(model_path + 'bst5_1' + model_name_suffix)
def main(): random.seed(11) np.random.seed(11) use_xtra_features = True train_path = '../processed/' model_path = '../models/' model_name_suffix = '_final_subm' offset_amount = 0.07 # fraction of the train set to use as hold out num_over = 2 num_threads = 7 #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. train, integer_labels, actual_labels, cutoff = fn.load_train_data( train_path, 7, 18, offset_amount) reduced_labels = fn.aggregate_labels( [[range(4, 6), 4], [range(6, 70), 5]], integer_labels ).iloc[:, 0] #.iloc becuase series and df don't behave the same if use_xtra_features: types = [ 'TimeToEnd', 'Reflectivity', 'Zdr', 'RR2', 'ReflectivityQC', 'RadarQualityIndex', 'RR3', 'RR1', 'Composite', 'RhoHV', 'HybridScan', 'LogWaterVolume' ] xtra_train = pd.DataFrame() for i in range(len(types)): xtra_train_temp = pd.read_csv(train_path + 'train_' + types[i] + '8_17.csv', index_col=0) xtra_train = pd.concat([xtra_train, xtra_train_temp], axis=1) xtra_train = xtra_train.reindex(train.index) train = pd.concat([train, xtra_train], axis=1) data = (train.iloc[cutoff:, :], reduced_labels.iloc[cutoff:], train.iloc[:cutoff, :], reduced_labels.iloc[:cutoff]) bst1 = fn.train_tree_xgb(data, 0.020, 1.5, 14, 55, .6, .5, 6, num_threads, num_over) bst1.save_model(model_path + 'bst4_1' + model_name_suffix)
def main(): random.seed(11) np.random.seed(11) train_path = '../processed/' model_path = '../models/' model_name_suffix = '_final_subm' offset_amount = 0.07# fraction of the train set to use as hold out num_over = 2 num_threads = 7 #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. train, integer_labels,actual_labels, cutoff = fn.load_train_data(train_path,1,4,offset_amount) reduced_labels = fn.aggregate_labels([[range(3,7),3],[range(7,70),4]], integer_labels).iloc[:,0] #.iloc becuase series and df don't behave the same data = (train.iloc[cutoff:,:],reduced_labels.iloc[cutoff:],train.iloc[:cutoff,:],reduced_labels.iloc[:cutoff]) bst1 = fn.train_tree_xgb(data,0.015, 1.5, 9, 45, .55, .55,5, num_threads, num_over) bst1.save_model(model_path+'bst2_1'+model_name_suffix)