Esempio n. 1
0
def main():
    random.seed(11)
    np.random.seed(11)

    use_xtra_features = True
    train_path = '../processed/'
    model_path = '../models/'

    model_name_suffix = '_final_subm'
    offset_amount = 0.07# fraction of the train set to use as hold out
    num_over = 2
    num_threads  = 7

    #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. 
    train, integer_labels,actual_labels, cutoff = fn.load_train_data(train_path,7,18,offset_amount)

    reduced_labels = fn.aggregate_labels([[range(4,6),4],[range(6,70),5]], integer_labels).iloc[:,0] #.iloc becuase series and df don't behave the same

    if use_xtra_features:
        types = ['TimeToEnd','Reflectivity','Zdr','RR2','ReflectivityQC','RadarQualityIndex','RR3','RR1','Composite','RhoHV','HybridScan','LogWaterVolume']
        xtra_train = pd.DataFrame()
        for i in range(len(types)):
            xtra_train_temp = pd.read_csv(train_path+'train_'+types[i]+'8_17.csv',index_col=0)
            xtra_train = pd.concat([xtra_train,xtra_train_temp],axis=1)

        xtra_train = xtra_train.reindex(train.index)
        train= pd.concat([train, xtra_train],axis=1)

    data = (train.iloc[cutoff:,:],reduced_labels.iloc[cutoff:],train.iloc[:cutoff,:],reduced_labels.iloc[:cutoff])

    bst1 = fn.train_tree_xgb(data, 0.020, 1.5, 14, 55, .6, .5,6, num_threads, num_over)
    bst1.save_model(model_path+'bst4_1'+model_name_suffix)
Esempio n. 2
0
def main():
    random.seed(11)
    np.random.seed(11)

    train_path = '../processed/'
    model_path = '../models/'

    model_name_suffix = '_final_subm'
    offset_amount = 0.07  # fraction of the train set to use as hold out
    num_over = 2
    num_threads = 7

    #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset.
    train, integer_labels, actual_labels, cutoff = fn.load_train_data(
        train_path, 3, 8, offset_amount)

    reduced_labels = fn.aggregate_labels(
        [[range(3, 7), 3], [range(7, 70), 4]], integer_labels
    ).iloc[:, 0]  #.iloc becuase series and df don't behave the same

    data = (train.iloc[cutoff:, :], reduced_labels.iloc[cutoff:],
            train.iloc[:cutoff, :], reduced_labels.iloc[:cutoff])

    bst1 = fn.train_tree_xgb(data, 0.02, 1.5, 14, 45, .45, .5, 5, num_threads,
                             num_over)
    bst1.save_model(model_path + 'bst3_1' + model_name_suffix)
def main():
    random.seed(11)
    np.random.seed(11)

    train_path = '../processed/'
    model_path = '../models/'

    model_name_suffix = '_final_subm'
    offset_amount = 0.07  # fraction of the train set to use as hold out
    num_over = 2
    num_threads = 7

    #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset.
    train, integer_labels, actual_labels, cutoff = fn.load_train_data(
        train_path, 1, 1, offset_amount)

    #drop the columns with constant values
    train = train.loc[:, train.mean() != -99999]

    #aggregate the original labels into 3 groups, 0mm,1mm, and 2-69mm
    reduced_labels = fn.aggregate_labels(
        [[range(2, 70), 2]], integer_labels
    ).iloc[:, 0]  #.iloc becuase series and df don't behave the same

    #split into a train and validation set for early stopping, this makes the call to xgb readable
    data = (train.iloc[cutoff:, :], reduced_labels.iloc[cutoff:],
            train.iloc[:cutoff, :], reduced_labels.iloc[:cutoff])

    #train_tree_xgb(data,eta, gamma, max_d, min_child, subsamp, col_samp,num_classes, num_threads, num_over=3,eval_func=None):
    bst1 = fn.train_tree_xgb(data, 0.015, 1.5, 9, 55, .45, .55, 3, num_threads,
                             num_over)

    #done with this model save for later when we make the predictions
    bst1.save_model(model_path + 'bst1_1' + model_name_suffix)
Esempio n. 4
0
def main():
    random.seed(11)
    np.random.seed(11)

    #set this to false for a faster training time
    use_xtra_features = True

    train_path = '../processed/'
    model_path = '../models/'

    model_name_suffix = '_final_subm'
    offset_amount = 0.07# fraction of the train set to use as hold out
    num_over = 2
    num_threads  = 7

    #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. 
    train, integer_labels,actual_labels, cutoff = fn.load_train_data(train_path,17,1000,offset_amount)

    reduced_labels = fn.aggregate_labels([[range(8,10),8],[range(10,14),9],[range(14,19),10],[range(19,70),11]], integer_labels).iloc[:,0] 

    if use_xtra_features:
        types = ['TimeToEnd','Reflectivity','Zdr','RR2','ReflectivityQC','RadarQualityIndex','RR3','RR1','Composite','RhoHV','HybridScan','LogWaterVolume']
        xtra_train = pd.DataFrame()
        for i in range(len(types)):
            xtra_train_temp = pd.read_csv(train_path+'train_'+types[i]+'18_199.csv', index_col=0)
            xtra_train = pd.concat([xtra_train,xtra_train_temp],axis=1)

        xtra_train = xtra_train.reindex(train.index)
        train= pd.concat([train, xtra_train],axis=1)

    data = (train.iloc[cutoff:,:],reduced_labels.iloc[cutoff:],train.iloc[:cutoff,:],reduced_labels.iloc[:cutoff])

    bst1 = fn.train_tree_xgb(data, 0.025, 2.5, 14, 85, .65, .5,12, num_threads, num_over)
    bst1.save_model(model_path+'bst5_1'+model_name_suffix)
def main():
    random.seed(11)
    np.random.seed(11)

    #set this to false for a faster training time
    use_xtra_features = True

    train_path = '../processed/'
    model_path = '../models/'

    model_name_suffix = '_final_subm'
    offset_amount = 0.07  # fraction of the train set to use as hold out
    num_over = 2
    num_threads = 7

    #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset.
    train, integer_labels, actual_labels, cutoff = fn.load_train_data(
        train_path, 17, 1000, offset_amount)

    reduced_labels = fn.aggregate_labels(
        [[range(8, 10), 8], [range(10, 14), 9], [range(14, 19), 10],
         [range(19, 70), 11]], integer_labels).iloc[:, 0]

    if use_xtra_features:
        types = [
            'TimeToEnd', 'Reflectivity', 'Zdr', 'RR2', 'ReflectivityQC',
            'RadarQualityIndex', 'RR3', 'RR1', 'Composite', 'RhoHV',
            'HybridScan', 'LogWaterVolume'
        ]
        xtra_train = pd.DataFrame()
        for i in range(len(types)):
            xtra_train_temp = pd.read_csv(train_path + 'train_' + types[i] +
                                          '18_199.csv',
                                          index_col=0)
            xtra_train = pd.concat([xtra_train, xtra_train_temp], axis=1)

        xtra_train = xtra_train.reindex(train.index)
        train = pd.concat([train, xtra_train], axis=1)

    data = (train.iloc[cutoff:, :], reduced_labels.iloc[cutoff:],
            train.iloc[:cutoff, :], reduced_labels.iloc[:cutoff])

    bst1 = fn.train_tree_xgb(data, 0.025, 2.5, 14, 85, .65, .5, 12,
                             num_threads, num_over)
    bst1.save_model(model_path + 'bst5_1' + model_name_suffix)
def main():
    random.seed(11)
    np.random.seed(11)

    use_xtra_features = True
    train_path = '../processed/'
    model_path = '../models/'

    model_name_suffix = '_final_subm'
    offset_amount = 0.07  # fraction of the train set to use as hold out
    num_over = 2
    num_threads = 7

    #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset.
    train, integer_labels, actual_labels, cutoff = fn.load_train_data(
        train_path, 7, 18, offset_amount)

    reduced_labels = fn.aggregate_labels(
        [[range(4, 6), 4], [range(6, 70), 5]], integer_labels
    ).iloc[:, 0]  #.iloc becuase series and df don't behave the same

    if use_xtra_features:
        types = [
            'TimeToEnd', 'Reflectivity', 'Zdr', 'RR2', 'ReflectivityQC',
            'RadarQualityIndex', 'RR3', 'RR1', 'Composite', 'RhoHV',
            'HybridScan', 'LogWaterVolume'
        ]
        xtra_train = pd.DataFrame()
        for i in range(len(types)):
            xtra_train_temp = pd.read_csv(train_path + 'train_' + types[i] +
                                          '8_17.csv',
                                          index_col=0)
            xtra_train = pd.concat([xtra_train, xtra_train_temp], axis=1)

        xtra_train = xtra_train.reindex(train.index)
        train = pd.concat([train, xtra_train], axis=1)

    data = (train.iloc[cutoff:, :], reduced_labels.iloc[cutoff:],
            train.iloc[:cutoff, :], reduced_labels.iloc[:cutoff])

    bst1 = fn.train_tree_xgb(data, 0.020, 1.5, 14, 55, .6, .5, 6, num_threads,
                             num_over)
    bst1.save_model(model_path + 'bst4_1' + model_name_suffix)
Esempio n. 7
0
def main():
    random.seed(11)
    np.random.seed(11)

    train_path = '../processed/'
    model_path = '../models/'

    model_name_suffix = '_final_subm'
    offset_amount = 0.07# fraction of the train set to use as hold out
    num_over = 2
    num_threads  = 7

    #generate trainset, labels, and test set based on the number of valid radar readings in the original dataset. 
    train, integer_labels,actual_labels, cutoff = fn.load_train_data(train_path,1,4,offset_amount)

    reduced_labels = fn.aggregate_labels([[range(3,7),3],[range(7,70),4]], integer_labels).iloc[:,0] #.iloc becuase series and df don't behave the same

    data = (train.iloc[cutoff:,:],reduced_labels.iloc[cutoff:],train.iloc[:cutoff,:],reduced_labels.iloc[:cutoff])

    bst1 = fn.train_tree_xgb(data,0.015, 1.5, 9, 45, .55, .55,5, num_threads, num_over)
    bst1.save_model(model_path+'bst2_1'+model_name_suffix)