def train():
    weather = load_weather()
    training = load_training()
    
    X = assemble_X(training, weather)
    print len(X[0])
    mean, std = normalize(X)
    y = assemble_y(training)
        
    input_size = len(X[0])
    
    learning_rate = theano.shared(np.float32(0.1))
    
    net = NeuralNet(
    layers=[  
        ('input', InputLayer),
         ('hidden1', DenseLayer),
        ('dropout1', DropoutLayer),
        ('hidden2', DenseLayer),
        ('dropout2', DropoutLayer),
        ('output', DenseLayer),
        ],
    # layer parameters:
    input_shape=(None, input_size), 
    hidden1_num_units=325, 
    dropout1_p=0.4,
    hidden2_num_units=325, 
    dropout2_p=0.4,
    output_nonlinearity=sigmoid, 
    output_num_units=1, 

    # optimization method:
    update=nesterov_momentum,
    update_learning_rate=learning_rate,
    update_momentum=0.9,
    
    # Decay the learning rate
    on_epoch_finished=[
            AdjustVariable(learning_rate, target=0, half_life=1),
            ],

    # This is silly, but we don't want a stratified K-Fold here
    # To compensate we need to pass in the y_tensor_type and the loss.
    regression=True,
    y_tensor_type = T.imatrix,
    objective_loss_function = binary_crossentropy,
     
    max_epochs=85, 
    eval_size=0.1,
    verbose=1,
    )

    X, y = shuffle(X, y, random_state=123)
    net.fit(X, y)
    
    _, X_valid, _, y_valid = net.train_test_split(X, y, net.eval_size)
    probas = net.predict_proba(X_valid)[:,0]
    print("ROC score", metrics.roc_auc_score(y_valid, probas))

    return net, mean, std     
Beispiel #2
0
def train(trainfile, weatherfile):
    weather = load_weather(weatherfile)
    training = load_training(trainfile)

    np.random.seed(42)

    X = assemble_X(training, weather)
    mean, std = normalize(X)
    y = assemble_y(training)

    input_size = len(X[0])

    learning_rate = theano.shared(np.float32(0.1))

    net = NeuralNet(
        layers=[
            ('input', InputLayer),
            ('hidden1', DenseLayer),
            ('dropout1', DropoutLayer),
            ('hidden2', DenseLayer),
            ('dropout2', DropoutLayer),
            ('output', DenseLayer),
        ],
        # layer parameters:
        input_shape=(None, input_size),
        hidden1_num_units=400,
        dropout1_p=0.4,
        hidden2_num_units=200,
        dropout2_p=0.4,
        output_nonlinearity=sigmoid,
        output_num_units=1,

        # optimization method:
        update=nesterov_momentum,
        update_learning_rate=learning_rate,
        update_momentum=0.9,

        # Decay the learning rate
        on_epoch_finished=[
            AdjustVariable(learning_rate, target=0, half_life=4),
        ],

        # This is silly, but we don't want a stratified K-Fold here
        # To compensate we need to pass in the y_tensor_type and the loss.
        regression=True,
        y_tensor_type=T.imatrix,
        objective_loss_function=binary_crossentropy,
        max_epochs=60,
        eval_size=0.1,
        verbose=1,
    )

    X, y = shuffle(X, y, random_state=123)
    net.fit(X, y)

    _, X_valid, _, y_valid = net.train_test_split(X, y, net.eval_size)
    probas = net.predict_proba(X_valid)[:, 0]
    print("ROC score", metrics.roc_auc_score(y_valid, probas))

    return net, mean, std
Beispiel #3
0
# Compute the z-scores for both train and validation.  However, use mean and standard deviation for training
# on both.  This is customary because we trained on this standard deviation and mean.  Additionally, our
# prediction set might too small to calculate a meaningful mean and standard deviation.
X_train_z = zscore(X_train, train_mean, train_sdev) #scipy.stats.mstats.zscore(X_train)
X_validate_z = zscore(X_validate, train_mean, train_sdev)  #scipy.stats.mstats.zscore(X_validate)

#These can be used to check my zscore calc to numpy
#print(X_train_z)
#print(scipy.stats.mstats.zscore(X_train))

# Provide our own validation set
def my_split(self, X, y, eval_size):
    return X_train_z,X_validate_z,y_train,y_validate

net0.train_test_split = types.MethodType(my_split, net0)

# Train the network
net0.fit(X_train_z,y_train)

# Predict the validation set
pred_y = net0.predict(X_validate_z)

# Display predictions and count the number of incorrect predictions.
species_names = ['setosa','versicolour','virginica']

count = 0
wrong = 0
for element in zip(X_validate,y_validate,pred_y):
    print("Input: sepal length: {}, sepal width: {}, petal length: {}, petal width: {}; Expected: {}; Actual: {}".format(
        element[0][0],element[0][1],element[0][2],element[0][3],
Beispiel #4
0
def train():
    weather = load_weather()
    training = load_training()
    
    X = assemble_X(training, weather)
    mean, std = normalize(X)
    y = assemble_y(training)

    col_labels = ["date.year","date.month","date.day","block","lat","long",]
    for obs in ["Tmax","Tmin","Tavg","DewPoint","WetBulb","PrecipTotal","Depart","Sunrise","Sunset","Heat","Cool","ResultSpeed","ResultDir"]:
      for day in ["1","2","3","5","8","12"]:
        col_labels.append(obs+"_"+day)
    for i_spec in range(6):
      col_labels.append("species_"+str(i_spec))
    X_file = csv.writer(open("X.csv", "w"))
    X_file.writerow(col_labels)
    for row in X:
        X_file.writerow(row)

    y_file = csv.writer(open("y.csv", "w"))
    y_file.writerow(["y"])
    for row in y:
        y_file.writerow(row)
 
    input_size = len(X[0])
    
    learning_rate = theano.shared(np.float32(0.1))
    
    net = NeuralNet(
    layers=[  
        ('input', InputLayer),
         ('hidden1', DenseLayer),
        ('dropout1', DropoutLayer),
        ('hidden2', DenseLayer),
        ('dropout2', DropoutLayer),
        ('output', DenseLayer),
        ],
    # layer parameters:
    input_shape=(None, input_size), 
    hidden1_num_units=600, 
    dropout1_p=0.5,
    hidden2_num_units=400, 
    dropout2_p=0.5,
    output_nonlinearity=sigmoid, 
    output_num_units=1, 

    # optimization method:
    update=nesterov_momentum,
    update_learning_rate=learning_rate,
    update_momentum=0.9,
    
    # Decay the learning rate
    on_epoch_finished=[
            AdjustVariable(learning_rate, target=0, half_life=20),
            ],

    # This is silly, but we don't want a stratified K-Fold here
    # To compensate we need to pass in the y_tensor_type and the loss.
    regression=True,
    y_tensor_type = T.imatrix,
    objective_loss_function = binary_crossentropy,
     
    max_epochs=500, 
    eval_size=0.1,
    verbose=1,
    )

    X, y = shuffle(X, y, random_state=123)
    net.fit(X, y)
    
    _, X_valid, _, y_valid = net.train_test_split(X, y, net.eval_size)
    probas = net.predict_proba(X_valid)[:,0]
    print("ROC score", metrics.roc_auc_score(y_valid, probas))

    return net, mean, std     
Beispiel #5
0
output_num_units=4, 

# optimization method:
update=nesterov_momentum,
update_learning_rate=learning_rate,
update_momentum=0.899,

# Decay the learning rate
on_epoch_finished=[
        AdjustVariable(learning_rate, target=0, half_life=4),
        ],

# This is silly, but we don't want a stratified K-Fold here
# To compensate we need to pass in the y_tensor_type and the loss.
regression=True,
y_tensor_type = T.imatrix,
objective_loss_function = binary_crossentropy,
 
max_epochs=75, 
eval_size=0.1,
verbose=1,
)

X, y = shuffle(Xtrh, y, random_state=123)
net.fit(X, y)

_, X_valid, _, y_valid = net.train_test_split(X, y, net.eval_size)
probas = net.predict_proba(X_valid)[:,0]
print("ROC score", metrics.roc_auc_score(y_valid, probas))

Beispiel #6
0
def NN1_Classifier(Train_DS, y, Actual_DS, Sample_DS, grid):

    print("***************Starting NN1 Classifier***************")
    t0 = time()


    if grid:
        #used for checking the best performance for the model using hyper parameters
        print("Starting model fit with Grid Search")

    else:
        #y = y.reshape((-1, 1))
        y   = y.astype('int32')
        Actual_DS  = np.array(Actual_DS.astype('float32'))
        Train_DS   = np.array(Train_DS.astype('float32'))

        learning_rate = theano.shared(np.float32(0.1))
        #Define Model parms - 2 hidden layers
        clf = NeuralNet(
        	layers=[
                    ('input', InputLayer),
                    ('dropout0', DropoutLayer),
                    ('hidden1', DenseLayer),
                    ('dropout1', DropoutLayer),
                    ('hidden2', DenseLayer),
                    ('dropout2', DropoutLayer),
                    ('output', DenseLayer),
       		       ],

   	    # layer parameters:
        input_shape=(None, Train_DS.shape[1]),
        dropout0_p=0.25,
        hidden1_num_units=400,
        dropout1_p = 0.4,
        hidden2_num_units=400,
        dropout2_p = 0.4,

        output_nonlinearity=softmax,  # output layer uses identity function
        output_num_units=5,

        #optimization method
        #update=sgd,
        #update=nesterov_momentum,
        update=adagrad,
        update_learning_rate=0.01,
        use_label_encoder=False,
        batch_iterator_train=BatchIterator(batch_size=100),
        #update_momentum=0.1,
        # on_epoch_finished=[
        # AdjustVariable('update_learning_rate', start=0.1, stop=0.0001),
        # EarlyStopping(patience=10),
        # ],
        eval_size = 0.1,
        regression=False,
        max_epochs=60,
        verbose=1
        )

        Train_DS, y = shuffle(Train_DS, y, random_state=42)
        clf.fit(Train_DS, y)

        _, X_valid, _, y_valid = clf.train_test_split(Train_DS, y, clf.eval_size)

        y_pred=clf.predict(X_valid)
        score=quadratic_weighted_kappa(y_valid, y_pred)

        print("Best score: %0.3f" % score)

    #Predict actual model
    pred_Actual = clf.predict(Actual_DS)
    print("Actual NN1 Model predicted")

    #Get the predictions for actual data set
    preds = pd.DataFrame(pred_Actual, index=Sample_DS.id.values, columns=Sample_DS.columns[1:])
    preds.to_csv(file_path+'output/Submission_Roshan_NN_Model_1.csv', index_label='id')

    print("***************Ending NN1 Classifier***************")
    return pred_Actual
    update_momentum=0.9,
    regression=False,

    on_epoch_finished=[
        EarlyStopping(patience=5)
        ],

    verbose=1,
    max_epochs=100)

X_train, y_train, X_val, y_val, X_test, y_test = load_dataset(True)

def my_split(self, X, y, eval_size):
    return X_train,X_val,y_train,y_val

net0.train_test_split = types.MethodType(my_split, net0)

net0.fit(X_train, y_train)

y_predict = net0.predict(X_val)

count = 0
wrong = 0
for element in zip(X_val,y_val,y_predict):
    if element[1] != element[2]:
        wrong = wrong + 1
    count = count + 1

print("Incorrect {}/{} ({}%)".format(wrong,count,(wrong/count)*100))

Beispiel #8
0
    # on_epoch_finished=[
    #    AdjustVariable('update_learning_rate', start=0.01, stop=0.0001),
    #    AdjustVariable('update_momentum', start=0.9, stop=0.999),
    # ],
    #on_epoch_finished=[
    #       AdjustVariable(learning_rate, target=0, half_life=4),
    #],
    eval_size=0.2,
    verbose=1,
    max_epochs=32,
    batch_iterator_train=BatchIterator(batch_size=64),
    batch_iterator_test=BatchIterator(batch_size=64))

net0.fit(X, y)
X_train, X_valid, y_train, y_valid = net0.train_test_split(
    X, y, net0.eval_size)
probas = net0.predict_proba(X_valid)[:, 0]
#probas.loc[(probas[0]<0.3).values,[0]] = 0.1
print("ROC score", metrics.roc_auc_score(y_valid, (probas)))
#yp = DataFrame(net0.predict_proba(X_test),columns=[ u'Class_1', u'Class_2', u'Class_3', u'Class_4', u'Class_5', u'Class_6', u'Class_7', u'Class_8', u'Class_9'])
pred_prob = pd.DataFrame(net0.predict(X_test))
print pred_prob

sample = pd.read_csv('/Users/IkkiTanaka/Documents/KDDCup/sampleSubmission.csv',
                     header=None)
preds = pd.concat([sample[0], pd.DataFrame(pred_prob)], axis=1)
preds.to_csv('/Users/IkkiTanaka/Documents/KDDCup/pred/xgb/predNN1.csv',
             header=None,
             index=False)

for i in xrange(0, 50):
Beispiel #9
0
def train():
    weather = load_weather()
    training = load_training()

    X = assemble_X(training, weather)
    mean, std = normalize(X)
    y = assemble_y(training)

    col_labels = [
        "date.year",
        "date.month",
        "date.day",
        "block",
        "lat",
        "long",
    ]
    for obs in [
            "Tmax", "Tmin", "Tavg", "DewPoint", "WetBulb", "PrecipTotal",
            "Depart"
    ]:
        for day in ["1", "3", "7", "14"]:
            col_labels.append(obs + "_" + day)
    for i_spec in range(6):
        col_labels.append("species_" + str(i_spec))
    X_file = csv.writer(open("X.csv", "w"))
    X_file.writerow(col_labels)
    for row in X:
        X_file.writerow(row)

    y_file = csv.writer(open("y.csv", "w"))
    y_file.writerow(["y"])
    for row in y:
        y_file.writerow(row)

    input_size = len(X[0])

    learning_rate = theano.shared(np.float32(0.1))

    net = NeuralNet(
        layers=[
            ('input', InputLayer),
            ('hidden1', DenseLayer),
            ('dropout1', DropoutLayer),
            ('hidden2', DenseLayer),
            ('dropout2', DropoutLayer),
            ('output', DenseLayer),
        ],
        # layer parameters:
        input_shape=(None, input_size),
        hidden1_num_units=256,
        dropout1_p=0.4,
        hidden2_num_units=256,
        dropout2_p=0.4,
        output_nonlinearity=sigmoid,
        output_num_units=1,

        # optimization method:
        update=nesterov_momentum,
        update_learning_rate=learning_rate,
        update_momentum=0.9,

        # Decay the learning rate
        on_epoch_finished=[
            AdjustVariable(learning_rate, target=0, half_life=4),
        ],

        # This is silly, but we don't want a stratified K-Fold here
        # To compensate we need to pass in the y_tensor_type and the loss.
        regression=True,
        y_tensor_type=T.imatrix,
        objective_loss_function=binary_crossentropy,
        max_epochs=32,
        eval_size=0.1,
        verbose=1,
    )

    X, y = shuffle(X, y, random_state=123)
    net.fit(X, y)

    _, X_valid, _, y_valid = net.train_test_split(X, y, net.eval_size)
    probas = net.predict_proba(X_valid)[:, 0]
    print("ROC score", metrics.roc_auc_score(y_valid, probas))

    return net, mean, std
Beispiel #10
0
                 # on_epoch_finished=[
                 #    AdjustVariable('update_learning_rate', start=0.01, stop=0.0001),
                 #    AdjustVariable('update_momentum', start=0.9, stop=0.999),
                 # ],
                 #on_epoch_finished=[
                 #       AdjustVariable(learning_rate, target=0, half_life=4),
                 #],
                 eval_size=0.2,
                 verbose=1,
                 max_epochs=32,
                 batch_iterator_train=BatchIterator(batch_size=64),
                 batch_iterator_test=BatchIterator(batch_size=64 )
                 )

net0.fit(X, y)
X_train, X_valid, y_train, y_valid = net0.train_test_split(X, y, net0.eval_size)
probas = net0.predict_proba(X_valid)[:,0]
#probas.loc[(probas[0]<0.3).values,[0]] = 0.1
print("ROC score", metrics.roc_auc_score(y_valid, (probas)))
#yp = DataFrame(net0.predict_proba(X_test),columns=[ u'Class_1', u'Class_2', u'Class_3', u'Class_4', u'Class_5', u'Class_6', u'Class_7', u'Class_8', u'Class_9'])
pred_prob = pd.DataFrame(net0.predict( X_test ))
print pred_prob

sample = pd.read_csv('/Users/IkkiTanaka/Documents/KDDCup/sampleSubmission.csv',header=None)
preds = pd.concat([sample[0],pd.DataFrame(pred_prob)],axis=1)
preds.to_csv('/Users/IkkiTanaka/Documents/KDDCup/pred/xgb/predNN1.csv'  ,header=None,index=False)




for i in xrange(0,50):