def grid_search_with_cross_validation(nX_tr, y_tr, nX_te, y_te, din=50, dout=2): print "Grid search with cross-validation" archs = [[din, 50, dout], [din, 500, dout], [din, 500, 300, dout], [din, 800, 500, 300, dout], [din, 800, 800, 500, 300, dout]] reg_coeffs = [1e-7, 5e-7, 1e-6, 5e-6, 1e-5] decays = [1e-5, 5e-5, 1e-4] ml_utils.testmodels(nX_tr, y_tr, nX_te, y_te, archs, actfn='relu', last_act='softmax', reg_coeffs=reg_coeffs, num_epoch=100, batch_size=1000, sgd_lr=1e-5, sgd_decays=decays, sgd_moms=[0.99], sgd_Nesterov=True, EStop=True, verbose=0) print "Grid search with cross-validation - END"
def linear_activations(nX_tr, y_tr, nX_te, y_te, din=50, dout=2): print "Linear Activations" archs_1 = [[din, dout], [din, 50, dout], [din, 50, 50, dout], [din, 50, 50, 50, dout]] ml_utils.testmodels(nX_tr, y_tr, nX_te, y_te, archs_1, actfn='linear', sgd_lr=1e-3, verbose=0) archs_2 = [[din, 50, dout], [din, 500, dout], [din, 500, 300, dout], [din, 800, 500, 300, dout], [din, 800, 800, 500, 300, dout]] ml_utils.testmodels(nX_tr, y_tr, nX_te, y_te, archs_2, sgd_lr=1e-3, verbose=0) print "Linear Activations - END"
def combination(nX_tr, y_tr, nX_te, y_te, best_reg_coeff, best_decay, best_momentum, din=50, dout=2): print "best combination" archs = [[din, 800, 500, 300, dout]] ml_utils.testmodels(nX_tr, y_tr, nX_te, y_te, archs, actfn='relu', last_act='softmax', reg_coeffs=[best_reg_coeff], num_epoch=100, batch_size=1000, sgd_lr=1e-5, sgd_decays=[best_decay], sgd_moms=[best_momentum], sgd_Nesterov=True, EStop=True, verbose=0) print "best combination - END"
def parth(X_tr, y_tr, X_te, y_te): start_time = time.time() regularization_architecture = [[50, 800, 500, 300, 2]] regularization = [ pow(10, -7), 5 * pow(10, -7), pow(10, -6), 5 * pow(10, -6), pow(10, -5) ] results = hw.testmodels(X_tr, y_tr, X_te, y_te, regularization_architecture, actfn='relu', last_act='softmax', reg_coeffs=regularization, num_epoch=30, batch_size=1000, sgd_lr=0.0005, sgd_decays=[0.0], sgd_moms=[0.0], sgd_Nesterov=False, EStop=True, verbose=0) print " Time Taken = ", time.time() - start_time displayJson(results)
def momentum_fn(nX_tr, y_tr, nX_te, y_te, best_decay, din=50, dout=2): print "momentum" archs = [[din, 800, 500, 300, dout]] decays = [1e-5, 5e-5, 1e-4, 3e-4, 7e-4, 1e-3] architecture, _lambda, decay, momentum, actfn, best_acc = ml_utils.testmodels( nX_tr, y_tr, nX_te, y_te, archs, actfn='relu', last_act='softmax', reg_coeffs=[0.0], num_epoch=50, batch_size=1000, sgd_lr=1e-5, sgd_decays=[best_decay], sgd_moms=[0.99, 0.98, 0.95, 0.9, 0.85], sgd_Nesterov=True, EStop=False, verbose=0) print "momentum - END" return momentum
def SGD_with_weight_decay(nX_tr, y_tr, nX_te, y_te, din=50, dout=2): print "SGD with weight decay" archs = [[din, 800, 500, 300, dout]] decays = [5e-5, 1e-4, 3e-4, 7e-4, 1e-3] architecture, _lambda, decay, momentum, actfn, best_acc = ml_utils.testmodels( nX_tr, y_tr, nX_te, y_te, archs, actfn='relu', last_act='softmax', reg_coeffs=[5e-7], num_epoch=100, batch_size=1000, sgd_lr=1e-5, sgd_decays=decays, sgd_moms=[0.0], sgd_Nesterov=False, EStop=False, verbose=0) print "SGD with weight decay - END" return decay
def partl(X_tr, y_tr, X_te, y_te): start_time = time.time() decay_list = [pow(10, -5), 5 * pow(10, -5), pow(10, -4)] #archs = [[50, 50, 2], [50, 500, 2], [50, 500, 300, 2], [50, 800, 500, 300, 2], [50, 800, 800, 500, 300, 2]] archs = [[50, 800, 800, 500, 300, 2]] regularization = [ pow(10, -7), 5 * pow(10, -7), pow(10, -6), 5 * pow(10, -6), pow(10, -5) ] results = hw.testmodels(X_tr, y_tr, X_te, y_te, archs, actfn='relu', last_act='softmax', reg_coeffs=regularization, num_epoch=100, batch_size=1000, sgd_lr=0.00001, sgd_decays=decay_list, sgd_moms=[0.99], sgd_Nesterov=True, EStop=True, verbose=0) print " Time Taken = ", time.time() - start_time displayJson(results)
def partk(X_tr, y_tr, X_te, y_te): start_time = time.time() optimal_regu = 5 * pow(10, -6) optimal_decay = 5 * pow(10, -5) optimal_momentum = 0.99 results = hw.testmodels(X_tr, y_tr, X_te, y_te, [[50, 800, 500, 300, 2]], actfn='relu', last_act='softmax', reg_coeffs=[optimal_regu], num_epoch=100, batch_size=1000, sgd_lr=0.00001, sgd_decays=[optimal_decay], sgd_moms=[optimal_momentum], sgd_Nesterov=True, EStop=True, verbose=0) print " Time Taken = ", time.time() - start_time displayJson(results)
def parti(X_tr, y_tr, X_te, y_te): start_time = time.time() regularization = [5 * pow(10, -7)] decay_list = [ pow(10, -5), 5 * pow(10, -5), pow(10, -4), 3 * pow(10, -4), 7 * pow(10, -4), pow(10, -3) ] results = hw.testmodels(X_tr, y_tr, X_te, y_te, [[50, 800, 500, 300, 2]], actfn='relu', last_act='softmax', reg_coeffs=regularization, num_epoch=100, batch_size=1000, sgd_lr=0.00001, sgd_decays=decay_list, sgd_moms=[0.0], sgd_Nesterov=False, EStop=False, verbose=0) print " Time Taken = ", time.time() - start_time displayJson(results)
def relu_activations(nX_tr, y_tr, nX_te, y_te, din=50, dout=2): print "ReLu Activations" archs = [[din, 50, dout], [din, 500, dout], [din, 500, 300, dout], [din, 800, 500, 300, dout], [din, 800, 800, 500, 300, dout]] ml_utils.testmodels(nX_tr, y_tr, nX_te, y_te, archs, actfn='relu', sgd_lr=5e-4, verbose=0) print "ReLu Activations - END"
def sigmoid_activations(nX_tr, y_tr, nX_te, y_te, din=50, dout=2): print "Sigmoid Activations" archs = [[din, 50, dout], [din, 500, dout], [din, 500, 300, dout], [din, 800, 500, 300, dout], [din, 800, 800, 500, 300, dout]] ml_utils.testmodels(nX_tr, y_tr, nX_te, y_te, archs, actfn='sigmoid', sgd_lr=1e-3, verbose=0) print "Sigmoid Activations - END"
def l2_regularization(nX_tr, y_tr, nX_te, y_te, din=50, dout=2): print "L2 Regularization" archs = [[din, 800, 500, 300, dout]] reg_coeffs = [1e-7, 5e-7, 1e-6, 5e-6, 1e-5] ml_utils.testmodels(nX_tr, y_tr, nX_te, y_te, archs, actfn='relu', reg_coeffs=reg_coeffs, sgd_lr=5e-4, verbose=0) print "L2 Regularization - END"
def early_stopping_l2_regularization(nX_tr, y_tr, nX_te, y_te, din=50, dout=2): print "Early Stopping and L2-regularization" archs = [[din, 800, 500, 300, dout]] reg_coeffs = [1e-7, 5e-7, 1e-6, 5e-6, 1e-5] architecture, _lambda, decay, momentum, actfn, best_acc = ml_utils.testmodels( nX_tr, y_tr, nX_te, y_te, archs, actfn='relu', reg_coeffs=reg_coeffs, sgd_lr=5e-4, EStop=True, verbose=0) print "Early Stopping and L2-regularization - END" return _lambda
def partd_a(X_tr, y_tr, X_te, y_te): start_time = time.time() archs = [[50, 2], [50, 50, 2], [50, 50, 50, 2], [50, 50, 50, 50, 2]] results = hw.testmodels(X_tr=X_tr, y_tr=y_tr, X_te=X_te, y_te=y_te, archs=archs, actfn='linear', last_act='softmax', reg_coeffs=[0.0], num_epoch=30, batch_size=1000, sgd_lr=0.001, sgd_decays=[0.0], sgd_moms=[0.0], sgd_Nesterov=False, EStop=False, verbose=0) print " Time Taken = ", time.time() - start_time displayJson(results)
def partj(X_tr, y_tr, X_te, y_te): start_time = time.time() archs = [[50, 800, 500, 300, 2]] optimal_decay = 5 * pow(10, -5) results = hw.testmodels(X_tr, y_tr, X_te, y_te, archs, actfn='relu', last_act='softmax', reg_coeffs=[0.0], num_epoch=50, batch_size=1000, sgd_lr=0.00001, sgd_decays=[optimal_decay], sgd_moms=[0.99, 0.98, 0.95, 0.9, 0.85], sgd_Nesterov=True, EStop=False, verbose=0) print " Time Taken = ", time.time() - start_time displayJson(results)
# decay = [0.00001] # best decay from i # momentum = [0.99, 0.98, 0.95, 0.9, 0.85] # start = datetime.now() # hw_utils.testmodels(X_train_norm, Y_train, X_test_norm, Y_test, # arch, 'relu', 'softmax', L2, 50, 1000, 0.00001, decay, momentum, True, False, 1) # end = datetime.now() # print "Time taken in momentum : " + str((end - start).total_seconds()) # # Part k combination print "\n\nCombination" L2 = [0.0000001] # Best value from h decay = [0.00001] # best decay from i momentum = [0.99] # best value from j start = datetime.now() hw_utils.testmodels(X_train_norm, Y_train, X_test_norm, Y_test, arch, 'relu', 'softmax', L2, 100, 1000, 0.00001, decay, momentum, True, True, 1) end = datetime.now() print "Time taken in combining the above : " + str( (end - start).total_seconds()) # Part l Grid search with cross validation print "\n\nGrid search with cross validation" arch = [[50, 50, 2], [50, 500, 2], [50, 500, 300, 2], [50, 800, 500, 300, 2], [50, 800, 800, 500, 300, 2]] L2 = [0.0000001, 0.0000005, 0.000001, 0.000005, 0.00001] decay = [0.00001, 0.0005, 0.0001] momentum = [0.99] start = datetime.now() hw_utils.testmodels(X_train_norm, Y_train, X_test_norm, Y_test, arch, 'relu', 'softmax', L2, 100, 1000, 0.00001, decay, momentum, True,
print "Grid search" din, dout = len(X_tr[0]), len(y_tr[0]) arch = [[din, 50, dout], [din, 500, dout], [din, 500, 300, dout], [din, 800, 500, 300, dout], [din, 800, 800, 500, 300, dout]] l2_reg_coeffs = [1e-7, 5e-7, 1e-6, 5e-6, 1e-5] decays = [1e-5, 5e-5, 1e-4] moms = [0.99] start_time = time.time() testmodels(X_tr, y_tr, X_te, y_te, arch, actfn='relu', last_act='softmax', reg_coeffs=l2_reg_coeffs, num_epoch=100, batch_size=1000, sgd_lr=1e-5, sgd_decays=decays, sgd_moms=moms, sgd_Nesterov=True, EStop=True, verbose=0) end_time = time.time() print "Grid search {0:.3f} seconds".format(end_time - start_time)
L = len(X_tr[0]) archs = [[L, 50, 2], [L, 50, 30, 2], [L, 500, 2], [L, 500, 300, 2], [L, 500, 500, 300, 2]] reg_coeff = [1e-07, 1e-06, 1e-05] sgd_decay = [1e-05, 1e-04, 1e-03] moms = [0.99] best_param = testmodels(X_tr, y_tr, X_te, y_te, archs, actfn='relu', last_act='softmax', reg_coeffs=reg_coeff, num_epoch=100, batch_size=1000, sgd_lr=5 * 1e-05, sgd_decays=sgd_decay, sgd_moms=moms, sgd_Nesterov=True, EStop=False, verbose=0) print best_param Prob = testmodels_test(X_tr, y_tr, X_VALID, [best_param[0]], actfn='relu', last_act='softmax', reg_coeffs=[best_param[1]],
dIn = 50 dOut = 2 xTrain = xTrainNorm yTrain = yTrain xTest = xTestNorm yTest = yTest print "\nLinear Activations" print "-------------------" print "Architecture 1" print "---------------" architectures = [[dIn, dOut], [dIn, 50, dOut], [dIn, 50, 50, dOut], [dIn, 50, 50, 50, dOut]] startTime = default_timer() hw.testmodels(xTrain, yTrain, xTest, yTest, architectures, 'linear', 'softmax', [0.0], 30, 1000, 0.001, [0.0], [0.0], False, False, 0) timeTaken = default_timer() - startTime print "Training time = " + str(timeTaken) + " s" print "Architecture 2" print "---------------" architectures = [[dIn, 50, dOut], [dIn, 500, dOut], [dIn, 500, 300, dOut], [dIn, 800, 500, 300, dOut], [dIn, 800, 800, 500, 300, dOut]] startTime = default_timer() hw.testmodels(xTrain, yTrain, xTest, yTest, architectures, 'linear', 'softmax', [0.0], 30, 1000, 0.001, [0.0], [0.0], False, False, 0) timeTaken = default_timer() - startTime print "Training time = " + str(timeTaken) + " s" print "\nSigmoid Activations" print "--------------------"
print 'Try Neural Network' print din, dout time_e = hw_utils.start_time() arch_list_e = [[din, din, dout], [din, din * 10, dout], [din, din * 10, din * 7.5, dout], [din, din * 15, din * 10, din * 7.5, dout], [din, din * 15, din * 1.5, din * 10, din * 7.5, dout]] #arch_list_e=[[din, din*15, din*10, din*7.5, dout]] arch_list_e = [[din, din, dout]] y_out = hw_utils.testmodels(X_tr, y_tr, X_te, y_te, arch_list_e, actfn='relu', last_act='softmax', reg_coeffs=[0.0], num_epoch=30, batch_size=1000, sgd_lr=5e-4, sgd_decays=[0.0], sgd_moms=[0.0], sgd_Nesterov=False, EStop=False, verbose=0) hw_utils.end_time(time_e) validate[2] = y_out[0] validate[3] = y_out[1] validate.to_csv("validate_result.csv", index=False)
arch_list1 = [[din, dout], [din, 50, dout], [din, 50, 50, dout], [din, 50, 50, 50, dout]] print arch_list1 print ' 3 (h) Early Stopping and L2-regularization' time_g = start_time() arch_list_e = [[din, 800, 500, 300, dout]] param_list = [10e-7, 50e-7, 10e-6, 50e-6, 10e-5] best_h = hw_utils.testmodels(X_tr, y_tr, X_te, y_te, arch_list_e, actfn='relu', last_act='softmax', reg_coeffs=param_list, num_epoch=30, batch_size=1000, sgd_lr=5e-4, sgd_decays=[0.0], sgd_moms=[0.0], sgd_Nesterov=False, EStop=True, verbose=0) end_time(time_g) print ' 3 (i) SGD with weight decay' time_g = start_time() arch_list_e = [[din, 800, 500, 300, dout]] param_list = [50e-7] decay = [10e-5, 50e-5, 10e-4, 30e-4, 70e-4, 10e-3] best_i = hw_utils.testmodels(X_tr,
import time print "Linear activations start" din, dout = len(X_tr[0]), len(y_tr[0]) arch = [[din, dout], [din, 50, dout], [din, 50, 50, dout], [din, 50, 50, 50, dout]] testmodels(X_tr, y_tr, X_te, y_te, arch, actfn='linear', last_act='softmax', reg_coeffs=[0.0], num_epoch=30, batch_size=1000, sgd_lr=1e-3, sgd_decays=[0.0], sgd_moms=[0.0], sgd_Nesterov=False, EStop=False, verbose=0) start_time = time.time() arch = [[din, 50, dout], [din, 500, dout], [din, 500, 300, dout], [din, 800, 500, 300, dout], [din, 800, 800, 500, 300, dout]] testmodels(X_tr, y_tr, X_te, y_te,