def cross_validate(X, Y, n, k=5, epochs=50, lr=1.0, lam=1.0, nhn=50, dropout=None, v=False): kf = KFold(n, n_folds=k, shuffle=True, random_state=1917) avgs_train,avgs_test = [],[] num_outputs = 10 for train_idx,test_idx in kf: x_train,x_test = X[train_idx],X[test_idx] y_train,y_test = Y[train_idx],Y[test_idx] cv_nn = NeuralNetwork(len(x_train[0]), [nhn], num_outputs, learning_rate=lr, dropout=dropout, maxnorm=None) cv_nn.fit(x_train, y_train, training_horizon=epochs, verbose=v) avgs_train.append(accuracy(cv_nn.predict(x_train), y_train)) avgs_test.append(accuracy(cv_nn.predict(x_test), y_test)) return np.mean(avgs_test), np.mean(avgs_train)
def cross_validate(X, Y, n, k=5, epochs=50, lr=1.0, lam=1.0, nhn=50, dropout=None, v=False): kf = KFold(n, n_folds=k, shuffle=True, random_state=1917) avgs_train, avgs_test = [], [] num_outputs = 10 for train_idx, test_idx in kf: x_train, x_test = X[train_idx], X[test_idx] y_train, y_test = Y[train_idx], Y[test_idx] cv_nn = NeuralNetwork(len(x_train[0]), [nhn], num_outputs, learning_rate=lr, dropout=dropout, maxnorm=None) cv_nn.fit(x_train, y_train, training_horizon=epochs, verbose=v) avgs_train.append(accuracy(cv_nn.predict(x_train), y_train)) avgs_test.append(accuracy(cv_nn.predict(x_test), y_test)) return np.mean(avgs_test), np.mean(avgs_train)
def command_line_run(args): args_dict = {} for i in range(1, len(args)): if '-' in args[i]: args_dict[args[i]] = [] args_dict[-1] = args_dict[args[i]] else: args_dict[-1].append(float(args[i])) del args_dict[-1] num_classes = 10 random.seed(1917) if '-debug' in args_dict: train_outputs = import_csv(TRAIN_OUTPUTS_SUBSET_PATH).astype(int) train_inputs = import_csv(TRAIN_INPUTS_SUBSET_PATH) else: train_outputs = import_csv(TRAIN_OUTPUTS_PATH).astype(int) train_inputs = import_csv(TRAIN_INPUTS_PATH) if '-t' in args_dict: print len(train_inputs) train_inputs = np.array(transform_features(train_inputs)) print len(train_inputs) # Default values. hnh = [] num_features = 300 dropout = None lr = 1.0 epochs = 50 if '-f' in args_dict: num_features = map(int, args_dict['-f'])[0] if '-test' in args_dict: test_inputs = import_csv(TEST_INPUTS_PATH) if '-t' in args_dict: test_inputs = transform_features(test_inputs) if not num_features == len(train_inputs[0]): alll = feature_reduce( np.array(list(train_inputs) + list(test_inputs)), num_features) train_inputs = alll[:len(train_inputs)] test_inputs = alll[len(train_inputs):] if '-validate' in args_dict: validation_size = (4 * len(train_inputs)) / 5 # Randomize the train and validation set. rand_idxs = random.sample(range(0, len(train_inputs)), len(train_inputs)) test_inputs = train_inputs[rand_idxs[validation_size:]] test_outputs = train_outputs[rand_idxs[validation_size:]] train_inputs = train_inputs[rand_idxs[0:validation_size]] train_outputs = train_outputs[rand_idxs[0:validation_size]] # We have to reduce the features all at the same time because it is unsupervised learning and # we want the same features to be picked by PCA for both of the train and test sets. if not num_features == len(train_inputs[0]): alll = feature_reduce( np.array(list(train_inputs) + list(test_inputs)), num_features) train_inputs = alll[:len(train_inputs)] test_inputs = alll[len(train_inputs):] if '-hn' in args_dict: hnh = map(int, args_dict['-hn']) if '-d' in args_dict: if not (0.0 <= args_dict['-d'][0] <= 1.0): print 'Please input a dropout rate between 0 and 1!' exit(0) dropout = args_dict['-d'][0] if '-lr' in args_dict: lr = args_dict['-lr'][0] if '-e' in args_dict: epochs = int(args_dict['-e'][0]) nn = NeuralNetwork(len(train_inputs[0]), hnh, num_classes, learning_rate=lr, dropout=dropout) nn.fit(train_inputs, train_outputs, training_horizon=epochs, verbose=True) p = nn.predict(test_inputs) fname = data_files_path + 'predictions_with_%depochs_%dfeatures_%0.2flf' % ( epochs, num_features, lr) if '-test' in args_dict: with open(fname + '.csv', 'w') as f: f.write('Id,Prediction\n') for i in range(len(p)): f.write('%d,%d\n' % (i + 1, p[i])) else: print accuracy(p, test_outputs) if '-record' in args_dict: heatmap(p, test_outputs, fname)
def command_line_run(args): args_dict = {} for i in range(1,len(args)): if '-' in args[i]: args_dict[args[i]] = [] args_dict[-1] = args_dict[args[i]] else: args_dict[-1].append(float(args[i])) del args_dict[-1] num_classes = 10 random.seed(1917) if '-debug' in args_dict: train_outputs = import_csv(TRAIN_OUTPUTS_SUBSET_PATH).astype(int) train_inputs = import_csv(TRAIN_INPUTS_SUBSET_PATH) else: train_outputs = import_csv(TRAIN_OUTPUTS_PATH).astype(int) train_inputs = import_csv(TRAIN_INPUTS_PATH) if '-t' in args_dict: print len(train_inputs) train_inputs = np.array(transform_features(train_inputs)) print len(train_inputs) # Default values. hnh = [] num_features = 300 dropout = None lr = 1.0 epochs = 50 if '-f' in args_dict: num_features = map(int, args_dict['-f'])[0] if '-test' in args_dict: test_inputs = import_csv(TEST_INPUTS_PATH) if '-t' in args_dict: test_inputs = transform_features(test_inputs) if not num_features == len(train_inputs[0]): alll = feature_reduce(np.array(list(train_inputs)+list(test_inputs)), num_features) train_inputs = alll[: len(train_inputs)] test_inputs = alll[len(train_inputs) :] if '-validate' in args_dict: validation_size = (4 * len(train_inputs)) / 5 # Randomize the train and validation set. rand_idxs = random.sample(range(0, len(train_inputs)), len(train_inputs)) test_inputs = train_inputs[rand_idxs[validation_size : ]] test_outputs = train_outputs[rand_idxs[validation_size : ]] train_inputs = train_inputs[rand_idxs[0 : validation_size]] train_outputs = train_outputs[rand_idxs[0 : validation_size]] # We have to reduce the features all at the same time because it is unsupervised learning and # we want the same features to be picked by PCA for both of the train and test sets. if not num_features == len(train_inputs[0]): alll = feature_reduce(np.array(list(train_inputs)+list(test_inputs)), num_features) train_inputs = alll[: len(train_inputs)] test_inputs = alll[len(train_inputs) :] if '-hn' in args_dict: hnh = map(int, args_dict['-hn']) if '-d' in args_dict: if not (0.0 <= args_dict['-d'][0] <= 1.0): print 'Please input a dropout rate between 0 and 1!' exit(0) dropout = args_dict['-d'][0] if '-lr' in args_dict: lr = args_dict['-lr'][0] if '-e' in args_dict: epochs = int(args_dict['-e'][0]) nn = NeuralNetwork(len(train_inputs[0]), hnh, num_classes, learning_rate=lr, dropout=dropout) nn.fit(train_inputs, train_outputs, training_horizon=epochs, verbose=True) p = nn.predict(test_inputs) fname = data_files_path+'predictions_with_%depochs_%dfeatures_%0.2flf'%(epochs,num_features,lr) if '-test' in args_dict: with open(fname+'.csv','w') as f: f.write('Id,Prediction\n') for i in range(len(p)): f.write('%d,%d\n'%(i+1,p[i])) else: print accuracy(p, test_outputs) if '-record' in args_dict: heatmap(p, test_outputs, fname)