def command_line_run(args): args_dict = {} for i in range(1, len(args)): if '-' in args[i]: args_dict[args[i]] = [] args_dict[-1] = args_dict[args[i]] else: args_dict[-1].append(float(args[i])) del args_dict[-1] num_classes = 10 random.seed(1917) if '-debug' in args_dict: train_outputs = import_csv(TRAIN_OUTPUTS_SUBSET_PATH).astype(int) train_inputs = import_csv(TRAIN_INPUTS_SUBSET_PATH) else: train_outputs = import_csv(TRAIN_OUTPUTS_PATH).astype(int) train_inputs = import_csv(TRAIN_INPUTS_PATH) if '-t' in args_dict: print len(train_inputs) train_inputs = np.array(transform_features(train_inputs)) print len(train_inputs) # Default values. hnh = [] num_features = 300 dropout = None lr = 1.0 epochs = 50 if '-f' in args_dict: num_features = map(int, args_dict['-f'])[0] if '-test' in args_dict: test_inputs = import_csv(TEST_INPUTS_PATH) if '-t' in args_dict: test_inputs = transform_features(test_inputs) if not num_features == len(train_inputs[0]): alll = feature_reduce( np.array(list(train_inputs) + list(test_inputs)), num_features) train_inputs = alll[:len(train_inputs)] test_inputs = alll[len(train_inputs):] if '-validate' in args_dict: validation_size = (4 * len(train_inputs)) / 5 # Randomize the train and validation set. rand_idxs = random.sample(range(0, len(train_inputs)), len(train_inputs)) test_inputs = train_inputs[rand_idxs[validation_size:]] test_outputs = train_outputs[rand_idxs[validation_size:]] train_inputs = train_inputs[rand_idxs[0:validation_size]] train_outputs = train_outputs[rand_idxs[0:validation_size]] # We have to reduce the features all at the same time because it is unsupervised learning and # we want the same features to be picked by PCA for both of the train and test sets. if not num_features == len(train_inputs[0]): alll = feature_reduce( np.array(list(train_inputs) + list(test_inputs)), num_features) train_inputs = alll[:len(train_inputs)] test_inputs = alll[len(train_inputs):] if '-hn' in args_dict: hnh = map(int, args_dict['-hn']) if '-d' in args_dict: if not (0.0 <= args_dict['-d'][0] <= 1.0): print 'Please input a dropout rate between 0 and 1!' exit(0) dropout = args_dict['-d'][0] if '-lr' in args_dict: lr = args_dict['-lr'][0] if '-e' in args_dict: epochs = int(args_dict['-e'][0]) nn = NeuralNetwork(len(train_inputs[0]), hnh, num_classes, learning_rate=lr, dropout=dropout) nn.fit(train_inputs, train_outputs, training_horizon=epochs, verbose=True) p = nn.predict(test_inputs) fname = data_files_path + 'predictions_with_%depochs_%dfeatures_%0.2flf' % ( epochs, num_features, lr) if '-test' in args_dict: with open(fname + '.csv', 'w') as f: f.write('Id,Prediction\n') for i in range(len(p)): f.write('%d,%d\n' % (i + 1, p[i])) else: print accuracy(p, test_outputs) if '-record' in args_dict: heatmap(p, test_outputs, fname)
def command_line_run(args): args_dict = {} for i in range(1,len(args)): if '-' in args[i]: args_dict[args[i]] = [] args_dict[-1] = args_dict[args[i]] else: args_dict[-1].append(float(args[i])) del args_dict[-1] num_classes = 10 random.seed(1917) if '-debug' in args_dict: train_outputs = import_csv(TRAIN_OUTPUTS_SUBSET_PATH).astype(int) train_inputs = import_csv(TRAIN_INPUTS_SUBSET_PATH) else: train_outputs = import_csv(TRAIN_OUTPUTS_PATH).astype(int) train_inputs = import_csv(TRAIN_INPUTS_PATH) if '-t' in args_dict: print len(train_inputs) train_inputs = np.array(transform_features(train_inputs)) print len(train_inputs) # Default values. hnh = [] num_features = 300 dropout = None lr = 1.0 epochs = 50 if '-f' in args_dict: num_features = map(int, args_dict['-f'])[0] if '-test' in args_dict: test_inputs = import_csv(TEST_INPUTS_PATH) if '-t' in args_dict: test_inputs = transform_features(test_inputs) if not num_features == len(train_inputs[0]): alll = feature_reduce(np.array(list(train_inputs)+list(test_inputs)), num_features) train_inputs = alll[: len(train_inputs)] test_inputs = alll[len(train_inputs) :] if '-validate' in args_dict: validation_size = (4 * len(train_inputs)) / 5 # Randomize the train and validation set. rand_idxs = random.sample(range(0, len(train_inputs)), len(train_inputs)) test_inputs = train_inputs[rand_idxs[validation_size : ]] test_outputs = train_outputs[rand_idxs[validation_size : ]] train_inputs = train_inputs[rand_idxs[0 : validation_size]] train_outputs = train_outputs[rand_idxs[0 : validation_size]] # We have to reduce the features all at the same time because it is unsupervised learning and # we want the same features to be picked by PCA for both of the train and test sets. if not num_features == len(train_inputs[0]): alll = feature_reduce(np.array(list(train_inputs)+list(test_inputs)), num_features) train_inputs = alll[: len(train_inputs)] test_inputs = alll[len(train_inputs) :] if '-hn' in args_dict: hnh = map(int, args_dict['-hn']) if '-d' in args_dict: if not (0.0 <= args_dict['-d'][0] <= 1.0): print 'Please input a dropout rate between 0 and 1!' exit(0) dropout = args_dict['-d'][0] if '-lr' in args_dict: lr = args_dict['-lr'][0] if '-e' in args_dict: epochs = int(args_dict['-e'][0]) nn = NeuralNetwork(len(train_inputs[0]), hnh, num_classes, learning_rate=lr, dropout=dropout) nn.fit(train_inputs, train_outputs, training_horizon=epochs, verbose=True) p = nn.predict(test_inputs) fname = data_files_path+'predictions_with_%depochs_%dfeatures_%0.2flf'%(epochs,num_features,lr) if '-test' in args_dict: with open(fname+'.csv','w') as f: f.write('Id,Prediction\n') for i in range(len(p)): f.write('%d,%d\n'%(i+1,p[i])) else: print accuracy(p, test_outputs) if '-record' in args_dict: heatmap(p, test_outputs, fname)
# grid score print ("Grid scores on development set:") print () for params, mean_score, scores in clf.grid_scores_: print ("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() * 2, params)) print () # validation print ("Performance of optimal learner on validation set") print () expected, predicted = test_y, clf.predict(test_x) print (classification_report(expected, predicted)) print (confusion_matrix(expected, predicted)) print () accuracy(predicted, expected) heatmap(predicted, expected, "LinSVM/testAccuracy") # training accuracy print ("Performance of optimal learner on training set") print () expected, predicted = train_y, clf.predict(train_x) print (classification_report(expected, predicted)) print (confusion_matrix(expected, predicted)) print () accuracy(predicted, expected) heatmap(predicted, expected, "LinSVM/trainAccuracy") # training on the whole dataset print ("fitting the best estimator on the complete training set") learner = clf.best_estimator_ learner.fit(train_inputs, train_outputs)
print("Grid scores on development set:") print() for params, mean_score, scores in clf.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() * 2, params)) print() #validation print("Performance of optimal learner on validation set") print() expected, predicted = test_y, clf.predict(test_x) print(classification_report(expected, predicted)) print(confusion_matrix(expected, predicted)) print() accuracy(predicted, expected) heatmap(predicted, expected, 'LogReg/testAccuracy') #training accuracy print("Performance of optimal learner on training set") print() expected, predicted = train_y, clf.predict(train_x) print(classification_report(expected, predicted)) print(confusion_matrix(expected, predicted)) print() accuracy(predicted, expected) heatmap(predicted, expected, 'LogReg/trainAccuracy') #training on the whole dataset print("fitting the best estimator on the complete training set") learner = clf.best_estimator_ learner.fit(train_inputs, train_outputs)
#grid score print("Grid scores on development set:") print() for params, mean_score, scores in clf.grid_scores_: print("%0.3f (+/-%0.03f) for %r" % (mean_score, scores.std() * 2, params)) print() #validation print("Performance of optimal learner on validation set") print() expected, predicted = test_y, clf.predict(test_x) print(classification_report(expected, predicted)) print(confusion_matrix(expected, predicted)) print() accuracy(predicted,expected) heatmap(predicted,expected,'LogReg/testAccuracy') #training accuracy print("Performance of optimal learner on training set") print() expected, predicted = train_y, clf.predict(train_x) print(classification_report(expected, predicted)) print(confusion_matrix(expected, predicted)) print() accuracy(predicted,expected) heatmap(predicted,expected,'LogReg/trainAccuracy') #training on the whole dataset print("fitting the best estimator on the complete training set") learner=clf.best_estimator_ learner.fit(train_inputs,train_outputs)