def logr_stats(model): print('Logistic Regression stats on 2 vs 7 - does not save anything') X_train, y_train = load_train_data() X_test, y_test = load_test_data() print('Data Loaded') Xbin_train, ybin_train = get_digit_pair_data(X_train, y_train, 2, 7) X_train, y_train, X_val, y_val = split_data(Xbin_train, ybin_train) print('Train Model') acc_train, acc_val = model.train(X_train, y_train, X_val, y_val) print('Model Trained') Xbin_test, ybin_test = get_digit_pair_data(X_test, y_test, 2, 7) acc_train = model_accuracy(model, Xbin_train, ybin_train) acc_test = model_accuracy(model, Xbin_test, ybin_test) print('acc train: {0}\nacc test: {1}'.format(acc_train, acc_test))
def visualize_cnn(config={'hidden_size': 1024}): """ Visualize the convolutions features found in cnn model somehow. To use this update add_prediction op and store references to the variable weights for the convolution and tensors representing the computation save the first convolution variable in self.C1 save the output of the first convolution in self.conv1 save the output of the first convolution after relu in self.conv1_relu """ config = {'hidden_size': 1024} model = Classifier(cnn.ConvolutionalModel, cnn.Config(**config)) W = model.get_cnn_weights() convolutions = W.transpose(3, 0, 1, 2).squeeze() # convolutions = W.transpose(3, 0, 1, 2).reshape(32,25).T # fig2 = plt.figure() # ax = fig2.add_axes([0.05, 0.05, 0.9, 0.9]) # ax.matshow(convolutions.reshape(5, 5, 4, 8).transpose(2,0,3,1).reshape(4*5, 8*5), cmap='gray') fig, axes = plt.subplots(8, 4) vmin, vmax = W.min(), W.max() for coef, ax in zip(convolutions, axes.ravel()): ax.matshow(coef, cmap=plt.cm.gray, vmin=.5 * vmin, vmax=.5 * vmax) ax.set_xticks(()) ax.set_yticks(()) fig.savefig('results/convolution_filters.png') img, lab = load_train_data() conv, relu = model.get_cnn_conv(img[0:64]) fig2 = plt.figure() ax2 = fig2.add_axes([0.05, 0.05, 0.9, 0.9]) c0 = conv[0] tmp = c0.transpose(2, 0, 1).reshape(32, 784).T.reshape(28, 28, 4, 8) one_plot = tmp.transpose(2, 0, 3, 1).reshape(4 * 28, 8 * 28) ax2.matshow(one_plot, cmap='gray') ax2.set_title('convolution output of first point') fig2.savefig('results/convolution_of_first_image.png') fig3 = plt.figure() ax3 = fig3.add_axes([0.05, 0.05, 0.9, 0.9]) c0 = relu[0] tmp = c0.transpose(2, 0, 1).reshape(32, 784).T.reshape(28, 28, 4, 8) one_plot = tmp.transpose(2, 0, 3, 1).reshape(4 * 28, 8 * 28) ax3.matshow(one_plot, cmap='gray') ax3.set_title('convolution relu output of first point') fig3.savefig('results/convolution_of_first_image_after_relu.png')
def model_stats(model, name): """ Train a model and make classification report and confusion matrix and save them """ X_train, y_train = load_train_data() X_test, y_test = load_test_data() acc_train = model_accuracy(model, X_train, y_train) acc_test = model_accuracy(model, X_test, y_test) df_acc = pd.DataFrame(np.c_[acc_train, acc_test], columns=['train_accuracy', 'test_accuracy']) print('df_acc', df_acc) export_dataframe('{0}_stats_accuracy.csv'.format(name.lower()), df_acc) print('Train Accuracy: {0}, Test Accuracy: {1}'.format(acc_train, acc_test)) pred_test = model.predict(X_test) confusion = confusion_matrix(y_test, pred_test) cr = classification_report(y_test, pred_test) print('Full Model Stats') print('Classification Report - Test Data') print(cr) print('Confusion Matrix - Test Data') df_confusion = pd.DataFrame(confusion) display(df_confusion) export_dataframe('{0}_confusion_matrix.csv'.format(name.lower()), df_confusion) return model
def nn_stats(model, name): print('Loading Data') X, y = load_train_data() X_train, y_train, X_val, y_val = split_data(X, y) X_test, y_test = load_test_data() print('Data Loaded') print('Train Model') acc_train, acc_val = model.train(X_train, y_train, X_val, y_val) print('Model Trained') df = pd.DataFrame(np.c_[acc_train, acc_val], columns = ['in_sample_acc', 'validation_acc']) export_dataframe('{0}_early_stopping.csv'.format(name), df) fig = plt.figure() ax = fig.add_axes([0.15, 0.1, 0.8, 0.8]) n = len(acc_train) ax.plot(np.arange(1,n+1), 1.0 - np.array(acc_train), 'g-o', label='training error') ax.plot(np.arange(1,n+1), 1.0 - np.array(acc_val), 'b-o', label='validation error') ax.legend() ax.set_title('Minimize Error Progress') ax.set_ylabel('Error') ax.set_xlabel('Epoch') ax.set_xticks(np.arange(1,n+1)) export_fig('{0}_early_stopping'.format(name), fig) model_stats(model, name)
if __name__ == "__main__": """ Main code you can use and update as you please if you want to use command line arguments Otherwise you are free to ignore it. There are some extra functions in model_stats you can use as well if you would like to. """ if not os.path.exists('results'): print('create results folder') os.mkdir('results') pd.set_option('display.max_rows', 500) pd.set_option('display.max_columns', 500) pd.set_option('display.width', 1000) [au_train_images, au_train_labels] = load_train_data() [au_test_images, au_test_labels] = load_test_data() rp = np.random.permutation(au_train_labels.size) digs = au_train_images[rp, :] labs = au_train_labels[rp] digs = digs[0:1000, :] labs = labs[0:1000] parser = argparse.ArgumentParser() parser.add_argument('-lin', action='store_true', default=False) parser.add_argument('-poly2', action='store_true', default=False) parser.add_argument('-poly3', action='store_true', default=False) parser.add_argument('-rbf', action='store_true', default=False) args = parser.parse_args() X = digs[0:100] Y = labs[0:100]