def test1(): # baseline # Split dataset into batches batch_size = 32 # batch_size = 64 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, 10), nn.ReLU(), # nn.LeakyReLU(), nn.Linear(10, 10), ) lr = 0.1 # lr = 0.01 momentum = 0 # momentum = 0.9 ################################## val_acc = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy)) return val_acc
def batch_data(X_train, y_train, X_dev, y_dev, X_test, y_test, batch_size=32): # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) return train_batches, dev_batches, test_batches
def main(): X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]] X_train = X_train[:dev_split_index] y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) # Load model input_dimension = img_rows * img_cols model = CNN(input_dimension) # TODO add proper layers to CNN class above # Train train_model(train_batches, dev_batches, model) ## Evaluate the model on test data loss, acc = run_epoch(test_batches, model.eval(), None) print('Test loss1: {:.6f} accuracy1: {:.6f} loss2: {:.6f} accuracy2: {:.6f}'.format(loss[0], acc[0], loss[1], acc[1]))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to reshape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and validation dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] # Shuffle the data permutation = torch.randperm(X_train.shape[0]) X_train = X_train[permutation] y_train = y_train[permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# # Model specification model = nn.Sequential( nn.Conv2d(1, 32, (3, 3)), nn.ReLU(), nn.MaxPool2d((2, 2)), nn.Conv2d(32, 64, (3, 3)), nn.ReLU(), nn.MaxPool2d((2, 2)), Flatten(), nn.Linear(1600, 128), nn.Dropout(0.5), nn.Linear(128, 10), ) ################################## # Moving model and data to GPU if torch.cuda.is_available(): print("----------------- Using the Device: CUDA -----------------") model = model.to('cuda') else: print("----------------- Using the Device: CPU ----------------- ") train_model(train_batches, dev_batches, model, nesterov=True) # Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO #pragma: coderesponse template name="pytorchcnn" dedent="true" model = nn.Sequential( nn.Conv2d( 1, 32, (3, 3)), #out_size 26 x 26 x 32 (in_size - (kernel_size-1)) nn.ReLU(), nn.MaxPool2d( (2, 2)), # out_size 13x13 x32 (in_size - (kernel_size-1)-1)/stride +1 nn.Conv2d(32, 64, (3, 3)), #out_size 11x11x64 nn.ReLU(), nn.MaxPool2d((2, 2)), #out_size 5x5x64 = 1600 Flatten(), nn.Linear(1600, 128), nn.Dropout(p=0.5), nn.Linear(128, 10)) #pragma: coderesponse end ################################## train_model(train_batches, dev_batches, model, nesterov=True) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( # (num pictures × dimensions × size[0] × size[1]) # input: num × 1 × 28 × 28 nn.Conv2d(1, 32, (3, 3)), # after conv1: num × 32 × 26 × 26 nn.ReLU(), nn.MaxPool2d((2, 2)), # after pool1: num × 32 × 13 × 13 nn.Conv2d(32, 64, (3, 3)), # after cov2: num × 64 × 11 × 11 nn.ReLU(), nn.MaxPool2d((2, 2)), # after pool2: num × 64 × 5 × 5 Flatten(), # input for linear is 64 × 5 × 5 nn.Linear(1600, 128), nn.Dropout(p=0.5), nn.Linear(128, 10)) ################################## train_model(train_batches, dev_batches, model, nesterov=True) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# # Model specification TODO model = nn.Sequential( # Valid Convolution with 3x3 Kernel: 28x28 -> 26x26 nn.Conv2d(1, 32, (3, 3)), nn.ReLU(), # Pooling with 2x2 Kernel: 26x26 -> 13x13 nn.MaxPool2d((2, 2)), # Valid Convolution with 3x3 Kernel: 13x13 -> 11x11 nn.Conv2d(32, 64, (3, 3)), nn.ReLU(), # Pooling with 2x2 Kernel: 11x11 to 5x5 nn.MaxPool2d((2, 2)), Flatten(), # Flattening: 5x5x64 -> 1600 nn.Linear(1600, 128), nn.Dropout(), nn.Linear(128, 10)) ################################## train_model(train_batches, dev_batches, model, nesterov=True) # Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def train(batch_size=32, hidden_size=10, lr=0.1, momentum=0, activation=nn.ReLU): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = batch_size train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, hidden_size), activation(), nn.Linear(hidden_size, 10), ) lr = lr momentum = momentum ################################## val_acc = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy)) return val_acc, accuracy
def oldmain(classes, batch, eta, momentum, LeakyReLU): # Load the dataset num_classes = classes X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = batch train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO if not LeakyReLU: model = nn.Sequential( nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10), ) else: model = nn.Sequential( nn.Linear(784, 128), nn.LeakyReLU(), nn.Linear(128, 10), ) lr = eta #momentum=0 ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy)) return accuracy
def test_grid(batch_size=32, lr=0.1, momentum=0, LeakyReLU=False): np.random.seed(12321) # for reproducibility torch.manual_seed(12321) # for reproducibility # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model_relu = nn.Sequential( nn.Linear(784, 128), nn.ReLU(), nn.Linear(128, 10), ) model_lrelu = nn.Sequential( nn.Linear(784, 128), nn.LeakyReLU(), nn.Linear(128, 10), ) model = model_lrelu if LeakyReLU else model_relu ################################## val_acc = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print ("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy)) return val_acc
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() print(len(X_train), 'len(X_train)') print(len(X_train[0]), 'len(X_train)[0]') print(y_train.shape, 'y_train.shape') # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) print(dev_split_index, 'dev_split_index') X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] print(X_train[1].shape, 'X_train') print(y_train[1], 'y_train[1]') # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) # print(train_batches,'train_batches.') dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, 10), nn.LeakyReLU(), nn.Linear(10, 10), ) lr = 0.1 momentum = 0 ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO # N is batch size; D_in is input dimension; # H is hidden dimension; D_out is output dimension. N, D_in, H, D_out = batch_size, 784, 128, 10 model = nn.Sequential( nn.Linear(D_in, H), nn.ReLU(), nn.Linear(H, D_out) ) lr=0.1 momentum=0 leaky_relu_active = False ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Batch size: {}; Learning Rate: {}; Momentum: {}; LeakyReLU: {}; Hidden Dimension: {}". format(batch_size, lr, momentum, leaky_relu_active, H)) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 #batch_size = 64 #Acc1 = 0.9314 #Acc2= 0.976478 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, 128), nn.ReLU(), #nn.LeakyReLU(), #Acc1 = 0.9207 Acc2 = 0.978944 nn.Linear(128, 10), ) lr=0.1 #lr = 0.01 #Acc1 = 0.9206 Acc2= 0.955047 #momentum=0 momentum = 0.9 #Acc1 = 0.8928 Acc2 = 0.967246 ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print ("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(batch_size=32, lr=0.1, momentum=0, act="ReLU", hsize=10): print("batch: %d, learnign rate: %f, momentum: %f, activation: %s" % (batch_size, lr, momentum, act)) # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches # batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Linear(784, hsize), get_activation(act), nn.Linear(hsize, 10), ) # lr=0.1 # momentum=0 ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO model = nn.Sequential( nn.Conv2d(1, 32, (3, 3)), nn.ReLU(), nn.MaxPool2d((2, 2)), ) ################################## train_model(train_batches, dev_batches, model, nesterov=True) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # ======== Load the dataset =========== num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # print(y_train) # ======= Split into train and dev ========== dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # ========= Split dataset into batches ============ batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ## =========== MODEL SPECIFICATION ============ model = nn.Sequential( nn.Linear(784, 10), nn.ReLU(), nn.Linear(10, 10), ) lr=0.1 momentum=0 # model.summary() ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print ("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]] X_train = X_train[:dev_split_index] y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [[y_train[0][i] for i in permutation], [y_train[1][i] for i in permutation]] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) # print(train_batches[0]['x'].shape, train_batches[0]['y'].shape) # batch[i]['x'] is (64, 1, 42, 28) = (batch_size, 1, img_rows, img_cols) # batch[i]['y'] is (2, 64) = (num_labels, batch_size) # print(len(X_train), len(y_train)) # 36000, 2 # print(len(X_dev), len(y_dev)) # 4000, 2 # print(len(train_batches), len(dev_batches), len(test_batches)) # 562, 62, 62 # Load model input_dimension = img_rows * img_cols model = MLP(input_dimension) # Train train_model(train_batches, dev_batches, model) ## Evaluate the model on test data loss, acc = run_epoch(test_batches, model.eval(), None) print( 'Test loss1: {:.6f} accuracy1: {:.6f} loss2: {:.6f} accuracy2: {:.6f}' .format(loss[0], acc[0], loss[1], acc[1]))
def main(batch_size=32, lr=1e-1, hidden_size=10, momentum=0): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(X_train.shape[0])]) np.random.shuffle(permutation) X_train = X_train[permutation] y_train = y_train[permutation] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification model = nn.Sequential( nn.Linear(X_train.shape[1], hidden_size), nn.ReLU(), nn.Linear(hidden_size, num_classes), ) ################################## val_acr = train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print ("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy)) return batch_size, lr, momentum, hidden_size, val_acr
def main(): X_train, y_train, X_test, y_test = U.get_data(path_to_data_dir, use_mini_dataset) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = [y_train[0][dev_split_index:], y_train[1][dev_split_index:]] X_train = X_train[:dev_split_index] y_train = [y_train[0][:dev_split_index], y_train[1][:dev_split_index]] permutation = torch.randperm(len(X_train)) X_train = X_train[permutation] y_train = [y_train[0][permutation], y_train[1][permutation]] # Split dataset into batches train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) # Load model input_dimension = img_rows * img_cols model = CNN() # Move model to the GPU if torch.cuda.is_available(): model = model.to(device) print("----------------- Using the Device: GPU -----------------") else: print("----------------- Using the Device: CPU -----------------") # Train train_model(train_batches, dev_batches, model) # Evaluate the model on test data loss, acc = run_epoch(test_batches, model.eval(), None) print( 'Test loss1: {:.6f} accuracy1: {:.6f} loss2: {:.6f} accuracy2: {:.6f}' .format(loss[0], acc[0], loss[1], acc[1]))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) # print(X_train[0].shape) # 1x28x28 ################################# ## Model specification TODO model = nn.Sequential( nn.Conv2d(1, 32, (3, 3)), # 0. nn.ReLU(), # 1. nn.MaxPool2d((2, 2)), # 2. nn.Conv2d(32, 64, (3, 3)), # 3. nn.ReLU(), # 4. nn.MaxPool2d((2, 2)), # 5. Flatten( ), # 6. In: torch.Size([32, 64, 5, 5]) Out: torch.Size([32, 1600]) nn.Linear(1600, 128), # 7. nn.Dropout(0.5), # 8. nn.Linear(128, 10), # 9. ) # Model's state_dict: # 0.weight torch.Size([32, 1, 3, 3]) # 0.bias torch.Size([32]) # 3.weight torch.Size([64, 32, 3, 3]) # 3.bias torch.Size([64]) # 7.weight torch.Size([128, 1600]) # 7.bias torch.Size([128]) # 9.weight torch.Size([10, 128]) # 9.bias torch.Size([10]) # Optimizer's state_dict: # state {} # param_groups [{'lr': 0.01, 'momentum': 0.9, 'dampening': 0, # 'weight_decay': 0, 'nesterov': True, # 'params': [140710194828704, 140710194828784, 140710194829184, 140710194829264, # 140710194829824, 140710194829904, 140710194830144, 140710194830224]}] ################################## train_model(train_batches, dev_batches, model, nesterov=True) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # Split dataset into batches batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification TODO #pragma: coderesponse template name="pytorchcnn" dedent="true" model = nn.Sequential( #input image 784pixels -->28x28x1 nn.Conv2d(1, 32, (3, 3)), #convolutional layer with 32 filters of size 3x3 #26x26x32 nn.ReLU(), #ReLU nonlinearity nn.MaxPool2d((2, 2)), #max pool layer with size 2x2 #13x13x32 nn.Conv2d(32, 64, (3, 3)), #convolutional layer with 64 filters of size 3x3 #11x11x64 nn.ReLU(), nn.MaxPool2d((2, 2)), #max pooling layer with size 2x2 #5x5x64 Flatten(), #flatten layer #5*5*64 = 1600 nn.Linear(1600, 128), #fully connected layer #1600 -> 128 nn.Dropout(0.5), #dropout layer nn.Linear(128, 10), #fully connected layer #128 -> 10 ) #pragma: coderesponse end ################################## train_model(train_batches, dev_batches, model, nesterov=True) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))
def main(): # Load the dataset num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # Split into train and dev dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] #validation_scores = [0.932487, 0.944388, 0.937834, 0.907587, 0.936497] #validation_scores2 = [0.977440, 0.977487, 0.978610, 0.968416, 0.978443] validation_scores = [] best_validation = {'score': 0, 'param': None} test_scores = [] baseline = { 'batch_size': 32, 'activation': nn.ReLU(), 'lr': 0.1, 'momentum': 0 } grid = [(), ('batch_size', 64), ('lr', 0.1), ('momentum', 0.9), ('activation', nn.LeakyReLU())] for p in grid: np.random.seed(12321) # for reproducibility torch.manual_seed(12321) # for reproducibility print('Testing param:', p) params = copy.deepcopy(baseline) if len(p): params[p[0]] = p[1] # Split dataset into batches batch_size = params['batch_size'] train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) ################################# ## Model specification model = nn.Sequential( nn.Linear(784, 128), params['activation'], nn.Linear(128, 10), ) lr = params['lr'] momentum = params['momentum'] ################################## train_model(train_batches, dev_batches, model, lr=lr, momentum=momentum) ## Evaluate on validation data loss, accuracy = run_epoch(dev_batches, model.eval(), None) validation_scores += [accuracy] if accuracy > best_validation['score'] and len(p): best_validation['score'] = accuracy best_validation['param'] = p[0] ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) test_scores += [accuracy] print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy)) print('Best validation:', best_validation) print('Validation scores:', validation_scores) print('Test scores:', test_scores)
def main(): # ======= Load the dataset =========== num_classes = 10 X_train, y_train, X_test, y_test = get_MNIST_data() # We need to rehape the data back into a 1x28x28 image to make it a 4D tensor # as Conv2d() takes input parameters from 4D tensor X_train = np.reshape(X_train, (X_train.shape[0], 1, 28, 28)) X_test = np.reshape(X_test, (X_test.shape[0], 1, 28, 28)) # print(X_train.shape) # print(X_test.shape) # =========== Split into train(90%) and dev(10%) for validation set ========= dev_split_index = int(9 * len(X_train) / 10) X_dev = X_train[dev_split_index:] y_dev = y_train[dev_split_index:] X_train = X_train[:dev_split_index] y_train = y_train[:dev_split_index] permutation = np.array([i for i in range(len(X_train))]) np.random.shuffle(permutation) X_train = [X_train[i] for i in permutation] y_train = [y_train[i] for i in permutation] # ======== Split dataset into batches ========== batch_size = 32 train_batches = batchify_data(X_train, y_train, batch_size) dev_batches = batchify_data(X_dev, y_dev, batch_size) test_batches = batchify_data(X_test, y_test, batch_size) #### ============ MODEL SPECIFICATION ================== model = nn.Sequential( nn.Conv2d( 1, 32, (3, 3) ), #A convolutional layer with 32 filters of size 3×3, in_channel=1 nn.ReLU(), #A ReLU nonlinearity nn.MaxPool2d((2, 2)), #A max pooling layer with size 2×2 nn.Conv2d(32, 64, (3, 3)), #A convolutional layer with 64 filters of size 3×3 nn.ReLU(), nn.MaxPool2d((2, 2)), Flatten(), #A flatten layer nn.Linear(1600, 128), #A fully connected layer with 128 neurons nn.Dropout(0.5), #A dropout layer with drop probability 0.5 nn.Linear(128, 10), #A fully-connected layer with 10 neurons ) # Use the nn package to define our model as a sequence of layers. # nn.Sequential is a Module which contains other Modules, and applies them in sequence to produce its output. # nn.Conv2d(input image channel, output channel, (square convolution or Kernel size)) # nn.Maxpool2d((Kernel size),(stride)) # Kernel size => the size of the window to take a max over # stride – the stride of the window. Default value is Kernel size. stride = (2,2) halves the image dimension output from Conv2d # nn.Linear (in features, out features, bias=True) # in features => size of each input sample # out features => size of each output sample # nn.Dropout(Drop out probability) # Dropout is a regularization technique that “drops out” or “deactivates” few neurons in the neural network randomly in order to avoid the problem of overfitting. ################################## val_acc, train_acc, train_loss, v_loss, v_acc = train_model(train_batches, dev_batches, model, nesterov=True) ## Evaluate the model on test data loss, accuracy = run_epoch(test_batches, model.eval(), None) print("Loss on test set:" + str(loss) + " Accuracy on test set: " + str(accuracy))