def train_framework(train_set, val_set, test_set, epochs=1000, mini_batch_size=50, lr=0.001): # Turn autograd off torch.set_grad_enabled(False) # Net definition model = nn.Sequential(nn.Linear(2, 25, activation="relu"), F.ReLU(), nn.Linear(25, 25, activation="relu"), F.ReLU(), nn.Linear(25, 25, activation="relu"), F.ReLU(), nn.Linear(25, 2, activation="relu")) # Training params opt = optim.SGD(lr, model) criterion = losses.LossMSE() # Train start_time = time.perf_counter() history = train_model(model, train_set[0], train_set[1], val_set[0], val_set[1], criterion, opt, epochs, mini_batch_size, pytorch=False, verbose=True) end_time = time.perf_counter() # Compute final accuracies train_acc = compute_accuracy(model, train_set[0], train_set[1], pytorch=False) test_acc = compute_accuracy(model, test_set[0], test_set[1], pytorch=False) print("\tTraining time : %s s" % (end_time - start_time)) print("\tAccuracy : train_acc = %s \t test_acc = %s" % (train_acc, test_acc)) return history, end_time - start_time, (train_acc, test_acc)
def feedForward(self, inputs): self.inputs = inputs #save the shape height, width, filters = inputs.shape self.layerOutput = np.zeros((height//2, width//2, filters)) #output size is halved for h in range(height//2): for w in range(width//2): #for each 2 pixels/inputs self.layerOutput[h, w] = np.max(inputs[(h*2):(h*2+2),(w*2):(w*2+2)], axis = (0,1)) #we assume even number of pixels self.layerOutput = activations.ReLU(self.layerOutput) return self.layerOutput
def __init__(self, size, init=None, activation=None): """ @size - nodes in the layer @init - initializer class @activation - activation class """ self.lnum = -1 self.lname = "" self.weights = None self.bias = None self.size = size self.init = init if init else initializers.Uniform(-0.3, 0.3) self.activation = activation if activation else activations.ReLU()
def convert_to_one_hot_labels(input, target, zero_value=0): '''Convert output to one-hot labeled tensor. Value at label position will be 1 and zero_value everywhere else.''' tmp = input.new(target.size(0), target.max() + 1).fill_(zero_value) tmp.scatter_(1, target.view(-1, 1), 1.0) return tmp y_train = convert_to_one_hot_labels(x_train, y_train, -1) y_test = convert_to_one_hot_labels(x_train, y_test, -1) ### Testing the speed of our own framework ### # Defining the model architecture model = containers.Sequential(layers.Linear(2, 25, with_bias=True), activations.ReLU(), layers.Linear(25, 25, with_bias=True), activations.ReLU(), layers.Linear(25, 25, with_bias=True), activations.ReLU(), layers.Linear(25, 2, with_bias=True), activations.Tanh()) criterion = losses.LossMSE() optimizer = optimizers.SGD(model.param(), learning_rate=0.001) def compute_nb_errors(model, data_input, data_target): mini_batch_size = 100 n_misclassified = 0 for b in range(0, data_input.size(0), mini_batch_size):
def inference(images, phase_train, scope=''): BATCH_SIZE = int(BATCH / NUM_GPU) with tf.name_scope(scope, [images]): #Conv11-64p1 conv0 = cnv.conv(images, 'conv0', [11, 11, 3, 32], stride=[1, 1, 1, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm0 = bn.batch_norm_layer(conv0, train_phase=phase_train, scope_bn='BN0') relu0 = act.ReLU(bnorm0, 'ReLU0') #SKIP CONNECTION 0 #Conv9-128s2 conv1 = cnv.conv(relu0, 'conv1', [9, 9, 32, 64], stride=[1, 2, 2, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm1 = bn.batch_norm_layer(conv1, train_phase=phase_train, scope_bn='BN1') relu1 = act.ReLU(bnorm1, 'ReLU1') #Conv3-128p1 conv2 = cnv.conv(relu1, 'conv2', [3, 3, 64, 128], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm2 = bn.batch_norm_layer(conv2, train_phase=phase_train, scope_bn='BN2') relu2 = act.ReLU(bnorm2, 'ReLU2') #Conv3-128p1 conv3 = cnv.conv(relu2, 'conv3', [3, 3, 128, 128], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm3 = bn.batch_norm_layer(conv3, train_phase=phase_train, scope_bn='BN3') relu3 = act.ReLU(bnorm3, 'ReLU3') #SKIP CONNEgradLossCTION 1 #Conv7-256s2 conv4 = cnv.conv(relu3, 'conv4', [7, 7, 128, 256], stride=[1, 2, 2, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm4 = bn.batch_norm_layer(conv4, train_phase=phase_train, scope_bn='BN4') relu4 = act.ReLU(bnorm4, 'ReLU4') #Conv3-256p1 conv5 = cnv.conv(relu4, 'conv5', [3, 3, 256, 256], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm5 = bn.batch_norm_layer(conv5, train_phase=phase_train, scope_bn='BN5') relu5 = act.ReLU(bnorm5, 'ReLU5') #Conv3-256p1 conv6 = cnv.conv(relu5, 'conv6', [3, 3, 256, 256], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm6 = bn.batch_norm_layer(conv6, train_phase=phase_train, scope_bn='BN6') relu6 = act.ReLU(bnorm6, 'ReLU6') #SKIP CONNECTION 2 #Conv5-512s2 conv7_1 = cnv.conv(relu6, 'conv7_1', [5, 1, 256, 512], stride=[1, 2, 1, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) conv7_2 = cnv.conv(conv7_1, 'conv7_2', [1, 5, 512, 512], stride=[1, 1, 2, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm7 = bn.batch_norm_layer(conv7_2, train_phase=phase_train, scope_bn='BN7') relu7 = act.ReLU(bnorm7, 'ReLU7') #Conv3-512p1 conv8_1 = cnv.conv(relu7, 'conv8_1', [3, 1, 512, 512], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) conv8_2 = cnv.conv(conv8_1, 'conv8_2', [1, 3, 512, 512], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm8 = bn.batch_norm_layer(conv8_2, train_phase=phase_train, scope_bn='BN8') relu8 = act.ReLU(bnorm8, 'ReLU8') #Conv3-512p1 conv9_1 = cnv.conv(relu8, 'conv9_1', [1, 3, 512, 512], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) conv9_2 = cnv.conv(conv9_1, 'conv9_2', [3, 1, 512, 512], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm9 = bn.batch_norm_layer(conv9_2, train_phase=phase_train, scope_bn='BN9') relu9 = act.ReLU(bnorm9, 'ReLU9') #SKIP CONNECTION 3 #Conv3-1024s2 conv10_1 = cnv.conv(relu9, 'conv10_1', [3, 1, 512, 1024], stride=[1, 2, 1, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) conv10_2 = cnv.conv(conv10_1, 'conv10_2', [1, 3, 1024, 1024], stride=[1, 1, 2, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm10 = bn.batch_norm_layer(conv10_2, train_phase=phase_train, scope_bn='BN10') relu10 = act.ReLU(bnorm10, 'ReLU10') #Conv3-1024p1 conv11_1 = cnv.conv(relu10, 'conv1UPDATE_OPS_COLLECTION1_1', [1, 3, 1024, 1024], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) conv11_2 = cnv.conv(conv11_1, 'conv11_2', [3, 1, 1024, 1024], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm11 = bn.batch_norm_layer(conv11_2, train_phase=phase_train, scope_bn='BN11') relu11 = act.ReLU(bnorm11, 'ReLU11') #GO UP deconv1 = dcnv.deconv( relu11, [BATCH_SIZE, int(IMAGE_SIZE_H / 8), int(IMAGE_SIZE_W / 8), 512], 'deconv1', [4, 4, 512, 1024], stride=[1, 2, 2, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) dbnorm1 = bn.batch_norm_layer(deconv1, train_phase=phase_train, scope_bn='dBN1') drelu1 = act.ReLU(dbnorm1 + relu9, 'dReLU1') conv12_1 = cnv.conv(drelu1, 'conv12_1', [3, 1, 512, 512], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) conv12_2 = cnv.conv(conv12_1, 'conv12_2', [1, 3, 512, 512], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm12 = bn.batch_norm_layer(conv12_2, train_phase=phase_train, scope_bn='BN12') relu12 = act.ReLU(bnorm12, 'ReLU12') deconv2 = dcnv.deconv( relu12, [BATCH_SIZE, int(IMAGE_SIZE_H / 4), int(IMAGE_SIZE_W / 4), 256], 'deconv2', [4, 4, 256, 512], stride=[1, 2, 2, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) dbnorm2 = bn.batch_norm_layer(deconv2, train_phase=phase_train, scope_bn='dBN2') drelu2 = act.ReLU(dbnorm2 + relu6, 'dReLU2') conv13 = cnv.conv(drelu2, 'conv13', [3, 3, 256, 256], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm13 = bn.batch_norm_layer(conv13, train_phase=phase_train, scope_bn='BN13') relu13 = act.ReLU(bnorm13, 'ReLU13') deconv3 = dcnv.deconv( relu13, [BATCH_SIZE, int(IMAGE_SIZE_H / 2), int(IMAGE_SIZE_W / 2), 128], 'deconv3', [4, 4, 128, 256], stride=[1, 2, 2, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) dbnorm3 = bn.batch_norm_layer(deconv3, train_phase=phase_train, scope_bn='dBN3') drelu3 = act.ReLU(dbnorm3 + relu3, 'dReLU3') conv14 = cnv.conv(drelu3, 'conv14', [3, 3, 128, 128], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm14 = bn.batch_norm_layer(conv14, train_phase=phase_train, scope_bn='BN14') relu14 = act.ReLU(bnorm14, 'ReLU14') deconv4 = dcnv.deconv( relu14, [BATCH_SIZE, int(IMAGE_SIZE_H), int(IMAGE_SIZE_W), 32], 'deconv4', [4, 4, 32, 128], stride=[1, 2, 2, 1], padding='SAME', wd=WEIGHT_DECAY, FLOAT16=FLOAT16) dbnorm4 = bn.batch_norm_layer(deconv4, train_phase=phase_train, scope_bn='dBN4') drelu3 = act.ReLU(dbnorm4 + relu0, 'dReLU4') conv_last = cnv.conv(drelu3, 'conv_last', [3, 3, 32, 32], wd=WEIGHT_DECAY, FLOAT16=FLOAT16) bnorm_last = bn.batch_norm_layer(conv_last, train_phase=phase_train, scope_bn='BNl') relu_last = act.ReLU(bnorm_last, 'ReLU_last') scores = cnv.conv(relu_last, 'scores', [3, 3, 32, 1], wd=0, FLOAT16=FLOAT16) tf.summary.image('output', scores) return scores
batch_size = 50 num_epochs = 100 num_classes = 2 hidden_units = 100 hidden_units2 = 10 dimensions = 2 # PeaksData da, SwissRollData, GMMData X_train, y_train, X_test, y_test = utils.get_data('PeaksData') X_train, y_train = shuffle(X_train, y_train) # gradient and jacobian tests grad_test_W(X_train, y_train) grad_test_b(X_train, y_train) jacobian_test_W(X_train, y_train) jacobian_test_b(X_train, y_train) grad_test_W_whole_network(X_train, y_train) grad_test_b_whole_network(X_train, y_train) model = models.MyNeuralNetwork() model.add(layers.Linear(dimensions, hidden_units)) model.add(activations.ReLU()) model.add(layers.Softmax(hidden_units, 5)) optimizer = optimizers.SGD(model.parameters, lr=0.1) losses, train_accuracy, test_accuracy = model.fit(X_train, y_train, X_test, y_test, batch_size, num_epochs, optimizer) # plotting utils.plot_scores(train_accuracy, test_accuracy)
if mode == 'train': model.zero_grad() model.backward(grad_wrt_output) optimizer.step() losses[mode].append(sum_loss) if verbose: print('Epoch {}: Train loss = {:.6f}, val loss = {:.6f}'.format( e, losses['train'][-1], losses['val'][-1])) return losses ### Testing without Dropout ### # Defining the model architecture model = containers.Sequential(layers.Linear(2, 500, with_bias=True), activations.ReLU(), layers.Linear(500, 500, with_bias=True), activations.ReLU(), layers.Linear(500, 500, with_bias=True), activations.ReLU(), layers.Linear(500, 500, with_bias=True), activations.ReLU(), layers.Linear(500, 2, with_bias=True), activations.Tanh()) criterion = losses.LossMSE() optimizer = optimizers.Adam(model.param(), learning_rate=0.001, p1=0.9, p2=0.999)
plt.plot(X_test, y_test) plt.plot(X_test, predictions) plt.show() ''' '' EPOCHS = 10001 LEARNING_RATE = 0.05 X_train, y_train = spiral_data(samples=100, classes=3) X_val, y_val = spiral_data(samples=100, classes=3) model = network.NeuralNetwork() model.add_layer( layers.Dense(2, 64, weight_regularizer_l2=0.000005, bias_regularizer_l2=0.000005)) model.add_layer(activations.ReLU()) model.add_layer(layers.Dropout(rate=0.2)) model.add_layer(layers.Dense(64, 3)) model.add_layer(activations.Softmax()) model.set(loss=losses.CategoricalCrossentropy(), optimizier=optimizers.Adam(learning_rate=LEARNING_RATE), accuracy=metrics.CategoricalAccuracy()) model.fit(X_train, y_train, epochs=EPOCHS, validation_data=(X_val, y_val))