def create_output_node(model=None, input_sequences=None, num_gru=None, old_h0s=None, reset=None, num_pixelCNN_layer = None): assert(model is not None) assert(input_sequences is not None) assert(num_gru is not None) assert(old_h0s is not None) assert(reset is not None) assert(num_pixelCNN_layer is not None) new_h0s = T.zeros_like(old_h0s) h0s = theano.ifelse.ifelse(reset, new_h0s, old_h0s) for i in range(num_gru): gru_layer = GRU(DIM, DIM, last_layer, s0 = h0s[i,:,:], name = model.name+"GRU_{}".format(i)) last_hidden_list.append(gru_layer.output()[:,-1]) model.add_layer(gru_layer) last_layer = gru_layer fc1 = FC(DIM, Q_LEVELS, last_layer, name = model.name+"FullyConnected") model.add_layer(fc1) softmax = Softmax(fc1, name= model.name+"Softmax") model.add_layer(softmax) return softmax.output(), T.stack(last_hidden_list, axis = 0)
class AttentionWeight: def __init__(self) -> None: self.params = [] self.grads = [] self.softmax = Softmax() self.cache = None def forward(self, hs: np.ndarray, h: np.ndarray) -> np.ndarray: N, T, H = hs.shape hr = h.reshape(N, 1, H).repeat(T, axis=1) t = hs * hr s = np.sum(t, axis=2) a = self.softmax.forward(s) self.cache = (hs, hr) return a def backward(self, da: np.ndarray) -> np.ndarray: hs, hr = self.cache N, T, H = hs.shape ds = self.softmax.backward(da) dt = ds.reshape(N, T, 1).repeat(H, axis=2) dhs = dt * hr dhr = dt * hs dh = np.sum(dhr, axis=1) return dhs, dh
def test_softmax_grad(N=None): from layers import Softmax from functools import partial np.random.seed(12345) N = np.inf if N is None else N p_soft = partial(F.softmax, dim=1) gold = torch_gradient_generator(p_soft) i = 0 while i < N: mine = Softmax() n_ex = np.random.randint(1, 3) n_dims = np.random.randint(1, 50) z = random_tensor((n_ex, n_dims), standardize=True) out = mine.forward(z) assert_almost_equal( gold(z), mine.backward(np.ones_like(out)), err_msg="Theirs:\n{}\n\nMine:\n{}\n".format( gold(z), mine.backward(np.ones_like(out)) ), decimal=3, ) print("PASSED") i += 1
def test_SoftmaxLayerGradientCheck(self): x = np.random.rand(3) layer = Softmax() layer.forward(x) grad = layer.backward(np.array([1.])) numgrad = numerical_gradient.calc(layer.forward, x) numgrad = np.sum(numgrad, axis=1) numerical_gradient.assert_are_similar(grad, numgrad)
def bn_2_layer_test(epochs=2, reg=0.0, lr=0.01, momentum=0.7): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network = Model(name="2-layer(NO BN)") network.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he")) network.addLayer(Relu()) network.addLayer(Linear(50,10, regularization=reg, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") network.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels)) networkBN = Model(name="2-layer(WITH BN)") networkBN.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he")) networkBN.addLayer(BatchNormalization(50, trainable=True, alpha=0.90)) networkBN.addLayer(Relu()) networkBN.addLayer(Linear(50,10, regularization=reg, initializer="he")) networkBN.addLayer(Softmax()) sgd2 = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5) networkBN.compile(sgd2, "cce") networkBN.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels)) #plotAccuracy(network, "plots/", timestamp) #plotLoss(network, "plots/", timestamp) #loss, acc = network.evaluate(testingData, testingLabels) #print("Test loss: {} , Test acc: {}".format(loss, acc) ) #plotAccuracy(network, "plots/", timestamp, title="2-layer(NO BN) accuracy over epochs", fileName="nobnacc") #plotLoss(network, "plots/", timestamp, title="2-layer(NO BN) loss over epochs", fileName="nobnloss") #plotAccuracy(networkBN, "plots/", timestamp, title="2-layer(WITH BN) accuracy over epochs", fileName="bnacc") #plotLoss(networkBN, "plots/", timestamp, title="2-layer(WITH BN) loss over epochs", fileName="bnloss") multiPlotLoss((network, networkBN), "plots/", timestamp, title="2-layer network loss over epochs, eta:{}, lambda:{}".format(lr, reg)) multiPlotAccuracy((network, networkBN), "plots/", timestamp, title="2-layer network accuracy over epochs, eta:{}, lambda:{}".format(lr, reg))
def test_softmax_activation(N=None): from layers import Softmax N = np.inf if N is None else N mine = Softmax() gold = lambda z: F.softmax(torch.FloatTensor(z), dim=1).numpy() i = 0 while i < N: n_dims = np.random.randint(1, 100) z = random_stochastic_matrix(1, n_dims) assert_almost_equal(mine.forward(z), gold(z)) print("PASSED") i += 1
def tryParameters(test_name, N_hidden, lam, l_rate, decay, mom, epochs=50, batch_size=250): net = Net([ BatchNorm(cifar.in_size, trainMean()), Linear(cifar.in_size, N_hidden, lam=lam), ReLU(N_hidden), Linear(N_hidden, cifar.out_size, lam=lam), Softmax(cifar.out_size) ], lam, l_rate, decay, mom) results = net.trainMiniBatch(train, val, epochs, batch_size, shuffle=True) print('{} Test Accuracy: {:.2f}'.format( test_name, net.accuracy(test['one_hot'].T, test['images'].T))) print('Final train a/c, val a/c: {:.2f}/{:.2f}, {:.2f}/{:.2f}'.format( results['last_a_train'], results['last_c_train'], results['last_a_val'], results['last_c_val'])) plotResults(test_name, results['a_train'], results['c_train'], results['a_val'], results['c_val']) #weights_plot(net, "plots/weights_vizualisation_{}.png".format(test_name), labels) return results
def test1layergradients(samples=1, dimensions=3072): print("\n\nTesting 1-layer gradients (NO BN, NO REG) using a batch size of {}".format(samples)) trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat") trainingData = trainingData[0:dimensions, 0:samples] trainingLabels = trainingLabels[0:dimensions, 0:samples] encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples] network = Model() linear = Linear(dimensions, 10, regularization=0.00) network.addLayer(linear) network.addLayer(Softmax()) sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True) network.compile(sgd, "cce") network.predict(trainingData) network.backpropagate(encodedTrainingLabels) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') numerical_gradW = compute_grads(1e-6, linear.W, trainingData, encodedTrainingLabels, network) numerical_gradb = compute_grads(1e-6, linear.b, trainingData, encodedTrainingLabels, network) print("W") relative_errorW = grad_difference(linear.gradW, numerical_gradW) print("b") relative_errorb = grad_difference(linear.gradb, numerical_gradb) return (relative_errorW, linear.gradW, numerical_gradW), (relative_errorb, linear.gradb, numerical_gradb)
def gradient_check(lam, lin_neurons, with_BN): # prepare a subset of the train data subset = 50 grad_train_img = train['images'][:subset, :].T grad_train_truth = train['one_hot'][:subset, :].T count = 0 layers = [] for N in lin_neurons: not_last_layer = count < (len(lin_neurons) - 1) layers.append( Linear(cifar.in_size if count == 0 else lin_neurons[count - 1], N if not_last_layer else cifar.out_size, lam=lam)) if not_last_layer: if with_BN: layers.append(BatchNorm(N)) layers.append(ReLU(N)) count += 1 if len(lin_neurons) == 1 and with_BN: layers.append(BatchNorm(cifar.out_size)) layers.append(Softmax(cifar.out_size)) # init the network print(["{}:{},{}".format(l.name, l.in_size, l.out_size) for l in layers]) g_net = Net(layers, lam=lam, l_rate=0.001, decay=0.99, mom=0.99) # do the pass grad_out = g_net.forward(grad_train_img, train=True) g_net.backward(grad_train_truth) cost = g_net.cost(grad_train_truth, out=grad_out) # calc the numeric grad for each linear layer for linear in [l for l in layers if l.isActivation == False]: num_gradient(grad_train_img, grad_train_truth, g_net, linear, cost)
def sample(rnn, seed_ix, n): """ sample a sequence of integers from the model h is memory state, seed_ix is seed letter for first time step """ x = np.zeros(vocab_size) x[seed_ix] = 1 ixes = [] for t in xrange(n): y = rnn.nodes[-1].forward(x, is_training=False) p = Softmax().forward(y) ix = np.random.choice(range(vocab_size), p=p.ravel()) x = np.zeros(vocab_size) x[ix] = 1 ixes.append(ix) return ixes
def gradient_check(): # prepare a subset of the train data subset = 50 grad_train_img = train['images'][:subset, :].T grad_train_truth = train['one_hot'][:subset, :].T # init the network N_hidden = 50 lin = [ Linear(cifar.in_size, N_hidden, lam=0.1), Linear(N_hidden, cifar.out_size, lam=0.1) ] g_net = Net( [lin[0], ReLU(N_hidden), lin[1], Softmax(cifar.out_size)], lam=0.1, l_rate=0.001, decay=0.99, mom=0.99) # do the pass grad_out = g_net.forward(grad_train_img) g_net.backward(grad_train_truth) cost = g_net.cost(grad_train_truth, out=grad_out) # calc the numeric grad for each linear layer for linear in lin: num_gradient(grad_train_img, grad_train_truth, g_net, linear, cost)
def getNetwork(): ''' to obtain network structure from specified file ''' file_name = "models/structure.json" if len(sys.argv)>1: file_name = sys.argv[1] f = file(file_name, "r") s = f.read() f.close() networks = json.loads(s) for network in networks: config = network['config'] dis_model = network['model'] model = Network() for layer in dis_model: if layer['type'] == 'Linear': model.add(Linear(layer['name'], layer['in_num'], layer['out_num'], layer['std'])) if layer['type'] == 'Relu': model.add(Relu(layer['name'])) if layer['type'] == 'Sigmoid': model.add(Sigmoid(layer['name'])) if layer['type'] == 'Softmax': model.add(Softmax(layer['name'])) loss = EuclideanLoss('loss') if 'loss' in config: if config['loss'] == 'CrossEntropyLoss': loss = CrossEntropyLoss('loss') yield network['name'], model, config, loss
def network_setup(model_file_path=None): freq_count = 4000 count_bins = 88 * 20 dataset = MapsDB('../db', freq_count=freq_count, count_bins=count_bins, batch_size=128, start_time=0.5, duration=0.5) model = Network() model.add(Linear('fc1', dataset.get_vec_input_width(), 2048, 0.001)) model.add(Sigmoid('sigmoid1')) model.add(Linear('fc2', 2048, dataset.get_label_width(), 0.001)) model.add(Softmax('softmax2')) loss = CrossEntropyLoss(name='xent') # loss = EuclideanLoss(name='r2') optim = SGDOptimizer(learning_rate=0.00001, weight_decay=0.005, momentum=0.9) # optim = AdagradOptimizer(learning_rate=0.001, eps=1e-6) input_placeholder = T.fmatrix('input') label_placeholder = T.fmatrix('label') label_active_size_placeholder = T.ivector('label_active_size') if model_file_path: model.loads(model_file_path) else: dataset.load_cache() model.compile(input_placeholder, label_placeholder, label_active_size_placeholder, loss, optim) return model, dataset, freq_count, count_bins
def test_LinearSoftmax(self): model = Seq() model.add(Linear(2, 1)) model.add(Softmax()) data = np.array([2., 3.]) out = model.forward(data) self.assertEqual(out, 1.)
def __init__(self, input_size=INPUT_SIZE, output_size=OUTPUT_SIZE, hidden_size=HIDDEN_SIZE, embed_size=EMBED_SIZE, lr=LEARNING_RATE, clip_grad=CLIP_GRAD, init_range=INIT_RANGE): # this model will generate a vector representation based on the input input_layers = [ Embedding(input_size, embed_size, init_range), Lstm(embed_size, hidden_size, init_range), ] # this model will generate an output sequence based on the hidden vector output_layers = [ Embedding(output_size, embed_size, init_range), Lstm(embed_size, hidden_size, init_range, previous=input_layers[1]), Softmax(hidden_size, output_size, init_range) ] self.input_layers, self.output_layers = input_layers, output_layers self.hidden_size = hidden_size self.embed_size = embed_size self.input_size = input_size self.output_size = output_size self.lr = lr self.clip_grad = clip_grad
def main(): (x_train, y_train), (x_test, y_test) = mnist.load_data() print 'Imported MNIST data: training input %s and training labels %s.' % ( x_train.shape, y_train.shape) print 'Imported MNIST data: test input %s and test labels %s.' % ( x_test.shape, y_test.shape) N, H, W = x_train.shape x = x_train.reshape((N, H * W)).astype('float') / 255 y = to_categorical(y_train, num_classes=10) model = Sequential() model.add(Dense(), ReLU(), layer_dim=(28 * 28, 300), weight_scale=1e-2) model.add(Dense(), ReLU(), layer_dim=(300, 100), weight_scale=1e-2) model.add(Dense(), Softmax(), layer_dim=(100, 10), weight_scale=1e-2) model.compile(optimizer=GradientDescent(learning_rate=1e-2), loss_func=categorical_cross_entropy) model.fit(x, y, epochs=10, batch_size=50, verbose=False) N, H, W = x_test.shape x = x_test.reshape((N, H * W)).astype('float') / 255 y = to_categorical(y_test, num_classes=10) model.evaluate(x, y)
def generate_sequence(rnn_layer, first_elem, ind_to_char, char_to_ind, length=10): # print(rnn_layer.h) soft = Softmax() def sample(array): n = min(20, array.shape[0]) ind = np.argpartition(array, -n)[-n:] indx = np.random.choice(ind, p=soft(array[ind])) return indx k = len(ind_to_char) x = np.array([char_to_ind[elem] for elem in first_elem]) x = one_hotify(x, num_classes=k) string = first_elem for i in range(length): probs = rnn_layer(x) string += stringify(probs, ind_to_char) next_elem = [] for j in range(probs.shape[-1]): next_elem.append(sample(probs[:, j])) x = one_hotify(np.array(next_elem), k) # print(np.average(np.abs(rnn_layer.h))) return string
def test3layergradients(samples=1, dimensions=3072): print("\n\nTesting 3-layer gradients using a batch size of {}".format(samples)) trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat") trainingData = trainingData[0:dimensions, 0:samples] trainingLabels = trainingLabels[0:dimensions, 0:samples] encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples] network = Model() linear = Linear(dimensions, 50, regularization=0.00, initializer="he") network.addLayer(linear) network.addLayer(Relu()) linear2 = Linear(50, 30, regularization=0.00, initializer="he") network.addLayer(linear2) network.addLayer(Relu()) linear3 = Linear(30, 10, regularization=0.00, initializer="he") network.addLayer(linear3) network.addLayer(Softmax()) sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True) network.compile(sgd, "cce") network.predict(trainingData, updateInternal=True) network.backpropagate(encodedTrainingLabels) timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') numerical_gradW1 = compute_grads_w_BN(1e-4, linear.W, trainingData, encodedTrainingLabels, network) numerical_gradb1 = compute_grads_w_BN(1e-4, linear.b, trainingData, encodedTrainingLabels, network) numerical_gradW2 = compute_grads_w_BN(1e-4, linear2.W, trainingData, encodedTrainingLabels, network) numerical_gradb2 = compute_grads_w_BN(1e-4, linear2.b, trainingData, encodedTrainingLabels, network) numerical_gradW3 = compute_grads_w_BN(1e-4, linear3.W, trainingData, encodedTrainingLabels, network) numerical_gradb3 = compute_grads_w_BN(1e-4, linear3.b, trainingData, encodedTrainingLabels, network) print("W1") relative_errorW = grad_difference(linear.gradW, numerical_gradW1) print("b1") relative_errorb = grad_difference(linear.gradb, numerical_gradb1) print("W2") relative_errorW2 = grad_difference(linear2.gradW, numerical_gradW2) print("b2") relative_errorb2 = grad_difference(linear2.gradb, numerical_gradb2) print("W3") relative_errorW3 = grad_difference(linear3.gradW, numerical_gradW3) print("b3") relative_errorb3 = grad_difference(linear3.gradb, numerical_gradb3) print("\n")
def test_softmax(): from layers import Softmax x = T.tensor2() f = theano.function([x], Softmax()(x)) x = np.ones((batch_size, time_steps, input_size)) assert f(x).shape == (batch_size * time_steps, input_size)
def _loss_function(self, layer_input_output_cache, y_true): logits = layer_input_output_cache[-1] y_pred = Softmax().forward(logits) softmax_cross_entropy_loss = -1.0 / len(y_true) * np.sum( [y_true[i] * np.log(y_pred[i]) for i in range(len(y_true))]) softmax_cross_entropy_grad = y_pred - y_true return softmax_cross_entropy_loss, softmax_cross_entropy_grad
def __init__(self, name = "CIFAR10.pixelCNN", input_dim = 3, dims = 32, q_levels = 256, layers = 3, grad_clip = 1): # self.model = Model(name = model_name) self.name = name self.grad_clip = grad_clip self.is_train = T.scalar() self.X = T.tensor4('X') # shape: (batchsize, channels, height, width) self.X_r = T.itensor4('X_r') # print self.X.shape # return self.X_transformed = self.X_r.dimshuffle(0,2,3,1) self.input_layer = WrapperLayer(self.X.dimshuffle(0,2,3,1)) # input reshaped to (batchsize, height, width,3) self.q_levels = q_levels self.pixel_CNN = pixelConv( self.input_layer, input_dim, dims, Q_LEVELS = q_levels, name = self.name + ".pxCNN", num_layers = layers, ) print "done1" self.params = self.pixel_CNN.get_params() self.output_probab = Softmax(self.pixel_CNN).output() print "done2" self.cost = T.nnet.categorical_crossentropy( self.output_probab.reshape((-1,self.output_probab.shape[self.output_probab.ndim - 1])), self.X_r.flatten() ).mean() self.output_image = sample_from_softmax(self.output_probab) print "done3" grads = T.grad(self.cost, wrt=self.params, disconnected_inputs='warn') self.grads = [T.clip(g, floatX(-grad_clip), floatX(grad_clip)) for g in grads] print "done5" # learning_rate = T.scalar('learning_rate') self.updates = lasagne.updates.adam(self.grads, self.pixel_CNN.get_params(), learning_rate = 1e-3) print "d6" self.train_fn = theano.function([self.X, self.X_r], self.cost, updates = self.updates) print "done4" self.valid_fn = theano.function([self.X, self.X_r], self.cost) print "go to hell" self.generate_routine = theano.function([self.X], self.output_image) self.errors = {'training' : [], 'validation' : []} print "yo"
def create_output_node(model=None, input_sequences=None, num_gru=None, old_h0s=None, reset=None, num_pixelCNN_layer = None): assert(model is not None) assert(input_sequences is not None) assert(num_gru is not None) assert(old_h0s is not None) assert(reset is not None) assert(num_pixelCNN_layer is not None) new_h0s = T.zeros_like(old_h0s) h0s = theano.ifelse.ifelse(reset, new_h0s, old_h0s) embedding_layer = Embedding(Q_LEVELS, DIM, input_sequences, name = model.name+"Embedding.Q_LEVELS") model.add_layer(embedding_layer) prev_out = embedding_layer.output() last_layer = WrapperLayer(prev_out.reshape((prev_out.shape[0], prev_out.shape[1], WIDTH, DEPTH))) pixel_CNN = pixelConv( last_layer, DEPTH, DEPTH, name = model.name + ".pxCNN", num_layers = NUM_PIXEL_CNN_LAYER ) prev_out = pixel_CNN.output() last_layer = WrapperLayer(prev_out.reshape((prev_out.shape[0], prev_out.shape[1], -1))) last_hidden_list = [] for i in range(num_gru): gru_layer = GRU(DIM, DIM, last_layer, s0 = h0s[i,:,:], name = model.name+"GRU_{}".format(i)) last_hidden_list.append(gru_layer.output()[:,-1]) model.add_layer(gru_layer) last_layer = gru_layer fc1 = FC(DIM, Q_LEVELS, last_layer, name = model.name+"FullyConnected") model.add_layer(fc1) softmax = Softmax(fc1, name= model.name+"Softmax") model.add_layer(softmax) return softmax.output(), T.stack(last_hidden_list, axis = 0)
def test_score(model, test_set): test_err = 0. for x, target in test_set: y = model.forward(x) y = Softmax().forward(y) # print(y, np.argmax(y), np.argmax(target)) if np.argmax(y) != np.argmax(target): test_err += 1. test_score = (1.0 - test_err / float(len(test_set))) * 100.0 return test_score
def addSoftmaxLayer(self, **kwargs): """ Add softmax multi-class classification layer. """ input_layer = self.input_layer if not self.all_layers \ else self.all_layers[-1] new_layer = Softmax(input_layer, **kwargs) self.all_layers += (new_layer, ) self.n_layers = len(self.all_layers)
def adddiscriminator(self,num_1,num_2): input_layer = self.feature_layer name = "cate_1" new_layer1 = DenseLayer(input_layer, name= name, num_units=num_1 ) #self.all_layers += (new_layer1,) self.trainable_layers += (new_layer1,) name = "cate_2" new_layer2 = DenseLayer(new_layer1, name = name, num_units=num_2) #self.all_layers += (new_layer2,) self.trainable_layers += (new_layer2,) category = Softmax(new_layer2) self.category_layer = category
def test_model_with_softmax(): from models import Sequential from layers import Linear, Softmax inputs = np.array([[0.25, 0.63, 0.12]]) targets = np.array([0, 1, 0]) model = Sequential() model.add(Linear(3, 3, activation=Softmax())) predictions = model.feed_forward(inputs) loss = ce.loss(predictions, targets) for i in range(len(predictions)): gradient = ce.backward(predictions[i], targets[i]) print("grad", gradient)
def regularizationSearch(): trainingData, trainingLabels, \ validationData, validationLabels, \ testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.10) bestLambda = 0.0 bestValAcc = 0.0 bestLoss = 0.0 for lambdaValue in np.arange(0, 0.2, 0.005): network = Model() network.addLayer(Linear(32*32*3, 50, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(50, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(50, 30, regularization=lambdaValue, initializer="he")) network.addLayer(BatchNormalization(30, trainable=True)) network.addLayer(Relu()) network.addLayer(Linear(30,10, regularization=lambdaValue, initializer="he")) network.addLayer(Softmax()) sgd = SGD(lr=0.01, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5) network.compile(sgd, "cce") timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S') network.fit(trainingData, trainingLabels, epochs=20, validationData=(validationData, validationLabels), batch_size=64) #plotAccuracy(network, "plots/", timestamp) #plotLoss(network, "plots/", timestamp) print("Lambda:{}".format(lambdaValue)) loss, acc = network.evaluate(validationData, validationLabels) print("Val loss: {} , Val acc: {}".format(loss, acc) ) print("\n\n") if acc > bestValAcc: bestLambda = lambdaValue bestValAcc = acc bestLoss = loss return bestLambda, bestValAcc, bestLoss
def build_network(hidden_layer_sizes: List[int], batch_normalized: bool, regularization: float) -> Network: net = Network() layer_sizes = [CIFAR10.input_size ] + hidden_layer_sizes + [CIFAR10.output_size] for i, (size_in, size_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])): net.add_layer( Linear(size_in, size_out, regularization, Xavier(), name='Li' + str(i + 1))) if i < len(layer_sizes) - 2: if batch_normalized: net.add_layer( BatchNormalization(size_out, name='Bn' + str(i + 1))) net.add_layer(ReLU(size_out, name='Re' + str(i + 1))) else: net.add_layer(Softmax(size_out, name='S')) return net
def tryParameters(test_name, lin_neurons, with_BN, lam, l_rate, decay, mom, epochs=50, batch_size=250): count = 0 layers = [] for N in lin_neurons: not_last_layer = count < (len(lin_neurons) - 1) layers.append( Linear(cifar.in_size if count == 0 else lin_neurons[count - 1], N if not_last_layer else cifar.out_size, lam=lam)) if not_last_layer: if with_BN: layers.append(BatchNorm(N)) layers.append(ReLU(N)) count += 1 if len(lin_neurons) == 1 and with_BN: layers.append(BatchNorm(cifar.out_size)) layers.append(Softmax(cifar.out_size)) # init the network print(["{}:{},{}".format(l.name, l.in_size, l.out_size) for l in layers]) net = Net(layers, lam=lam, l_rate=l_rate, decay=0.99, mom=0.99) results = net.trainMiniBatch(train, val, epochs, batch_size, shuffle=True) print('{} Test Accuracy: {:.2f}'.format( test_name, net.accuracy(test['one_hot'].T, test['images'].T))) print('Final train a/c, val a/c: {:.2f}/{:.2f}, {:.2f}/{:.2f}'.format( results['last_a_train'], results['last_c_train'], results['last_a_val'], results['last_c_val'])) plotResults(test_name, results['a_train'], results['c_train'], results['a_val'], results['c_val']) #weights_plot(net, "plots/weights_vizualisation_{}.png".format(test_name), labels) return results
from network import Network from data_preparation import load_data from solve_rnn import solve_rnn import theano.tensor as T X_train, y_train, X_test, y_test = load_data() HIDDEN_DIM = 32 INPUT_DIM = 20 OUTPUT_DIM = 10 model = Network() model.add(RNN('rnn1', HIDDEN_DIM, INPUT_DIM, 0.1)) # output shape: 4 x HIDDEN_DIM model.add(Linear('fc', HIDDEN_DIM, OUTPUT_DIM, 0.1)) # output shape: 4 x OUTPUT_DIM model.add(Softmax('softmax')) loss = CrossEntropyLoss('xent') optim = SGDOptimizer(0.01, 0.0001, 0.9) input_placeholder = T.fmatrix('input') label_placeholder = T.fmatrix('label') model.compile(input_placeholder, label_placeholder, loss, optim) MAX_EPOCH = 6 DISP_FREQ = 1000 TEST_FREQ = 10000 solve_rnn(model, X_train, y_train, X_test, y_test, MAX_EPOCH, DISP_FREQ, TEST_FREQ)
X_r = T.itensor3('X_r') #shape: (batchsize, height, width) input_layer = WrapperLayer(X.dimshuffle(0,1,2,'x')) # input reshaped to (batchsize, height, width,1) pixel_CNN = pixelConv( input_layer, 1, DIM, name = model.name + ".pxCNN", num_layers = 12, Q_LEVELS = Q_LEVELS ) model.add_layer(pixel_CNN) output_probab = Softmax(pixel_CNN).output() # in nats cost = T.nnet.categorical_crossentropy( output_probab.reshape((-1,output_probab.shape[output_probab.ndim - 1])), X_r.flatten() ).mean() output_image = sample_from_softmax(output_probab) model.print_params() params = model.get_params()