Exemple #1
0
def create_output_node(model=None, input_sequences=None, num_gru=None, old_h0s=None, reset=None, num_pixelCNN_layer = None):
	assert(model is not None)
	assert(input_sequences is not None)
	assert(num_gru is not None)
	assert(old_h0s is not None)
	assert(reset is not None)
	assert(num_pixelCNN_layer is not None)

	new_h0s = T.zeros_like(old_h0s)
	h0s = theano.ifelse.ifelse(reset, new_h0s, old_h0s)


	for i in range(num_gru):
		gru_layer = GRU(DIM, DIM, last_layer, s0 = h0s[i,:,:], name = model.name+"GRU_{}".format(i))
		last_hidden_list.append(gru_layer.output()[:,-1])
		model.add_layer(gru_layer)
		last_layer = gru_layer

	fc1 = FC(DIM, Q_LEVELS, last_layer, name = model.name+"FullyConnected")
	model.add_layer(fc1)

	softmax = Softmax(fc1, name= model.name+"Softmax")
	model.add_layer(softmax)

	return softmax.output(), T.stack(last_hidden_list, axis = 0)
class AttentionWeight:
    def __init__(self) -> None:
        self.params = []
        self.grads = []
        self.softmax = Softmax()
        self.cache = None

    def forward(self, hs: np.ndarray, h: np.ndarray) -> np.ndarray:
        N, T, H = hs.shape

        hr = h.reshape(N, 1, H).repeat(T, axis=1)
        t = hs * hr
        s = np.sum(t, axis=2)
        a = self.softmax.forward(s)

        self.cache = (hs, hr)
        return a

    def backward(self, da: np.ndarray) -> np.ndarray:
        hs, hr = self.cache
        N, T, H = hs.shape

        ds = self.softmax.backward(da)
        dt = ds.reshape(N, T, 1).repeat(H, axis=2)
        dhs = dt * hr
        dhr = dt * hs
        dh = np.sum(dhr, axis=1)

        return dhs, dh
def test_softmax_grad(N=None):
    from layers import Softmax
    from functools import partial

    np.random.seed(12345)

    N = np.inf if N is None else N
    p_soft = partial(F.softmax, dim=1)
    gold = torch_gradient_generator(p_soft)

    i = 0
    while i < N:
        mine = Softmax()
        n_ex = np.random.randint(1, 3)
        n_dims = np.random.randint(1, 50)
        z = random_tensor((n_ex, n_dims), standardize=True)
        out = mine.forward(z)

        assert_almost_equal(
            gold(z),
            mine.backward(np.ones_like(out)),
            err_msg="Theirs:\n{}\n\nMine:\n{}\n".format(
                gold(z), mine.backward(np.ones_like(out))
            ),
            decimal=3,
        )
        print("PASSED")
        i += 1
Exemple #4
0
 def test_SoftmaxLayerGradientCheck(self):
     x = np.random.rand(3)
     layer = Softmax()
     layer.forward(x)
     grad = layer.backward(np.array([1.]))
     numgrad = numerical_gradient.calc(layer.forward, x)
     numgrad = np.sum(numgrad, axis=1)
     numerical_gradient.assert_are_similar(grad, numgrad)
Exemple #5
0
def bn_2_layer_test(epochs=2, reg=0.0, lr=0.01, momentum=0.7):

    trainingData, trainingLabels, \
    validationData, validationLabels, \
    testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.20)
    timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')


    network = Model(name="2-layer(NO BN)")
    
    network.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he"))
    network.addLayer(Relu())

    network.addLayer(Linear(50,10, regularization=reg, initializer="he"))
    network.addLayer(Softmax())

    sgd = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5)  
 
    network.compile(sgd, "cce")
    network.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels))
    


    networkBN = Model(name="2-layer(WITH BN)")
    networkBN.addLayer(Linear(32*32*3, 50, regularization=reg, initializer="he"))
    networkBN.addLayer(BatchNormalization(50, trainable=True, alpha=0.90))
    networkBN.addLayer(Relu())

    networkBN.addLayer(Linear(50,10, regularization=reg, initializer="he"))
    networkBN.addLayer(Softmax())

    sgd2 = SGD(lr=lr, lr_decay=1.00, momentum=momentum, shuffle=True, lr_min=1e-5)  
 
    networkBN.compile(sgd2, "cce")
    networkBN.fit(trainingData, trainingLabels, epochs=epochs, batch_size=64, validationData=(validationData, validationLabels))

    #plotAccuracy(network, "plots/", timestamp)
    #plotLoss(network, "plots/", timestamp)
    
    #loss, acc = network.evaluate(testingData, testingLabels)
    #print("Test loss: {} , Test acc: {}".format(loss, acc) )

    #plotAccuracy(network, "plots/", timestamp, title="2-layer(NO BN) accuracy over epochs", fileName="nobnacc")
    #plotLoss(network, "plots/", timestamp, title="2-layer(NO BN) loss over epochs", fileName="nobnloss")

    #plotAccuracy(networkBN, "plots/", timestamp, title="2-layer(WITH BN) accuracy over epochs", fileName="bnacc")
    #plotLoss(networkBN, "plots/", timestamp, title="2-layer(WITH BN) loss over epochs", fileName="bnloss")

    multiPlotLoss((network, networkBN), "plots/", timestamp, title="2-layer network loss over epochs, eta:{}, lambda:{}".format(lr, reg))
    multiPlotAccuracy((network, networkBN), "plots/", timestamp, title="2-layer network accuracy over epochs, eta:{}, lambda:{}".format(lr, reg))
def test_softmax_activation(N=None):
    from layers import Softmax

    N = np.inf if N is None else N

    mine = Softmax()
    gold = lambda z: F.softmax(torch.FloatTensor(z), dim=1).numpy()

    i = 0
    while i < N:
        n_dims = np.random.randint(1, 100)
        z = random_stochastic_matrix(1, n_dims)
        assert_almost_equal(mine.forward(z), gold(z))
        print("PASSED")
        i += 1
Exemple #7
0
def tryParameters(test_name,
                  N_hidden,
                  lam,
                  l_rate,
                  decay,
                  mom,
                  epochs=50,
                  batch_size=250):
    net = Net([
        BatchNorm(cifar.in_size, trainMean()),
        Linear(cifar.in_size, N_hidden, lam=lam),
        ReLU(N_hidden),
        Linear(N_hidden, cifar.out_size, lam=lam),
        Softmax(cifar.out_size)
    ], lam, l_rate, decay, mom)
    results = net.trainMiniBatch(train, val, epochs, batch_size, shuffle=True)
    print('{} Test Accuracy: {:.2f}'.format(
        test_name, net.accuracy(test['one_hot'].T, test['images'].T)))
    print('Final train a/c, val a/c: {:.2f}/{:.2f}, {:.2f}/{:.2f}'.format(
        results['last_a_train'], results['last_c_train'],
        results['last_a_val'], results['last_c_val']))
    plotResults(test_name, results['a_train'], results['c_train'],
                results['a_val'], results['c_val'])
    #weights_plot(net, "plots/weights_vizualisation_{}.png".format(test_name), labels)
    return results
Exemple #8
0
def test1layergradients(samples=1, dimensions=3072):

    print("\n\nTesting 1-layer gradients (NO BN, NO REG) using a batch size of {}".format(samples))
    trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat")

    
    trainingData = trainingData[0:dimensions, 0:samples]
    trainingLabels = trainingLabels[0:dimensions, 0:samples]
    encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples]
    

    network = Model()
    linear = Linear(dimensions, 10, regularization=0.00)
    network.addLayer(linear)
    network.addLayer(Softmax())

    sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True)
    network.compile(sgd, "cce")

    network.predict(trainingData)
    network.backpropagate(encodedTrainingLabels)
    
    timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')
    numerical_gradW = compute_grads(1e-6, linear.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb = compute_grads(1e-6, linear.b, trainingData, encodedTrainingLabels, network)

    print("W")
    relative_errorW = grad_difference(linear.gradW, numerical_gradW)
    print("b")
    relative_errorb = grad_difference(linear.gradb, numerical_gradb)

    return (relative_errorW, linear.gradW, numerical_gradW), (relative_errorb, linear.gradb, numerical_gradb)
Exemple #9
0
def gradient_check(lam, lin_neurons, with_BN):
    # prepare a subset of the train data
    subset = 50
    grad_train_img = train['images'][:subset, :].T
    grad_train_truth = train['one_hot'][:subset, :].T

    count = 0
    layers = []

    for N in lin_neurons:
        not_last_layer = count < (len(lin_neurons) - 1)
        layers.append(
            Linear(cifar.in_size if count == 0 else lin_neurons[count - 1],
                   N if not_last_layer else cifar.out_size,
                   lam=lam))
        if not_last_layer:
            if with_BN:
                layers.append(BatchNorm(N))
            layers.append(ReLU(N))
        count += 1
    if len(lin_neurons) == 1 and with_BN:
        layers.append(BatchNorm(cifar.out_size))
    layers.append(Softmax(cifar.out_size))
    # init the network
    print(["{}:{},{}".format(l.name, l.in_size, l.out_size) for l in layers])
    g_net = Net(layers, lam=lam, l_rate=0.001, decay=0.99, mom=0.99)

    # do the pass
    grad_out = g_net.forward(grad_train_img, train=True)
    g_net.backward(grad_train_truth)
    cost = g_net.cost(grad_train_truth, out=grad_out)

    # calc the numeric grad for each linear layer
    for linear in [l for l in layers if l.isActivation == False]:
        num_gradient(grad_train_img, grad_train_truth, g_net, linear, cost)
Exemple #10
0
def sample(rnn, seed_ix, n):
    """ 
    sample a sequence of integers from the model 
    h is memory state, seed_ix is seed letter for first time step
    """
    x = np.zeros(vocab_size)
    x[seed_ix] = 1
    ixes = []
    for t in xrange(n):
        y = rnn.nodes[-1].forward(x, is_training=False)
        p = Softmax().forward(y)
        ix = np.random.choice(range(vocab_size), p=p.ravel())
        x = np.zeros(vocab_size)
        x[ix] = 1
        ixes.append(ix)
    return ixes
Exemple #11
0
def gradient_check():
    # prepare a subset of the train data
    subset = 50
    grad_train_img = train['images'][:subset, :].T
    grad_train_truth = train['one_hot'][:subset, :].T

    # init the network
    N_hidden = 50
    lin = [
        Linear(cifar.in_size, N_hidden, lam=0.1),
        Linear(N_hidden, cifar.out_size, lam=0.1)
    ]
    g_net = Net(
        [lin[0], ReLU(N_hidden), lin[1],
         Softmax(cifar.out_size)],
        lam=0.1,
        l_rate=0.001,
        decay=0.99,
        mom=0.99)

    # do the pass
    grad_out = g_net.forward(grad_train_img)
    g_net.backward(grad_train_truth)
    cost = g_net.cost(grad_train_truth, out=grad_out)

    # calc the numeric grad for each linear layer
    for linear in lin:
        num_gradient(grad_train_img, grad_train_truth, g_net, linear, cost)
Exemple #12
0
def getNetwork():
	'''
	to obtain network structure from specified file
	'''
	file_name = "models/structure.json"
	if len(sys.argv)>1:
		file_name = sys.argv[1]
	f = file(file_name, "r")
	s = f.read()
	f.close()

	networks = json.loads(s)
	for network in networks:
		config = network['config']
		dis_model = network['model']
		model = Network()
		for layer in dis_model:
			if layer['type'] == 'Linear':
				model.add(Linear(layer['name'], layer['in_num'], layer['out_num'], layer['std']))
			if layer['type'] == 'Relu':
				model.add(Relu(layer['name']))
			if layer['type'] == 'Sigmoid':
				model.add(Sigmoid(layer['name']))
			if layer['type'] == 'Softmax':
				model.add(Softmax(layer['name']))
		loss = EuclideanLoss('loss')
		if 'loss' in config:
			if config['loss'] == 'CrossEntropyLoss':
				loss = CrossEntropyLoss('loss')
		yield network['name'], model, config, loss
Exemple #13
0
def network_setup(model_file_path=None):
    freq_count = 4000
    count_bins = 88 * 20
    dataset = MapsDB('../db',
                     freq_count=freq_count,
                     count_bins=count_bins,
                     batch_size=128,
                     start_time=0.5,
                     duration=0.5)
    model = Network()
    model.add(Linear('fc1', dataset.get_vec_input_width(), 2048, 0.001))
    model.add(Sigmoid('sigmoid1'))
    model.add(Linear('fc2', 2048, dataset.get_label_width(), 0.001))
    model.add(Softmax('softmax2'))

    loss = CrossEntropyLoss(name='xent')
    # loss = EuclideanLoss(name='r2')

    optim = SGDOptimizer(learning_rate=0.00001, weight_decay=0.005, momentum=0.9)
    # optim = AdagradOptimizer(learning_rate=0.001, eps=1e-6)

    input_placeholder = T.fmatrix('input')
    label_placeholder = T.fmatrix('label')
    label_active_size_placeholder = T.ivector('label_active_size')

    if model_file_path:
        model.loads(model_file_path)
    else:
        dataset.load_cache()

    model.compile(input_placeholder, label_placeholder, label_active_size_placeholder, loss, optim)
    return model, dataset, freq_count, count_bins
Exemple #14
0
 def test_LinearSoftmax(self):
     model = Seq()
     model.add(Linear(2, 1))
     model.add(Softmax())
     data = np.array([2., 3.])
     out = model.forward(data)
     self.assertEqual(out, 1.)
Exemple #15
0
    def __init__(self,
                 input_size=INPUT_SIZE,
                 output_size=OUTPUT_SIZE,
                 hidden_size=HIDDEN_SIZE,
                 embed_size=EMBED_SIZE,
                 lr=LEARNING_RATE,
                 clip_grad=CLIP_GRAD,
                 init_range=INIT_RANGE):
        # this model will generate a vector representation based on the input
        input_layers = [
            Embedding(input_size, embed_size, init_range),
            Lstm(embed_size, hidden_size, init_range),
        ]

        # this model will generate an output sequence based on the hidden vector
        output_layers = [
            Embedding(output_size, embed_size, init_range),
            Lstm(embed_size, hidden_size, init_range,
                 previous=input_layers[1]),
            Softmax(hidden_size, output_size, init_range)
        ]

        self.input_layers, self.output_layers = input_layers, output_layers
        self.hidden_size = hidden_size
        self.embed_size = embed_size
        self.input_size = input_size
        self.output_size = output_size
        self.lr = lr
        self.clip_grad = clip_grad
Exemple #16
0
def main():
    (x_train, y_train), (x_test, y_test) = mnist.load_data()
    print 'Imported MNIST data: training input %s and training labels %s.' % (
        x_train.shape, y_train.shape)
    print 'Imported MNIST data: test input %s and test labels %s.' % (
        x_test.shape, y_test.shape)

    N, H, W = x_train.shape
    x = x_train.reshape((N, H * W)).astype('float') / 255
    y = to_categorical(y_train, num_classes=10)

    model = Sequential()
    model.add(Dense(), ReLU(), layer_dim=(28 * 28, 300), weight_scale=1e-2)
    model.add(Dense(), ReLU(), layer_dim=(300, 100), weight_scale=1e-2)
    model.add(Dense(), Softmax(), layer_dim=(100, 10), weight_scale=1e-2)

    model.compile(optimizer=GradientDescent(learning_rate=1e-2),
                  loss_func=categorical_cross_entropy)
    model.fit(x, y, epochs=10, batch_size=50, verbose=False)

    N, H, W = x_test.shape
    x = x_test.reshape((N, H * W)).astype('float') / 255
    y = to_categorical(y_test, num_classes=10)

    model.evaluate(x, y)
def generate_sequence(rnn_layer,
                      first_elem,
                      ind_to_char,
                      char_to_ind,
                      length=10):
    # print(rnn_layer.h)
    soft = Softmax()

    def sample(array):
        n = min(20, array.shape[0])
        ind = np.argpartition(array, -n)[-n:]
        indx = np.random.choice(ind, p=soft(array[ind]))
        return indx

    k = len(ind_to_char)
    x = np.array([char_to_ind[elem] for elem in first_elem])
    x = one_hotify(x, num_classes=k)
    string = first_elem
    for i in range(length):
        probs = rnn_layer(x)
        string += stringify(probs, ind_to_char)
        next_elem = []
        for j in range(probs.shape[-1]):
            next_elem.append(sample(probs[:, j]))
        x = one_hotify(np.array(next_elem), k)
        # print(np.average(np.abs(rnn_layer.h)))
    return string
Exemple #18
0
def test3layergradients(samples=1, dimensions=3072):

    print("\n\nTesting 3-layer gradients using a batch size of {}".format(samples))
    trainingData, trainingLabels, encodedTrainingLabels = loadData("Datasets/cifar-10-batches-mat/data_batch_1.mat")

    
    trainingData = trainingData[0:dimensions, 0:samples]
    trainingLabels = trainingLabels[0:dimensions, 0:samples]
    encodedTrainingLabels = encodedTrainingLabels[0:dimensions, 0:samples]
    
    network = Model()

    linear = Linear(dimensions, 50, regularization=0.00, initializer="he")
    network.addLayer(linear)
    network.addLayer(Relu())

    linear2 = Linear(50, 30, regularization=0.00, initializer="he")
    network.addLayer(linear2)
    network.addLayer(Relu())

    linear3 = Linear(30, 10, regularization=0.00, initializer="he")
    network.addLayer(linear3)
    network.addLayer(Softmax())

    sgd = SGD(lr=0.001, lr_decay=1.0, momentum=0.0, shuffle=True)
    network.compile(sgd, "cce")

    network.predict(trainingData, updateInternal=True)
    network.backpropagate(encodedTrainingLabels)
    
    timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')
    
    numerical_gradW1 = compute_grads_w_BN(1e-4, linear.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb1 = compute_grads_w_BN(1e-4, linear.b, trainingData, encodedTrainingLabels, network)

    numerical_gradW2 = compute_grads_w_BN(1e-4, linear2.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb2 = compute_grads_w_BN(1e-4, linear2.b, trainingData, encodedTrainingLabels, network)

    numerical_gradW3 = compute_grads_w_BN(1e-4, linear3.W, trainingData, encodedTrainingLabels, network)
    numerical_gradb3 = compute_grads_w_BN(1e-4, linear3.b, trainingData, encodedTrainingLabels, network)



    print("W1")
    relative_errorW = grad_difference(linear.gradW, numerical_gradW1)
    print("b1")
    relative_errorb = grad_difference(linear.gradb, numerical_gradb1)

    print("W2")
    relative_errorW2 = grad_difference(linear2.gradW, numerical_gradW2)
    print("b2")
    relative_errorb2 = grad_difference(linear2.gradb, numerical_gradb2)
    
    print("W3")
    relative_errorW3 = grad_difference(linear3.gradW, numerical_gradW3)
    print("b3")
    relative_errorb3 = grad_difference(linear3.gradb, numerical_gradb3)

    print("\n")
Exemple #19
0
def test_softmax():
    from layers import Softmax

    x = T.tensor2()
    f = theano.function([x], Softmax()(x))

    x = np.ones((batch_size, time_steps, input_size))
    assert f(x).shape == (batch_size * time_steps, input_size)
    def _loss_function(self, layer_input_output_cache, y_true):
        logits = layer_input_output_cache[-1]
        y_pred = Softmax().forward(logits)

        softmax_cross_entropy_loss = -1.0 / len(y_true) * np.sum(
            [y_true[i] * np.log(y_pred[i]) for i in range(len(y_true))])
        softmax_cross_entropy_grad = y_pred - y_true
        return softmax_cross_entropy_loss, softmax_cross_entropy_grad
	def __init__(self, name = "CIFAR10.pixelCNN", input_dim = 3, dims = 32, q_levels = 256, layers = 3,
				grad_clip = 1):
		# self.model = Model(name = model_name)
		self.name = name
		self.grad_clip = grad_clip
		self.is_train = T.scalar()
		self.X = T.tensor4('X') # shape: (batchsize, channels, height, width)
		self.X_r = T.itensor4('X_r')
		# print self.X.shape
		# return 
		self.X_transformed = self.X_r.dimshuffle(0,2,3,1)
		self.input_layer = WrapperLayer(self.X.dimshuffle(0,2,3,1)) # input reshaped to (batchsize, height, width,3)
		self.q_levels = q_levels
		self.pixel_CNN = pixelConv(
			self.input_layer, 
			input_dim, 
			dims,
			Q_LEVELS = q_levels,
			name = self.name + ".pxCNN",
			num_layers = layers,
			)
		print "done1"
		self.params = self.pixel_CNN.get_params()
		self.output_probab = Softmax(self.pixel_CNN).output()
		print "done2"
		self.cost = T.nnet.categorical_crossentropy(
			self.output_probab.reshape((-1,self.output_probab.shape[self.output_probab.ndim - 1])),
			self.X_r.flatten()
			).mean()
		self.output_image = sample_from_softmax(self.output_probab)
		print "done3"
		grads = T.grad(self.cost, wrt=self.params, disconnected_inputs='warn')
		self.grads = [T.clip(g, floatX(-grad_clip), floatX(grad_clip)) for g in grads]
		print "done5"
		# learning_rate = T.scalar('learning_rate')
		self.updates = lasagne.updates.adam(self.grads, self.pixel_CNN.get_params(), learning_rate = 1e-3)
		print "d6"
		self.train_fn = theano.function([self.X, self.X_r], self.cost, updates = self.updates)
		print "done4"
		self.valid_fn = theano.function([self.X, self.X_r], self.cost)
		print "go to hell"
		self.generate_routine = theano.function([self.X], self.output_image)

		self.errors = {'training' : [], 'validation' : []}
		print "yo"
Exemple #22
0
def create_output_node(model=None, input_sequences=None, num_gru=None, old_h0s=None, reset=None, num_pixelCNN_layer = None):
	assert(model is not None)
	assert(input_sequences is not None)
	assert(num_gru is not None)
	assert(old_h0s is not None)
	assert(reset is not None)
	assert(num_pixelCNN_layer is not None)

	new_h0s = T.zeros_like(old_h0s)
	h0s = theano.ifelse.ifelse(reset, new_h0s, old_h0s)

	embedding_layer = Embedding(Q_LEVELS, DIM, input_sequences, name = model.name+"Embedding.Q_LEVELS")
	model.add_layer(embedding_layer)


	prev_out = embedding_layer.output()
	last_layer = WrapperLayer(prev_out.reshape((prev_out.shape[0], prev_out.shape[1], WIDTH, DEPTH)))

	pixel_CNN = pixelConv(
		last_layer,
		DEPTH,
		DEPTH,
		name = model.name + ".pxCNN",
		num_layers = NUM_PIXEL_CNN_LAYER
	)

	prev_out = pixel_CNN.output()
	last_layer = WrapperLayer(prev_out.reshape((prev_out.shape[0], prev_out.shape[1], -1)))

	last_hidden_list = []

	for i in range(num_gru):
		gru_layer = GRU(DIM, DIM, last_layer, s0 = h0s[i,:,:], name = model.name+"GRU_{}".format(i))
		last_hidden_list.append(gru_layer.output()[:,-1])
		model.add_layer(gru_layer)
		last_layer = gru_layer

	fc1 = FC(DIM, Q_LEVELS, last_layer, name = model.name+"FullyConnected")
	model.add_layer(fc1)

	softmax = Softmax(fc1, name= model.name+"Softmax")
	model.add_layer(softmax)

	return softmax.output(), T.stack(last_hidden_list, axis = 0)
Exemple #23
0
 def test_score(model, test_set):
     test_err = 0.
     for x, target in test_set:
         y = model.forward(x)
         y = Softmax().forward(y)
         # print(y, np.argmax(y), np.argmax(target))
         if np.argmax(y) != np.argmax(target):
             test_err += 1.
     test_score = (1.0 - test_err / float(len(test_set))) * 100.0
     return test_score
Exemple #24
0
    def addSoftmaxLayer(self, **kwargs):
        """
        Add softmax multi-class classification layer.
        """

        input_layer = self.input_layer if not self.all_layers \
            else self.all_layers[-1]
        new_layer = Softmax(input_layer, **kwargs)

        self.all_layers += (new_layer, )

        self.n_layers = len(self.all_layers)
Exemple #25
0
    def adddiscriminator(self,num_1,num_2):

        input_layer = self.feature_layer
        name = "cate_1"
        new_layer1 = DenseLayer(input_layer, name= name, num_units=num_1 )
        #self.all_layers += (new_layer1,)
        self.trainable_layers += (new_layer1,)
        name = "cate_2"
        new_layer2 = DenseLayer(new_layer1, name = name, num_units=num_2)
        #self.all_layers += (new_layer2,)
        self.trainable_layers += (new_layer2,)

        category = Softmax(new_layer2)
        self.category_layer = category
def test_model_with_softmax():
    from models import Sequential
    from layers import Linear, Softmax

    inputs = np.array([[0.25, 0.63, 0.12]])
    targets = np.array([0, 1, 0])

    model = Sequential()
    model.add(Linear(3, 3, activation=Softmax()))
    predictions = model.feed_forward(inputs)
    loss = ce.loss(predictions, targets)
    for i in range(len(predictions)):
        gradient = ce.backward(predictions[i], targets[i])
        print("grad", gradient)
def regularizationSearch():

    trainingData, trainingLabels, \
    validationData, validationLabels, \
    testingData, testingLabels = loadAllData("Datasets/cifar-10-batches-mat/", valsplit=0.10)    

    bestLambda = 0.0
    bestValAcc = 0.0
    bestLoss = 0.0
    
    for lambdaValue in np.arange(0, 0.2, 0.005):

        network = Model()
        network.addLayer(Linear(32*32*3, 50, regularization=lambdaValue, initializer="he"))
        network.addLayer(BatchNormalization(50, trainable=True))
        network.addLayer(Relu())

        network.addLayer(Linear(50, 30, regularization=lambdaValue, initializer="he"))
        network.addLayer(BatchNormalization(30, trainable=True))
        network.addLayer(Relu())

        network.addLayer(Linear(30,10, regularization=lambdaValue, initializer="he"))
        network.addLayer(Softmax())

        sgd = SGD(lr=0.01, lr_decay=0.95, momentum=0.7, shuffle=True, lr_min=1e-5)  
    
        network.compile(sgd, "cce")
        
        timestamp = datetime.now().strftime('%Y-%b-%d--%H-%M-%S')

        network.fit(trainingData, trainingLabels, epochs=20, validationData=(validationData, validationLabels), batch_size=64)

        
        #plotAccuracy(network, "plots/", timestamp)
        #plotLoss(network, "plots/", timestamp)
        
        print("Lambda:{}".format(lambdaValue))
        loss, acc = network.evaluate(validationData, validationLabels)
        print("Val loss: {} , Val acc: {}".format(loss, acc) )
        print("\n\n")
        
        if acc > bestValAcc:
            bestLambda = lambdaValue
            bestValAcc = acc
            bestLoss = loss
    
    return bestLambda, bestValAcc, bestLoss
def build_network(hidden_layer_sizes: List[int], batch_normalized: bool,
                  regularization: float) -> Network:
    net = Network()
    layer_sizes = [CIFAR10.input_size
                   ] + hidden_layer_sizes + [CIFAR10.output_size]
    for i, (size_in,
            size_out) in enumerate(zip(layer_sizes[:-1], layer_sizes[1:])):
        net.add_layer(
            Linear(size_in,
                   size_out,
                   regularization,
                   Xavier(),
                   name='Li' + str(i + 1)))
        if i < len(layer_sizes) - 2:
            if batch_normalized:
                net.add_layer(
                    BatchNormalization(size_out, name='Bn' + str(i + 1)))
            net.add_layer(ReLU(size_out, name='Re' + str(i + 1)))
        else:
            net.add_layer(Softmax(size_out, name='S'))
    return net
Exemple #29
0
def tryParameters(test_name,
                  lin_neurons,
                  with_BN,
                  lam,
                  l_rate,
                  decay,
                  mom,
                  epochs=50,
                  batch_size=250):

    count = 0
    layers = []

    for N in lin_neurons:
        not_last_layer = count < (len(lin_neurons) - 1)
        layers.append(
            Linear(cifar.in_size if count == 0 else lin_neurons[count - 1],
                   N if not_last_layer else cifar.out_size,
                   lam=lam))
        if not_last_layer:
            if with_BN:
                layers.append(BatchNorm(N))
            layers.append(ReLU(N))
        count += 1
    if len(lin_neurons) == 1 and with_BN:
        layers.append(BatchNorm(cifar.out_size))
    layers.append(Softmax(cifar.out_size))
    # init the network
    print(["{}:{},{}".format(l.name, l.in_size, l.out_size) for l in layers])
    net = Net(layers, lam=lam, l_rate=l_rate, decay=0.99, mom=0.99)
    results = net.trainMiniBatch(train, val, epochs, batch_size, shuffle=True)
    print('{} Test Accuracy: {:.2f}'.format(
        test_name, net.accuracy(test['one_hot'].T, test['images'].T)))
    print('Final train a/c, val a/c: {:.2f}/{:.2f}, {:.2f}/{:.2f}'.format(
        results['last_a_train'], results['last_c_train'],
        results['last_a_val'], results['last_c_val']))
    plotResults(test_name, results['a_train'], results['c_train'],
                results['a_val'], results['c_val'])
    #weights_plot(net, "plots/weights_vizualisation_{}.png".format(test_name), labels)
    return results
Exemple #30
0
from network import Network
from data_preparation import load_data
from solve_rnn import solve_rnn

import theano.tensor as T

X_train, y_train, X_test, y_test = load_data()

HIDDEN_DIM = 32
INPUT_DIM = 20
OUTPUT_DIM = 10

model = Network()
model.add(RNN('rnn1', HIDDEN_DIM, INPUT_DIM, 0.1))      # output shape: 4 x HIDDEN_DIM
model.add(Linear('fc', HIDDEN_DIM, OUTPUT_DIM, 0.1))    # output shape: 4 x OUTPUT_DIM
model.add(Softmax('softmax'))

loss = CrossEntropyLoss('xent')

optim = SGDOptimizer(0.01, 0.0001, 0.9)
input_placeholder = T.fmatrix('input')
label_placeholder = T.fmatrix('label')

model.compile(input_placeholder, label_placeholder, loss, optim)

MAX_EPOCH = 6
DISP_FREQ = 1000
TEST_FREQ = 10000

solve_rnn(model, X_train, y_train, X_test, y_test,
          MAX_EPOCH, DISP_FREQ, TEST_FREQ)
Exemple #31
0
X_r = T.itensor3('X_r') #shape: (batchsize, height, width)

input_layer = WrapperLayer(X.dimshuffle(0,1,2,'x')) # input reshaped to (batchsize, height, width,1)

pixel_CNN = pixelConv(
	input_layer, 
	1, 
	DIM,
	name = model.name + ".pxCNN",
	num_layers = 12,
	Q_LEVELS = Q_LEVELS
	)

model.add_layer(pixel_CNN)

output_probab = Softmax(pixel_CNN).output()

# in nats
cost = T.nnet.categorical_crossentropy(
	output_probab.reshape((-1,output_probab.shape[output_probab.ndim - 1])),
	X_r.flatten()
	).mean()

output_image = sample_from_softmax(output_probab)



model.print_params() 

params = model.get_params()