def build_model(text_len, negative_size, optimizer, word_size, entity_size, dim_size, word_static, entity_static, word_embedding, entity_embedding): text_input_layer = Input(shape=(text_len,), dtype='int32') word_embed_layer = Embedding( word_size, dim_size, input_length=text_len, name='word_embedding', weights=[word_embedding], trainable=not word_static )(text_input_layer) text_layer = TextRepresentationLayer(name='text_layer')( [word_embed_layer, text_input_layer] ) entity_input_layer = Input(shape=(negative_size + 1,), dtype='int32') entity_embed_layer = Embedding( entity_size, dim_size, input_length=negative_size + 1, name='entity_embedding', weights=[entity_embedding], trainable=not entity_static )(entity_input_layer) similarity_layer = DotLayer(name='dot_layer')( [RepeatVector(negative_size + 1)(text_layer), entity_embed_layer] ) predictions = SoftmaxLayer()(similarity_layer) model = Model(input=[text_input_layer, entity_input_layer], output=predictions) model.compile(optimizer=optimizer, loss='categorical_crossentropy', metrics=['accuracy']) return model
def __init__(self, inshape, hidshape, noutputs): ninput = np.prod(inshape) nhid = np.prod(hidshape) nparams = (ninput + 1) * nhid + (nhid * noutputs) # TODO: self.params = np.empty(nparams) self._grad = np.empty(nparams) inhidwts = ninput * nhid hidoutwts = nhid * noutputs self.layers = [ LinearLayer( inshape, hidshape, params=self.params[0:inhidwts], grad=self._grad[0:inhidwts] ), LogisticLayer( hidshape, params=self.params[inhidwts:(inhidwts + nhid)], grad=self._grad[inhidwts:(inhidwts + nhid)] ), LinearLayer( hidshape, noutputs, params=self.params[(inhidwts + nhid):], grad=self._grad[(inhidwts + nhid):] ), SoftmaxLayer() ]
def test_softmax_layer(): x_train = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [7.0, 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5], [6.3, 3.3, 6.0, 2.5], [5.8, 2.7, 5.1, 1.9]]) y_train = np.array([0, 0, 1, 1, 2, 2]) softmax = SoftmaxLayer() W = 0.001 * np.random.randn(4, 3) b = np.zeros((1, 3)) reg_parameter = 0.001 g_numerical_W = eval_numerical_gradient(softmax.forward_pass, x_train, y_train, W, b, reg_parameter) g_analytical_W = eval_analytical_gradient(softmax, x_train, y_train, W, b, reg_parameter) assert check_gradient( g_numerical_W, g_analytical_W ) <= 1e-7, "Error in calculating gradient of the SoftmaxLayer"
def test_tanh_layer(): x_train = np.array([[5.1, 3.5, 1.4, 0.2], [4.9, 3.0, 1.4, 0.2], [7.0, 3.2, 4.7, 1.4], [6.4, 3.2, 4.5, 1.5], [6.3, 3.3, 6.0, 2.5], [5.8, 2.7, 5.1, 1.9]]) y_train = np.array([0, 0, 1, 1, 2, 2]) W1 = np.random.randn(4, 10) * 0.001 b1 = np.zeros((1, 10)) W2 = np.random.randn(10, 6) * 0.001 b2 = np.zeros((1, 6)) softmax = SoftmaxLayer() tanh = TanhLayer() reg_parameter = 0.001 g_numerical_W = eval_hidden_numerical_gradient(tanh, softmax, x_train, y_train, W1, b1, W2, b2, reg_parameter) g_analytical_W = eval_hidden_analytical_gradient(tanh, softmax, x_train, y_train, W1, b1, W2, b2, reg_parameter) assert check_gradient( g_numerical_W, g_analytical_W ) <= 1e-7, "Error in calculating gradient of the TanhLayer"
def __init__(self, config): self.config = config batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data x = T.ftensor4('x') y = T.ivector('y') rand = T.fvector('rand') print '... building the model' self.layers = [] params = [] weight_types = [] if flag_datalayer: data_layer = DataLayer(input=x, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1_input = data_layer.output else: layer1_input = x convpool_layer1 = ConvPoolLayer(input=layer1_input, image_shape=(3, 227, 227, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, ) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, 27, 27, batch_size), filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, 13, 13, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096) softmax_layer8 = SoftmaxLayer( input=dropout_layer7.output, n_in=4096, n_out=1000) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.cost = softmax_layer8.negative_log_likelihood(y) self.errors = softmax_layer8.errors(y) self.errors_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params self.x = x self.y = y self.rand = rand self.weight_types = weight_types self.batch_size = batch_size
def __init__(self, input_shape=(None, 3, None, None), n_classes=6, n_filters_first_conv=48, n_pool=4, growth_rate=12, n_layers_per_block=5, dropout_p=0.5): """ This code implements the Fully Convolutional DenseNet described in https://arxiv.org/abs/1611.09326 The network consist of a downsampling path, where dense blocks and transition down are applied, followed by an upsampling path where transition up and dense blocks are applied. Skip connections are used between the downsampling path and the upsampling path Each layer is a composite function of BN - ReLU - Conv and the last layer is a softmax layer. :param input_shape: shape of the input batch. Only the first dimension (n_channels) is needed :param n_classes: number of classes :param n_filters_first_conv: number of filters for the first convolution applied :param n_pool: number of pooling layers = number of transition down = number of transition up :param growth_rate: number of new feature maps created by each layer in a dense block :param n_layers_per_block: number of layers per block. Can be an int or a list of size 2 * n_pool + 1 :param dropout_p: dropout rate applied after each convolution (0. for not using) """ if type(n_layers_per_block) == list: assert (len(n_layers_per_block) == 2 * n_pool + 1) elif type(n_layers_per_block) == int: n_layers_per_block = [n_layers_per_block] * (2 * n_pool + 1) else: raise ValueError # Theano variables self.input_var = T.tensor4('input_var', dtype='float32') # input image self.target_var = T.tensor4('target_var', dtype='int32') # target ##################### # First Convolution # ##################### inputs = InputLayer(input_shape, self.input_var) # We perform a first convolution. All the features maps will be stored in the tensor called stack (the Tiramisu) stack = Conv2DLayer(inputs, n_filters_first_conv, filter_size=3, pad='same', W=HeUniform(gain='relu'), nonlinearity=linear, flip_filters=False) # The number of feature maps in the stack is stored in the variable n_filters n_filters = n_filters_first_conv ##################### # Downsampling path # ##################### skip_connection_list = [] for i in range(n_pool): # Dense Block for j in range(n_layers_per_block[i]): # Compute new feature maps l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p) # And stack it : the Tiramisu is growing stack = ConcatLayer([stack, l]) n_filters += growth_rate # At the end of the dense block, the current stack is stored in the skip_connections list skip_connection_list.append(stack) # Transition Down stack = TransitionDown(stack, n_filters, dropout_p) skip_connection_list = skip_connection_list[::-1] ##################### # Bottleneck # ##################### # We store now the output of the next dense block in a list. We will only upsample these new feature maps block_to_upsample = [] # Dense Block for j in range(n_layers_per_block[n_pool]): l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p) block_to_upsample.append(l) stack = ConcatLayer([stack, l]) ####################### # Upsampling path # ####################### for i in range(n_pool): # Transition Up ( Upsampling + concatenation with the skip connection) n_filters_keep = growth_rate * n_layers_per_block[n_pool + i] stack = TransitionUp(skip_connection_list[i], block_to_upsample, n_filters_keep) # Dense Block block_to_upsample = [] for j in range(n_layers_per_block[n_pool + i + 1]): l = BN_ReLU_Conv(stack, growth_rate, dropout_p=dropout_p) block_to_upsample.append(l) stack = ConcatLayer([stack, l]) ##################### # Softmax # ##################### self.output_layer = SoftmaxLayer(stack, n_classes)
import skimage.measure import pickle from readlabel import read_image from network import Network from layers import ConvPoolLayer, FullyConnectedLayer, SoftmaxLayer, ReLU, Sigmoid whole_data = read_image(path1 = 'test_images/', path2 = './test_annotation', data_size = 1050) whole_x = whole_data[0] mean = whole_x.mean(axis=0) std = whole_x.std(axis=0) whole_x = (whole_x - mean) / std whole_y = whole_data[1] test_x = whole_x test_y = whole_y test_data = [test_x, test_y] mini_batch_size = 1 # final net = Network([ConvPoolLayer(filter_shape=(5, 5, 3, 9), image_shape=(mini_batch_size, 64, 64, 3), poolsize=2, activation_fn=ReLU), ConvPoolLayer(filter_shape=(5, 5, 9, 18), image_shape=(mini_batch_size, 30, 30, 9), poolsize=2, activation_fn=ReLU), ConvPoolLayer(filter_shape=(4, 4, 18, 36), image_shape=(mini_batch_size, 13, 13, 18), poolsize=2, activation_fn=ReLU), FullyConnectedLayer(n_in=900, n_out=225, activation_fn=ReLU), FullyConnectedLayer(n_in=225, n_out=50, activation_fn=ReLU), SoftmaxLayer(n_in=50, n_out=20, activation_fn=None)], mini_batch_size) print('start') net.load_test(mini_batch_size, test_data, path='./finalparams_noact.pickle')
i = T.lscalar() # mini-batch index self.test_mb_predictions = theano.function([i], self.layers[-1].y_out, givens={self.x: observation}, on_unused_input='warn') return self.test_mb_predictions(0) #Initialize network layers = [ FullyConnectedLayer(n_in=4, n_out=10), FullyConnectedLayer(n_in=10, n_out=10), SoftmaxLayer(n_in=10, n_out=2) ] params = [param for layer in layers for param in layer.params] iterations = mini_batch_size x = T.vector("x") y = T.ivector("y") init_layer = layers[0] init_layer.set_inpt(x, 1) for j in xrange(1, len(layers)): prev_layer, layer = layers[j - 1], layers[j] layer.set_inpt(prev_layer.output, 1) cost = T.argmax(T.log(layers[-1].output))
import numpy as np from layers import SoftmaxLayer from datareader import load_mnist from constants import * x_train, y_train = load_mnist(MNIST_TRAINING_X , MNIST_TRAINING_y) x_train = x_train.reshape(MNIST_NUM_TRAINING, MNIST_NUM_FEATURES) y_train = y_train.reshape(MNIST_NUM_TRAINING) # initialize parameters randomly W = 0.001 * np.random.randn(MNIST_NUM_FEATURES, MNIST_NUM_OUTPUT) b = np.zeros((1, MNIST_NUM_OUTPUT)) learning_rate = 0.1 # step size of the gradient descent algorithm reg_parameter = 0.01 # regularization strength softmax = SoftmaxLayer() num_iter = 1000 BATCH_SIZE = 500 for i in range(num_iter): idx = np.random.choice(MNIST_NUM_TRAINING, BATCH_SIZE, replace=True) x_batch = x_train[idx, :] y_batch = y_train[idx] output_prob, loss = softmax.forward_pass(x_batch, y_batch, W, b, reg_parameter, []) if i % 50 == 0: print('iteration: {:3d} loss: {:3e}'.format(i, loss)) gradW, gradB, _ = softmax.backward_pass(output_prob, x_batch, y_batch, W, b, reg_parameter) W = W - learning_rate * gradW
test_data = [test_x, test_y] # final net = Network([ ConvPoolLayer(filter_shape=(5, 5, 3, 9), image_shape=(mini_batch_size, 64, 64, 3), poolsize=2, activation_fn=ReLU), ConvPoolLayer(filter_shape=(5, 5, 9, 18), image_shape=(mini_batch_size, 30, 30, 9), poolsize=2, activation_fn=ReLU), ConvPoolLayer(filter_shape=(4, 4, 18, 36), image_shape=(mini_batch_size, 13, 13, 18), poolsize=2, activation_fn=ReLU), FullyConnectedLayer(n_in=900, n_out=225, activation_fn=ReLU), FullyConnectedLayer(n_in=225, n_out=50, activation_fn=ReLU), SoftmaxLayer(n_in=50, n_out=20, activation_fn=None) ], mini_batch_size) print('start') net.train_save(training_data, 13, mini_batch_size, 0.001, validation_data, test_data, test=False, save=2)
def __init__(self, config): self.config = config batch_size = config['batch_size'] num_seq = config['num_seq'] self.n_timesteps = config['num_timesteps'] num_joints = config['num_joints'] classes_num = config['classes_num'] # ##################### BUILD NETWORK ########################## mask = T.fvector('mask') y = T.lvector('y') target = T.ftensor3('target') rand = T.fvector('rand') trng = RandomStreams(1234) use_noise = T.fscalar('use_noise') print '... building the model' self.layers = [] params = [] weight_types = [] conv_fea = T.ftensor4('conv_fea') #(49, 16, 8, 1024) lstm_att_layer15 = JointAttentionLstmLayer(config, num_joints, conv_fea=conv_fea, mask=mask, batch_size=batch_size, num_seq=num_seq, trng=trng, use_noise=use_noise, n_in=1024 * 5, n_out=1024, dim_part=32) self.layers.append(lstm_att_layer15) params += lstm_att_layer15.params weight_types += lstm_att_layer15.weight_type self.conv_fea = conv_fea softmax_input = lstm_att_layer15.output softmax_layer15 = SoftmaxLayer(input=softmax_input, n_in=1024, n_out=21) self.layers.append(softmax_layer15) params += softmax_layer15.params weight_types += softmax_layer15.weight_type # #################### NETWORK BUILT ####################### self.cost_nll = softmax_layer15.negative_log_likelihood(y, mask) self.cost_jhmdb_attention = T.mean(T.sum(T.sum( 0.5 * (lstm_att_layer15.attention - target)**2, axis=1), axis=1), axis=0, dtype=theano.config.floatX) self.cost = self.cost_nll + self.cost_jhmdb_attention self.errors_video = softmax_layer15.errors_video( y, mask, batch_size, num_seq) self.params = params self.prob = softmax_layer15.p_y_given_x self.mask = mask self.y = y self.target = target self.rand = rand self.weight_types = weight_types self.batch_size = batch_size self.num_seq = num_seq self.use_noise = use_noise
def __init__(self, config): self.config = config batch_size = config.batch_size lib_conv = config.lib_conv group = (2 if config.grouping else 1) LRN = (True if config.LRN else False) print 'LRN, group', LRN, group # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data x = T.ftensor4('x') y = T.lvector('y') print '... building the model with ConvLib %s, LRN %s, grouping %i ' \ % (lib_conv, LRN, group) self.layers = [] params = [] weight_types = [] layer1_input = x convpool_layer1 = ConvPoolLayer( input=layer1_input, image_shape=((3, 224, 224, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 3, 227, 227)), filter_shape=((3, 11, 11, 96) if lib_conv == 'cudaconvnet' else (96, 3, 11, 11)), convstride=4, padsize=(0 if lib_conv == 'cudaconvnet' else 3), group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=LRN, lib_conv=lib_conv) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer( input=convpool_layer1.output, image_shape=((96, 27, 27, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 96, 27, 27)), filter_shape=((96, 5, 5, 256) if lib_conv == 'cudaconvnet' else (256, 96, 5, 5)), convstride=1, padsize=2, group=group, poolsize=3, poolstride=2, bias_init=0.1, lrn=LRN, lib_conv=lib_conv, ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer( input=convpool_layer2.output, image_shape=((256, 13, 13, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 256, 13, 13)), filter_shape=((256, 3, 3, 384) if lib_conv == 'cudaconvnet' else (384, 256, 3, 3)), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer( input=convpool_layer3.output, image_shape=((384, 13, 13, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 384, 13, 13)), filter_shape=((384, 3, 3, 384) if lib_conv == 'cudaconvnet' else (384, 384, 3, 3)), convstride=1, padsize=1, group=group, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer( input=convpool_layer4.output, image_shape=((384, 13, 13, batch_size) if lib_conv == 'cudaconvnet' else (batch_size, 384, 13, 13)), filter_shape=((384, 3, 3, 256) if lib_conv == 'cudaconvnet' else (256, 384, 3, 3)), convstride=1, padsize=1, group=group, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type if lib_conv == 'cudaconvnet': fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) else: fc_layer6_input = convpool_layer5.output.flatten(2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output) softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.cost = softmax_layer8.negative_log_likelihood(y) self.errors = softmax_layer8.errors(y) self.errors_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params self.x = x self.y = y # self.rand = rand self.weight_types = weight_types self.batch_size = batch_size
def __init__(self, input, n_in=28**2, n_hidden_1=1024, n_hidden_2=1024, n_hidden_3=1024, n_hidden_4=1024, n_out=10, W_hidden_1=None, W_hidden_2=None, W_hidden_3=None, W_hidden_4=None, W_out=None, dropout=0.0, seed=None): relu_activation = lambda x: T.nnet.relu(x, 0.1) # relu_activation = T.nnet.relu seed = np.random.randint(int(1e5)) if seed is None else seed self.dropout_layer_1 = DropoutLayer(input=input, seed=seed, dropout=dropout) self.hidden_1 = HiddenLayer( seed=seed + 1, # input=input, input=self.dropout_layer_1.output, # input=self.dropout_layer.output, n_in=n_in, n_out=n_hidden_1, activation=relu_activation, W=W_hidden_1, ) self.dropout_layer_2 = DropoutLayer(input=self.hidden_1.output, seed=seed + 2, dropout=dropout) self.hidden_2 = HiddenLayer( seed=seed + 3, # input=self.hidden_1.output, input=self.dropout_layer_2.output, n_in=n_hidden_1, n_out=n_hidden_2, activation=relu_activation, W=W_hidden_2) self.dropout_layer_3 = DropoutLayer(input=self.hidden_2.output, seed=seed + 4, dropout=dropout) self.hidden_3 = HiddenLayer(seed=seed + 5, input=self.dropout_layer_3.output, n_in=n_hidden_2, n_out=n_hidden_3, activation=relu_activation, W=W_hidden_3) self.dropout_layer_4 = DropoutLayer(input=self.hidden_3.output, seed=seed + 6, dropout=dropout) self.hidden_4 = HiddenLayer(seed=seed + 7, input=self.dropout_layer_4.output, n_in=n_hidden_3, n_out=n_hidden_4, activation=relu_activation, W=W_hidden_4) self.dropout_layer_5 = DropoutLayer(input=self.hidden_4.output, seed=seed + 8, dropout=dropout) self.linear_layer = HiddenLayer( seed=seed + 9, # input=self.hidden_1.output, # input=self.hidden_2.output, input=self.dropout_layer_5.output, n_in=n_hidden_4, n_out=n_out, activation=identity_map, W=W_out) self.softmax_layer = SoftmaxLayer(input=self.linear_layer.output) # keep track of model input self.input = input self.p_y_given_x = self.softmax_layer.p_y_given_x self.y_pred = self.softmax_layer.y_pred self.L1 = (abs(self.hidden_1.W).sum() + abs(self.hidden_2.W).sum() + abs(self.hidden_3.W).sum() + abs(self.hidden_4.W).sum() + abs(self.linear_layer.W).sum()) self.L2_sqr = (T.sum(self.hidden_1.W**2) + T.sum(self.hidden_2.W**2) + T.sum(self.hidden_3.W**2) + T.sum(self.hidden_4.W**2) + T.sum(self.linear_layer.W**2)) self.mean_log_likelihood = (self.softmax_layer.mean_log_likelihood) self.errors = self.softmax_layer.errors self.params = (self.hidden_1.params + self.hidden_2.params + self.hidden_3.params + self.hidden_4.params + self.linear_layer.params)
def __init__(self, config): self.config = config batch_size = config['batch_size'] batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] layers = [] params = [] weight_types = [] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data x1 = T.ftensor4('x1') x2 = T.ftensor4('x2') y = T.lvector('y') # The ground truth to be compared with will go here rand1 = T.fvector('rand1') rand2 = T.fvector('rand2') print '... building the model' if flag_datalayer: data_layerA = DataLayer(input=x1, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1A_input = data_layerA.output else: layer1A_input = x1 if flag_datalayer: data_layerB = DataLayer(input=x2, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1B_input = data_layerB.output else: layer1B_input = x2 fc_layer2_input = T.concatenate( (T.flatten(layer1A_input.dimshuffle(3, 0, 1, 2), 2), T.flatten(layer1B_input.dimshuffle(3, 0, 1, 2), 2)), axis=1) fc_layer2 = FCLayer(input=fc_layer2_input, n_in=154587 * 2, n_out=4096) layers.append(fc_layer2) params += fc_layer2.params weight_types += fc_layer2.weight_type dropout_layer2 = DropoutLayer(fc_layer2.output, n_in=4096, n_out=4096) fc_layer3 = FCLayer(input=dropout_layer2.output, n_in=4096, n_out=4096) layers.append(fc_layer3) params += fc_layer3.params weight_types += fc_layer3.weight_type dropout_layer3 = DropoutLayer(fc_layer3.output, n_in=4096, n_out=4096) # Final softmax layer softmax_layer3 = SoftmaxLayer( input=dropout_layer3.output, n_in=4096, n_out=2) # Only a single binary output is required! layers.append(softmax_layer3) params += softmax_layer3.params weight_types += softmax_layer3.weight_type # #################### NETWORK BUILT ####################### self.cost = softmax_layer3.negative_log_likelihood(y) self.errors = softmax_layer3.errors(y) self.errors_top_5 = softmax_layer3.errors_top_x(y, 5) self.x1 = x1 self.x2 = x2 self.y = y self.rand1 = rand1 self.rand2 = rand2 self.layers = layers self.params = params self.weight_types = weight_types self.batch_size = batch_size
def __init__(self, config, testMode): self.config = config batch_size = config['batch_size'] lib_conv = config['lib_conv'] useLayers = config['useLayers'] #imgWidth = config['imgWidth'] #imgHeight = config['imgHeight'] initWeights = config['initWeights'] #if we wish to initialize alexnet with some weights. #need to make changes in layers.py to accept initilizing weights if initWeights: weightsDir = config['weightsDir'] weightFileTag = config['weightFileTag'] prob_drop = config['prob_drop'] # ##################### BUILD NETWORK ########################## x = T.ftensor4('x') mean = T.ftensor4('mean') #y = T.lvector('y') print '... building the model' self.layers = [] params = [] weight_types = [] if useLayers >= 1: convpool_layer1 = ConvPoolLayer(input=x-mean, image_shape=(3, None, None, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_0'+weightFileTag, 'b_0'+weightFileTag] ) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type if useLayers >= 2: convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, None, None, batch_size), #change from 27 to appropriate value sbased on conv1's output filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_1'+weightFileTag, 'W1_1'+weightFileTag, 'b0_1'+weightFileTag, 'b1_1'+weightFileTag] ) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type if useLayers >= 3: convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, None, None, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_2'+weightFileTag, 'b_2'+weightFileTag] ) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type if useLayers >= 4: convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, None, None, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_3'+weightFileTag, 'W1_3'+weightFileTag, 'b0_3'+weightFileTag, 'b1_3'+weightFileTag] ) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type if useLayers >= 5: convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, None, None, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W0_4'+weightFileTag, 'W1_4'+weightFileTag, 'b0_4'+weightFileTag, 'b1_4'+weightFileTag] ) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type if useLayers >= 6: fc_layer6_input = T.flatten(convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_5'+weightFileTag, 'b_5'+weightFileTag]) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type if testMode: dropout_layer6 = fc_layer6 else: dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, prob_drop=prob_drop) if useLayers >= 7: fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_6'+weightFileTag, 'b_6'+weightFileTag]) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type if testMode: dropout_layer6 = fc_layer7 else: dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, prob_drop=prob_drop) if useLayers >= 8: softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=1000, initWeights=initWeights, weightsDir=weightsDir, weightFiles=['W_7'+weightFileTag, 'b_7'+weightFileTag]) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.output = self.layers[useLayers-1] self.params = params self.x = x self.mean = mean self.weight_types = weight_types self.batch_size = batch_size self.useLayers = useLayers self.outLayer = self.layers[useLayers-1] meanVal = np.load(config['mean_file']) meanVal = meanVal[:, :, :, np.newaxis].astype('float32') #x is 4d, with 'batch' number of images. meanVal has only '1' in the 'batch' dimension. subtraction wont work. meanVal = np.tile(meanVal,(1,1,1,batch_size)) self.meanVal = meanVal #meanVal = np.zeros([3,imgHeight,imgWidth,2], dtype='float32') if useLayers >= 8: #if last layer is softmax, then its output is y_pred finalOut = self.outLayer.y_pred else: finalOut = self.outLayer.output self.forwardFunction = theano.function([self.x, In(self.mean, value=meanVal)], [finalOut])
def __init__(self, config): ModelBase.__init__(self) self.config = config self.verbose = self.config['verbose'] self.name = 'alexnet' batch_size = config['batch_size'] flag_datalayer = config['use_data_layer'] lib_conv = config['lib_conv'] n_softmax_out = config['n_softmax_out'] # ##################### BUILD NETWORK ########################## # allocate symbolic variables for the data # 'rand' is a random array used for random cropping/mirroring of data x = T.ftensor4('x') y = T.lvector('y') rand = T.fvector('rand') lr = T.scalar('lr') if self.verbose: print 'AlexNet 2/16' self.layers = [] params = [] weight_types = [] if flag_datalayer: data_layer = DataLayer(input=x, image_shape=(3, 256, 256, batch_size), cropsize=227, rand=rand, mirror=True, flag_rand=config['rand_crop']) layer1_input = data_layer.output else: layer1_input = x convpool_layer1 = ConvPoolLayer(input=layer1_input, image_shape=(3, 227, 227, batch_size), filter_shape=(3, 11, 11, 96), convstride=4, padsize=0, group=1, poolsize=3, poolstride=2, bias_init=0.0, lrn=True, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer1) params += convpool_layer1.params weight_types += convpool_layer1.weight_type convpool_layer2 = ConvPoolLayer(input=convpool_layer1.output, image_shape=(96, 27, 27, batch_size), filter_shape=(96, 5, 5, 256), convstride=1, padsize=2, group=2, poolsize=3, poolstride=2, bias_init=0.1, lrn=True, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer2) params += convpool_layer2.params weight_types += convpool_layer2.weight_type convpool_layer3 = ConvPoolLayer(input=convpool_layer2.output, image_shape=(256, 13, 13, batch_size), filter_shape=(256, 3, 3, 384), convstride=1, padsize=1, group=1, poolsize=1, poolstride=0, bias_init=0.0, lrn=False, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer3) params += convpool_layer3.params weight_types += convpool_layer3.weight_type convpool_layer4 = ConvPoolLayer(input=convpool_layer3.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 384), convstride=1, padsize=1, group=2, poolsize=1, poolstride=0, bias_init=0.1, lrn=False, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer4) params += convpool_layer4.params weight_types += convpool_layer4.weight_type convpool_layer5 = ConvPoolLayer(input=convpool_layer4.output, image_shape=(384, 13, 13, batch_size), filter_shape=(384, 3, 3, 256), convstride=1, padsize=1, group=2, poolsize=3, poolstride=2, bias_init=0.0, lrn=False, lib_conv=lib_conv, verbose=self.verbose) self.layers.append(convpool_layer5) params += convpool_layer5.params weight_types += convpool_layer5.weight_type fc_layer6_input = T.flatten( convpool_layer5.output.dimshuffle(3, 0, 1, 2), 2) fc_layer6 = FCLayer(input=fc_layer6_input, n_in=9216, n_out=4096, verbose=self.verbose) self.layers.append(fc_layer6) params += fc_layer6.params weight_types += fc_layer6.weight_type dropout_layer6 = DropoutLayer(fc_layer6.output, n_in=4096, n_out=4096, verbose=self.verbose) fc_layer7 = FCLayer(input=dropout_layer6.output, n_in=4096, n_out=4096, verbose=self.verbose) self.layers.append(fc_layer7) params += fc_layer7.params weight_types += fc_layer7.weight_type dropout_layer7 = DropoutLayer(fc_layer7.output, n_in=4096, n_out=4096, verbose=self.verbose) softmax_layer8 = SoftmaxLayer(input=dropout_layer7.output, n_in=4096, n_out=n_softmax_out, verbose=self.verbose) self.layers.append(softmax_layer8) params += softmax_layer8.params weight_types += softmax_layer8.weight_type # #################### NETWORK BUILT ####################### self.p_y_given_x = softmax_layer8.p_y_given_x self.y_pred = softmax_layer8.y_pred self.output = self.p_y_given_x self.cost = softmax_layer8.negative_log_likelihood(y) self.error = softmax_layer8.errors(y) if n_softmax_out < 5: self.error_top_5 = softmax_layer8.errors_top_x(y, n_softmax_out) else: self.error_top_5 = softmax_layer8.errors_top_x(y, 5) self.params = params # inputs self.x = x self.y = y self.rand = rand self.lr = lr self.shared_x = theano.shared( np.zeros( (3, config['input_width'], config['input_height'], config['file_batch_size']), # for loading large batch dtype=theano.config.floatX), borrow=True) self.shared_y = theano.shared(np.zeros((config['file_batch_size'], ), dtype=int), borrow=True) self.shared_lr = theano.shared(np.float32(config['learning_rate'])) # training related self.base_lr = np.float32(config['learning_rate']) self.step_idx = 0 self.mu = config['momentum'] # def: 0.9 # momentum self.eta = config['weight_decay'] #0.0002 # weight decay self.weight_types = weight_types self.batch_size = batch_size self.grads = T.grad(self.cost, self.params) subb_ind = T.iscalar('subb') # sub batch index #print self.shared_x[:,:,:,subb_ind*self.batch_size:(subb_ind+1)*self.batch_size].shape.eval() self.subb_ind = subb_ind self.shared_x_slice = self.shared_x[:, :, :, subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size] self.shared_y_slice = self.shared_y[subb_ind * self.batch_size:(subb_ind + 1) * self.batch_size]
def __init__(self, X, n_in, n_out, n_hidden_layers, n_units_in, n_units_hidden, M_lst=None, m_lst=None, sigma_W_params_lst=None, sigma_b_params_lst=None, sigma_W=1e-3, tune_sigma_W=True, sigma_b=1e-6, tune_sigma_b=True, l_W=1e-6, l_b=1e-6, diag_noise=True, approx_cols=False, divide_1st_layer_by_its_n_out=False, b_out_deterministic=False, seed=None): assert n_hidden_layers > 0, 'n_layers must be positive' n_layers = n_hidden_layers + 1 M_lst = [None] * (n_layers) if M_lst is None else M_lst m_lst = [None] * (n_layers) if m_lst is None else m_lst if sigma_W_params_lst is None: sigma_W_params_lst = [None] * (n_layers) if sigma_b_params_lst is None: sigma_b_params_lst = [None] * (n_layers) assert \ len(M_lst) == len(m_lst) == len(sigma_W_params_lst) == \ len(sigma_b_params_lst) == n_layers, \ 'length of all lists must be hte same and equal to ' \ '(n_layers + 1) where the +1 is for the output layer mapping' # set seed to ensure each layer is init differently (cf. seed += 1) seed = np.random.randint(int(1e6)) if seed is None else seed np.random.seed(seed) def activation(x): return T.nnet.relu(x, alpha=0.1) self.in_layer = GaussLayer( input=X, n_in=n_in, n_out=n_units_in, M=M_lst[0], m=m_lst[0], sigma_W=sigma_W, tune_sigma_W=tune_sigma_W, sigma_W_params=sigma_W_params_lst[0], sigma_b=sigma_b, tune_sigma_b=tune_sigma_b, sigma_b_params=sigma_b_params_lst[0], l_W=l_W, l_b=l_b, diag_noise=diag_noise, activation=activation, approx_cols=approx_cols, seed=seed, name='h1' ) self.layers = [self.in_layer] seed += 1 # specific settings necessary for initialisation of deep GPs if divide_1st_layer_by_its_n_out: sqrt_n_out = T.constant(self.in_layer.n_out ** 0.5, dtype=floatX) self.in_layer.output /= sqrt_n_out # the first hidden layer was already set up above for i in xrange(1, n_hidden_layers): prev_layer = self.layers[-1] layer = GaussLayer( input=prev_layer.output, n_in=prev_layer.n_out, n_out=n_units_hidden, M=M_lst[i], m=m_lst[i], sigma_W=sigma_W, tune_sigma_W=tune_sigma_W, sigma_W_params=sigma_W_params_lst[i], sigma_b=sigma_b, tune_sigma_b=tune_sigma_b, sigma_b_params=sigma_b_params_lst[i], l_W=l_W, l_b=l_b, diag_noise=diag_noise, activation=activation, name='h' + str(i + 1), approx_cols=approx_cols, seed=seed ) self.layers += [layer] seed += 1 # initialised separately because of the necessary linear activation prev_layer = self.layers[-1] self.out_layer = GaussLayer( input=prev_layer.output, n_in=prev_layer.n_out, n_out=n_out, M=M_lst[-1], m=m_lst[-1], sigma_W=sigma_W, tune_sigma_W=tune_sigma_W, sigma_W_params=sigma_W_params_lst[-1], sigma_b=sigma_b, tune_sigma_b=tune_sigma_b, sigma_b_params=sigma_b_params_lst[-1], l_W=l_W, l_b=l_b, diag_noise=diag_noise, b_is_deterministic=b_out_deterministic, approx_cols=approx_cols, name='out', seed=seed ) self.layers += [self.out_layer] self.softmax = SoftmaxLayer( input=self.out_layer.output, name='softmax' ) self.params = reduce( lambda x, y: x + y, [layer.grad_params for layer in self.layers] ) self.input = X self.p_y_given_x = self.softmax.p_y_given_x self.y_pred = self.softmax.y_pred self.mean_log_likelihood = self.softmax.mean_log_likelihood self.errors = self.softmax.errors # self.kl_W = T.sum([layer.kl_W() for layer in self.layers]) # self.kl_b = T.sum([layer.kl_b() for layer in self.layers]) # self.kl = self.kl_W + self.kl_b self.effect_kl_W = T.sum([layer.effect_kl_W() for layer in self.layers]) self.effect_kl_b = T.sum([layer.effect_kl_b() for layer in self.layers]) self.effect_kl = self.effect_kl_W + self.effect_kl_b