def test_forward4(self): h = 5 layer = ConvLayer(2, 5, h) x = fake_data((2, 2, 8, 8)) layer.W = fake_data((5, 2, h, h)) layer.b = fake_data(layer.b.shape) y = layer.forward(x) from test1.test4_result import t4_should_be self.assertTrue(np.allclose(y, t4_should_be))
def test_forward1(self): layer = ConvLayer(1, 1, 3) x = fake_data((1, 1, 3, 3)) layer.W = fake_data((1, 1, 3, 3)) layer.b = fake_data(layer.b.shape) y = layer.forward(x) should_be = np.array([[[[58., 100., 70.], [132., 204., 132.], [70., 100., 58.]]]]) self.assertTrue(np.allclose(y, should_be))
def test_forward2(self): layer = ConvLayer(2, 1, 3) x = fake_data((1, 2, 4, 4)) layer.W = fake_data((1, 2, 3, 3)) layer.b = fake_data(layer.b.shape) y = layer.forward(x) should_be = np.array([[[[1196., 1796., 1916., 1264.], [1881., 2793., 2946., 1923.], [2313., 3405., 3558., 2307.], [1424., 2072., 2156., 1380.]]]]) self.assertTrue(np.allclose(y, should_be))
def __call__(self, disc_input): feat_params = self.feat_params self._disc = Sequential('Fixed_Conv_Disc') conv_count, pool_count, fc_count = 0, 0, 0 for i in xrange(self.num_feat_layers): if feat_params[i]['layer_type'] == 'conv': self._disc += ConvLayer(feat_params[i]['n_filters_in'], feat_params[i]['n_filters_out'], feat_params[i]['input_dim'], feat_params[i]['filter_dim'], feat_params[i]['strides'], name='classifier_conv_%d' % conv_count) self._disc.layers[-1].weights['W'] = tf.constant( feat_params[i]['W']) self._disc.layers[-1].weights['b'] = tf.constant( feat_params[i]['b']) self._disc += feat_params[i]['act_fn'] conv_count += 1 elif feat_params[i]['layer_type'] == 'pool': self._disc += PoolLayer(feat_params[i]['input_dim'], feat_params[i]['filter_dim'], feat_params[i]['strides'], name='classifier_pool_%d' % i) pool_count += 1 elif feat_params[i]['layer_type'] == 'fc': # self._disc += FullyConnected( # feat_params[i]['W'].shape[0], # feat_params[i]['W'].shape[1], # activation=tf.nn.tanh, # scale=0.01, # name='classifier_fc_%d' % fc_count # ) self._disc += ConstFC(feat_params[i]['W'], feat_params[i]['b'], activation=feat_params[i]['act_fn'], name='classifier_fc_%d' % fc_count) fc_count += 1 if isinstance(self._disc.layers[-1], ConstFC): disc_input_dim = self._disc.layers[-1].weights['w'].get_shape( )[1].value elif isinstance(self._disc.layers[-1], PoolLayer): disc_input_dim = np.prod(self._disc.layers[-1].output_dim) * ( self._disc.layers[-3].n_filters_out) else: # function after conv layer disc_input_dim = np.prod(self._disc.layers[-1].output_dim) * ( self._disc.layers[-2].n_filters_out) # self._disc += FullyConnected(disc_input_dim, 1024, activation=tf.nn.tanh, scale=0.01, name='disc_fc_0') self._disc += FullyConnected(disc_input_dim, 1, activation=None, scale=0.01, name='disc_logit') self._disc += lambda p: 1.0 / (1.0 + tf.exp(-p)) self.disc = self._disc(disc_input) return self.disc
def __init__(self, numpy_rng, theano_rng=None, cfg=None, testing=False): self.layers = [] self.params = [] self.delta_params = [] self.conv_layers = [] self.cfg = cfg self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.use_fast = cfg.use_fast if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') self.conv_layer_num = len(self.conv_layer_configs) for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.layers[-1].output config = self.conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape=config['input_shape'], filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=self.conv_activation, flatten=config['flatten'], use_fast=self.use_fast, testing=testing) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][ 2] * config['output_shape'][3] self.fc_dnn = DNN(numpy_rng=numpy_rng, theano_rng=theano_rng, cfg=self.cfg, input=self.layers[-1].output) self.layers.extend(self.fc_dnn.layers) self.params.extend(self.fc_dnn.params) self.delta_params.extend(self.fc_dnn.delta_params) self.finetune_cost = self.fc_dnn.logLayer.negative_log_likelihood( self.y) self.errors = self.fc_dnn.logLayer.errors(self.y)
def test_forward3(self): layer = ConvLayer(2, 2, 3) x = fake_data((1, 2, 4, 4)) layer.W = fake_data((2, 2, 3, 3)) layer.b = fake_data(layer.b.shape) y = layer.forward(x) should_be = np.array([[[[1196., 1796., 1916., 1264.], [1881., 2793., 2946., 1923.], [2313., 3405., 3558., 2307.], [1424., 2072., 2156., 1380.]], [[2709., 4173., 4509., 3065.], [4582., 7006., 7483., 5056.], [5878., 8914., 9391., 6304.], [4089., 6177., 6477., 4333.]]]]) self.assertTrue(np.allclose(y, should_be))
def __init__(self, ch_in: int = 32, n_recursions: int = 4, dropout: Optional[float] = None, use_shuffle: bool = True, activ: nn.Module = nn.ELU, use_pooling: bool = False): """ :param ch_in: number of input Channels :param n_recursions: number of times to repeat :param use_shuffle: whether to use pixel shuffel or traditional deconvolution :param dropout: whether or not to use dropout and how much :param use_pooling: whether to use pooling or strides for downsampling """ super().__init__() layers = [ConvLayer(ch_in, ch_in, stride=2 - int(use_pooling), dropout=dropout, activ=activ), ConvLayer(ch_in, ch_in, dropout=dropout, activ=activ), ConvLayer(ch_in, 2 * ch_in, dropout=dropout, activ=activ)] if use_pooling: layers.insert(1, nn.MaxPool2d(2)) self.down = nn.Sequential(*layers) if n_recursions > 1: self.rec_unet = UnetBulk(ch_in=2*ch_in, n_recursions=n_recursions-1, dropout=dropout, use_shuffle=use_shuffle, activ=activ) down_chs = 4 * ch_in else: self.rec_unet = lambda x: x down_chs = 2 * ch_in if use_shuffle: deconv = DeconvLayer(ch_in, ch_in, dropout=dropout, activ=activ) else: deconv = TransConvLayer(ch_in, ch_in, dropout=dropout, activ=activ) self.up = nn.Sequential(ConvLayer(down_chs, ch_in, dropout=dropout, activ=activ), ConvLayer(ch_in, ch_in, dropout=dropout, activ=activ), deconv)
def __init__(self, input_shape, input_channels, enc_params, dec_params, name=''): """ enc_params: - kernels - strides - num_filters - act_fn dec_params: - layer_dims - act_fn """ super(ConvAutoEncoder, self).__init__() self.input_shape = input_shape self.input_channels = input_channels self.enc_params = enc_params self.dec_params = dec_params self.name = name self.enc_params['act_fn'] = map(lambda p: act_lib[p], self.enc_params['act_fn']) self.dec_params['act_fn'] = map(lambda p: act_lib[p], self.dec_params['act_fn']) # Build the encoder which is fully convolutional and no pooling self._encoder = Sequential(self.name + 'ae_encoder') for i in range(len(self.enc_params['kernels'])): self._encoder += ConvLayer( self.input_channels if i == 0 else self.enc_params['num_filters'][i-1], enc_params['num_filters'][i], self.input_shape if i == 0 else self._encoder.layers[-2].output_dim, self.enc_params['kernels'][i], self.enc_params['strides'][i], name=self.name+'_enc_conv_%d' % (i+1) ) self._encoder += self.enc_params['act_fn'][i] # Build the decoder which is fully connected self._decoder = Sequential(self.name + 'ae_decoder') for i in range(len(self.dec_params['layer_dims'])): self._decoder += FullyConnected( self.enc_params['num_filters'][-1] * np.prod(self._encoder.layers[-2].output_dim) if i == 0 \ else self.dec_params['layer_dims'][i-1], self.dec_params['layer_dims'][i], self.dec_params['act_fn'][i], name=self.name+'_dec_fc_%d' % (i+1) )
def __init__(self, ch_in: int = 12, ch_out: int = 2, bulk_ch: int = 32, n_recursions: int = 4, use_shuffle: bool = True, dropout: Optional[float] = None, activ: nn.Module = nn.ELU, use_pooling: bool = True): """ :param ch_in: number of input Channels :param ch_out: number of output Channels :param bulk_ch: initial channels for bulk :param n_recursions: number of times to repeat :param use_shuffle: whether to use pixel shuffel or traditional deconvolution :param dropout: whether or not to use dropout and how much """ super().__init__() self.in_layer = ConvLayer(ch_in, bulk_ch, ks=1, pad=0, dropout=dropout, activ=activ) self.bulk = UnetBulk(ch_in=bulk_ch, n_recursions=n_recursions, dropout=dropout, use_shuffle=use_shuffle, activ=activ, use_pooling=use_pooling) self.out = nn.Conv2d(2 * bulk_ch, ch_out, (1, 1))
def test_backward3_5(self): layer = ConvLayer(5, 3, 3) x = fake_data((2, 5, 3, 3)) layer.W = fake_data(layer.W.shape) layer.b = fake_data(layer.b.shape) y = layer.forward(x) x_grad = layer.backward(np.ones_like(y)) # do numerical gradients nm_x_grad = numerical_gradient(layer, x, x) nm_w_grad = numerical_gradient(layer, x, layer.W) nm_b_grad = numerical_gradient(layer, x, layer.b) self.assertTrue(np.allclose(nm_x_grad, x_grad)) self.assertTrue(np.allclose(nm_w_grad, layer.W_grad)) self.assertTrue(np.allclose(nm_b_grad, layer.b_grad))
def test_backward4(self): h = 5 layer = ConvLayer(2, 5, h) x = fake_data((2, 2, 8, 8)) layer.W = fake_data((5, 2, h, h)) layer.b = fake_data(layer.b.shape) y = layer.forward(x) x_grad = layer.backward(np.ones_like(y)) nm_x_grad = numerical_gradient(layer, x, x) nm_w_grad = numerical_gradient(layer, x, layer.W) nm_b_grad = numerical_gradient(layer, x, layer.b) self.assertTrue(np.allclose(nm_x_grad, x_grad)) self.assertTrue(np.allclose(nm_w_grad, layer.W_grad)) self.assertTrue(np.allclose(nm_b_grad, layer.b_grad))
def test_backward1(self): layer = ConvLayer(1, 1, 3) x = fake_data((1, 1, 8, 8)) layer.W = fake_data((1, 1, 3, 3)) layer.b = fake_data(layer.b.shape) y = layer.forward(x) x_grad = layer.backward(np.ones_like(y)) # do numerical gradients nm_x_grad = numerical_gradient(layer, x, x) nm_w_grad = numerical_gradient(layer, x, layer.W) nm_b_grad = numerical_gradient(layer, x, layer.b) # note that this does not check the gradients of the padded elements self.assertTrue(np.allclose(nm_x_grad, x_grad)) self.assertTrue(np.allclose(nm_w_grad, layer.W_grad)) self.assertTrue(np.allclose(nm_b_grad, layer.b_grad))
def test_backward5(self): h = 5 layer = ConvLayer(2, 5, h) x = fake_data((2, 2, 8, 8)) layer.W = fake_data((5, 2, h, h)) layer.b = fake_data(layer.b.shape) y = layer.forward(x) y_grad = fake_data(y.shape) x_grad = layer.backward(y_grad) nm_x_grad = numerical_gradient(layer, x, x, y_grad) nm_w_grad = numerical_gradient(layer, x, layer.W, y_grad) nm_b_grad = numerical_gradient(layer, x, layer.b, y_grad) self.assertTrue(np.allclose(nm_x_grad, x_grad)) #print("expected",nm_x_grad) #print(x_grad) self.assertTrue(np.allclose(nm_w_grad, layer.W_grad)) #print("expected2",nm_w_grad) #print(layer.W_grad) self.assertTrue(np.allclose(nm_b_grad, layer.b_grad))
def test_backward2(self): layer = ConvLayer(2, 1, 3) x = fake_data((1, 2, 4, 4)) layer.W = fake_data((1, 2, 3, 3)) layer.b = fake_data(layer.b.shape) y = layer.forward(x) x_grad = layer.backward(np.ones_like(y)) # do numerical gradients nm_x_grad = numerical_gradient(layer, x, x) nm_w_grad = numerical_gradient(layer, x, layer.W) nm_b_grad = numerical_gradient(layer, x, layer.b) self.assertTrue(np.allclose(nm_x_grad, x_grad)) #print("expected", nm_x_grad) #print(x_grad) self.assertTrue(np.allclose(nm_w_grad, layer.W_grad)) #print("expected2", nm_w_grad) #print(layer.W_grad) self.assertTrue(np.allclose(nm_b_grad, layer.b_grad))
def __init__(self, input_dim, input_channels, num_classes, conv_params, pool_params, fc_params, name=''): super(CNNClassifier, self).__init__() conv_params['act_fn'] = map(lambda p: act_lib[p], conv_params['act_fn']) fc_params['act_fn'] = map(lambda p: act_lib[p], fc_params['act_fn']) self.input_dim = input_dim self.input_channels = input_channels self.num_classes = num_classes self.conv_params = conv_params self.pool_params = pool_params self.fc_params = fc_params with tf.variable_scope(name): self._classifier_conv = Sequential('CNN_Classifier_Conv') self._classifier_conv += ConvLayer( self.input_channels, self.conv_params['n_filters'][0], self.input_dim, self.conv_params['kernels'][0], self.conv_params['strides'][0], name='classifier_conv_0' ) print('#'*100) print(self._classifier_conv.layers[-1].output_dim) self._classifier_conv += self.conv_params['act_fn'][0] self._classifier_conv += PoolLayer( self._classifier_conv.layers[-2].output_dim, self.pool_params['kernels'][0], self.pool_params['strides'][0], name='pool_0' ) print('#'*100) print(self._classifier_conv.layers[-1].output_dim) for i in xrange(1, len(self.conv_params['kernels'])): self._classifier_conv += ConvLayer( self.conv_params['n_filters'][i-1], self.conv_params['n_filters'][i], self._classifier_conv.layers[-1].output_dim, self.conv_params['kernels'][i], self.conv_params['strides'][i], name='classifier_conv_%d' % i ) print('#'*100) print(self._classifier_conv.layers[-1].output_dim) self._classifier_conv += self.conv_params['act_fn'][i] self._classifier_conv += PoolLayer( self._classifier_conv.layers[-2].output_dim, self.pool_params['kernels'][0], self.pool_params['strides'][0], name='pool_%d' % i ) print('#'*100) print(self._classifier_conv.layers[-1].output_dim) self._classifier_fc = Sequential('CNN_Classifier_FC') self._classifier_fc += FC( np.prod(self._classifier_conv.layers[-1].output_dim) * self.conv_params['n_filters'][-1], self.fc_params['dims'][0], activation=self.fc_params['act_fn'][0], scale=0.01, name='classifier_fc_0' ) print(self._classifier_fc.layers[-1].output_dim) for i in xrange(1, len(self.fc_params['dims'])): self._classifier_fc += FC( self.fc_params['dims'][i-1], self.fc_params['dims'][i], activation=self.fc_params['act_fn'][i], scale=0.01, name='classifier_fc_%d' % i ) print(self._classifier_fc.layers[-1].output_dim)
def __init__(self, numpy_rng, theano_rng=None, cfg=None, testing=False, input=None): self.layers = [] self.extra_layers = [] self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins self.n_outs = cfg.n_outs self.conv_layers = [] self.cfg = cfg self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.use_fast = cfg.use_fast self.extra_x = T.matrix('extra_x') # 1.5 attention self.extra_dim = cfg.extra_dim print 'Extra input dimension: ' + str(cfg.extra_dim) self.extra_layers_sizes = cfg.extra_layers_sizes # 2. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################### # build cnn layers # ####################### print '1. start to build cnn mag layer: ' + str( self.conv_layer_configs) self.conv_layer_num = len(self.conv_layer_configs) for i in xrange(self.conv_layer_num): if i == 0: input = self.x else: input = self.layers[-1].output config = self.conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape=config['input_shape'], filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=self.conv_activation, flatten=config['flatten'], use_fast=self.use_fast, testing=testing) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] cfg.n_ins = config['output_shape'][1] * config['output_shape'][ 2] * config['output_shape'][3] ####################################### # build phase-based attention layer # ####################################### # 0. phase-based attention print '2. start to build attend layer: ' + str(self.extra_layers_sizes) for i in xrange(len(self.extra_layers_sizes)): if i == 0: input_size = cfg.extra_dim layer_input = self.extra_x else: input_size = self.extra_layers_sizes[i - 1] layer_input = self.extra_layers[-1].output W = None b = None attend_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.extra_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild attend layer: ' + str(input_size) + ' x ' + str( attend_layer.n_out) self.extra_layers.append(attend_layer) self.params.extend(attend_layer.params) self.delta_params.extend(attend_layer.delta_params) self.extra_output = self.extra_layers[-1].output self.extra_output = T.nnet.softmax(self.extra_layers[-1].output) #self.extra_output_rand = numpy.asarray(numpy_rng.uniform( # low=-0.1, # high=1.0, # size=(32,20)), dtype=theano.config.floatX) #self.extra_output = theano.shared(value=self.extra_output_rand, name='rand', borrow=True) print '2. finish attend layer softmax(0): ' + str( self.extra_layers[-1].n_out) ####################################### # build dnnv # ####################################### print '3. start to build dnnv layer: ' + str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): # construct the hidden layer if i == 0: # 1. Join two features (magnitude + phase) input_size = self.conv_output_dim + self.extra_layers_sizes[-1] layer_input = T.join(1, self.layers[-1].output, self.extra_output) # 2. Weighted Sum (magnitude * phase) #input_size = self.conv_output_dim #layer_input = self.layers[-1].output * self.extra_output else: input_size = self.hidden_layers_sizes[i - 1] layer_input = self.layers[-1].output W = None b = None hidden_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=self.hidden_layers_sizes[i], W=W, b=b, activation=self.activation) print '\tbuild dnnv layer: ' + str(input_size) + ' x ' + str( hidden_layer.n_out) # add the layer to our list of layers self.layers.append(hidden_layer) self.params.extend(hidden_layer.params) self.delta_params.extend(hidden_layer.delta_params) print '3. finish dnnv layer: ' + str(self.layers[-1].n_out) ####################################### # build logistic regression layer # ####################################### print '4. start to build log layer: 1' # We now need to add a logistic layer on top of the MLP self.logLayer = OutputLayer(input=self.layers[-1].output, n_in=self.hidden_layers_sizes[-1], n_out=self.n_outs) print '\tbuild final layer: ' + str( self.layers[-1].n_out) + ' x ' + str(self.n_outs) self.layers.append(self.logLayer) self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) print '4. finish log layer: ' + str(self.layers[-1].n_out) print 'Total layers: ' + str(len(self.layers)) self.finetune_cost = self.logLayer.l2(self.y) self.errors = self.logLayer.errors(self.y) sys.stdout.flush()
lr = 0.3 epochs = 60 batch_size = 32 counter = 0 errork = np.zeros(k) loss = np.zeros(shape=(k, epochs)) for train_index, test_index in kf.split(myX[np.arange(533), :, :, :]): train_x, test_x = myX[train_index, :, :, :], myX[test_index, :, :, :] train_y, test_y = y[train_index], y[test_index] #training print('Creating model with lr = ' + str(lr)) myNet = Sequential( layers=( ConvLayer(n_i=3, n_o=16, h=3), ReluLayer(), MaxPoolLayer(size=2), ConvLayer(n_i=16, n_o=32, h=3), ReluLayer(), MaxPoolLayer(size=2), FlattenLayer(), FullLayer(n_i=12 * 12 * 32, n_o=6), # no neutral class:/ SoftMaxLayer()), loss=CrossEntropyLayer()) print("Initiating training") loss[counter, :] = myNet.fit(x=train_x, y=train_y, epochs=epochs, lr=lr,
from layers.full import FullLayer from layers.cross_entropy import CrossEntropyLayer import numpy as np #import matplotlib #matplotlib.use('Agg') import matplotlib.pyplot as plt from layers.dataset import fer2013 #Import and process the input (train_x, train_y), (val_x,val_y), (test_x, test_y) = fer2013() lr = 0.1 epochs = 100 batch_size = 128 myNet = Sequential(layers=(ConvLayer(n_i=1,n_o=16,h=3), ReluLayer(), MaxPoolLayer(size=2), ConvLayer(n_i=16,n_o=32,h=3), ReluLayer(), MaxPoolLayer(size=2), FlattenLayer(), FullLayer(n_i=12*12*32,n_o=7), SoftMaxLayer()), loss=CrossEntropyLayer()) myNet.load() """ pred = myNet.predict(val_x) accuracy = np.mean(pred == val_y) print('At learning rate = '+str(lr))
def __init__(self, numpy_rng, theano_rng=None, cfg = None, testing = False, input = None): self.cfg = cfg self.params = [] self.delta_params = [] self.n_ins = cfg.n_ins; self.n_outs = cfg.n_outs self.l1_reg = cfg.l1_reg self.l2_reg = cfg.l2_reg self.do_maxout = cfg.do_maxout; self.pool_size = cfg.pool_size self.max_col_norm = cfg.max_col_norm self.layers = [] self.conv_layers = [] self.lstm_layers = [] self.fc_layers = [] # 1. conv self.conv_layer_configs = cfg.conv_layer_configs self.conv_activation = cfg.conv_activation self.conv_layers_number = len(self.conv_layer_configs) self.use_fast = cfg.use_fast # 2. lstm self.lstm_layers_sizes = cfg.lstm_layers_sizes self.lstm_layers_number = len(self.lstm_layers_sizes) # 3. dnn self.hidden_layers_sizes = cfg.hidden_layers_sizes self.hidden_layers_number = len(self.hidden_layers_sizes) self.activation = cfg.activation if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) if input == None: self.x = T.matrix('x') else: self.x = input self.y = T.matrix('y') ####################### # build conv layers # ####################### print '1. start to build conv layer: '+ str(self.conv_layers_number) for i in xrange(self.conv_layers_number): if i == 0: input = self.x else: input = self.conv_layers[-1].output config = self.conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape = config['input_shape'], filter_shape = config['filter_shape'], poolsize = config['poolsize'], activation = self.conv_activation, flatten = config['flatten'], use_fast = self.use_fast, testing = testing) print '\tbuild conv layer: ' +str(config['input_shape']) self.layers.append(conv_layer) self.conv_layers.append(conv_layer) self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3] print '\t cnn out: '+ str(self.conv_output_dim) cfg.n_ins = config['output_shape'][1] * config['output_shape'][2] * config['output_shape'][3] print '1. finish conv layer: '+ str(self.layers[-1].n_out) ####################### # build lstm layers # ####################### print '2. start to build lstm layer: '+ str(self.lstm_layers_number) for i in xrange(self.lstm_layers_number): if i == 0: input_size = self.conv_output_dim input = self.layers[-1].output else: input_size = self.lstm_layers_sizes[i - 1] input = self.layers[-1].output print 'build lstm layer: ' + str(input_size) lstm_layer = LSTMLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.lstm_layers_sizes[i]) print '\tbuild lstm layer: ' + str(input_size) +' x '+ str(lstm_layer.n_out) self.layers.append(lstm_layer) self.lstm_layers.append(lstm_layer) self.params.extend(lstm_layer.params) self.delta_params.extend(lstm_layer.delta_params) print '2. finish lstm layer: '+ str(self.layers[-1].n_out) ####################### # build dnnv layers # ####################### print '3. start to build dnnv layer: '+ str(self.hidden_layers_number) for i in xrange(self.hidden_layers_number): if i == 0: input_size = self.layers[-1].n_out else: input_size = self.hidden_layers_sizes[i - 1] input = self.layers[-1].output fc_layer = HiddenLayer(rng=numpy_rng, input=input, n_in=input_size, n_out=self.hidden_layers_sizes[i]) print '\tbuild dnnv layer: ' + str(input_size) +' x '+ str(fc_layer.n_out) self.layers.append(fc_layer) self.fc_layers.append(fc_layer) self.params.extend(fc_layer.params) self.delta_params.extend(fc_layer.delta_params) print '3. finish dnnv layer: '+ str(self.layers[-1].n_out) ####################### # build log layers # ####################### print '4. start to build log layer: 1' input_size = self.layers[-1].n_out input = self.layers[-1].output logLayer = OutputLayer(input=input, n_in=input_size, n_out=self.n_outs) print '\tbuild final layer: ' + str(input_size) +' x '+ str(fc_layer.n_out) self.layers.append(logLayer) self.params.extend(logLayer.params) self.delta_params.extend(logLayer.delta_params) print '4. finish log layer: '+ str(self.layers[-1].n_out) print 'Total layers: '+ str(len(self.layers)) sys.stdout.flush() self.finetune_cost = self.layers[-1].l2(self.y) self.errors = self.layers[-1].errors(self.y) if self.l2_reg is not None: for i in xrange(self.hidden_layers_number): W = self.layers[i].W self.finetune_cost += self.l2_reg * T.sqr(W).sum()
import numpy as np from layers.dataset import cifar100 from layers.full import FullLayer from layers.softmax import SoftMaxLayer from layers.cross_entropy import CrossEntropyLayer from layers.sequential import Sequential from layers.relu import ReluLayer from layers.conv import ConvLayer from layers.flatten import FlattenLayer from layers.maxpool import MaxPoolLayer # getting training data and testing data (x_train, y_train), (x_test, y_test) = cifar100(seed=1213351124) # initialize the each layer for ML model layer1 = ConvLayer(3, 16, 3) relu1 = ReluLayer() maxpool1= MaxPoolLayer() layer2 = ConvLayer(16, 32, 3) relu2 = ReluLayer() maxpool2 = MaxPoolLayer() loss1 = CrossEntropyLayer() flatten = FlattenLayer() layer3 = FullLayer(2048, 3) softmax1 = SoftMaxLayer() model = Sequential( ( layer1, relu1, maxpool1, layer2,
def train(options): # Get logger log = utils.get_logger(os.path.join(options['model_dir'], 'log.txt')) options_file = open(os.path.join(options['dashboard_dir'], 'options'), 'w') options_file.write(options['description'] + '\n') options_file.write( 'DKL Weight: {}\nLog Sigma^2 clipped to: [{}, {}]\n\n'.format( options['DKL_weight'], -options['sigma_clip'], options['sigma_clip'])) for optn in options: options_file.write(optn) options_file.write(':\t') options_file.write(str(options[optn])) options_file.write('\n') options_file.close() # Dashboard Catalog catalog = open(os.path.join(options['dashboard_dir'], 'catalog'), 'w') catalog.write("""filename,type,name options,plain,Options train_loss.csv,csv,Train Loss ll.csv,csv,Neg. Log-Likelihood dec_log_sig_sq.csv,csv,Decoder Log Simga^2 dec_std_log_sig_sq.csv,csv,STD of Decoder Log Simga^2 dec_mean.csv,csv,Decoder Mean dkl.csv,csv,DKL enc_log_sig_sq.csv,csv,Encoder Log Sigma^2 enc_std_log_sig_sq.csv,csv,STD of Encoder Log Sigma^2 enc_mean.csv,csv,Encoder Mean val_loss.csv,csv,Validation Loss """) catalog.flush() train_log = open(os.path.join(options['dashboard_dir'], 'train_loss.csv'), 'w') val_log = open(os.path.join(options['dashboard_dir'], 'val_loss.csv'), 'w') dkl_log = open(os.path.join(options['dashboard_dir'], 'dkl.csv'), 'w') ll_log = open(os.path.join(options['dashboard_dir'], 'll.csv'), 'w') dec_sig_log = open( os.path.join(options['dashboard_dir'], 'dec_log_sig_sq.csv'), 'w') enc_sig_log = open( os.path.join(options['dashboard_dir'], 'enc_log_sig_sq.csv'), 'w') dec_std_sig_log = open( os.path.join(options['dashboard_dir'], 'dec_std_log_sig_sq.csv'), 'w') enc_std_sig_log = open( os.path.join(options['dashboard_dir'], 'enc_std_log_sig_sq.csv'), 'w') dec_mean_log = open(os.path.join(options['dashboard_dir'], 'dec_mean.csv'), 'w') enc_mean_log = open(os.path.join(options['dashboard_dir'], 'enc_mean.csv'), 'w') # val_sig_log = open(os.path.join(options['dashboard_dir'], 'val_log_sig_sq.csv'), 'w') train_log.write('step,time,Train Loss\n') val_log.write('step,time,Validation Loss\n') dkl_log.write('step,time,DKL\n') ll_log.write('step,time,-LL\n') dec_sig_log.write('step,time,Decoder Log Sigma^2\n') enc_sig_log.write('step,time,Encoder Log Sigma^2\n') dec_std_sig_log.write('step,time,STD of Decoder Log Sigma^2\n') enc_std_sig_log.write('step,time,STD of Encoder Log Sigma^2\n') dec_mean_log.write('step,time,Decoder Mean\n') enc_mean_log.write('step,time,Encoder Mean\n') # Print options utils.print_options(options, log) # Load dataset ---------------------------------------------------------------------- # Train provider train_provider, val_provider, test_provider = get_providers(options, log, flat=True) # Initialize model ------------------------------------------------------------------ with tf.device('/gpu:0'): # Define inputs ---------------------------------------------------------- model_input_batch = tf.placeholder( tf.float32, shape=[ options['batch_size'], np.prod(np.array(options['img_shape'])) ], name='enc_inputs') sampler_input_batch = tf.placeholder( tf.float32, shape=[options['batch_size'], options['latent_dims']], name='dec_inputs') log.info('Inputs defined') # Feature Extractor ----------------------------------------------------- feat_layers = [] feat_params = pickle.load(open(options['feat_params_path'], 'rb')) _classifier = Sequential('CNN_Classifier') conv_count, pool_count, fc_count = 0, 0, 0 for lay in feat_params: print(lay['layer_type']) for i in xrange(options['num_feat_layers']): if feat_params[i]['layer_type'] == 'conv': _classifier += ConvLayer(feat_params[i]['n_filters_in'], feat_params[i]['n_filters_out'], feat_params[i]['input_dim'], feat_params[i]['filter_dim'], feat_params[i]['strides'], name='classifier_conv_%d' % conv_count) _classifier.layers[-1].weights['W'] = tf.constant( feat_params[i]['W']) _classifier.layers[-1].weights['b'] = tf.constant( feat_params[i]['b']) _classifier += feat_params[i]['act_fn'] conv_count += 1 elif feat_params[i]['layer_type'] == 'pool': _classifier += PoolLayer(feat_params[i]['input_dim'], feat_params[i]['filter_dim'], feat_params[i]['strides'], name='classifier_pool_%d' % i) pool_count += 1 feat_layers.append(i) elif feat_params[i]['layer_type'] == 'fc': _classifier += ConstFC(feat_params[i]['W'], feat_params[i]['b'], activation=feat_params[i]['act_fn'], name='classifier_fc_%d' % fc_count) fc_count += 1 feat_layers.append(i) # if options['feat_type'] == 'fc': # feat_model = Sequential('feat_extractor') # feat_params = pickle.load(open(options['feat_params_path'], 'rb')) # for i in range(options['num_feat_layers']): # feat_model += ConstFC( # feat_params['enc_W'][i], # feat_params['enc_b'][i], # activation=feat_params['enc_act_fn'][i], # name='feat_layer_%d'%i # ) # else: # pass # VAE ------------------------------------------------------------------- # VAE model vae_model = cupboard('vanilla_vae')( options['p_layers'], options['q_layers'], np.prod(options['img_shape']), options['latent_dims'], options['DKL_weight'], options['sigma_clip'], 'vanilla_vae') # ----------------------------------------------------------------------- feat_vae = cupboard('feat_vae')( vae_model, _classifier, feat_layers, options['DKL_weight'], options['vae_rec_loss_weight'], img_shape=options['img_shape'], input_channels=options['input_channels'], flat=False, name='feat_vae_model') log.info('Model initialized') # Define forward pass cost_function = feat_vae(model_input_batch) log.info('Forward pass graph built') # Define sampler sampler = feat_vae.build_sampler(sampler_input_batch) log.info('Sampler graph built') # Define optimizer optimizer = tf.train.AdamOptimizer(learning_rate=options['lr']) # optimizer = tf.train.GradientDescentOptimizer(learning_rate=options['lr']) # train_step = optimizer.minimize(cost_function) log.info('Optimizer graph built') # Get gradients grads = optimizer.compute_gradients(cost_function) grads = [gv for gv in grads if gv[0] != None] grad_tensors = [gv[0] for gv in grads] # Clip gradients clip_grads = [(tf.clip_by_norm(gv[0], 5.0, name='grad_clipping'), gv[1]) for gv in grads] # Update op backpass = optimizer.apply_gradients(clip_grads) # Define init operation init_op = tf.initialize_all_variables() log.info('Variable initialization graph built') # Define op to save and restore variables saver = tf.train.Saver() log.info('Save operation built') # -------------------------------------------------------------------------- # Train loop --------------------------------------------------------------- with tf.Session(config=tf.ConfigProto(log_device_placement=True)) as sess: log.info('Session started') # Initialize shared variables or restore if options['reload']: saver.restore(sess, options['reload_file']) log.info('Shared variables restored') test_LL_and_DKL(sess, test_provider, feat_vae.vae.DKL, feat_vae.vae.rec_loss, options, model_input_batch) return mean_img = np.load( os.path.join(options['data_dir'], 'mean' + options['extension'])) std_img = np.load( os.path.join(options['data_dir'], 'std' + options['extension'])) visualize(sess, feat_vae.vae.dec_mean, feat_vae.vae.dec_log_std_sq, sampler, sampler_input_batch, model_input_batch, feat_vae.vae.enc_mean, feat_vae.vae.enc_log_std_sq, train_provider, val_provider, options, catalog, mean_img, std_img) return else: sess.run(init_op) log.info('Shared variables initialized') # Define last losses to compute a running average last_losses = np.zeros((10)) batch_abs_idx = 0 for epoch_idx in xrange(options['n_epochs']): batch_rel_idx = 0 log.info('Epoch {}'.format(epoch_idx + 1)) for inputs, _ in train_provider: batch_abs_idx += 1 batch_rel_idx += 1 result = sess.run( # (cost_function, train_step, model.enc_std, model.enc_mean, model.encoder, model.dec_std, model.dec_mean, model.decoder, model.rec_loss, model.DKL), # 0 1 2 3 4 5 6 7 8 9 10 [ cost_function, backpass, feat_vae.vae.DKL, feat_vae.vae.rec_loss, feat_vae.vae.dec_log_std_sq, feat_vae.vae.enc_log_std_sq, feat_vae.vae.enc_mean, feat_vae.vae.dec_mean ] + [gv[0] for gv in grads], feed_dict={model_input_batch: inputs}) cost = result[0] if batch_abs_idx % 10 == 0: train_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(last_losses))) dkl_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[2]))) ll_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', -np.mean(result[3]))) train_log.flush() dkl_log.flush() ll_log.flush() dec_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[4]))) enc_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[5]))) # val_sig_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_sig_log.flush() enc_sig_log.flush() dec_std_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.std(result[4]))) enc_std_sig_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.std(result[5]))) dec_mean_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[7]))) enc_mean_log.write('{},{},{}\n'.format( batch_abs_idx, '2016-04-22', np.mean(result[6]))) dec_std_sig_log.flush() enc_std_sig_log.flush() dec_mean_log.flush() enc_mean_log.flush() # val_sig_log.flush() # Check cost if np.isnan(cost) or np.isinf(cost): log.info('NaN detected') for i in range(len(result)): print("\n\nresult[%d]:" % i) try: print(np.any(np.isnan(result[i]))) except: pass print(result[i]) print(result[3].shape) print(model._encoder.layers[0].weights['w'].eval()) print('\n\nAny:') print(np.any(np.isnan(result[8]))) print(np.any(np.isnan(result[9]))) print(np.any(np.isnan(result[10]))) print(inputs) return 1., 1., 1. # Update last losses last_losses = np.roll(last_losses, 1) last_losses[0] = cost # Display training information if np.mod(epoch_idx, options['freq_logging']) == 0: log.info( 'Epoch {:02}/{:02} Batch {:03} Current Loss: {:0>15.4f} Mean last losses: {:0>15.4f}' .format(epoch_idx + 1, options['n_epochs'], batch_abs_idx, float(cost), np.mean(last_losses))) log.info('Batch Mean LL: {:0>15.4f}'.format( np.mean(result[3], axis=0))) log.info('Batch Mean -DKL: {:0>15.4f}'.format( np.mean(result[2], axis=0))) # Save model if np.mod(batch_abs_idx, options['freq_saving']) == 0: saver.save( sess, os.path.join(options['model_dir'], 'model_at_%d.ckpt' % batch_abs_idx)) log.info('Model saved') # Validate model if np.mod(batch_abs_idx, options['freq_validation']) == 0: valid_costs = [] seen_batches = 0 for val_batch, _ in val_provider: val_cost = sess.run( cost_function, feed_dict={model_input_batch: val_batch}) valid_costs.append(val_cost) seen_batches += 1 if seen_batches == options['valid_batches']: break # Print results log.info('Validation loss: {:0>15.4f}'.format( float(np.mean(valid_costs)))) val_samples = sess.run( sampler, feed_dict={ sampler_input_batch: MVN(np.zeros(options['latent_dims']), np.diag(np.ones(options['latent_dims'])), size=options['batch_size']) }) val_log.write('{},{},{}\n'.format(batch_abs_idx, '2016-04-22', np.mean(valid_costs))) val_log.flush() save_ae_samples(catalog, np.reshape(result[7], [options['batch_size']] + options['img_shape']), np.reshape(inputs, [options['batch_size']] + options['img_shape']), np.reshape(val_samples, [options['batch_size']] + options['img_shape']), batch_abs_idx, options['dashboard_dir'], num_to_save=5, save_gray=True) # save_dash_samples( # catalog, # val_samples, # batch_abs_idx, # options['dashboard_dir'], # flat_samples=True, # img_shape=options['img_shape'], # num_to_save=5 # ) save_samples( val_samples, int(batch_abs_idx / options['freq_validation']), os.path.join(options['model_dir'], 'valid_samples'), True, options['img_shape'], 5) save_samples( inputs, int(batch_abs_idx / options['freq_validation']), os.path.join(options['model_dir'], 'input_sanity'), True, options['img_shape'], num_to_save=5) save_samples( result[7], int(batch_abs_idx / options['freq_validation']), os.path.join(options['model_dir'], 'rec_sanity'), True, options['img_shape'], num_to_save=5) log.info('End of epoch {}'.format(epoch_idx + 1)) # Test Model -------------------------------------------------------------------------- test_results = [] for inputs in test_provider: if isinstance(inputs, tuple): inputs = inputs[0] batch_results = sess.run([ feat_vae.vae.DKL, feat_vae.vae.rec_loss, feat_vae.vae.dec_log_std_sq, feat_vae.vae.enc_log_std_sq, feat_vae.vae.dec_mean, feat_vae.vae.enc_mean ], feed_dict={model_input_batch: inputs}) test_results.append( map( lambda p: np.mean(p, axis=1) if len(p.shape) > 1 else np.mean(p), batch_results)) test_results = map(list, zip(*test_results)) # Print results log.info('Test Mean Rec. Loss: {:0>15.4f}'.format( float(np.mean(test_results[1])))) log.info('Test DKL: {:0>15.4f}'.format(float(np.mean( test_results[0])))) log.info('Test Dec. Mean Log Std Sq: {:0>15.4f}'.format( float(np.mean(test_results[2])))) log.info('Test Enc. Mean Log Std Sq: {:0>15.4f}'.format( float(np.mean(test_results[3])))) log.info('Test Dec. Mean Mean: {:0>15.4f}'.format( float(np.mean(test_results[4])))) log.info('Test Enc. Mean Mean: {:0>15.4f}'.format( float(np.mean(test_results[5]))))
def __init__(self, numpy_rng, theano_rng=None, batch_size=256, n_outs=500, conv_layer_configs=[], hidden_layers_sizes=[500, 500], ivec_layers_sizes=[500, 500], conv_activation=T.nnet.sigmoid, full_activation=T.nnet.sigmoid, use_fast=False, update_part=[0, 1], ivec_dim=100): self.conv_layers = [] self.full_layers = [] self.ivec_layers = [] self.params = [] self.delta_params = [] if not theano_rng: theano_rng = RandomStreams(numpy_rng.randint(2**30)) # allocate symbolic variables for the data self.x = T.matrix('x') self.y = T.ivector('y') input_shape = conv_layer_configs[0]['input_shape'] n_ins = input_shape[-1] * input_shape[-2] * input_shape[-3] self.iv = self.x[:, n_ins:n_ins + ivec_dim] self.raw = self.x[:, 0:n_ins] self.conv_layer_num = len(conv_layer_configs) self.full_layer_num = len(hidden_layers_sizes) self.ivec_layer_num = len(ivec_layers_sizes) # construct the adaptation NN for i in xrange(self.ivec_layer_num): if i == 0: input_size = ivec_dim layer_input = self.iv else: input_size = ivec_layers_sizes[i - 1] layer_input = self.ivec_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=ivec_layers_sizes[i], activation=T.nnet.sigmoid) # add the layer to our list of layers self.ivec_layers.append(sigmoid_layer) if 0 in update_part: self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) linear_func = lambda x: x sigmoid_layer = HiddenLayer(rng=numpy_rng, input=self.ivec_layers[-1].output, n_in=ivec_layers_sizes[-1], n_out=n_ins, activation=linear_func) self.ivec_layers.append(sigmoid_layer) if 0 in update_part: self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) for i in xrange(self.conv_layer_num): if i == 0: input = self.raw + self.ivec_layers[-1].output else: input = self.conv_layers[-1].output config = conv_layer_configs[i] conv_layer = ConvLayer(numpy_rng=numpy_rng, input=input, input_shape=config['input_shape'], filter_shape=config['filter_shape'], poolsize=config['poolsize'], activation=conv_activation, flatten=config['flatten'], use_fast=use_fast) self.conv_layers.append(conv_layer) if 1 in update_part: self.params.extend(conv_layer.params) self.delta_params.extend(conv_layer.delta_params) self.conv_output_dim = config['output_shape'][1] * config[ 'output_shape'][2] * config['output_shape'][3] for i in xrange(self.full_layer_num): # construct the sigmoidal layer if i == 0: input_size = self.conv_output_dim layer_input = self.conv_layers[-1].output else: input_size = hidden_layers_sizes[i - 1] layer_input = self.full_layers[-1].output sigmoid_layer = HiddenLayer(rng=numpy_rng, input=layer_input, n_in=input_size, n_out=hidden_layers_sizes[i], activation=full_activation) # add the layer to our list of layers self.full_layers.append(sigmoid_layer) if 1 in update_part: self.params.extend(sigmoid_layer.params) self.delta_params.extend(sigmoid_layer.delta_params) # We now need to add a logistic layer on top of the MLP self.logLayer = LogisticRegression(input=self.full_layers[-1].output, n_in=hidden_layers_sizes[-1], n_out=n_outs) self.full_layers.append(self.logLayer) if 1 in update_part: self.params.extend(self.logLayer.params) self.delta_params.extend(self.logLayer.delta_params) self.finetune_cost = self.logLayer.negative_log_likelihood(self.y) self.errors = self.logLayer.errors(self.y)
from layers.relu import ReLU from layers.softmax import SoftMax from layers.conv import ConvLayer from layers.max_pooling import MaxPooling Logger.setLogLevel(Logger.INFO) if __name__ == "__main__": output = 10 input = 28 * 28 # setup network graph IL = InputLayer(input) CONV = ConvLayer("conv", 12, 4, 4, 2, IL, 28, 28, 1) ACT1 = ReLU("relu", CONV) POOL = MaxPooling("pooling", 2, 2, 13, 13, 12, ACT1) FC = FCLayer("fc", 60, POOL) ACT2 = ReLU("relu2", FC) OL = FCLayer("output", output, ACT2) SOFTMAX = SoftMax("softmax", OL) train, test = readMnistData("train-images.idx3-ubyte", "train-labels.idx1-ubyte", 12) allTrainData = np.array([train[i].pixels for i in range(len(train))]) average = 0 # np.average(allTrainData, 0) variance = 256.0 # np.var(allTrainData, axis=0) + 0.00000001 batchSize = 100 epoch = 20 enableBackPropCheck = False learningRate = 0.1