def yolo(input_var=None): l_in = InputLayer(shape=(None, 1, PIXELS, PIXELS), input_var=input_var) l_conv = ConvLayer(l_in, num_filters=16, filter_size=3, pad=1, nonlinearity=very_leaky_rectify) #l_convb = ConvLayer(l_conv, num_filters=3, filter_size=3, pad=1, nonlinearity=very_leaky_rectify) #l_conv1 = ConvLayer(l_convb, num_filters=8, filter_size=3, pad=1, nonlinearity=rectify) #l_conv2 = ConvLayer(l_conv1, num_filters=8, filter_size=3, pad=1, nonlinearity=rectify) l_pool = MaxPool2DLayer(l_conv, pool_size=2, stride=2) l_convb = ConvLayer(l_pool, num_filters=32, filter_size=3, pad=1, nonlinearity=very_leaky_rectify) l_poolb = MaxPool2DLayer(l_convb, pool_size=2, stride=2) l_convc = ConvLayer(l_poolb, num_filters=64, filter_size=3, pad=1, nonlinearity=very_leaky_rectify) l_poolc = MaxPool2DLayer(l_convc, pool_size=2, stride=2) l_convd = ConvLayer(l_poolc, num_filters=128, filter_size=3, pad=1, nonlinearity=very_leaky_rectify) #l_dropout1 = DropoutLayer(l_pool, p=0.25) l_poold = MaxPool2DLayer(l_convd, pool_size=2, stride=2) l_conve = ConvLayer(l_poold, num_filters=256, filter_size=3, pad=1, nonlinearity=very_leaky_rectify) l_poole = MaxPool2DLayer(l_conve, pool_size=2, stride=2) l_convf = ConvLayer(l_poole, num_filters=512, filter_size=3, pad=1, nonlinearity=very_leaky_rectify) l_poolf = MaxPool2DLayer(l_convf, pool_size=2, stride=2) l_hidden = DenseLayer(l_pool, num_units=1024, nonlinearity=very_leaky_rectify) #l_dropout2 = DropoutLayer(l_hidden, p=0.5) l_hidden1 = DenseLayer(l_hidden, num_units=512, nonlinearity=very_leaky_rectify) l_hidden2 = DenseLayer(l_hidden1, num_units=256, nonlinearity=very_leaky_rectify) l_hidden3 = DenseLayer(l_hidden2, num_units=128, nonlinearity=very_leaky_rectify) l_hidden4 = DenseLayer(l_hidden2, num_units=64, nonlinearity=very_leaky_rectify) l_out = DenseLayer(l_hidden1, num_units=2, nonlinearity=sigmoid) return l_out
def build_network(): conv_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'filter_size': (3, 3), 'stride': (1, 1), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } nin_defs = { 'W': lasagne.init.HeNormal('relu'), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.LeakyRectify(0.1) } dense_defs = { 'W': lasagne.init.HeNormal(1.0), 'b': lasagne.init.Constant(0.0), 'nonlinearity': lasagne.nonlinearities.softmax } wn_defs = {'momentum': .999} net = InputLayer(name='input', shape=(None, 3, 32, 32)) net = GaussianNoiseLayer(net, name='noise', sigma=.15) net = WN( Conv2DLayer(net, name='conv1a', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv1b', num_filters=128, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv1c', num_filters=128, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer(net, name='pool1', pool_size=(2, 2)) net = DropoutLayer(net, name='drop1', p=.5) net = WN( Conv2DLayer(net, name='conv2a', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv2b', num_filters=256, pad='same', **conv_defs), **wn_defs) net = WN( Conv2DLayer(net, name='conv2c', num_filters=256, pad='same', **conv_defs), **wn_defs) net = MaxPool2DLayer(net, name='pool2', pool_size=(2, 2)) net = DropoutLayer(net, name='drop2', p=.5) net = WN( Conv2DLayer(net, name='conv3a', num_filters=512, pad=0, **conv_defs), **wn_defs) net = WN(NINLayer(net, name='conv3b', num_units=256, **nin_defs), **wn_defs) net = WN(NINLayer(net, name='conv3c', num_units=128, **nin_defs), **wn_defs) net = GlobalPoolLayer(net, name='pool3') net = WN(DenseLayer(net, name='dense', num_units=10, **dense_defs), **wn_defs) return net
def create_network(available_actions_count): # Create the input variables s1 = tensor.tensor4("State") a = tensor.vector("Action", dtype="int32") q2 = tensor.vector("Q2") r = tensor.vector("Reward") isterminal = tensor.vector("IsTerminal", dtype="int8") # Create the input layer of the network. dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1) # Add 2 convolutional layers with ReLu activation dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[6, 6], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=3) dqn = Conv2DLayer(dqn, num_filters=8, filter_size=[3, 3], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) # Add a single fully-connected layer. dqn = DenseLayer(dqn, num_units=128, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) # Add the output layer (also fully-connected). # (no nonlinearity as it is for approximating an arbitrary real function) dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None) # Define the loss function q = get_output(dqn) # target differs from q only for the selected action. The following means: # target_Q(s,a) = r + gamma * max Q(s2,_) if isterminal else r target_q = tensor.set_subtensor( q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() # Update the parameters according to the computed gradient using RMSProp. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compile the theano functions print("Compiling the network ...") function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print("Network compiled.") def simple_get_best_action(state): return function_get_best_action( state.reshape([1, 1, resolution[0], resolution[1]])) # Returns Theano objects for the net and functions. return dqn, function_learn, function_get_q_values, simple_get_best_action
def VGG_16(num_of_classes, input_var=None): net = {} net['input'] = InputLayer(shape=(None, 3, 224, 224), input_var=input_var) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_3'], 2) net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_3'], 2) net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_3'], 2) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['fc6_dropout'] = DropoutLayer(net['fc6'], p=0.5) net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096) net['fc7_dropout'] = DropoutLayer(net['fc7'], p=0.5) net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=num_of_classes, nonlinearity=None) net['prob'] = NonlinearityLayer(net['fc8'], softmax) return net
rng=np.random ### start to build the CNN network x1=T.tensor4('x1',dtype='float64') y1=T.vector('y1',dtype='int64') batchsize=100 l0=InputLayer(shape=(None,3,128,128),input_var=x1) l1=Conv2DLayer(l0,48,(5,5),nonlinearity=very_leaky_rectify,W=GlorotUniform('relu')) l2=MaxPool2DLayer(l1,(2,2)) l3=Conv2DLayer(l2,64,(5,5),nonlinearity=very_leaky_rectify,W=GlorotUniform('relu')) l4=MaxPool2DLayer(l3,(2,2)) l5=Conv2DLayer(l4,96,(5,5),nonlinearity=very_leaky_rectify,W=GlorotUniform('relu')) l6=MaxPool2DLayer(l5,(3,3)) l7=DenseLayer(l6,512,nonlinearity=very_leaky_rectify,W=lasagne.init.GlorotNormal()) #l7_5=cyclicpool(l7) #l7_5=lasagne.layers.DropoutLayer(l7) l8=DenseLayer(l7,2,nonlinearity=softmax) rate=theano.shared(.0002) params = lasagne.layers.get_all_params(l8) prediction = lasagne.layers.get_output(l8) loss = lasagne.objectives.categorical_crossentropy(prediction,y1) loss = loss.mean() updates_sgd = adagrad(loss, params, learning_rate=rate) updates = apply_nesterov_momentum(updates_sgd, params, momentum=0.9)
def build_rnn_network(rnnmodel, X_sym, hid_init_sym): net = {} net['input0'] = InputLayer((batch_size, seq_len), X_sym) net['input'] = lasagne.layers.EmbeddingLayer( net['input0'], outputclass, units[0]) #,W=lasagne.init.Uniform(inial_scale) net['rnn0'] = DimshuffleLayer( net['input'], (1, 0, 2)) #change to (time, batch_size,hidden_units) for l in range(1, num_layers + 1): net['hiddeninput%d' % l] = InputLayer( (batch_size, units[l - 1]), hid_init_sym[:, acc_units[l - 1]:acc_units[l]]) net['rnn%d' % (l - 1)] = ReshapeLayer(net['rnn%d' % (l - 1)], (batch_size * seq_len, -1)) net['rnn%d' % (l - 1)] = DenseLayer( net['rnn%d' % (l - 1)], units[l - 1], W=ini_W, b=lasagne.init.Constant(args.ini_b), nonlinearity=None) #W=Uniform(ini_rernn_in_to_hid), # net['rnn%d' % (l - 1)] = ReshapeLayer(net['rnn%d' % (l - 1)], (seq_len, batch_size, -1)) if args.use_residual and l > args.residual_layers and ( l - 1) % args.residual_layers == 0: # and l!=num_layers if units[l - 1] != units[l - 1 - args.residual_layers]: net['leftbranch%d' % (l - 1)] = ReshapeLayer( net['sum%d' % (l - args.residual_layers)], (batch_size * seq_len, -1)) net['leftbranch%d' % (l - 1)] = DenseLayer(net['leftbranch%d' % (l - 1)], units[l - 1], W=ini_W, nonlinearity=None) net['leftbranch%d' % (l - 1)] = ReshapeLayer( net['leftbranch%d' % (l - 1)], (seq_len, batch_size, -1)) net['leftbranch%d' % (l - 1)] = BatchNorm_step_timefirst_Layer( net['leftbranch%d' % (l - 1)], axes=(0, 1)) print('left branch') else: net['leftbranch%d' % (l - 1)] = net['sum%d' % (l - args.residual_layers)] net['sum%d' % l] = ElemwiseSumLayer( (net['rnn%d' % (l - 1)], net['leftbranch%d' % (l - 1)])) else: net['sum%d' % l] = net['rnn%d' % (l - 1)] net['rnn%d' % l] = net['sum%d' % l] if not args.use_bn_afterrnn: net['rnn%d' % l] = BatchNorm_step_timefirst_Layer( net['rnn%d' % l], axes=(0, 1), beta=lasagne.init.Constant(args.ini_b)) ini_hid_start = 0 if act == tanh: ini_hid_start = -1 * U_bound net['rnn%d' % l] = rnnmodel(net['rnn%d' % l], units[l - 1], hid_init=net['hiddeninput%d' % l], W_hid_to_hid=Uniform(range=(ini_hid_start, U_bound)), nonlinearity=act, only_return_final=False, grad_clipping=args.gradclipvalue) net['last_state%d' % l] = SliceLayer(net['rnn%d' % l], -1, axis=0) if l == 1: net['hid_out'] = net['last_state%d' % l] else: net['hid_out'] = ConcatLayer( [net['hid_out'], net['last_state%d' % l]], axis=1) if use_dropout and l % droplayers == 0: net['rnn%d' % l] = lasagne.layers.DropoutLayer(net['rnn%d' % l], p=droprate, shared_axes=taxdrop) if args.use_bn_afterrnn: net['rnn%d' % l] = BatchNorm_step_timefirst_Layer(net['rnn%d' % l], axes=(0, 1)) net['rnn%d' % num_layers] = DimshuffleLayer(net['rnn%d' % num_layers], (1, 0, 2)) net['reshape_rnn'] = ReshapeLayer(net['rnn%d' % num_layers], (-1, units[num_layers - 1])) net['out'] = DenseLayer( net['reshape_rnn'], outputclass, nonlinearity=softmax ) #lasagne.init.HeNormal(gain='relu'))#,W=Uniform(inial_scale) return net
def build_lstm_network(rnnmodel): net = {} net['input'] = InputLayer((batch_size, seq_len, feature_size)) net['rnn']=rnnmodel(net['input'],hidden_units,forgetgate=lasagne.layers.Gate(b=lasagne.init.Constant(1.)),peepholes=False, only_return_final=True,grad_clipping=args.gradclipvalue) net['out']=DenseLayer(net['rnn'],outputclass,nonlinearity=softmax) return net
def __init__(self, input, emb_layer='fc7-1', **kwargs): """Initialize the parameters :type rng: numpy.random.RandomState :param rng: a random number generator used to initialize weights :type input: theano.tensor.TensorType :param input: symbolic variable that describes the input of the architecture (one minibatch) ..................... . .. ... .... """ self.hasSupervised = False self.hasUnsupervised = False self.net = {} self.net['input'] = InputLayer((None, 3, 16, 112, 112), input_var=input) # ----------- 1st layer group --------------- self.net['conv1a'] = Conv3DDNNLayer( self.net['input'], 64, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False) self.net['pool1'] = MaxPool3DDNNLayer(self.net['conv1a'], pool_size=(1, 2, 2), stride=(1, 2, 2)) # ------------- 2nd layer group -------------- self.net['conv2a'] = Conv3DDNNLayer( self.net['pool1'], 128, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) self.net['pool2'] = MaxPool3DDNNLayer(self.net['conv2a'], pool_size=(2, 2, 2), stride=(2, 2, 2)) # ----------------- 3rd layer group -------------- self.net['conv3a'] = Conv3DDNNLayer( self.net['pool2'], 256, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) self.net['conv3b'] = Conv3DDNNLayer( self.net['conv3a'], 256, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) self.net['pool3'] = MaxPool3DDNNLayer(self.net['conv3b'], pool_size=(2, 2, 2), stride=(2, 2, 2)) # ----------------- 4th layer group -------------- self.net['conv4a'] = Conv3DDNNLayer( self.net['pool3'], 512, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) self.net['conv4b'] = Conv3DDNNLayer( self.net['conv4a'], 512, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) self.net['pool4'] = MaxPool3DDNNLayer(self.net['conv4b'], pool_size=(2, 2, 2), stride=(2, 2, 2)) # ----------------- 5th layer group -------------- self.net['conv5a'] = Conv3DDNNLayer( self.net['pool4'], 512, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) self.net['conv5b'] = Conv3DDNNLayer( self.net['conv5a'], 512, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) # We need a padding layer, as C3D only pads on the right, which cannot be done with a theano pooling layer self.net['pad'] = PadLayer(self.net['conv5b'], width=[(0, 1), (0, 1)], batch_ndim=3) self.net['pool5'] = MaxPool3DDNNLayer(self.net['pad'], pool_size=(2, 2, 2), pad=(0, 0, 0), stride=(2, 2, 2)) self.net['fc6-1'] = DenseLayer( self.net['pool5'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) self.net['fc7-1'] = DenseLayer( self.net['fc6-1'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) self.net['fc8-1'] = DenseLayer(self.net['fc7-1'], num_units=487, nonlinearity=None) self.net['prob'] = NonlinearityLayer(self.net['fc8-1'], softmax) self.embedding = lasagne.layers.get_output( self.net[emb_layer]).flatten(ndim=2) with open('data/c3d_model.pkl') as f: model = pickle.load(f) lasagne.layers.set_all_param_values(self.net['prob'], model, trainable=True)
def _get_l_out(self, input_vars): check_options(self.options) id_tag = (self.id + '/') if self.id else '' prev_output_var, mask_var = input_vars[-2:] color_input_vars = input_vars[:-2] context_len = self.context_len if hasattr(self, 'context_len') else 1 l_color_repr, color_inputs = self.color_vec.get_input_layer( color_input_vars, recurrent_length=self.seq_vec.max_len - 1, cell_size=self.options.speaker_cell_size, context_len=context_len, id=self.id) l_hidden_color = dimshuffle(l_color_repr, (0, 2, 1)) for i in range(1, self.options.speaker_hidden_color_layers + 1): l_hidden_color = NINLayer( l_hidden_color, num_units=self.options.speaker_cell_size, nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity], name=id_tag + 'hidden_color%d' % i) l_hidden_color = dimshuffle(l_hidden_color, (0, 2, 1)) l_prev_out = InputLayer(shape=(None, self.seq_vec.max_len - 1), input_var=prev_output_var, name=id_tag + 'prev_input') l_prev_embed = EmbeddingLayer( l_prev_out, input_size=len(self.seq_vec.tokens), output_size=self.options.speaker_cell_size, name=id_tag + 'prev_embed') l_in = ConcatLayer([l_hidden_color, l_prev_embed], axis=2, name=id_tag + 'color_prev') l_mask_in = InputLayer(shape=(None, self.seq_vec.max_len - 1), input_var=mask_var, name=id_tag + 'mask_input') l_rec_drop = l_in cell = CELLS[self.options.speaker_cell] cell_kwargs = { 'mask_input': (None if self.options.speaker_no_mask else l_mask_in), 'grad_clipping': self.options.speaker_grad_clipping, 'num_units': self.options.speaker_cell_size, } if self.options.speaker_cell == 'LSTM': cell_kwargs['forgetgate'] = Gate( b=Constant(self.options.speaker_forget_bias)) if self.options.speaker_cell != 'GRU': cell_kwargs['nonlinearity'] = NONLINEARITIES[ self.options.speaker_nonlinearity] for i in range(1, self.options.speaker_recurrent_layers): l_rec = cell(l_rec_drop, name=id_tag + 'rec%d' % i, **cell_kwargs) if self.options.speaker_dropout > 0.0: l_rec_drop = DropoutLayer(l_rec, p=self.options.speaker_dropout, name=id_tag + 'rec%d_drop' % i) else: l_rec_drop = l_rec l_rec = cell(l_rec_drop, name=id_tag + 'rec%d' % self.options.speaker_recurrent_layers, **cell_kwargs) l_shape = ReshapeLayer(l_rec, (-1, self.options.speaker_cell_size), name=id_tag + 'reshape') l_hidden_out = l_shape for i in range(1, self.options.speaker_hidden_out_layers + 1): l_hidden_out = DenseLayer( l_hidden_out, num_units=self.options.speaker_cell_size, nonlinearity=NONLINEARITIES[self.options.speaker_nonlinearity], name=id_tag + 'hidden_out%d' % i) l_softmax = DenseLayer(l_hidden_out, num_units=len(self.seq_vec.tokens), nonlinearity=softmax, name=id_tag + 'softmax') l_out = ReshapeLayer( l_softmax, (-1, self.seq_vec.max_len - 1, len(self.seq_vec.tokens)), name=id_tag + 'out') return l_out, color_inputs + [l_prev_out, l_mask_in]
def build_model(input_var, output_dim): net = {} net['input'] = InputLayer((None, 3, 224, 224), input_var=input_var) sub_net, parent_layer_name = build_simple_block( net['input'], ['conv1', 'bn_conv1', 'conv1_relu'], 64, 7, 3, 2, use_bias=True) net.update(sub_net) net['pool1'] = PoolLayer(net[parent_layer_name], pool_size=3, stride=2, pad=0, mode='max', ignore_border=False) block_size = list('abc') parent_layer_name = 'pool1' for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1, 1, True, 4, ix='2%s' % c) else: sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0 / 4, 1, False, 4, ix='2%s' % c) net.update(sub_net) block_size = list('abcd') for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0 / 2, 1.0 / 2, True, 4, ix='3%s' % c) else: sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0 / 4, 1, False, 4, ix='3%s' % c) net.update(sub_net) block_size = list('abcdef') for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0 / 2, 1.0 / 2, True, 4, ix='4%s' % c) else: sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0 / 4, 1, False, 4, ix='4%s' % c) net.update(sub_net) block_size = list('abc') for c in block_size: if c == 'a': sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0 / 2, 1.0 / 2, True, 4, ix='5%s' % c) else: sub_net, parent_layer_name = build_residual_block( net[parent_layer_name], 1.0 / 4, 1, False, 4, ix='5%s' % c) net.update(sub_net) net['pool5'] = PoolLayer(net[parent_layer_name], pool_size=7, stride=1, pad=0, mode='average_exc_pad', ignore_border=False) net['fc1000'] = DenseLayer(net['pool5'], num_units=output_dim, nonlinearity=None) net['prob'] = NonlinearityLayer(net['fc1000'], nonlinearity=softmax) return net
return net # Load model weights and metadata d = pickle.load(open('../input/pretrained/vgg16.pkl')) # Build the network and fill with pretrained weights net = build_model() # Define loss function and metrics, and get an updates dictionary X_sym = T.tensor4() y_sym = T.ivector() # We'll connect our output classifier to the last fully connected layer of the network net['new_output'] = DenseLayer(net['drop7'], num_units=8, nonlinearity=softmax, W=lasagne.init.Normal(0.01)) prediction = lasagne.layers.get_output(net['new_output'], X_sym) loss = lasagne.objectives.categorical_crossentropy(prediction, y_sym) loss = loss.mean() acc = T.mean(T.eq(T.argmax(prediction, axis=1), y_sym), dtype=theano.config.floatX) learning_rate = theano.shared(np.array(0.0003, dtype=theano.config.floatX)) learning_rate_decay = np.array(0.3, dtype=theano.config.floatX) updates = OrderedDict() for name, layer in net.items(): layer_params = layer.get_params(trainable=True)
def build_google(input_var): net = {} net['input'] = InputLayer((None, 3, 224, 224), input_var) net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=3, flip_filters=False) net['pool1/3x3_s2'] = PoolLayer(net['conv1/7x7_s2'], pool_size=3, stride=2, ignore_border=False) net['pool1/norm1'] = LRNLayer(net['pool1/3x3_s2'], alpha=0.00002, k=1) net['conv2/3x3_reduce'] = ConvLayer(net['pool1/norm1'], 64, 1, flip_filters=False) net['conv2/3x3'] = ConvLayer(net['conv2/3x3_reduce'], 192, 3, pad=1, flip_filters=False) net['conv2/norm2'] = LRNLayer(net['conv2/3x3'], alpha=0.00002, k=1) net['pool2/3x3_s2'] = PoolLayer(net['conv2/norm2'], pool_size=3, stride=2, ignore_border=False) net.update( build_inception_module('inception_3a', net['pool2/3x3_s2'], [32, 64, 96, 128, 16, 32])) net.update( build_inception_module('inception_3b', net['inception_3a/output'], [64, 128, 128, 192, 32, 96])) net['pool3/3x3_s2'] = PoolLayer(net['inception_3b/output'], pool_size=3, stride=2, ignore_border=False) net.update( build_inception_module('inception_4a', net['pool3/3x3_s2'], [64, 192, 96, 208, 16, 48])) net.update( build_inception_module('inception_4b', net['inception_4a/output'], [64, 160, 112, 224, 24, 64])) net.update( build_inception_module('inception_4c', net['inception_4b/output'], [64, 128, 128, 256, 24, 64])) net.update( build_inception_module('inception_4d', net['inception_4c/output'], [64, 112, 144, 288, 32, 64])) net.update( build_inception_module('inception_4e', net['inception_4d/output'], [128, 256, 160, 320, 32, 128])) net['pool4/3x3_s2'] = PoolLayer(net['inception_4e/output'], pool_size=3, stride=2, ignore_border=False) net.update( build_inception_module('inception_5a', net['pool4/3x3_s2'], [128, 256, 160, 320, 32, 128])) net.update( build_inception_module('inception_5b', net['inception_5a/output'], [128, 384, 192, 384, 48, 128])) net['pool5/7x7_s1'] = GlobalPoolLayer(net['inception_5b/output']) net['loss3/classifier'] = DenseLayer(net['pool5/7x7_s1'], num_units=1000, nonlinearity=linear) net['prob'] = NonlinearityLayer(net['loss3/classifier'], nonlinearity=softmax) return net
def create_model(ae, s2_ae, input_shape, input_var, mask_shape, mask_var, s2_shape, s2_var, lstm_size=250, win=T.iscalar('theta)'), output_classes=26, fusiontype='concat', w_init_fn=las.init.Orthogonal(), use_peepholes=True): bn_weights, bn_biases, bn_shapes, bn_nonlinearities = ae s2_weights, s2_biases, s2_shapes, s2_nonlinearities = s2_ae gate_parameters = Gate(W_in=w_init_fn, W_hid=w_init_fn, b=las.init.Constant(0.)) cell_parameters = Gate( W_in=w_init_fn, W_hid=w_init_fn, # Setting W_cell to None denotes that no cell connection will be used. W_cell=None, b=las.init.Constant(0.), # By convention, the cell nonlinearity is tanh in an LSTM. nonlinearity=tanh) l_s1 = InputLayer(input_shape, input_var, 's1_im') l_mask = InputLayer(mask_shape, mask_var, 'mask') l_s2 = InputLayer(s2_shape, s2_var, 's2_im') symbolic_batchsize_s1 = l_s1.input_var.shape[0] symbolic_seqlen_s1 = l_s1.input_var.shape[1] symbolic_batchsize_s2 = l_s2.input_var.shape[0] symbolic_seqlen_s2 = l_s2.input_var.shape[1] l_reshape1_s1 = ReshapeLayer(l_s1, (-1, input_shape[-1]), name='reshape1_s1') l_encoder_s1 = create_pretrained_encoder( l_reshape1_s1, bn_weights, bn_biases, bn_shapes, bn_nonlinearities, ['fc1_s1', 'fc2_s1', 'fc3_s1', 'bottleneck_s1']) s1_len = las.layers.get_output_shape(l_encoder_s1)[-1] l_reshape2_s1 = ReshapeLayer( l_encoder_s1, (symbolic_batchsize_s1, symbolic_seqlen_s1, s1_len), name='reshape2_s1') l_delta_s1 = DeltaLayer(l_reshape2_s1, win, name='delta_s1') # s2 images l_reshape1_s2 = ReshapeLayer(l_s2, (-1, s2_shape[-1]), name='reshape1_s2') l_encoder_s2 = create_pretrained_encoder( l_reshape1_s2, s2_weights, s2_biases, s2_shapes, s2_nonlinearities, ['fc1_s2', 'fc2_s2', 'fc3_s2', 'bottleneck_s2']) s2_len = las.layers.get_output_shape(l_encoder_s2)[-1] l_reshape2_s2 = ReshapeLayer( l_encoder_s2, (symbolic_batchsize_s2, symbolic_seqlen_s2, s2_len), name='reshape2_s2') l_delta_s2 = DeltaLayer(l_reshape2_s2, win, name='delta_s2') l_lstm_s1 = LSTMLayer( l_delta_s1, int(lstm_size), peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_s1') l_lstm_s2 = LSTMLayer( l_delta_s2, lstm_size, peepholes=use_peepholes, # We need to specify a separate input for masks mask_input=l_mask, # Here, we supply the gate parameters for each gate ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, # We'll learn the initialization and use gradient clipping learn_init=True, grad_clipping=5., name='lstm_s2') # We'll combine the forward and backward layer output by summing. # Merge layers take in lists of layers to merge as input. if fusiontype == 'adasum': l_fuse = AdaptiveElemwiseSumLayer([l_lstm_s1, l_lstm_s2], name='adasum1') elif fusiontype == 'sum': l_fuse = ElemwiseSumLayer([l_lstm_s1, l_lstm_s2], name='sum1') elif fusiontype == 'concat': l_fuse = ConcatLayer([l_lstm_s1, l_lstm_s2], axis=-1, name='concat') f_lstm_agg, b_lstm_agg = create_blstm(l_fuse, l_mask, lstm_size, cell_parameters, gate_parameters, 'lstm_agg') l_sum2 = ElemwiseSumLayer([f_lstm_agg, b_lstm_agg], name='sum2') # reshape to (num_examples * seq_len, lstm_size) l_reshape3 = ReshapeLayer(l_sum2, (-1, lstm_size), name='reshape3') # Now, we can apply feed-forward layers as usual. # We want the network to predict a classification for the sequence, # so we'll use a the number of classes. l_softmax = DenseLayer(l_reshape3, num_units=output_classes, nonlinearity=las.nonlinearities.softmax, name='softmax') l_out = ReshapeLayer(l_softmax, (-1, symbolic_seqlen_s1, output_classes), name='output') return l_out, l_fuse
def execute(dataset, n_hidden_t_enc, n_hidden_s, num_epochs=500, learning_rate=.001, learning_rate_annealing=1.0, gamma=1, lmd=0., disc_nonlinearity="sigmoid", keep_labels=1.0, prec_recall_cutoff=True, missing_labels_val=-1.0, which_fold=1, early_stop_criterion='loss', dataset_path='/Tmp/carriepl/datasets/', save_path='/data/lisatmp4/romerosa/DietNetworks/', resume=False, exp_name=None): # Load the dataset print("Loading data") x_train, y_train, x_valid, y_valid, x_test, y_test, \ x_unsup, training_labels = mlh.load_data( dataset, dataset_path, None, which_fold=which_fold, keep_labels=keep_labels, missing_labels_val=missing_labels_val, embedding_input='raw') # Extract required information from data n_samples, n_feats = x_train.shape print("Number of features : ", n_feats) print("Glorot init : ", 2.0 / (n_feats + n_hidden_t_enc[-1])) n_targets = y_train.shape[1] # Set some variables batch_size = 138 # Preparing folder to save stuff exp_name = 'basic_' + mlh.define_exp_name( keep_labels, 0, 0, gamma, lmd, [], n_hidden_t_enc, [], n_hidden_s, which_fold, learning_rate, 0, 0, early_stop_criterion, learning_rate_annealing) print("Experiment: " + exp_name) save_path = os.path.join(save_path, dataset, exp_name) print(save_path) if not os.path.exists(save_path): os.makedirs(save_path) # Prepare Theano variables for inputs and targets input_var_sup = T.matrix('input_sup') target_var_sup = T.matrix('target_sup') lr = theano.shared(np.float32(learning_rate), 'learning_rate') # Build model print("Building model") discrim_net = InputLayer((None, n_feats), input_var_sup) discrim_net = DenseLayer(discrim_net, num_units=n_hidden_t_enc[-1], nonlinearity=rectify) # Reconstruct the input using dec_feat_emb if gamma > 0: reconst_net = DenseLayer(discrim_net, num_units=n_feats, nonlinearity=linear) nets = [reconst_net] else: nets = [None] # Add supervised hidden layers for hid in n_hidden_s: discrim_net = DropoutLayer(discrim_net) discrim_net = DenseLayer(discrim_net, num_units=hid) assert disc_nonlinearity in ["sigmoid", "linear", "rectify", "softmax"] discrim_net = DropoutLayer(discrim_net) discrim_net = DenseLayer(discrim_net, num_units=n_targets, nonlinearity=eval(disc_nonlinearity)) # Load best model with np.load(os.path.join(save_path, 'model_best.npz')) as f: param_values = [f['arr_%d' % i] for i in range(len(f.files))] lasagne.layers.set_all_param_values( filter(None, nets) + [discrim_net], param_values) print("Building and compiling training functions") # Build and compile functions predictions, predictions_det = mh.define_predictions(nets, start=0) prediction_sup, prediction_sup_det = mh.define_predictions([discrim_net]) prediction_sup = prediction_sup[0] prediction_sup_det = prediction_sup_det[0] # Define losses # reconstruction losses _, reconst_losses_det = mh.define_reconst_losses(predictions, predictions_det, [input_var_sup]) # supervised loss _, sup_loss_det = mh.define_sup_loss(disc_nonlinearity, prediction_sup, prediction_sup_det, keep_labels, target_var_sup, missing_labels_val) inputs = [input_var_sup, target_var_sup] params = lasagne.layers.get_all_params([discrim_net] + filter(None, nets), trainable=True) # Combine losses loss_det = sup_loss_det + gamma * reconst_losses_det[0] l2_penalty = apply_penalty(params, l2) loss_det = loss_det + lmd * l2_penalty # Monitoring Labels monitor_labels = ["reconst. loss"] monitor_labels = [ i for i, j in zip(monitor_labels, reconst_losses_det) if j != 0 ] monitor_labels += ["loss. sup.", "total loss"] # Build and compile test function val_outputs = reconst_losses_det val_outputs = [ i for i, j in zip(val_outputs, reconst_losses_det) if j != 0 ] val_outputs += [sup_loss_det, loss_det] # Compute accuracy and add it to monitoring list test_acc, test_pred = mh.define_test_functions(disc_nonlinearity, prediction_sup, prediction_sup_det, target_var_sup) monitor_labels.append("accuracy") val_outputs.append(test_acc) # Compile prediction function predict = theano.function([input_var_sup], test_pred) # Compile validation function val_fn = theano.function(inputs, [prediction_sup_det] + val_outputs, on_unused_input='ignore') # Finally, launch the training loop. print("Starting testing...") test_minibatches = mlh.iterate_minibatches(x_test, y_test, batch_size, shuffle=False) test_err, pred, targets = mlh.monitoring(test_minibatches, "test", val_fn, monitor_labels, prec_recall_cutoff, return_pred=True) lab = targets.argmax(1) pred_argmax = pred.argmax(1) continent_cat = mh.create_1000_genomes_continent_labels() lab_cont = np.zeros(lab.shape) pred_cont = np.zeros(pred_argmax.shape) for i, c in enumerate(continent_cat): for el in c: lab_cont[lab == el] = i pred_cont[pred_argmax == el] = i cm_e = np.zeros((26, 26)) cm_c = np.zeros((5, 5)) for i in range(26): for j in range(26): cm_e[i, j] = ((pred_argmax == i) * (lab == j)).sum() for i in range(5): for j in range(5): cm_c[i, j] = ((pred_cont == i) * (lab_cont == j)).sum() np.savez(os.path.join(save_path, 'cm' + str(which_fold) + '.npz'), cm_e=cm_e, cm_c=cm_c) print(os.path.join(save_path, 'cm' + str(which_fold) + '.npz'))
def build_network(): from lasagne.layers import Conv2DLayer as ConvLayer from lasagne.layers import MaxPool2DLayer as PoolLayer net = {} net['input'] = InputLayer((None, 3, 448, 448)) net['conv1/7x7_s2'] = ConvLayer(net['input'], 64, 7, stride=2, pad=2, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['pool1/2x2_s2'] = PoolLayer(net['conv1/7x7_s2'], pool_size=2, stride=2, ignore_border=False) net['conv2/3x3_s1'] = ConvLayer(net['pool1/2x2_s2'], 192, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['pool2/2x2_s2'] = PoolLayer(net['conv2/3x3_s1'], pool_size=2, stride=2, ignore_border=False) net['conv3/1x1_s1'] = ConvLayer(net['pool2/2x2_s2'], 128, 1, stride=1, pad=0, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv4/3x3_s1'] = ConvLayer(net['conv3/1x1_s1'], 256, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv5/1x1_s1'] = ConvLayer(net['conv4/3x3_s1'], 256, 1, stride=1, pad=0, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv6/3x3_s1'] = ConvLayer(net['conv5/1x1_s1'], 512, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['pool3/2x2_s2'] = PoolLayer(net['conv6/3x3_s1'], pool_size=2, stride=2, ignore_border=False) ## 4 - times net['conv7/1x1_s1'] = ConvLayer(net['pool3/2x2_s2'], 256, 1, stride=1, pad=0, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv8/3x3_s1'] = ConvLayer(net['conv7/1x1_s1'], 512, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv9/1x1_s1'] = ConvLayer(net['conv8/3x3_s1'], 256, 1, stride=1, pad=0, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv10/3x3_s1'] = ConvLayer(net['conv9/1x1_s1'], 512, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv11/1x1_s1'] = ConvLayer(net['conv10/3x3_s1'], 256, 1, stride=1, pad=0, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv12/3x3_s1'] = ConvLayer(net['conv11/1x1_s1'], 512, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv13/1x1_s1'] = ConvLayer(net['conv12/3x3_s1'], 256, 1, stride=1, pad=0, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv14/3x3_s1'] = ConvLayer(net['conv13/1x1_s1'], 512, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) #### net['conv15/1x1_s1'] = ConvLayer(net['conv14/3x3_s1'], 512, 1, stride=1, pad=0, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv16/3x3_s1'] = ConvLayer(net['conv15/1x1_s1'], 1024, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) ## maxpool 4 ===> net['pool4/2x2_s2'] = PoolLayer(net['conv16/3x3_s1'], pool_size=2, stride=2, ignore_border=False) ## 2 - times net['conv17/1x1_s1'] = ConvLayer(net['pool4/2x2_s2'], 512, 1, stride=1, pad=0, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv18/3x3_s1'] = ConvLayer(net['conv17/1x1_s1'], 1024, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv19/1x1_s1'] = ConvLayer(net['conv18/3x3_s1'], 512, 1, stride=1, pad=0, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv20/3x3_s1'] = ConvLayer(net['conv19/1x1_s1'], 1024, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) #### net['conv21/3x3_s1'] = ConvLayer(net['conv20/3x3_s1'], 1024, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv22/3x3_s2'] = ConvLayer(net['conv21/3x3_s1'], 1024, 3, stride=2, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv23/3x3_s1'] = ConvLayer(net['conv22/3x3_s2'], 1024, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) net['conv24/3x3_s1'] = ConvLayer(net['conv23/3x3_s1'], 1024, 3, stride=1, pad=1, flip_filters=False, nonlinearity=LeakyRectify(0.1)) # dense layer net['dense1'] = DenseLayer(net['conv24/3x3_s1'], num_units=4096, nonlinearity=LeakyRectify(0.1)) net['dropout'] = DropoutLayer(net['dense1'], p=0.5) net['dense2'] = DenseLayer(net['dropout'], num_units=1470, nonlinearity=linear) net['output_layer'] = net['dense2'] ## detection params return net
def D_paper( num_channels=1, # Overridden based on dataset. resolution=32, # Overridden based on dataset. label_size=0, # Overridden based on dataset. fmap_base=4096, fmap_decay=1.0, fmap_max=256, mbstat_func='Tstdeps', mbstat_avg='all', mbdisc_kernels=None, use_wscale=True, use_gdrop=True, use_layernorm=False, **kwargs): R = int(np.log2(resolution)) assert resolution == 2**R and resolution >= 4 cur_lod = theano.shared(np.float32(0.0)) gdrop_strength = theano.shared(np.float32(0.0)) def nf(stage): return min(int(fmap_base / (2.0**(stage * fmap_decay))), fmap_max) def GD(layer): return GDropLayer(layer, name=layer.name + 'gd', mode='prop', strength=gdrop_strength) if use_gdrop else layer def LN(layer): return LayerNormLayer(layer, name=layer.name + 'ln') if use_layernorm else layer def WS(layer): return WScaleLayer(layer, name=layer.name + 'ws') if use_wscale else layer input_layer = InputLayer(name='Dimages', shape=[None, num_channels, 2**R, 2**R]) net = WS( NINLayer(input_layer, name='D%dx' % (R - 1), num_units=nf(R - 1), nonlinearity=lrelu, W=ilrelu)) for I in xrange(R - 1, 1, -1): # I = R-1, R-2, ..., 2 net = LN( WS( Conv2DLayer(GD(net), name='D%db' % I, num_filters=nf(I), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu))) net = LN( WS( Conv2DLayer(GD(net), name='D%da' % I, num_filters=nf(I - 1), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu))) net = Downscale2DLayer(net, name='D%ddn' % I, scale_factor=2) lod = Downscale2DLayer(input_layer, name='D%dxs' % (I - 1), scale_factor=2**(R - I)) lod = WS( NINLayer(lod, name='D%dx' % (I - 1), num_units=nf(I - 1), nonlinearity=lrelu, W=ilrelu)) net = LODSelectLayer(name='D%dlod' % (I - 1), incomings=[net, lod], cur_lod=cur_lod, first_incoming_lod=R - I - 1) if mbstat_avg is not None: net = MinibatchStatConcatLayer(net, name='Dstat', func=globals()[mbstat_func], averaging=mbstat_avg) net = LN( WS( Conv2DLayer(GD(net), name='D1b', num_filters=nf(1), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu))) net = LN( WS( Conv2DLayer(GD(net), name='D1a', num_filters=nf(0), filter_size=4, pad=0, nonlinearity=lrelu, W=ilrelu))) if mbdisc_kernels: import minibatch_discrimination net = minibatch_discrimination.MinibatchLayer( net, name='Dmd', num_kernels=mbdisc_kernels) output_layers = [ WS( DenseLayer(net, name='Dscores', num_units=1, nonlinearity=linear, W=ilinear)) ] if label_size: output_layers += [ WS( DenseLayer(net, name='Dlabels', num_units=label_size, nonlinearity=linear, W=ilinear)) ] return dict(input_layers=[input_layer], output_layers=output_layers, cur_lod=cur_lod, gdrop_strength=gdrop_strength)
def train(images, labels, fold, model_type, batch_size=32, num_epochs=5): """ A sample training function which loops over the training set and evaluates the network on the validation set after each epoch. Evaluates the network on the training set whenever the :param images: input images :param labels: target labels :param fold: tuple of (train, test) index numbers :param model_type: model type ('cnn', '1dconv', 'maxpool', 'lstm', 'mix') :param batch_size: batch size for training :param num_epochs: number of epochs of dataset to go over for training :return: none """ num_classes = len(np.unique(labels)) (X_train, y_train), (X_val, y_val), (X_test, y_test) = reformatInput(images, labels, fold) # reformatInput: Receives the the indices for train and test datasets and # Outputs the train, validation, and test data and label datasets X_train = X_train.astype("float32", casting='unsafe') X_val = X_val.astype("float32", casting='unsafe') X_test = X_test.astype("float32", casting='unsafe') # Prepare Theano variables for inputs and targets input_var = T.TensorType('floatX', ((False, ) * 5))() target_var = T.ivector('targets') # Create neural network model (depending on first command line parameter) print("Building model and compiling functions...") # Building the appropriate model if model_type == '1dconv': network = build_convpool_conv1d(input_var, num_classes) elif model_type == 'maxpool': network = build_convpool_max(input_var, num_classes) elif model_type == 'lstm': network = build_convpool_lstm(input_var, num_classes, 100) elif model_type == 'mix': network = build_convpool_mix(input_var, num_classes, 100) elif model_type == 'cnn': input_var = T.tensor4('inputs') network, _ = build_cnn(input_var) network = DenseLayer(lasagne.layers.dropout(network, p=.5), num_units=256, nonlinearity=lasagne.nonlinearities.rectify) network = DenseLayer(lasagne.layers.dropout(network, p=.5), num_units=num_classes, nonlinearity=lasagne.nonlinearities.softmax) else: raise ValueError( "Model not supported ['1dconv', 'maxpool', 'lstm', 'mix', 'cnn']") # Create a loss expression for training, i.e., a scalar objective we want # to minimize (for our multi-class problem, it is the cross-entropy loss): prediction = lasagne.layers.get_output(network) loss = lasagne.objectives.categorical_crossentropy(prediction, target_var) loss = loss.mean() params = lasagne.layers.get_all_params(network, trainable=True) updates = lasagne.updates.adam(loss, params, learning_rate=0.001) # Create a loss expression for validation/testing. The crucial difference # here is that we do a deterministic forward pass through the network, # disabling dropout layers. test_prediction = lasagne.layers.get_output(network, deterministic=True) test_loss = lasagne.objectives.categorical_crossentropy( test_prediction, target_var) test_loss = test_loss.mean() # As a bonus, also create an expression for the classification accuracy: test_acc = T.mean(T.eq(T.argmax(test_prediction, axis=1), target_var), dtype=theano.config.floatX) # Compile a function performing a training step on a mini-batch (by giving # the updates dictionary) and returning the corresponding training loss: train_fn = theano.function([input_var, target_var], loss, updates=updates) # Compile a second function computing the validation loss and accuracy: val_fn = theano.function([input_var, target_var], [test_loss, test_acc]) # Finally, launch the training loop. print("Starting training...") best_validation_accu = 0 # We iterate over epochs: for epoch in range(num_epochs): # In each epoch, we do a full pass over the training data: train_err = 0 train_batches = 0 start_time = time.time() for batch in iterate_minibatches(X_train, y_train, batch_size, shuffle=False): inputs, targets = batch train_err += train_fn(inputs, targets) train_batches += 1 # And a full pass over the validation data: val_err = 0 val_acc = 0 val_batches = 0 for batch in iterate_minibatches(X_val, y_val, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) val_err += err val_acc += acc val_batches += 1 av_train_err = train_err / train_batches av_val_err = val_err / val_batches av_val_acc = val_acc / val_batches # Then we print the results for this epoch: print("Epoch {} of {} took {:.3f}s".format(epoch + 1, num_epochs, time.time() - start_time)) print(" training loss:\t\t{:.6f}".format(av_train_err)) print(" validation loss:\t\t{:.6f}".format(av_val_err)) print(" validation accuracy:\t\t{:.2f} %".format(av_val_acc * 100)) if av_val_acc > best_validation_accu: best_validation_accu = av_val_acc # After training, we compute and print the test error: test_err = 0 test_acc = 0 test_batches = 0 for batch in iterate_minibatches(X_test, y_test, batch_size, shuffle=False): inputs, targets = batch err, acc = val_fn(inputs, targets) test_err += err test_acc += acc test_batches += 1 av_test_err = test_err / test_batches av_test_acc = test_acc / test_batches print("Final results:") print(" test loss:\t\t\t{:.6f}".format(av_test_err)) print(" test accuracy:\t\t{:.2f} %".format(av_test_acc * 100)) # Dump the network weights to a file like this: np.savez('weights_lasg_{0}'.format(model_type), *lasagne.layers.get_all_param_values(network)) print('-' * 50) print("Best validation accuracy:\t\t{:.2f} %".format(best_validation_accu * 100)) print("Best test accuracy:\t\t{:.2f} %".format(av_test_acc * 100))
def D_mnist_mode_recovery( num_channels=1, resolution=32, fmap_base=64, fmap_decay=1.0, fmap_max=256, mbstat_func='Tstdeps', mbstat_avg=None, #'all', label_size=0, use_wscale=False, use_gdrop=False, use_layernorm=False, use_batchnorm=True, X=2, progressive=False, **kwargs): R = int(np.log2(resolution)) assert resolution == 2**R and resolution >= 4 cur_lod = theano.shared(np.float32(0.0)) gdrop_strength = theano.shared(np.float32(0.0)) def nf(stage): return min(int(fmap_base / (2.0**(stage * fmap_decay))) // X, fmap_max) def GD(layer): return GDropLayer(layer, name=layer.name + 'gd', mode='prop', strength=gdrop_strength) if use_gdrop else layer def LN(layer): return LayerNormLayer(layer, name=layer.name + 'ln') if use_layernorm else layer def WS(layer): return WScaleLayer(layer, name=layer.name + 'ws') if use_wscale else layer def BN(layer): return lasagne.layers.batch_norm(layer) if use_batchnorm else layer net = input_layer = InputLayer(name='Dimages', shape=[None, num_channels, 2**R, 2**R]) for I in xrange(R - 1, 1, -1): # I = R-1, R-2, ..., 2 (i.e. 4,3,2) net = BN( LN( WS( Conv2DLayer(GD(net), name='D%da' % I, num_filters=nf(I - 1), filter_size=3, pad=1, nonlinearity=lrelu, W=ilrelu)))) net = Downscale2DLayer(net, name='D%ddn' % I, scale_factor=2) if progressive: lod = Downscale2DLayer(input_layer, name='D%dxs' % (I - 1), scale_factor=2**(R - I)) lod = WS( NINLayer(lod, name='D%dx' % (I - 1), num_units=nf(I - 1), nonlinearity=lrelu, W=ilrelu)) net = LODSelectLayer(name='D%dlod' % (I - 1), incomings=[net, lod], cur_lod=cur_lod, first_incoming_lod=R - I - 1) if mbstat_avg is not None: net = MinibatchStatConcatLayer(net, name='Dstat', func=globals()[mbstat_func], averaging=mbstat_avg) net = FlattenLayer(GD(net), name='Dflatten') output_layers = [ WS( DenseLayer(net, name='Dscores', num_units=1, nonlinearity=linear, W=ilinear)) ] if label_size: output_layers += [ WS( DenseLayer(net, name='Dlabels', num_units=label_size, nonlinearity=linear, W=ilinear)) ] return dict(input_layers=[input_layer], output_layers=output_layers, cur_lod=cur_lod, gdrop_strength=gdrop_strength)
net['prob'] = NonlinearityLayer(net['fc1000'], nonlinearity=softmax) return net # Load model weights and metadata d = pickle.load(open('../input/pretrained/resnet50.pkl')) # Build the network and fill with pretrained weights net = build_model() # Define loss function and metrics, and get an updates dictionary X_sym = T.tensor4() y_sym = T.ivector() # We'll connect our output classifier to the last fully connected layer of the network net['new_output'] = DenseLayer(net['pool5'], num_units=10, nonlinearity=softmax, W=lasagne.init.HeNormal(0.01)) prediction = lasagne.layers.get_output(net['new_output'], X_sym) loss = lasagne.objectives.categorical_crossentropy(prediction, y_sym) loss = loss.mean() acc = T.mean(T.eq(T.argmax(prediction, axis=1), y_sym), dtype=theano.config.floatX) learning_rate = theano.shared(np.array(0.0002, dtype=theano.config.floatX)) learning_rate_decay = np.array(0.1, dtype=theano.config.floatX) updates = OrderedDict() print("Setting learning rates...") for name, layer in net.items(): print(name) layer_params = layer.get_params(trainable=True)
def build_cnn(input_var=None, n=3): # create a residual learning building block with two stacked 3x3 convlayers as in paper def residual_block(l, increase_dim=False, projection=False): input_num_filters = l.output_shape[1] if increase_dim: first_stride = (2, 2) out_num_filters = input_num_filters * 2 else: first_stride = (1, 1) out_num_filters = input_num_filters stack_1 = batch_norm( ConvLayer(l, num_filters=out_num_filters, filter_size=(3, 3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) stack_2 = batch_norm( ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # add shortcut connections if increase_dim: if projection: # projection shortcut, as option B in paper projection = batch_norm( ConvLayer(l, num_filters=out_num_filters, filter_size=(1, 1), stride=(2, 2), nonlinearity=None, pad='same', b=None, flip_filters=False)) block = NonlinearityLayer(ElemwiseSumLayer( [stack_2, projection]), nonlinearity=rectify) else: # identity shortcut, as option A in paper identity = ExpressionLayer( l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2] // 2, s[3] // 2)) padding = PadLayer(identity, [out_num_filters // 4, 0, 0], batch_ndim=1) block = NonlinearityLayer(ElemwiseSumLayer([stack_2, padding]), nonlinearity=rectify) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, l]), nonlinearity=rectify) return block # Building the network l_in = InputLayer(shape=(None, 3, 32, 32), input_var=input_var) # first layer, output is 16 x 32 x 32 l = batch_norm( ConvLayer(l_in, num_filters=16, filter_size=(3, 3), stride=(1, 1), nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False)) # first stack of residual blocks, output is 16 x 32 x 32 for _ in range(n): l = residual_block(l) # second stack of residual blocks, output is 32 x 16 x 16 l = residual_block(l, increase_dim=True) for _ in range(1, n): l = residual_block(l) # third stack of residual blocks, output is 64 x 8 x 8 l = residual_block(l, increase_dim=True) for _ in range(1, n): l = residual_block(l) # average pooling l = GlobalPoolLayer(l) # fully connected layer network = DenseLayer(l, num_units=1000, W=lasagne.init.HeNormal(), nonlinearity=softmax) return network
def build_rnn_network(rnnmodel): net = {} net['input'] = InputLayer((batch_size, seq_len, feature_size)) net['rnn']=rnnmodel(net['input'],hidden_units,nonlinearity=act,W_in_to_hid=Normal(args.ini),W_hid_to_hid=lambda shape: np.identity(hidden_units,dtype=np.float32),only_return_final=True ,grad_clipping=args.gradclipvalue) net['out']=DenseLayer(net['rnn'],outputclass,nonlinearity=softmax) return net
def __init__(self): print("Initialising network...") import theano import theano.tensor as T import lasagne from lasagne.layers import (InputLayer, LSTMLayer, ReshapeLayer, ConcatLayer, DenseLayer) theano.config.compute_test_value = 'raise' # Construct LSTM RNN: One LSTM layer and one dense output layer l_in = InputLayer(shape=input_shape) # setup fwd and bck LSTM layer. l_fwd = LSTMLayer(l_in, N_HIDDEN, backwards=False, learn_init=True, peepholes=True) l_bck = LSTMLayer(l_in, N_HIDDEN, backwards=True, learn_init=True, peepholes=True) # concatenate forward and backward LSTM layers concat_shape = (N_SEQ_PER_BATCH * SEQ_LENGTH, N_HIDDEN) l_fwd_reshape = ReshapeLayer(l_fwd, concat_shape) l_bck_reshape = ReshapeLayer(l_bck, concat_shape) l_concat = ConcatLayer([l_fwd_reshape, l_bck_reshape], axis=1) l_recurrent_out = DenseLayer(l_concat, num_units=N_OUTPUTS, nonlinearity=None) l_out = ReshapeLayer(l_recurrent_out, output_shape) input = T.tensor3('input') target_output = T.tensor3('target_output') # add test values input.tag.test_value = rand(*input_shape).astype(theano.config.floatX) target_output.tag.test_value = rand(*output_shape).astype( theano.config.floatX) print("Compiling Theano functions...") # Cost = mean squared error cost = T.mean((l_out.get_output(input) - target_output)**2) # Use NAG for training all_params = lasagne.layers.get_all_params(l_out) updates = lasagne.updates.nesterov_momentum(cost, all_params, LEARNING_RATE) # Theano functions for training, getting output, and computing cost self.train = theano.function([input, target_output], cost, updates=updates, on_unused_input='warn', allow_input_downcast=True) self.y_pred = theano.function([input], l_out.get_output(input), on_unused_input='warn', allow_input_downcast=True) self.compute_cost = theano.function([input, target_output], cost, on_unused_input='warn', allow_input_downcast=True) print("Done initialising network.")
def build_model(): net = {} net['input'] = InputLayer(shape=(None, ) + nn_input_shape) net['reshuffle'] = DimshuffleLayer(net['input'], pattern=(0, 'x', 1, 2, 3)) net['conv'] = bc(net['reshuffle'], num_filters=32, filter_size=3, stride=2) net['conv_1'] = bc(net['conv'], num_filters=32, filter_size=3) net['conv_2'] = bc(net['conv_1'], num_filters=64, filter_size=3, pad=1) net['pool'] = Pool3DLayer(net['conv_2'], pool_size=3, stride=2, mode='max') # net['conv_3'] = bc(net['pool'], num_filters=80, filter_size=1) # net['conv_4'] = bc(net['conv_3'], num_filters=192, filter_size=3) # net['pool_1'] = Pool3DLayer(net['conv_4'], pool_size=3, stride=2, mode='max') # I divided all the number of filters by 2 net['mixed/join'] = inceptionA(net['pool'], nfilt=((32, ), (24, 32), (32, 48, 48), (16, ))) net['mixed_1/join'] = inceptionA(net['mixed/join'], nfilt=((32, ), (24, 32), (32, 48, 48), (32, ))) net['mixed_2/join'] = inceptionA(net['mixed_1/join'], nfilt=((32, ), (24, 32), (32, 48, 48), (32, ))) # I divided all the number of filters by 2 net['mixed_3/join'] = inceptionB(net['mixed_2/join'], nfilt=((192, ), (32, 48, 48))) # I divided all the number of filters by 4 net['mixed_4/join'] = inceptionC(net['mixed_3/join'], nfilt=((48, ), (32, 32, 32, 48), (32, 32, 32, 32, 32, 32, 48), (48, ))) net['mixed_5/join'] = inceptionC(net['mixed_4/join'], nfilt=((48, ), (40, 40, 40, 48), (40, 40, 40, 40, 40, 40, 48), (48, ))) net['mixed_6/join'] = inceptionC(net['mixed_5/join'], nfilt=((48, ), (40, 40, 40, 48), (40, 40, 40, 40, 40, 40, 48), (48, ))) net['mixed_7/join'] = inceptionC(net['mixed_6/join'], nfilt=((48, ), (48, 48, 40, 48), (48, 48, 48, 48, 48, 48, 48), (48, ))) net['mixed_8/join'] = inceptionD(net['mixed_7/join'], nfilt=((48, 80), (48, 48, 48, 48, 48))) net['mixed_9/join'] = inceptionE(net['mixed_8/join'], nfilt=((80, ), (96, 96, 96, 96), (112, 96, 96, 96, 96), (48, )), pool_mode='average_exc_pad') net['mixed_10/join'] = inceptionE(net['mixed_9/join'], nfilt=((80, ), (96, 96, 96, 96), (112, 96, 96, 96, 96), (48, )), pool_mode='max') net['pool3'] = GlobalPoolLayer(net['mixed_10/join']) net['sigmoid'] = DenseLayer(net['pool3'], num_units=1, W=lasagne.init.Constant(0.0), b=None, nonlinearity=lasagne.nonlinearities.sigmoid) net['output'] = reshape(net['sigmoid'], shape=(-1, )) return { "inputs": { "bcolzall:3d": net['input'], }, "outputs": { "predicted_probability": net['output'] }, }
network['concat1'] = ConcatLayer([network['lstm-forward'],network['lstm-backward']],axis=2) network['lstm-forward2'] = lasagne.layers.recurrent.LSTMLayer(network['concat1'],400, mask_input=network['mask'], ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, gradient_steps=-1, grad_clipping=100., only_return_final=True) network['lstm-backward2'] = lasagne.layers.recurrent.LSTMLayer(network['concat1'],400, mask_input=network['mask'], ingate=gate_parameters, forgetgate=gate_parameters, cell=cell_parameters, outgate=gate_parameters, gradient_steps=-1, grad_clipping=100., only_return_final=True, backwards=True) network['concat'] = ConcatLayer([network['lstm-forward2'],network['lstm-backward2']]) #added for lstm-dropout network['fc1'] = batch_norm(DenseLayer(network['concat'],2048,nonlinearity=lasagne.nonlinearities.sigmoid)) network['drop1'] = DropoutLayer(network['fc1'],p=0.5) #softmax temperature = 1.0 custom_softmax = TemperatureSoftmax(temperature) # Output layer softmax = custom_softmax network['fc3'] = DenseLayer(network['drop1'],5820,nonlinearity=None) network['prob'] = NonlinearityLayer(network['fc3'],nonlinearity=softmax) network_output = lasagne.layers.get_output(network['prob']) hidden_output = lasagne.layers.get_output(network['drop1'], deterministic=True)
def __init__(self, input_var=None, dropout_rate=0.5): net = {} net['input'] = InputLayer((None, 3, 224, 224), input_var=input_var) net['conv1_1'] = ConvLayer(net['input'], 64, 3, pad=1, flip_filters=False) net['conv1_2'] = ConvLayer(net['conv1_1'], 64, 3, pad=1, flip_filters=False) net['pool1'] = PoolLayer(net['conv1_2'], 2) net['conv2_1'] = ConvLayer(net['pool1'], 128, 3, pad=1, flip_filters=False) net['conv2_2'] = ConvLayer(net['conv2_1'], 128, 3, pad=1, flip_filters=False) net['pool2'] = PoolLayer(net['conv2_2'], 2) net['conv3_1'] = ConvLayer(net['pool2'], 256, 3, pad=1, flip_filters=False) net['conv3_2'] = ConvLayer(net['conv3_1'], 256, 3, pad=1, flip_filters=False) net['conv3_3'] = ConvLayer(net['conv3_2'], 256, 3, pad=1, flip_filters=False) net['pool3'] = PoolLayer(net['conv3_3'], 2) net['conv4_1'] = ConvLayer(net['pool3'], 512, 3, pad=1, flip_filters=False) net['conv4_2'] = ConvLayer(net['conv4_1'], 512, 3, pad=1, flip_filters=False) net['conv4_3'] = ConvLayer(net['conv4_2'], 512, 3, pad=1, flip_filters=False) net['pool4'] = PoolLayer(net['conv4_3'], 2) net['conv5_1'] = ConvLayer(net['pool4'], 512, 3, pad=1, flip_filters=False) net['conv5_2'] = ConvLayer(net['conv5_1'], 512, 3, pad=1, flip_filters=False) net['conv5_3'] = ConvLayer(net['conv5_2'], 512, 3, pad=1, flip_filters=False) net['pool5'] = PoolLayer(net['conv5_3'], 2) net['fc6'] = DenseLayer(net['pool5'], num_units=4096) net['fc6_dropout'] = DropoutLayer(net['fc6'], p=dropout_rate) net['fc7'] = DenseLayer(net['fc6_dropout'], num_units=4096) net['fc7_dropout'] = DropoutLayer(net['fc7'], p=dropout_rate) net['fc8'] = DenseLayer(net['fc7_dropout'], num_units=1000, nonlinearity=None) net['prob'] = NonlinearityLayer(net['fc8'], softmax) self.net = net
def smart_find(X, y, X_valid, y_valid): loss = [] kf = KFold(n_splits=5, shuffle=True) conf_set = set() step = (64 + 10) / 4 max_neuron_units = step * 8 for i in range(1, max_neuron_units, step): for j in range(0, max_neuron_units, step): for k in range(0, max_neuron_units, step): struct_net = (i) l = InputLayer(shape=(None, X.shape[1])) # ------- HIDDEN ----------- l = DenseLayer(l, num_units=i, nonlinearity=softmax) if j > 0: if i + step < j: continue l = DenseLayer(l, num_units=j, nonlinearity=softmax) struct_net = (i, j) if k > 0: if i + step < k or j + step < k: continue struct_net = (i, j, k) l = DenseLayer(l, num_units=k, nonlinearity=softmax) # ------- HIDDEN ----------- l = DenseLayer(l, num_units=len(np.unique(y)), nonlinearity=softmax) net = NeuralNet(l, update=adam, update_learning_rate=0.01, max_epochs=250) if struct_net in conf_set: continue print('=' * 40) print(struct_net) print('=' * 40) conf_set.add(struct_net) k_loss = [] y_data = np.array([y]).transpose() data = np.concatenate((X, y_data), axis=1) for train_index, test_index in kf.split(data): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] net.fit(X_train, y_train) y_pred = net.predict(X_test) loss_error = net.score(X_test, y_test) # loss_error = mean_squared_error(y_test, y_pred) k_loss.append(loss_error) print(loss_error) loss_net = (i, j, k, np.array(k_loss).mean()) print(loss_net) loss.append(loss_net) print('=' * 40) # for i in range(1, max_hidden_layers): # for j in range((64 + 10) // 2 // i, max_neuron_units // i, 10 // i): # print('=' * 40) # print('%s hidden layers' % i) # print('%s neurons' % j) # print('=' * 40) # l = InputLayer(shape=(None, X.shape[1])) # for k in range(i): # l = DenseLayer(l, num_units=j, nonlinearity=softmax) # l = DenseLayer(l, num_units=len(np.unique(y)), nonlinearity=softmax) # net = NeuralNet(l, update=adam, update_learning_rate=0.01, max_epochs=500) # # k_loss = [] # y_data = np.array([y]).transpose() # data = np.concatenate((X, y_data), axis=1) # for train_index, test_index in kf.split(data): # X_train, X_test = X[train_index], X[test_index] # y_train, y_test = y[train_index], y[test_index] # # net.fit(X_train, y_train) # y_pred = net.predict(X_test) # loss_error = mean_squared_error(y_test, y_pred) # k_loss.append(loss_error) # print(loss_error) # # loss_net = (i, j, np.array(k_loss).mean()) # print(loss_net) # loss.append(loss_net) # print('=' * 40) print(min(loss, key=lambda x: x[3])) print(max(loss, key=lambda x: x[3])) print(loss)
minibatches = [ (indices, X[indices], y[indices]) for indices in np.array_split(train_indices, len(train_indices) / minibatch_size) ] inp_x = theano.sparse.csr_fmatrix() l_in = InputLayer((None, X.shape[1]), name="inputs", input_var=inp_x) l_hiddens = [ CondenseLayer(l_in, num_units=100, nonlinearity=rectify, W=Orthogonal()) ] for i in xrange(0): l_hiddens.append( DenseLayer(dropout(l_hiddens[-1]), num_units=100, nonlinearity=rectify)) l_out = DenseLayer(dropout(l_hiddens[-1]), num_units=y.shape[1], nonlinearity=softmax, W=Orthogonal()) def reset(): if any(np.isnan(scale.get_value()) for scale in scales): for scale in scales: scale.set_value(1.) for l in l_hiddens: l.b.set_value(Constant()(l.b.get_value().shape)) l.W.set_value(Orthogonal()(l.W.get_value().shape)) l_out.b.set_value(Constant()(l_out.b.get_value().shape)) l_out.W.set_value(Orthogonal()(l_out.W.get_value().shape))
def build_model(input_var=None, batch_size=2, use_cpu_compatible=theano.config.device == 'cpu'): ''' Builds Video2GIF model @param input_var: @param batch_size: @param use_cpu_compatible: use CPU compatible layers (i.e. no cuDNN). Default for theano device CPU; otherwise False @return: A dictionary containing the network layers, where the output layer is at key 'score' ''' net = {} net['input'] = InputLayer((batch_size, 3, 16, 112, 112), input_var=input_var) if use_cpu_compatible: ''' Slow implementation running on CPU Test snip scores: [-0.08948517, -0.01212098]; Time: 11s ''' print('Use slow network implementation (without cuDNN)') # ----------- 1st layer group --------------- # Pad first, as this layer doesn't support padding net['pad'] = PadLayer(net['input'], width=1, batch_ndim=2) net['conv1a'] = lasagne.layers.conv.Conv3DLayer( net['pad'], 64, (3, 3, 3), pad=0, nonlinearity=lasagne.nonlinearities.rectify, flip_filters=True) net['pool1'] = lasagne.layers.pool.Pool3Layer(net['conv1a'], pool_size=(1, 2, 2), stride=(1, 2, 2)) # ------------- 2nd layer group -------------- net['pad2'] = PadLayer(net['pool1'], width=1, batch_ndim=2) net['conv2a'] = lasagne.layers.conv.Conv3DLayer( net['pad2'], 128, (3, 3, 3), pad=0, nonlinearity=lasagne.nonlinearities.rectify) net['pool2'] = lasagne.layers.pool.Pool3Layer(net['conv2a'], pool_size=(2, 2, 2), stride=(2, 2, 2)) # ----------------- 3rd layer group -------------- net['pad3a'] = PadLayer(net['pool2'], width=1, batch_ndim=2) net['conv3a'] = lasagne.layers.conv.Conv3DLayer( net['pad3a'], 256, (3, 3, 3), pad=0, nonlinearity=lasagne.nonlinearities.rectify) net['pad3b'] = PadLayer(net['conv3a'], width=1, batch_ndim=2) net['conv3b'] = lasagne.layers.conv.Conv3DLayer( net['pad3b'], 256, (3, 3, 3), pad=0, nonlinearity=lasagne.nonlinearities.rectify) net['pool3'] = lasagne.layers.pool.Pool3Layer(net['conv3b'], pool_size=(2, 2, 2), stride=(2, 2, 2)) # ----------------- 4th layer group -------------- net['pad4a'] = PadLayer(net['pool3'], width=1, batch_ndim=2) net['conv4a'] = lasagne.layers.conv.Conv3DLayer( net['pad4a'], 512, (3, 3, 3), pad=0, nonlinearity=lasagne.nonlinearities.rectify) net['pad4b'] = PadLayer(net['conv4a'], width=1, batch_ndim=2) net['conv4b'] = lasagne.layers.conv.Conv3DLayer( net['pad4b'], 512, (3, 3, 3), pad=0, nonlinearity=lasagne.nonlinearities.rectify) net['pool4'] = lasagne.layers.pool.Pool3Layer(net['conv4b'], pool_size=(2, 2, 2), stride=(2, 2, 2)) # ----------------- 5th layer group -------------- net['pad5a'] = PadLayer(net['pool4'], width=1, batch_ndim=2) net['conv5a'] = lasagne.layers.conv.Conv3DLayer( net['pad5a'], 512, (3, 3, 3), pad=0, nonlinearity=lasagne.nonlinearities.rectify) net['pad5b'] = PadLayer(net['conv5a'], width=1, batch_ndim=2) net['conv5b'] = lasagne.layers.conv.Conv3DLayer( net['pad5b'], 512, (3, 3, 3), pad=0, nonlinearity=lasagne.nonlinearities.rectify) # We need a padding layer, as C3D only pads on the right, which cannot be done with a theano pooling layer net['pad'] = PadLayer(net['conv5b'], width=[(0, 1), (0, 1)], batch_ndim=3) net['pool5'] = lasagne.layers.pool.Pool3Layer(net['pad'], pool_size=(2, 2, 2), pad=(0, 0, 0), stride=(2, 2, 2)) net['fc6-1'] = DenseLayer(net['pool5'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) else: ''' Fast implementation running on GPU Test snip scores:[-0.08948528,-0.01212097]; Time: 0.33s ''' print('Use fast network implementation (cuDNN)') # ----------- 1st layer group --------------- net['conv1a'] = Conv3DDNNLayer( net['input'], 64, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify, flip_filters=False) net['pool1'] = MaxPool3DDNNLayer(net['conv1a'], pool_size=(1, 2, 2), stride=(1, 2, 2)) # ------------- 2nd layer group -------------- net['conv2a'] = Conv3DDNNLayer( net['pool1'], 128, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) net['pool2'] = MaxPool3DDNNLayer(net['conv2a'], pool_size=(2, 2, 2), stride=(2, 2, 2)) # ----------------- 3rd layer group -------------- net['conv3a'] = Conv3DDNNLayer( net['pool2'], 256, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) net['conv3b'] = Conv3DDNNLayer( net['conv3a'], 256, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) net['pool3'] = MaxPool3DDNNLayer(net['conv3b'], pool_size=(2, 2, 2), stride=(2, 2, 2)) # ----------------- 4th layer group -------------- net['conv4a'] = Conv3DDNNLayer( net['pool3'], 512, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) net['conv4b'] = Conv3DDNNLayer( net['conv4a'], 512, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) net['pool4'] = MaxPool3DDNNLayer(net['conv4b'], pool_size=(2, 2, 2), stride=(2, 2, 2)) # ----------------- 5th layer group -------------- net['conv5a'] = Conv3DDNNLayer( net['pool4'], 512, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) net['conv5b'] = Conv3DDNNLayer( net['conv5a'], 512, (3, 3, 3), pad=1, nonlinearity=lasagne.nonlinearities.rectify) # We need a padding layer, as C3D only pads on the right, which cannot be done with a theano pooling layer net['pad'] = PadLayer(net['conv5b'], width=[(0, 1), (0, 1)], batch_ndim=3) net['pool5'] = MaxPool3DDNNLayer(net['pad'], pool_size=(2, 2, 2), pad=(0, 0, 0), stride=(2, 2, 2)) net['fc6-1'] = DenseLayer(net['pool5'], num_units=4096, nonlinearity=lasagne.nonlinearities.rectify) net['h1'] = DenseLayer(net['fc6-1'], num_units=512, nonlinearity=lasagne.nonlinearities.rectify) net['h2'] = DenseLayer(net['h1'], num_units=128, nonlinearity=lasagne.nonlinearities.rectify) net['score'] = DenseLayer(net['h2'], num_units=1, nonlinearity=None) return net
backwards=False, name="dialogues") dialogue_rnn_layers_sliced = gru_hidden_readout(dialogue_rnn_layers, -1) ############################################################################### # DECODER # ############################################################################### # Tap into the common embedding layer but with decoder's own input. l_decoder_mask = InputLayer((None, None), name="decoder/mask") l_decoder_embed = InputLayer((None, None, n_embed_char), name="decoder/input") # Project the hidden state of the encoder dec_hid_inputs = [] for layer in dialogue_rnn_layers_sliced: l_project = DenseLayer(layer, n_hidden_decoder, nonlinearity=None, name=os.path.join(layer.name, "proj")) dec_hid_inputs.append(l_project) # Construct layers of GRU-s which recieve the final state of the encoder's network. dec_rnn_layers = gru_column(l_decoder_embed, n_hidden_decoder, dec_hid_inputs, mask_input=l_decoder_mask, learn_init=True, backwards=False, name="decoder") dec_rnn_layers_sliced = gru_hidden_readout(dec_rnn_layers, -1) l_decoder_reembedder = DenseLayer(dec_rnn_layers[-1], num_units=len(vocab), nonlinearity=None, num_leading_axes=2, name="decoder/project") lasagne.layers.set_all_param_values(l_decoder_reembedder, weights["l_decoder_reembedder"])
def _create_networks(n_actions, spec_shape, sheet_shape, show_nets=False): """ Build policy network """ l_in_spec = InputLayer(shape=[ None, ] + spec_shape) l_in_sheet = InputLayer(shape=[ None, ] + sheet_shape) net_spec = l_in_spec net_spec = Conv2DLayer(net_spec, num_filters=16, filter_size=4, stride=2, nonlinearity=elu) net_spec = Conv2DLayer(net_spec, num_filters=32, filter_size=3, stride=2, nonlinearity=elu) net_spec = Conv2DLayer(net_spec, num_filters=64, filter_size=3, stride=1, nonlinearity=elu) net_spec = FlattenLayer(net_spec) net_sheet = l_in_sheet net_sheet = Conv2DLayer(net_sheet, num_filters=16, filter_size=(4, 8), stride=2, nonlinearity=elu) net_sheet = Conv2DLayer(net_sheet, num_filters=32, filter_size=3, stride=(1, 2), nonlinearity=elu) net_sheet = Conv2DLayer(net_sheet, num_filters=32, filter_size=3, stride=(1, 2), nonlinearity=elu) net_sheet = Conv2DLayer(net_sheet, num_filters=32, filter_size=4, stride=2, nonlinearity=elu) net_sheet = FlattenLayer(net_sheet) net = ConcatLayer((net_spec, net_sheet), axis=1) net = DenseLayer(net, num_units=256, nonlinearity=elu) policy_net = DenseLayer(net, num_units=256, nonlinearity=elu) policy_net = DenseLayer(policy_net, num_units=n_actions, nonlinearity=softmax) value_net = DenseLayer(net, num_units=256, nonlinearity=elu) value_net = DenseLayer(value_net, num_units=1, nonlinearity=identity) if show_nets: print_net_architecture(policy_net, tag="Policy Network", detailed=False) print_net_architecture(value_net, tag="Value Network", detailed=False) return policy_net, value_net