def __init__(self, incoming, num_styles=None, epsilon=1e-4, beta=Constant(0), gamma=Constant(1), **kwargs): super(InstanceNormLayer, self).__init__(incoming, **kwargs) self.axes = (2, 3) self.epsilon = epsilon if num_styles == None: shape = (self.input_shape[1], ) else: shape = (num_styles, self.input_shape[1]) if beta is None: self.beta = None else: self.beta = self.add_param(beta, shape, 'beta', trainable=True, regularizable=False) if gamma is None: self.gamma = None else: self.gamma = self.add_param(gamma, shape, 'gamma', trainable=True, regularizable=True)
def test_simple_stats(self): net = collections.OrderedDict() net['l_in'] = InputLayer((None, 784)) net['l_shape'] = ReshapeLayer(net['l_in'], (-1, 1, 28, 28)) net['l_conv'] = Conv2DLayer(net['l_shape'], num_filters=3, filter_size=3, W=Constant(1.), pad=1) net['l_out'] = DenseLayer(net['l_conv'], num_units=10, W=Constant(1.), nonlinearity=softmax) no_epochs = 5 ws = lasagne_visualizer.weight_supervisor(net, no_epochs, mode='all_trainable') ws.initialize_grid() ws.accumulate_weight_stats() self.assertEquals(ws.max_weights.values(), [[1.]] * 2) self.assertEquals(ws.min_weights.values(), [[1.]] * 2) self.assertItemsEqual(ws.mean_weights.values(), [[1.]] * 2) self.assertItemsEqual(ws.err_band_lo_weights.values(), [[1.]] * 2) self.assertItemsEqual(ws.err_band_hi_weights.values(), [[1.]] * 2)
def policy_network(state): input_state = InputLayer(input_var=state, shape=(None, n_state)) dense = DenseLayer(input_state, num_units=n_state, nonlinearity=tanh, W=Normal(0.1, 0.0), b=Constant(0.0)) dense = DenseLayer(dense, num_units=n_state, nonlinearity=tanh, W=Normal(0.1, 0.0), b=Constant(0.0)) mean = DenseLayer(dense, num_units=n_action, nonlinearity=action_nonlinearity, W=Normal(0.1, 0.0), b=Constant(0.0)) sigma = DenseLayer(dense, num_units=n_action, nonlinearity=T.exp, W=Normal(0.1, 0.0), b=Constant(0.0)) return mean, sigma
def highway_dense(incoming, Wh=Orthogonal(), bh=Constant(0.0), Wt=Orthogonal(), bt=Constant(-4.0), nonlinearity=rectify, **kwargs): num_inputs = int(np.prod(incoming.output_shape[1:])) l_h = DenseLayer(incoming, num_units=num_inputs, W=Wh, b=bh, nonlinearity=nonlinearity) l_t = DenseLayer(incoming, num_units=num_inputs, W=Wt, b=bt, nonlinearity=sigmoid) return MultiplicativeGatingLayer(gate=l_t, input1=l_h, input2=incoming)
def conv_activation_bn(input_layer, name = '', pad='same', activation = 'relu', W_init = -1, use_bn = True, **kwargs): if use_bn: conv = Conv2DLayer(input_layer, name = name+'_linear', nonlinearity=linear, pad=pad, flip_filters=False, W = W_init, b = Constant(0.), **kwargs) bn = BatchNormLayer(conv, name = name+'_bn') out = NonlinearityLayer(bn, name = name+'_activation', nonlinearity = activation) else: out = Conv2DLayer(input_layer, name = name, nonlinearity=activation, pad=pad, flip_filters=False, W = W_init, b = Constant(0.), **kwargs) return out
def create_network(available_actions_num): # Creates the input variables s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State best Q-Value") r = tensor.vector("Rewards") nonterminal = tensor.vector("Nonterminal", dtype="int8") # Creates the input layer of the network. dqn = InputLayer(shape=[None, 1, downsampled_y, downsampled_x], input_var=s1) # Adds 3 convolutional layers, each followed by a max pooling layer. dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[3, 3], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = MaxPool2DLayer(dqn, pool_size=[2, 2]) # Adds a single fully connected layer. dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) # Adds a single fully connected layer which is the output layer. # (no nonlinearity as it is for approximating an arbitrary real function) dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None) # Theano stuff q = get_output(dqn) # Only q for the chosen actions is updated more or less according to following formula: # target Q(s,a,t) = r + gamma * max Q(s2,_,t+1) target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * nonterminal * q2) loss = squared_error(q, target_q).mean() # Updates the parameters according to the computed gradient using rmsprop. params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) # Compiles theano functions print "Compiling the network ..." function_learn = theano.function([s1, q2, a, r, nonterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print "Network compiled." # Returns Theano objects for the net and functions. # We wouldn't need the net anymore but it is nice to save your model. return dqn, function_learn, function_get_q_values, function_get_best_action
def upsample(input_layer, deconv = 'default', **kwargs): stride = 2 if deconv == 'default': return Deconv2DLayer( input_layer, num_filters=2, filter_size=4, stride=2, crop=1, W = W_init(gain=1.0), b=Constant(0.), nonlinearity=linear, flip_filters=False, **kwargs) # flip_filters=True(original) False(mine) elif deconv == 'subpixel': deconv_layer = Conv2DLayer(input_layer, num_filters=2*stride*stride, filter_size=3, pad = 1, nonlinearity = linear, W = W_init(gain=1.0), b=Constant(0.), name = 'flow_subpixel_conv') return SubpixelReshuffleLayer(deconv_layer, 2, stride, name = 'flow_subpixel_shuffle')
def leaky_deconv(input_layer, num_filters=64, activation = None, init = W_init_linear, deconv = 'default', **kwargs): stride = 2 if deconv == 'default': return Deconv2DLayer( input_layer, num_filters = num_filters, nonlinearity = activation, filter_size=4, stride=2, crop=1, W = init, b=Constant(0.), flip_filters=False, **kwargs) # flip_filters=True(original) False(mine) elif deconv == 'subpixel': deconv_layer = Conv2DLayer(input_layer, num_filters=num_filters*stride*stride, filter_size=3, pad = 1, nonlinearity = activation, W = init, b=Constant(0.), name = 'subpixel_conv') return SubpixelReshuffleLayer(deconv_layer, num_filters, stride, name = 'subpixel_shuffle')
def reset(): if any(np.isnan(scale.get_value()) for scale in scales): for scale in scales: scale.set_value(1.) for l in l_hiddens: l.b.set_value(Constant()(l.b.get_value().shape)) l.W.set_value(Orthogonal()(l.W.get_value().shape)) l_out.b.set_value(Constant()(l_out.b.get_value().shape)) l_out.W.set_value(Orthogonal()(l_out.W.get_value().shape)) for p in (p for u in (updates_ada, updates_other, updates_scal) for p in u if p not in get_all_params(l_out)): p.set_value(Constant()(p.get_value().shape))
def UNet_decoder_2(LR_conv1, LR_conv2, LR_conv3, LR_conv4, warp_conv1, warp_conv2, warp_conv3, warp_conv4, activation = SELU_activation, W_init = W_init_SELU): # 80 warp_deconv4 = Deconv2DLayer(ConcatLayer([LR_conv4, warp_conv4]), num_filters=64, filter_size=4, stride=2, crop=1, W = W_init, b=Constant(0.), nonlinearity=activation) # 160 warp_deconv3 = Deconv2DLayer(ConcatLayer([warp_deconv4, LR_conv3, warp_conv3]), num_filters=64, filter_size=4, stride=2, crop=1, W = W_init, b=Constant(0.), nonlinearity=activation) # 320 warp_deconv2 = Deconv2DLayer(ConcatLayer([warp_deconv3, LR_conv2, warp_conv2]), num_filters=64, filter_size=4, stride=2, crop=1, W = W_init, b=Constant(0.), nonlinearity=activation) # final post_fusion1 = Conv2DLayer(ConcatLayer([warp_deconv2, LR_conv1, warp_conv1]), 64, 5, pad=2, W = W_init, b=Constant(0.), nonlinearity=activation) post_fusion2 = Conv2DLayer(post_fusion1, 64, 5, pad=2, W = W_init, b=Constant(0.), nonlinearity=activation) final = Conv2DLayer(post_fusion1, 3, 5, pad=2, W = W_init_linear, b=Constant(0.), nonlinearity=linear) return final
def build_st_network(b_size, input_shape, withdisc=True): # General Params num_filters = 64 filter_size = (3, 3) pool_size = (2, 2) # SP Param b = np.zeros((2, 3), dtype=theano.config.floatX) b[0, 0] = 1 b[1, 1] = 1 b = b.flatten() # identity transform # Localization Network l_in = InputLayer(shape=(None, input_shape[1], input_shape[2], input_shape[3])) l_conv1 = Conv2DLayer(l_in, num_filters=num_filters, filter_size=filter_size) l_pool1 = MaxPool2DLayer(l_conv1, pool_size=pool_size) l_conv2 = Conv2DLayer(l_pool1, num_filters=num_filters, filter_size=filter_size) l_pool2 = MaxPool2DLayer(l_conv2, pool_size=pool_size) l_loc = DenseLayer(l_pool2, num_units=64, W=lasagne.init.HeUniform('relu')) l_param_reg = DenseLayer(l_loc, num_units=6, b=b, nonlinearity=lasagne.nonlinearities.linear, W=lasagne.init.Constant(0.0), name='param_regressor') if withdisc: l_dis = DiscreteLayer(l_param_reg, start=Constant(-3.), stop=Constant(3.), linrange=Constant(50.)) else: l_dis = l_param_reg # Transformer Network l_trans = TransformerLayer(l_in, l_dis, downsample_factor=1.0) final = ReshapeLayer(l_trans, shape=([0], -1)) return final
def train_test(train, labels, test, weight_decay): net = NeuralNet( layers=[ ('input', InputLayer), ('dropout0', DropoutLayer), ('dense1', DenseLayer), ('dropout1', DropoutLayer), ('dense2', DenseLayer), ('dropout2', DropoutLayer), ('dense3', DenseLayer), ('dropout3', DropoutLayer), ('output', DenseLayer), ], update=nesterov_momentum, loss=None, objective=partial(WeightDecayObjective, weight_decay=weight_decay), regression=False, max_epochs=600, eval_size=0.1, #on_epoch_finished = None, #on_training_finished = None, verbose=bool(VERBOSITY), input_shape=(None, train.shape[1]), output_num_units=NCLASSES, dense1_num_units=700, dense2_num_units=1000, dense3_num_units=700, dense1_nonlinearity=LeakyRectify(leakiness=0.1), dense2_nonlinearity=LeakyRectify(leakiness=0.1), dense3_nonlinearity=LeakyRectify(leakiness=0.1), output_nonlinearity=softmax, dense1_W=HeUniform(), dense2_W=HeUniform(), dense3_W=HeUniform(), dense1_b=Constant(0.), dense2_b=Constant(0.), dense3_b=Constant(0.), output_b=Constant(0.), dropout0_p=0.1, dropout1_p=0.6, dropout2_p=0.6, dropout3_p=0.6, update_learning_rate=shared(float32(0.02)), # update_momentum=shared(float32(0.9)), # batch_iterator_train=BatchIterator(batch_size=128), batch_iterator_test=BatchIterator(batch_size=128), ) net.fit(train, labels) return net.predict_proba(test)
def __init__(self, incoming, depth, n_estimators, n_outputs, pi_iters, **kwargs): self._incoming = incoming self._depth = depth self._n_estimators = n_estimators self._n_outputs = n_outputs self._pi_iters = pi_iters super(NeuralForestLayer, self).__init__(incoming, **kwargs) pi_init = Constant(val=1.0 / n_outputs)( ((1 << (depth - 1)) * n_estimators, n_outputs)) pi_name = "%s.%s" % (self.name, 'pi') if self.name is not None else 'pi' self.pi = theano.shared(pi_init, name=pi_name) # what we want to do here is pi / pi.sum(axis=1) # to be safe, if certain rows only contain zeroes (for some pi all y's became 0), # replace such row with 1/n_outputs sum_pi_over_y = self.pi.sum(axis=1).dimshuffle(0, 'x') all_0_y = T.eq(sum_pi_over_y, 0) norm_pi_body = (self.pi + all_0_y * (1.0 / n_outputs)) / (sum_pi_over_y + all_0_y) self.normalize_pi = theano.function([], [], updates=[(self.pi, norm_pi_body)]) self.update_pi_one_iter = self.get_update_pi_one_iter_func() self.normalize_pi() t_input = T.matrix('t_input') self.f_leaf_proba = theano.function([t_input], self.get_probabilities_for( get_output(incoming, t_input)))
def build_model(self, input_batch): ## initialize shared parameters Ws = [] bs = [] nLayersWithParams = 13 if self.refinement_network: nLayersWithParams = nLayersWithParams + 4 for i in range(nLayersWithParams): W = HeUniform() Ws.append(W) b = Constant(0.0) bs.append(b) hidden_state = InputLayer(input_var=np.zeros((self.batch_size, 64, self.npx/2, self.npx/2), dtype=np.float32), shape=(self.batch_size, 64, self.npx/2, self.npx/2)) ## get inputs inputs = InputLayer(input_var=input_batch, shape=(None, self.input_seqlen, self.npx, self.npx)) # inputs = InputLayer(input_var=input_batch, shape=(None, 1, self.npx, self.npx, self.input_seqlen)) # inputs = DimshuffleLayer(inputs, (0, 4, 2, 3, 1)) outputs = [] for i in range(self.input_seqlen - self.nInputs + self.target_seqlen): input = SliceLayer(inputs, indices=slice(0,self.nInputs), axis=1) output, hidden_state, filters = self.predict(input, hidden_state, Ws, bs) ## FIFO operation. inputs = SliceLayer(inputs, indices=slice(1, None), axis=1) if i == self.input_seqlen - self.nInputs: filtersToVisualize = filters if i >= self.input_seqlen - self.nInputs: inputs = ConcatLayer([inputs, output], axis=1) outputs.append(output) return output, outputs, filtersToVisualize
def ANN(X_train, y_train, verbose): layer_s = [("input", layers.InputLayer), ("dense0", layers.DenseLayer), ("output", layers.DenseLayer)] network = NeuralNet( layers=layer_s, input_shape=(None, X_train.shape[1]), dense0_num_units=100, dense0_W=Constant(val=1. / 14.0), #dense0_W = Normal(), #dense0_nonlinearity = tanh, output_num_units=1, output_nonlinearity=None, regression=True, update=sgd, update_learning_rate=0.001, #update_momentum = 0.9, objective_loss_function=squared_error, batch_iterator_train=BatchIterator(batch_size=121), train_split=TrainSplit(eval_size=0.1), verbose=1 if verbose else 0, max_epochs=400) network.fit(X_train, y_train) return network
def upsample_bn(input_layer, name = '', num_filters = None, filter_size= None, stride=None, crop=None, activation = 'relu', use_bn = True, W_init = 1, deconv_mode = None, **kwargs): if (deconv_mode == ''): deconv = Deconv2DLayer(input_layer, name = name+'_linear',nonlinearity=linear, num_filters=num_filters, filter_size=filter_size, stride=stride, crop=crop, W = W_init, b=Constant(0.), flip_filters=False, **kwargs) elif (deconv_mode == 'Subpixel'): deconv = lasagne.layers.Conv2DLayer(input_layer,name=name+'_linear', num_filters=num_filters*stride*stride, filter_size=3, pad = (filter_size-1)/2, nonlinearity = linear, W = W_init,b=Constant(0.)) deconv = lasagne.layers.SubpixelReshuffleLayer(deconv,num_filters,stride,name = name+'_linear_shuffle') if use_bn: bn = BatchNormLayer(deconv, name = name+'_bn') out = NonlinearityLayer(bn, name = name+'_activation', nonlinearity = activation) else: out = NonlinearityLayer(deconv, name = name+'_activation', nonlinearity = activation) return out
def __init__(self, W_in=Normal(0.1), W_hid=Normal(0.1), b=Constant(0.), nonlinearity=nonlin.sigmoid): self.W_in = W_in self.W_hid = W_hid self.b = b if nonlinearity is None: self.nonlinearity = nonlin.identity else: self.nonlinearity = nonlinearity
def create_network(available_actions_count): # Create the input variables s1 = tensor.tensor4("States") a = tensor.vector("Actions", dtype="int32") q2 = tensor.vector("Next State's best Q-Value") r = tensor.vector("Rewards") isterminal = tensor.vector("IsTerminal", dtype="int8") dqn = InputLayer(shape=[None, 1, resolution[0], resolution[1]], input_var=s1) dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=4) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4], nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1), stride=2) dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=HeUniform("relu"), b=Constant(.1)) dqn = DenseLayer(dqn, num_units=available_actions_count, nonlinearity=None) q = get_output(dqn) target_q = tensor.set_subtensor(q[tensor.arange(q.shape[0]), a], r + discount_factor * (1 - isterminal) * q2) loss = squared_error(q, target_q).mean() params = get_all_params(dqn, trainable=True) updates = rmsprop(loss, params, learning_rate) print "Compiling the network ..." function_learn = theano.function([s1, q2, a, r, isterminal], loss, updates=updates, name="learn_fn") function_get_q_values = theano.function([s1], q, name="eval_fn") function_get_best_action = theano.function([s1], tensor.argmax(q), name="test_fn") print "Network compiled." def simple_get_best_action(state): return function_get_best_action(state.reshape([1, 1, resolution[0], resolution[1]])) return dqn, function_learn, function_get_q_values, simple_get_best_action
def make_net(W, H, size1=20, size2=15): net = NeuralNet( layers=[ ('input', InputLayer), ('dense1', DenseLayer), ('dense2', DenseLayer), ('output', DenseLayer), ], input_shape=(None, W * H), dense1_num_units=size1, dense1_nonlinearity=LeakyRectify(leakiness=0.1), dense1_W=HeNormal(), dense1_b=Constant(), dense2_num_units=size2, dense2_nonlinearity=LeakyRectify(leakiness=0.1), dense2_W=HeNormal(), dense2_b=Constant(), output_num_units=4, output_nonlinearity=softmax, output_W=HeNormal(), output_b=Constant(), update=nesterov_momentum, # todo update_learning_rate=shared(float32(1.)), update_momentum=0.9, max_epochs=200, on_epoch_finished=[ StopWhenOverfitting(), StopAfterMinimum(), AdjustLearningRate(1., 0.0001), ], #label_encoder = False, regression=True, verbose=1, batch_iterator_train=BatchIterator(batch_size=128), # todo batch_iterator_test=BatchIterator(batch_size=128), train_split=TrainSplit(eval_size=0.1), ) net.initialize() return net
def residual(self, model, num_filters=None, dim_inc=False): residual = super(WeightedResNet, self).residual(model, num_filters=num_filters, dim_inc=dim_inc) residual = self.nonlinearity(residual) self.residuals.append(residual) shared_axes = tuple(range(len(residual.output_shape))) residual = ScaleLayer(residual, Constant(0), shared_axes=shared_axes) residual.params[residual.scales].add('layer_weight') self.weights.append(residual) return residual
def _create_network(available_actions_num, input_shape, visual_input_var, n_variables, variables_input_var): dqn = InputLayer(shape=[None, input_shape.frames, input_shape.y, input_shape.x], input_var=visual_input_var) dqn = Conv2DLayer(dqn, num_filters=32, filter_size=[8, 8], stride=[4, 4], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[4, 4], stride=[2, 2], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = Conv2DLayer(dqn, num_filters=64, filter_size=[3, 3], nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) if n_variables > 0: variables_layer = InputLayer(shape=[None, n_variables], input_var=variables_input_var) dqn = ConcatLayer((flatten(dqn), variables_layer)) dqn = DenseLayer(dqn, num_units=512, nonlinearity=rectify, W=GlorotUniform("relu"), b=Constant(.1)) dqn = DenseLayer(dqn, num_units=available_actions_num, nonlinearity=None) return dqn
def _get_l_out(self, input_vars): listener.check_options(self.options) id_tag = (self.id + '/') if self.id else '' input_var = input_vars[0] l_in = InputLayer(shape=(None, self.seq_vec.max_len), input_var=input_var, name=id_tag + 'desc_input') l_in_embed = EmbeddingLayer( l_in, input_size=len(self.seq_vec.tokens), output_size=self.options.listener_cell_size, name=id_tag + 'desc_embed') cell = CELLS[self.options.listener_cell] cell_kwargs = { 'grad_clipping': self.options.listener_grad_clipping, 'num_units': self.options.listener_cell_size, } if self.options.listener_cell == 'LSTM': cell_kwargs['forgetgate'] = Gate( b=Constant(self.options.listener_forget_bias)) if self.options.listener_cell != 'GRU': cell_kwargs['nonlinearity'] = NONLINEARITIES[ self.options.listener_nonlinearity] l_rec1 = cell(l_in_embed, name=id_tag + 'rec1', **cell_kwargs) if self.options.listener_dropout > 0.0: l_rec1_drop = DropoutLayer(l_rec1, p=self.options.listener_dropout, name=id_tag + 'rec1_drop') else: l_rec1_drop = l_rec1 l_hidden = DenseLayer( l_rec1_drop, num_units=self.options.listener_cell_size, nonlinearity=NONLINEARITIES[self.options.listener_nonlinearity], name=id_tag + 'hidden') if self.options.listener_dropout > 0.0: l_hidden_drop = DropoutLayer(l_hidden, p=self.options.listener_dropout, name=id_tag + 'hidden_drop') else: l_hidden_drop = l_hidden l_out = DenseLayer(l_hidden_drop, num_units=3, nonlinearity=softmax, name=id_tag + 'scores') return l_out, [l_in]
def __init__(self, incoming, num_units, W=Uniform(), b=Constant(0.), **kwargs): super(SharedDotLayer, self).__init__(incoming, **kwargs) num_inputs = self.input_shape[1] self.num_units = num_units self.W = self.add_param(W, (num_inputs, num_units), name='W') self.b = self.add_param(b, (num_units, ), name='b', regularizable=False)
def q_network(state): input_state = InputLayer(input_var=state, shape=(None, n_state)) dense_1 = DenseLayer(input_state, num_units=n_state, nonlinearity=tanh, W=Normal(0.1, 0.0), b=Constant(0.0)) dense_2 = DenseLayer(dense_1, num_units=n_state, nonlinearity=tanh, W=Normal(0.1, 0.0), b=Constant(0.0)) q_values = DenseLayer(dense_2, num_units=n_action, nonlinearity=None, W=Normal(0.1, 0.0), b=Constant(0.0)) return q_values
def addConvModule(nnet, num_filters, filter_size, pad='valid', W_init=None, bias=True, use_maxpool=True, pool_size=(2, 2), use_batch_norm=False, dropout=False, p_dropout=0.5, upscale=False, stride=(1, 1)): """ add a convolutional module (convolutional layer + (leaky) ReLU + MaxPool) to the network """ if W_init is None: W = GlorotUniform( gain=(2 / (1 + 0.01**2) )**0.5) # gain adjusted for leaky ReLU with alpha=0.01 else: W = W_init if bias is True: b = Constant(0.) else: b = None # build module if dropout: nnet.addDropoutLayer(p=p_dropout) nnet.addConvLayer(use_batch_norm=use_batch_norm, num_filters=num_filters, filter_size=filter_size, pad=pad, W=W, b=b, stride=stride) if Cfg.leaky_relu: nnet.addLeakyReLU() else: nnet.addReLU() if upscale: nnet.addUpscale(scale_factor=pool_size) elif use_maxpool: nnet.addMaxPool(pool_size=pool_size)
def create_th(image_shape, output_dim, layers_conf): from lasagne.init import GlorotUniform, Constant from lasagne.layers import Conv2DLayer, InputLayer, DenseLayer, get_output, \ get_all_params, set_all_param_values from lasagne.nonlinearities import rectify from lasagne.objectives import squared_error from lasagne.updates import rmsprop x = th.tensor.tensor4("input") t = th.tensor.matrix("target") net = InputLayer(shape=[None, 1, image_shape[0], image_shape[1]], input_var=x) for num_filters, kernel_size, stride in layers_conf[:-1]: net = Conv2DLayer(net, num_filters=num_filters, filter_size=[kernel_size, kernel_size], nonlinearity=rectify, W=GlorotUniform(), b=Constant(.1), stride=stride) net = DenseLayer(net, num_units=layers_conf[-1], nonlinearity=rectify, W=GlorotUniform(), b=Constant(.1)) net = DenseLayer(net, num_units=output_dim, nonlinearity=None) q = get_output(net) loss = squared_error(q, t).mean() params = get_all_params(net, trainable=True) updates = rmsprop(loss, params, learning_rate) backprop = th.function([x, t], loss, updates=updates, name="bprop") fwd_pass = th.function([x], q, name="fwd") return fwd_pass, backprop
def UNet_decoder_3(LR_conv1, LR_conv2, LR_conv3, LR_conv4, warp_conv1, warp_conv2, warp_conv3, warp_conv4): # 80 mask4 = Conv2DLayer(ConcatLayer([LR_conv4, warp_conv4]), 64, 5, pad=2, W = W_init_SELU, b=Constant(0.), nonlinearity=sigmoid) warp_conv4_m = ElemwiseMergeLayer([warp_conv4, mask4], T.mul) warp_deconv4 = Deconv2DLayer(ConcatLayer([LR_conv4, warp_conv4_m]), num_filters=64, filter_size=4, stride=2, crop=1, W = W_init_SELU, b=Constant(0.), nonlinearity=SELU_activation) # 160 mask3 = Conv2DLayer(ConcatLayer([warp_deconv4, LR_conv3, warp_conv3]), 64, 5, pad=2, W = W_init_SELU, b=Constant(0.), nonlinearity=sigmoid) warp_conv3_m = ElemwiseMergeLayer([warp_conv3, mask3], T.mul) warp_deconv3 = Deconv2DLayer(ConcatLayer([warp_deconv4, LR_conv3, warp_conv3_m]), num_filters=64, filter_size=4, stride=2, crop=1, W = W_init_SELU, b=Constant(0.), nonlinearity=SELU_activation) # 320 mask2 = Conv2DLayer(ConcatLayer([warp_deconv3, LR_conv2, warp_conv2]), 64, 5, pad=2, W = W_init_SELU, b=Constant(0.), nonlinearity=sigmoid) warp_conv2_m = ElemwiseMergeLayer([warp_conv2, mask2], T.mul) warp_deconv2 = Deconv2DLayer(ConcatLayer([warp_deconv3, LR_conv2, warp_conv2_m]), num_filters=64, filter_size=4, stride=2, crop=1, W = W_init_SELU, b=Constant(0.), nonlinearity=SELU_activation) # final mask1 = Conv2DLayer(ConcatLayer([warp_deconv2, LR_conv1, warp_conv1]), 64, 5, pad=2, W = W_init_SELU, b=Constant(0.), nonlinearity=sigmoid) warp_conv1_m = ElemwiseMergeLayer([warp_conv1, mask1], T.mul) post_fusion1 = Conv2DLayer(ConcatLayer([warp_deconv2, LR_conv1, warp_conv1_m]), 64, 5, pad=2, W = W_init_SELU, b=Constant(0.), nonlinearity=SELU_activation) post_fusion2 = Conv2DLayer(post_fusion1, 64, 5, pad=2, W = W_init_SELU, b=Constant(0.), nonlinearity=SELU_activation) final = Conv2DLayer(post_fusion1, 3, 5, pad=2, W = W_init_linear, b=Constant(0.), nonlinearity=linear) test = Conv2DLayer(final, 3, 5, pad=2, W = W_init_linear, b=Constant(0.), nonlinearity=linear) return final
def __init__(self, incoming, num_units, rng, factorized=True, common_noise=False, sigma_0=0.4, use_mu_init=True, **kwargs): super(NoisyDenseLayer, self).__init__(incoming, num_units, **kwargs) if not common_noise and self.num_leading_axes != 1: raise NotImplementedError("Test use of theano.tensor.batched_dot") num_inputs = int(np.prod(self.input_shape[self.num_leading_axes:])) if use_mu_init: # (override earlier W and b values, using num_inputs) val = np.sqrt(1 / num_inputs) if factorized else \ np.sqrt(3 / num_inputs) for param in [self.W, self.b]: param.set_value(floatX(get_rng().uniform( -val, val, param.get_value(borrow=True).shape))) # NOTE: paper quotes sigma_0 = 0.017 in case of not factorized sigma_0 = sigma_0 / np.sqrt(num_inputs) if factorized else sigma_0 W_sigma = b_sigma = Constant(sigma_0) self.W_sigma = self.add_param(W_sigma, (num_inputs, num_units), name="W_sigma") if self.b is None: self.b_sigma = None else: self.b_sigma = self.add_param(b_sigma, (num_units,), name="b_sigma", regularizable=False) if common_noise: if factorized: self.eps_i = eps_i = rng.normal((num_inputs,)) self.eps_j = eps_j = rng.normal((num_units,)) self.W_epsilon = T.outer(f(eps_i), f(eps_j)) self.b_epsilon = f(eps_j) else: self.W_epsilon = rng.normal((num_inputs, num_units)) self.b_epsilon = rng.normal((num_units,)) else: self.num_inputs = num_inputs self.num_units = num_units self.W_epsilon = None # Must build later, when have input length self.b_epsilon = None self.eps_is, self.eps_js = list(), list() self.W_epsilons, self.b_epsilons = list(), list() self.rng = rng self.common_noise = common_noise self.factorized = factorized self.use_mu_init = use_mu_init
def create_dnn(input_vars, num_inputs, hidden_layer_size, num_outputs): network = InputLayer((None, None, num_inputs), input_vars) batch_size_theano, seqlen, _ = network.input_var.shape network = GaussianNoiseLayer(network, sigma=0.01) network = ReshapeLayer(network, (-1, 129)) for i in range(1): network = DenseLayer(network, hidden_layer_size, W=GlorotUniform(), b=Constant(1.0), nonlinearity=leaky_rectify) network = ReshapeLayer(network, (-1, hidden_layer_size)) network = DenseLayer(network, num_outputs, nonlinearity=softmax) network = ReshapeLayer(network, (batch_size_theano, seqlen, num_outputs)) return network
def __init__(self, incoming, speaker_input_layer, num_speakers, psi, W=Constant(), **kwargs): incomings = [incoming, speaker_input_layer] super(LHUCLayer, self).__init__(incomings, **kwargs) m_batch, n_time_steps, n_features = self.input_shapes[0] self.num_speakers = num_speakers self.num_units = n_features self.W = self.add_param(W, (self.num_speakers, self.num_units), name='W_LHUC', speaker_dependent=True) self.psi = psi