def test_tuple_shape(self, func, input_layer, ExpressionLayer): from lasagne.layers.helper import get_output X, expected = self.np_result(func, input_layer) layer = ExpressionLayer(input_layer, func, output_shape=expected.shape) assert layer.get_output_shape_for(X.shape) == expected.shape output = get_output(layer, X).eval() assert np.allclose(output, expected)
def test_callable_shape(self, func, input_layer, ExpressionLayer): from lasagne.layers.helper import get_output X, expected = self.np_result(func, input_layer) def get_shape(input_shape): return func(np.empty(shape=input_shape)).shape layer = ExpressionLayer(input_layer, func, output_shape=get_shape) assert layer.get_output_shape_for(X.shape) == expected.shape output = get_output(layer, X).eval() assert np.allclose(output, expected)
def test_nones_shape(self, func, input_layer_nones, ExpressionLayer): input_shape = input_layer_nones.output_shape np_shape = tuple(0 if s is None else s for s in input_shape) X = np.random.uniform(-1, 1, np_shape) expected = func(X) expected_shape = tuple(s if s else None for s in expected.shape) layer = ExpressionLayer(input_layer_nones, func, output_shape=expected_shape) assert layer.get_output_shape_for(input_shape) == expected_shape def get_shape(input_shape): return expected_shape layer = ExpressionLayer(input_layer_nones, func, output_shape=get_shape) assert layer.get_output_shape_for(input_shape) == expected_shape layer = ExpressionLayer(input_layer_nones, func, output_shape='auto') assert layer.get_output_shape_for(input_shape) == expected_shape
def residual_block( l, batch_norm_alpha, batch_norm_epsilon, nonlinearity, survival_prob, add_after_nonlin, reduction_method, reduction_pool_mode, increase_units_factor=None, half_time=False, projection=False, ): assert survival_prob <= 1 and survival_prob >= 0 input_num_filters = l.output_shape[1] if increase_units_factor is not None: out_num_filters = int(input_num_filters * increase_units_factor) assert (out_num_filters - input_num_filters) % 2 == 0, ( "Need even " "number of extra channels in order to be able to pad correctly") else: out_num_filters = input_num_filters if (not half_time) or (reduction_method == 'conv'): stack_1 = batch_norm(Conv2DLayer(l, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=nonlinearity, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) else: assert half_time and reduction_method == 'pool' stack_1 = Pool2DLayer(l, pool_size=(3, 1), stride=(1, 1), pad=(1, 0), mode=reduction_pool_mode) # 1x1 conv here, therefore can do stride later without problems # otherwise would have to do stride here before # and make extra if condition later (only reshape with stride # in case of reduction method conv)... stack_1 = batch_norm(Conv2DLayer(stack_1, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=nonlinearity, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) if half_time: stack_1 = StrideReshapeLayer(stack_1, n_stride=2) stack_2 = batch_norm(Conv2DLayer(stack_1, num_filters=out_num_filters, filter_size=(3, 3), stride=(1, 1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu')), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) # add shortcut connections shortcut = l if half_time: # note since we are only reshaping # this is ok both for later identity and later projection # 1x1 conv of projection is same if we do it before or after this reshape # (would not be true if it was anything but 1x1 conv(!)) shortcut = StrideReshapeLayer(shortcut, n_stride=2) if increase_units_factor is not None: if projection: # projection shortcut, as option B in paper shortcut = batch_norm(Conv2DLayer(shortcut, num_filters=out_num_filters, filter_size=(1, 1), stride=(1, 1), nonlinearity=None, pad='same', b=None), epsilon=batch_norm_epsilon, alpha=batch_norm_alpha) else: # identity shortcut, as option A in paper n_extra_chans = out_num_filters - input_num_filters shortcut = PadLayer(shortcut, [n_extra_chans // 2, 0, 0], batch_ndim=1) if add_after_nonlin: stack_2 = NonlinearityLayer(stack_2) block = ElemwiseSumLayer([stack_2, shortcut]) else: block = NonlinearityLayer(ElemwiseSumLayer([stack_2, shortcut]), nonlinearity=nonlinearity) if survival_prob != 1: # Hack to make both be broadcastable along empty third dim # Otherwise I get an error that they are of different type: # shortcut: TensorType(False,False,False,True) # block: TensorType4d(32) or sth shortcut = ExpressionLayer(shortcut, lambda x: T.addbroadcast(x, 3)) block = ExpressionLayer(block, lambda x: T.addbroadcast(x, 3)) block = RandomSwitchLayer(block, shortcut, survival_prob) return block
def build_nmt_encoder_decoder(dim_word=1, n_embd=100, n_units=500, n_proj=200, state=None, rev_state=None, context_type=None, attention=False, drop_p=None): enc = OrderedDict() enc['input'] = InputLayer((None, None), name='input') enc_mask = enc['mask'] = InputLayer((None, None), name='mask') enc_rev_mask = enc['rev_mask'] = InputLayer((None, None), name='rev_mask') enc['input_emb'] = EmbeddingLayer(enc.values()[-1], input_size=dim_word, output_size=n_embd, name='input_emb') ### ENCODER PART ### # rnn encoder unit hid_init = Constant(0.) hid_init_rev = Constant(0.) encoder_unit = get_rnn_unit(enc.values()[-1], enc_mask, enc_rev_mask, hid_init, hid_init_rev, n_units, prefix='encoder_') enc.update(encoder_unit) # context layer = decoder's initial state of shape (batch_size, num_units) context = enc.values()[-1] # net['context'] if context_type == 'last': enc['context2init'] = SliceLayer(context, indices=-1, axis=1, name='last_encoder_context') elif context_type == 'mean': enc['context2init'] = ExpressionLayer(context, mean_over_1_axis, output_shape='auto', name='mean_encoder_context') ### DECODER PART ### W_init2proj, b_init2proj = GlorotUniform(), Constant(0.) enc['init_state'] = DenseLayer(enc['context2init'], num_units=n_units, W=W_init2proj, b=b_init2proj, nonlinearity=tanh, name='decoder_init_state') if state is None: init_state = enc['init_state'] init_state_rev = None #if rev_state is None else init_state if not attention: # if simple attetion the context is 2D, else 3D context = enc['context2init'] else: init_state = state init_state_rev = rev_state context = enc['context_input'] = \ InputLayer((None, n_units), name='ctx_input') # (batch_size, nfeats) # (batch_size, valid ntsteps) enc['target'] = InputLayer((None, None), name='target') dec_mask = enc['target_mask'] = InputLayer((None, None), name='target_mask') enc['target_emb'] = EmbeddingLayer(enc.values()[-1], input_size=dim_word, output_size=n_embd, name='target_emb') prevdim = n_embd prev2rnn = enc.values()[-1] # it's either emb or prev2rnn/noise decoder_unit = get_rnn_unit(prev2rnn, dec_mask, None, init_state, None, n_units, prefix='decoder_', context=context, attention=attention) enc.update(decoder_unit) if attention: ctxs = enc.values()[-1] ctxs_shape = ctxs.output_shape def get_ctx(x): return ctxs.ctx context = enc['context'] = ExpressionLayer(ctxs, function=get_ctx, output_shape=ctxs_shape, name='context') # return all values' # reshape for feed-forward layer # 2D shapes of (batch_size * num_steps, num_units/num_feats) enc['rnn2proj'] = rnn2proj = ReshapeLayer(enc.values()[-1], (-1, n_units), name='flatten_rnn2proj') enc['prev2proj'] = prev2proj = ReshapeLayer(prev2rnn, (-1, prevdim), name='flatten_prev') if isinstance(context, ExpressionLayer): ctx2proj = enc['ctx2proj'] = ReshapeLayer(context, (-1, ctxs_shape[-1]), name='flatten_ctxs') else: ctx2proj = context # load shared parameters W_rnn2proj, b_rnn2proj = GlorotUniform(), Constant(0.) W_prev2proj, b_prev2proj = GlorotUniform(), Constant(0.) W_ctx2proj, b_ctx2proj = GlorotUniform(), Constant(0.) # perturb rnn-to-projection by noise if drop_p is not None: rnn2proj = enc['noise_rnn2proj'] = DropoutLayer(rnn2proj, sigma=drop_p, name='noise_rnn2proj') prev2proj = enc['drop_prev2proj'] = DropoutLayer(prev2proj, sigma=drop_p, name='drop_prev2proj') ctx2proj = enc['noise_ctx2proj'] = DropoutLayer(ctx2proj, sigma=drop_p, name='noise_ctx2proj') # project rnn enc['rnn_proj'] = DenseLayer(rnn2proj, num_units=n_proj, nonlinearity=linear, W=W_rnn2proj, b=b_rnn2proj, name='rnn_proj') # project raw targets enc['prev_proj'] = DenseLayer(prev2proj, num_units=n_proj, nonlinearity=linear, W=W_prev2proj, b=b_prev2proj, name='prev_proj') # project context enc['ctx_proj'] = DenseLayer(ctx2proj, num_units=n_proj, nonlinearity=linear, W=W_ctx2proj, b=b_ctx2proj, name='ctx_proj') # reshape back for merging n_batch = enc['input'].input_var.shape[0] rnn2merge = enc['rnn2merge'] = ReshapeLayer(enc['rnn_proj'], (n_batch, -1, n_proj), name='reshaped_rnn2proj') prev2merge = enc['prev2merge'] = ReshapeLayer(enc['prev_proj'], (n_batch, -1, n_proj), name='reshaped_prev') if isinstance(context, ExpressionLayer): ctx2merge = ReshapeLayer(enc['ctx_proj'], (n_batch, -1, n_proj), name='reshaped_prev') else: ctx2merge = enc['ctx2merge'] = DimshuffleLayer(enc['ctx_proj'], pattern=(0, 'x', 1), name='reshaped_context') # combine projections into shape (batch_size, n_steps, n_proj) enc['proj_merge'] = ElemwiseMergeLayer([rnn2merge, prev2merge, ctx2merge], merge_function=tanh_add, name='proj_merge') # reshape for output regression projection enc['merge2proj'] = ReshapeLayer(enc.values()[-1], (-1, n_proj), name='flatten_proj_merge') # perturb concatenated regressors by noise if drop_p is not None: # if noise_type == 'binary': enc['noise_output'] = DropoutLayer(enc.values()[-1], p=drop_p, name='noise_output') # regress on combined (perturbed) projections out = get_output_unit(enc['target'], enc.values()[-1], dim_word) enc.update(out) # update graph return enc