Beispiel #1
0
def build_baseline5_fan(input_var):
    # TODO remove these imports + move relevant parts to layers.py once everything is
    # up and running
    import theano.tensor as T
    import numpy as np
    """ Using Baseline 1 with the novel FAN layer.

    VGG conv4_1 is used for feature extraction
    """
    net = OrderedDict()

    # Input, standardization
    last = net['input'] = InputLayer(
        (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var)
    last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x))

    net['features_s8'] = get_features(last)["conv4_1"]
    net['features'] = Upscale2DLayer(net["features_s8"], 8)
    net['mask'] = ExpressionLayer(
        net["features"], lambda x: 1. * T.eq(x, x.max(axis=1, keepdims=True)))

    last = net["middle"] = ConvLayer(last, 3, 1, nonlinearity=linear)
    last = net["fan"] = FeatureAwareNormLayer(
        (last, net['mask']),
        beta=nn.init.Constant(np.float32(128.)),
        gamma=nn.init.Constant(np.float32(25.)))

    return last, net
Beispiel #2
0
def build(myNet, idxSiam, verbose=True):

    # -------------------------------------------------------------------------
    # Bypass for score map
    myNet.layers[idxSiam]['kp-bypass-input-score'] = InputLayer(
        (myNet.config.batch_size, ),
        input_var=myNet.y[idxSiam],
        name='kp-bypass-input-score')

    myNet.layers[idxSiam]['kp-scoremap-cut'] = ExpressionLayer(
        myNet.layers[idxSiam]['kp-bypass-input-score'],
        lambda x: x.reshape([myNet.config.batch_size, 1]) * 2.0 - 1.0,
        output_shape=[myNet.config.batch_size, 1],
        name='kp-scoremap-cut')

    myNet.layers[idxSiam]['kp-scoremap'] = ExpressionLayer(
        myNet.layers[idxSiam]['kp-bypass-input-score'],
        lambda x: x.reshape([myNet.config.batch_size, 1]) * 2.0 - 1.0,
        output_shape=[myNet.config.batch_size, 1],
        name='kp-scoremap')

    # -------------------------------------------------------------------------
    # Bypass for xyz coordinates
    myNet.layers[idxSiam]['kp-bypass-input-xyz'] = InputLayer(
        (myNet.config.batch_size, 3),
        input_var=myNet.pos[idxSiam],
        name='kp-bypass-input-xyz')

    myNet.layers[idxSiam]['kp-output'] = ExpressionLayer(
        myNet.layers[idxSiam]['kp-bypass-input-xyz'],
        # lambda x: x + np.asarray([0.5, 0.5, 1],
        #                          dtype=floatX).reshape([1, 3]),
        lambda x: x,
        output_shape=[myNet.config.batch_size, 3],
        name='kp-output')
Beispiel #3
0
def get_model(input_images, input_position, input_mult, target_var):

    # number of SAX and distance between SAX slices
    #indexes = []
    #for i in range(input_position.shape[0]):
    #    indexes.append(numpy.where(input_position[i][:,0] == 0.)[0][0])
    
    # input layer with unspecified batch size
    layer     = InputLayer(shape=(None, 22, 30, 64, 64), input_var=input_images) #InputLayer(shape=(None, 1, 30, 64, 64), input_var=input_var)
    
    # Z-score?

    # Convolution then batchNormalisation then activation layer, then zero padding layer followed by a dropout layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    shortcut      = layer
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer 	  = ElemwiseSumLayer([layer, shortcut])
    layer         = batch_norm(Conv3DDNNLayer(incoming=layer, num_filters=16, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=rectify))
    layer         = Conv3DDNNLayer(incoming=layer, num_filters=22, filter_size=(3,3,3), stride=(1,1,1), pad='same', nonlinearity=sigmoid)

    layer_max     = ExpressionLayer(layer, lambda X: X.max(1), output_shape='auto')
    layer_min     = ExpressionLayer(layer, lambda X: X.min(1), output_shape='auto')
    
    layer_prediction = layer
    # image prediction
    prediction           = get_output(layer_prediction)
        
    loss                 = binary_crossentropy(prediction, target_var).mean()

    #Updates : Stochastic Gradient Descent (SGD) with Nesterov momentum
    params               = get_all_params(layer_prediction, trainable=True)

    # Create a loss expression for validation/testing. The crucial difference
    # here is that we do a deterministic forward pass through the network, disabling dropout layers.
    test_prediction      = get_output(layer_prediction, deterministic=True)
    test_loss            = binary_crossentropy(test_prediction, target_var).mean()

    return test_prediction, prediction, loss, params
Beispiel #4
0
 def geometric_mean(incoming):
     exp_out = ExpressionLayer(
         ElemwiseSumLayer(
             [
                 ExpressionLayer(
                     member, lambda x: T.log(x + NCEnsemble.eps)
                 )
                 for member in incoming
             ],
             coeffs=1./len(incoming)
         ),
         T.exp
     )
     Z = T.sum(get_output(exp_out), axis=1)[..., np.newaxis]
     return ExpressionLayer(exp_out, lambda x: x / Z)
Beispiel #5
0
    def shortcut(self, incoming, residual, type=None):
        """Create a shortcut from ``incoming`` to ``residual``."""
        type = type or self.type
        in_shape = getattr(incoming, 'output_shape', incoming)
        out_shape = getattr(residual, 'output_shape', residual)
        in_filters = in_shape[1]
        out_filters = out_shape[1]
        stride = (in_shape[-2] // out_shape[-2], in_shape[-1] // out_shape[-1])

        if type == 'C':
            # all shortcuts are projections
            return self.projection(incoming, out_filters, stride=stride)
        elif in_filters == out_filters:
            # A and B use identity shortcuts (if the dimensions stay)
            return incoming
        elif type == 'B':
            # if dimensions increase, B uses projections
            return self.projection(incoming, out_filters, stride=stride)
        elif type == 'A':
            if not numpy.all(in_shape[2:] == out_shape[2:]):
                shortcut = ExpressionLayer(
                    incoming, lambda x: x[:, :, ::stride[0], ::stride[1]],
                    in_shape[:2] + out_shape[2:])
            else:
                shortcut = incoming
            side = (out_filters - in_filters) // 2
            return PadLayer(shortcut, [side, 0, 0], batch_ndim=1)
Beispiel #6
0
    def residual_block(l, increase_dim=False, projection=False):
        input_num_filters = l.output_shape[1]
        if increase_dim:
            first_stride = (2,2)
            out_num_filters = input_num_filters*2
        else:
            first_stride = (1,1)
            out_num_filters = input_num_filters

        stack_1 = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(3,3), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))
        stack_2 = batch_norm(ConvLayer(stack_1, num_filters=out_num_filters, filter_size=(3,3), stride=(1,1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))

        # add shortcut connections
        if increase_dim:
            if projection:
                # projection shortcut, as option B in paper
                projection = batch_norm(ConvLayer(l, num_filters=out_num_filters, filter_size=(1,1), stride=(2,2), nonlinearity=None, pad='same', b=None, flip_filters=False))
                block = NonlinearityLayer(ElemwiseSumLayer([stack_2, projection]),nonlinearity=rectify)
            else:
                # identity shortcut, as option A in paper
                identity = ExpressionLayer(l, lambda X: X[:, :, ::2, ::2], lambda s: (s[0], s[1], s[2]//2, s[3]//2))
                padding = PadLayer(identity, [out_num_filters//4,0,0], batch_ndim=1)
                block = NonlinearityLayer(ElemwiseSumLayer([stack_2, padding]),nonlinearity=rectify)
        else:
            block = NonlinearityLayer(ElemwiseSumLayer([stack_2, l]),nonlinearity=rectify)

        return block
Beispiel #7
0
def build_model(feadim,
                Nclass,
                kernel_size=3,
                border_mode='same',
                input_length=None,
                noise=(0.1, 0.2, 0.1)):
    """
    Input shape: X.shape=(B, 1, rows, cols), GT.shape=(B, L)
    :param feadim:
    :param Nclass:
    :param loss:
    :param optimizer:
    :return:
    """
    input0 = InputLayer(shape=(None, 1, feadim, input_length), name='input0')
    pool0 = MaxPool2DLayer(input0, pool_size=(2, 2), name='pool0')
    pool1 = MaxPool2DLayer(pool0, pool_size=(2, 2), name='pool1')
    pool2 = MaxPool2DLayer(pool1, pool_size=(2, 1), name='pool2')
    pool3 = MaxPool2DLayer(pool2, pool_size=(2, 1), name='pool3')
    permute0 = ExpressionLayer(pool3,
                               filter_merge,
                               output_shape=filter_merge_output_shape,
                               name='permute0')
    pool4 = Pool1DLayer(permute0,
                        pool_size=2,
                        mode='average_exc_pad',
                        axis=1,
                        name='pool4')
    dense0 = DenseLayer(pool4,
                        num_units=Nclass + 1,
                        nonlinearity=softmax,
                        num_leading_axes=2,
                        name='dense0')
    return dense0
Beispiel #8
0
    def __init__(self, vocab, input_var=None):
        ### THEANO GRAPH INPUT ###
        # self.input_phrase = T.imatrix("encoder phrase tokens")
        ##########################

        self.l_in = InputLayer((None, None),
                               input_var=input_var,
                               name='utt input')
        self.l_mask = ExpressionLayer(self.l_in,
                                      lambda x: T.neq(x, vocab.PAD_ix),
                                      name='utt mask')

        self.l_emb = EmbeddingLayer(self.l_in,
                                    vocab.n_tokens,
                                    Config.EMB_SIZE,
                                    name="utt embedding")

        self.l_lstm = LSTMLayer(self.l_emb,
                                Config.N_LSTM_UNITS,
                                name='encoder_lstm',
                                grad_clipping=Config.LSTM_LAYER_GRAD_CLIP,
                                mask_input=self.l_mask,
                                only_return_final=True,
                                peepholes=False)

        self.output = self.l_lstm
Beispiel #9
0
    def create_attention(self, gru_con, in_con_mask, condition, batch_size,
                         n_hidden_con, **kwargs):

        # (batch_size, n_attention)
        gru_cond2 = non_flattening_dense_layer(gru_con,
                                               self.in_con_mask,
                                               self.n_attention,
                                               nonlinearity=None)
        gru_que2 = DenseLayer(condition, self.n_attention, nonlinearity=None)
        gru_que2 = dimshuffle(gru_que2, (0, 'x', 1))

        att = ElemwiseSumLayer([gru_cond2, gru_que2])
        att = NonlinearityLayer(att, T.tanh)
        att = SliceLayer(non_flattening_dense_layer(att,
                                                    self.in_con_mask,
                                                    1,
                                                    nonlinearity=None),
                         indices=0,
                         axis=2)

        att_softmax = SequenceSoftmax(att, self.in_con_mask)

        rep = ElemwiseMergeLayer(
            [ForgetSizeLayer(dimshuffle(att_softmax,
                                        (0, 1, 'x'))), gru_con], T.mul)

        return ExpressionLayer(rep, lambda x: T.sum(x, axis=1), lambda s:
                               (s[0], ) + s[2:])
Beispiel #10
0
def build_baseline2_feats(input_var, nb_filter=96):
    """ Slightly more complex model. Transform x to a feature space first
    """
    net = OrderedDict()

    # Input, standardization
    last = net['input'] = InputLayer(
        (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var)
    last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x))

    # Pretrained Encoder as before
    last = net["conv1_1"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_1"] = BatchNormLayer(last)
    last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify)
    last = net["conv1_2"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_2"] = BatchNormLayer(last)
    last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify)

    # Modified Middle Part
    last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear)

    # Decoder as before
    last = net["deconv1_2"] = TransposedConv2DLayer(
        last,
        net["conv1_2"].input_shape[1],
        net["conv1_2"].filter_size,
        stride=net["conv1_2"].stride,
        crop=net["conv1_2"].pad,
        W=net["conv1_2"].W,
        flip_filters=not net["conv1_2"].flip_filters,
        nonlinearity=None)
    last = net["deconv1_1"] = TransposedConv2DLayer(
        last,
        net["conv1_1"].input_shape[1],
        net["conv1_1"].filter_size,
        stride=net["conv1_1"].stride,
        crop=net["conv1_1"].pad,
        W=net["conv1_1"].W,
        flip_filters=not net["conv1_1"].flip_filters,
        nonlinearity=None)

    last = net["bn"] = BatchNormLayer(last,
                                      beta=nn.init.Constant(128.),
                                      gamma=nn.init.Constant(25.))

    return last, net
    def build_model(self):

        # reshape to [batch, color, x, y] to allow for convolution layers to work correctly
        observation_reshape = DimshuffleLayer(self.observation_layer,
                                              (0, 3, 1, 2))
        observation_reshape = Pool2DLayer(observation_reshape,
                                          pool_size=(2, 2))

        # memory
        window_size = 5
        # prev state input
        prev_window = InputLayer(
            (None, window_size) + tuple(observation_reshape.output_shape[1:]),
            name="previous window state")

        # our window
        memory_layer = WindowAugmentation(observation_reshape,
                                          prev_window,
                                          name="new window state")

        memory_dict = {memory_layer: prev_window}

        # pixel-wise maximum over the temporal window (to avoid flickering)
        memory_layer = ExpressionLayer(memory_layer,
                                       lambda a: a.max(axis=1),
                                       output_shape=(None, ) +
                                       memory_layer.output_shape[2:])

        # neural network body
        nn = batch_norm(
            lasagne.layers.Conv2DLayer(memory_layer,
                                       num_filters=16,
                                       filter_size=(8, 8),
                                       stride=(4, 4)))
        nn = batch_norm(
            lasagne.layers.Conv2DLayer(nn,
                                       num_filters=32,
                                       filter_size=(4, 4),
                                       stride=(2, 2)))
        nn = batch_norm(lasagne.layers.DenseLayer(nn, num_units=256))
        # q_eval
        policy_layer = DenseLayer(nn,
                                  num_units=self.n_actions,
                                  nonlinearity=lasagne.nonlinearities.linear,
                                  name="QEvaluator")
        # resolver
        resolver = EpsilonGreedyResolver(policy_layer, name="resolver")

        # all together
        agent = Agent(self.observation_layer, memory_dict, policy_layer,
                      resolver)

        return resolver, agent
def createXYZTCropLayer(input_layer_4d,
                        xyz_layer,
                        theta_layer,
                        max_scale,
                        out_width,
                        name=None):

    input_layer_shape = get_output_shape(input_layer_4d)
    batch_size = input_layer_shape[0]

    new_width = out_width
    new_height = out_width

    # ratio to reduce to patch size from original
    reduc_ratio = (np.cast[floatX](out_width) /
                   np.cast[floatX](input_layer_shape[3]))

    # merge xyz and t layers together to form xyzt
    xyzt_layer = ConcatLayer([xyz_layer, theta_layer])

    # create a param layer from xyz layer
    def xyzt_2_param(xyzt):
        # get individual xyz
        dx = xyzt[:, 0]  # x and y are already between -1 and 1
        dy = xyzt[:, 1]  # x and y are already between -1 and 1
        z = xyzt[:, 2]
        t = xyzt[:, 3]
        # compute the resize from the largest scale image
        dr = (np.cast[floatX](reduc_ratio) * np.cast[floatX](2.0)**z /
              np.cast[floatX](max_scale))

        # dimshuffle before concatenate
        params = [
            dr * T.cos(t), -dr * T.sin(t), dx, dr * T.sin(t), dr * T.cos(t), dy
        ]
        params = [_p.flatten().dimshuffle(0, 'x') for _p in params]

        # concatenate to have (1 0 0 0 1 0) when identity transform
        return T.concatenate(params, axis=1)

    param_layer = ExpressionLayer(xyzt_layer,
                                  xyzt_2_param,
                                  output_shape=(batch_size, 6))

    resize_layer = TransformerLayer(input_layer_4d,
                                    param_layer,
                                    new_height,
                                    new_width,
                                    name=name)

    return resize_layer
Beispiel #13
0
def build_baseline1_small(input_var):
    """ Most simplistic model possible. Effectively only uses last batch norm layer
    """
    net = OrderedDict()

    # Input, standardization
    last = net['input'] = InputLayer(
        (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var)
    last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x))
    last = net["middle"] = ConvLayer(last, 3, 1, nonlinearity=linear)
    last = net["bn"] = BatchNormLayer(last,
                                      beta=nn.init.Constant(128.),
                                      gamma=nn.init.Constant(25.))

    return last, net
Beispiel #14
0
def nn_upsample(upsample_in,
                num_styles=None,
                num_filters=None,
                filter_size=3,
                stride=1):
    if num_filters == None:
        num_filters = upsample_in.output_shape[1]

    nn_network = ExpressionLayer(upsample_in,
                                 lambda X: X.repeat(2, 2).repeat(2, 3),
                                 output_shape='auto')
    nn_network = style_conv_block(nn_network, num_styles, num_filters,
                                  filter_size, stride)

    return nn_network
Beispiel #15
0
    def init_nn_structure(self, seq_length, pred_len):
        """
        Inits network structure

        :param seq_length: number of features
        :type seq_length: int
        :param pred_len: number of predicted values (target dimensionality)
        :type pred_len: int
        :return: None
        """
        self.iteration = 0
        
        theano_input = T.tensor3()
        theano_output = T.matrix()
        
        from lasagne.layers import InputLayer, LSTMLayer, DenseLayer, ExpressionLayer, ConcatLayer
        from lasagne.nonlinearities import tanh
        
        model = {}
        model['input_layer'] = InputLayer((None, seq_length, 1), input_var=theano_input)
        
        lst_concat = []
        for i, key in enumerate(self.feature_dict.keys()):
            if self.feature_dict[key] is None or len(self.feature_dict[key]) == 0:
                continue
            model['input_slice_' + str(i)] = ExpressionLayer(model['input_layer'], lambda X: X[:,self.feature_dict[key],:])
            num_units = self.num_lstm_units_large if len(self.feature_dict[key]) > 10 else self.num_lstm_units_small
            model['hidden_layer_' + str(i) + '_1'] = LSTMLayer(model['input_slice_' + str(i)], 
                               num_units, grad_clipping=self.grad_clip, nonlinearity=tanh)
            model['hidden_layer_' + str(i) + '_2'] = LSTMLayer(model['hidden_layer_' + str(i) + '_1'], 
                               num_units, grad_clipping=self.grad_clip, nonlinearity=tanh, only_return_final=True)
            lst_concat.append(model['hidden_layer_' + str(i) + '_2'])
        model['concatenate_hidden'] = ConcatLayer(lst_concat, axis=1)
        model['output_layer'] = DenseLayer(model['concatenate_hidden'], pred_len, nonlinearity=None)
        
        model_output = lasagne.layers.get_output(model['output_layer'])
        params = lasagne.layers.get_all_params(model['output_layer'], trainable=True)

        self.loss = lasagne.objectives.squared_error(model_output, theano_output).mean()
        self.lr = theano.shared(np.array(self.learning_rate, dtype='float32'))
        self.updates = lasagne.updates.adam(self.loss, params, learning_rate=self.lr)

        self.l_out = model['output_layer']
        self.trainT = theano.function([theano_input, theano_output], self.loss, updates=self.updates)
        self.compute_cost = theano.function([theano_input, theano_output], self.loss)
        self.forecast = theano.function([theano_input], model_output)
        
        '''
Beispiel #16
0
    def setup_transform_net(self, input_var=None):
        transform_net = InputLayer(shape=self.shape, input_var=input_var)
        transform_net = style_conv_block(transform_net, self.num_styles, 32, 9,
                                         1)
        transform_net = style_conv_block(transform_net, self.num_styles, 64, 3,
                                         2)
        transform_net = style_conv_block(transform_net, self.num_styles, 128,
                                         3, 2)
        for _ in range(5):
            transform_net = residual_block(transform_net, self.num_styles)
        transform_net = nn_upsample(transform_net, self.num_styles)
        transform_net = nn_upsample(transform_net, self.num_styles)

        if self.net_type == 0:
            transform_net = style_conv_block(transform_net, self.num_styles, 3,
                                             9, 1, tanh)
            transform_net = ExpressionLayer(transform_net,
                                            lambda X: 150. * X,
                                            output_shape=None)
        elif self.net_type == 1:
            transform_net = style_conv_block(transform_net, self.num_styles, 3,
                                             9, 1, sigmoid)

        self.network['transform_net'] = transform_net
Beispiel #17
0
def build_sb_resnet_phase(prev_layer, n_out, count, stride):

    remaining_sticks = []
    # Initial stick length is 1.
    stick = ExpressionLayer(prev_layer,
                            function=lambda X: T.ones((X.shape[0], 1)),
                            output_shape=(None, 1))
    layer, remaining_stick = build_bottleneck_sb_residual_layer(
        prev_layer, n_out, stride, stick)
    remaining_sticks.append(remaining_stick)
    for _ in range(count - 1):
        layer, remaining_stick = build_bottleneck_sb_residual_layer(
            layer, n_out, stride=(1, 1), remaining_stick=remaining_stick)
        remaining_sticks.append(remaining_stick)

    # Compute posteriors
    posterior_a = ConcatLayer(
        [_remaining_stick.kumar_a for _remaining_stick in remaining_sticks],
        axis=1)
    posterior_b = ConcatLayer(
        [_remaining_stick.kumar_b for _remaining_stick in remaining_sticks],
        axis=1)
    stick_lengths = ConcatLayer(remaining_sticks, axis=1)
    return layer, (posterior_a, posterior_b, stick_lengths)
Beispiel #18
0
def build(myNet, idxSiam, verbose=True):

    INITIALIZATION_GAIN = 1.0

    # -----------------------------------------------------------------------------
    # input layer (2d croped patch)
    # myNet.layers[idxSiam]['ori-input']

    # -----------------------------------------------------------------------------
    # 3x Convolution and Max Pooling layers

    # --------------
    # Conv 0
    if idxSiam == 0:
        W_init = HeNormal(gain=INITIALIZATION_GAIN)
        # W_init = Constant(0.0)
        b_init = Constant(0.0)
    else:
        W_init = myNet.layers[0]['ori-c0'].W
        b_init = myNet.layers[0]['ori-c0'].b
    myNet.layers[idxSiam]['ori-c0'] = Conv2DLayer(
        myNet.layers[idxSiam]['ori-input'],
        num_filters=10,
        filter_size=5,
        W=W_init,
        b=b_init,
        nonlinearity=None,
        flip_filters=False,
        name='ori-c0',
    )
    # Activation 0
    myNet.layers[idxSiam]['ori-c0a'] = NonlinearityLayer(
        myNet.layers[idxSiam]['ori-c0'],
        nonlinearity=relu,
        name='ori-c0a',
    )
    # Pool 0
    myNet.layers[idxSiam]['ori-c0p'] = MaxPool2DLayer(
        myNet.layers[idxSiam]['ori-c0a'],
        pool_size=2,
        name='ori-c0p',
    )

    # --------------
    # Conv 1
    if idxSiam == 0:
        W_init = HeNormal(gain=INITIALIZATION_GAIN)
        # W_init = Constant(0.0)
        b_init = Constant(0.0)
    else:
        W_init = myNet.layers[0]['ori-c1'].W
        b_init = myNet.layers[0]['ori-c1'].b
    myNet.layers[idxSiam]['ori-c1'] = Conv2DLayer(
        myNet.layers[idxSiam]['ori-c0p'],
        num_filters=20,
        filter_size=5,
        W=W_init,
        b=b_init,
        nonlinearity=None,
        flip_filters=False,
        name='ori-c1',
    )
    # Activation 1
    myNet.layers[idxSiam]['ori-c1a'] = NonlinearityLayer(
        myNet.layers[idxSiam]['ori-c1'],
        nonlinearity=relu,
        name='ori-c1a',
    )
    # Pool 1
    myNet.layers[idxSiam]['ori-c1p'] = MaxPool2DLayer(
        myNet.layers[idxSiam]['ori-c1a'],
        pool_size=2,
        name='ori-c1p',
    )

    # --------------
    # Conv 2
    if idxSiam == 0:
        W_init = HeNormal(gain=INITIALIZATION_GAIN)
        # W_init = Constant(0.0)
        b_init = Constant(0.0)
    else:
        W_init = myNet.layers[0]['ori-c2'].W
        b_init = myNet.layers[0]['ori-c2'].b
    myNet.layers[idxSiam]['ori-c2'] = Conv2DLayer(
        myNet.layers[idxSiam]['ori-c1p'],
        num_filters=50,
        filter_size=3,
        W=W_init,
        b=b_init,
        nonlinearity=None,
        flip_filters=False,
        name='ori-c2',
    )
    # Activation 2
    myNet.layers[idxSiam]['ori-c2a'] = NonlinearityLayer(
        myNet.layers[idxSiam]['ori-c2'],
        nonlinearity=relu,
        name='ori-c2a',
    )
    # Pool 2
    myNet.layers[idxSiam]['ori-c2p'] = MaxPool2DLayer(
        myNet.layers[idxSiam]['ori-c2a'],
        pool_size=2,
        name='ori-c2p',
    )

    # -----------------------------------------------------------------------------
    # Fully Connected Layers

    # --------------
    # FC 3
    nu = 100
    ns = 4
    nm = 4
    if idxSiam == 0:
        W_init = HeNormal(gain=INITIALIZATION_GAIN)
        # W_init = Constant(0.0)
        b_init = Constant(0.0)
    else:
        W_init = myNet.layers[0]['ori-f3'].W
        b_init = myNet.layers[0]['ori-f3'].b
    myNet.layers[idxSiam]['ori-f3'] = DenseLayer(
        myNet.layers[idxSiam]['ori-c2a'],
        num_units=nu * ns * nm,
        W=W_init,
        b=b_init,
        nonlinearity=None,
        name='ori-f3',
    )
    # Activation 3
    myNet.layers[idxSiam]['ori-f3a'] = GHHFeaturePoolLayer(
        myNet.layers[idxSiam]['ori-f3'],
        num_in_sum=ns,
        num_in_max=nm,
        max_strength=myNet.config.max_strength,
        name='ori-f3a',
    )
    # Dropout 3
    myNet.layers[idxSiam]['ori-f3d'] = DropoutLayer(
        myNet.layers[idxSiam]['ori-f3a'],
        p=0.3,
        name='ori-f3d',
    )

    # --------------
    # FC 4
    nu = 2
    ns = 4
    nm = 4
    if idxSiam == 0:
        W_init = HeNormal(gain=INITIALIZATION_GAIN)
        # W_init = Constant(0.0)
        b_init = Constant(0.0)
    else:
        W_init = myNet.layers[0]['ori-f4'].W
        b_init = myNet.layers[0]['ori-f4'].b
    myNet.layers[idxSiam]['ori-f4'] = DenseLayer(
        myNet.layers[idxSiam]['ori-f3d'],
        num_units=nu * ns * nm,
        W=W_init,
        b=b_init,
        nonlinearity=None,
        name='ori-f4',
    )
    # Activation 4
    myNet.layers[idxSiam]['ori-f4a'] = GHHFeaturePoolLayer(
        myNet.layers[idxSiam]['ori-f4'],
        num_in_sum=ns,
        num_in_max=nm,
        max_strength=myNet.config.max_strength,
        name='ori-f4a',
    )

    # -----------------------------------------------------------------------------
    # Arctan2 Layer
    myNet.layers[idxSiam]['ori-output'] = ExpressionLayer(
        myNet.layers[idxSiam]['ori-f4a'],
        lambda x: CT.custom_arctan2(x[:, 0], x[:, 1]).flatten().dimshuffle(
            0, 'x'),
        output_shape=(myNet.config.batch_size, 1),
        name='ori-output',
    )
def build_generator_lstm(input_var, noise_size, cond_var=None, n_conds=0,
                         arch='lstm', with_BatchNorm=True, batch_size=None,
                         n_steps=None):
    from lasagne.layers import (
        InputLayer, DenseLayer, LSTMLayer, ReshapeLayer, DimshuffleLayer,
        concat, ExpressionLayer, NonlinearityLayer, DropoutLayer)

    from lasagne.init import Constant, HeNormal
    from lasagne.nonlinearities import rectify, softmax
    non_lin = rectify

    layer = InputLayer(
        shape=(batch_size, n_steps, noise_size), input_var=input_var)
    if cond_var is not None:
        layer = BatchNorm(DenseLayer(
            layer, noise_size, nonlinearity=non_lin), with_BatchNorm)
        layer = concat(
            [layer, InputLayer(shape=(batch_size, n_steps, n_conds),
                               input_var=cond_var)])
    if arch == 'lstm':
        layer = batch_norm(DenseLayer(layer, 1024, num_leading_axes=2))
        # recurrent layers for bidirectional network
        l_forward_noise = BatchNorm(LSTMLayer(
            layer, 512, learn_init=True, grad_clipping=100,
            only_return_final=False), with_BatchNorm)
        l_backward_noise = BatchNorm(LSTMLayer(
            layer, 512, learn_init=True, grad_clipping=100,
            only_return_final=False, backwards=True), with_BatchNorm)
        layer = concat([l_forward_noise, l_backward_noise], axis=2)
        # dense layers
        layer = BatchNorm(DenseLayer(
            layer, 1024, num_leading_axes=2), with_BatchNorm)
        layer = BatchNorm(DenseLayer(
            layer, 128, num_leading_axes=2), with_BatchNorm)
        # reshape to apply softmax per timestep
        layer = ReshapeLayer(layer, (-1, [2]))
        layer = NonlinearityLayer(layer, softmax)
        layer = ReshapeLayer(layer, (input_var.shape[0], -1, [1]))
        layer = DimshuffleLayer(layer, (0, 'x', 2, 1))
        layer = ExpressionLayer(layer, lambda X: X*2 - 1)
    elif arch == 1:
        # input layers
        l_in = InputLayer(
            shape=params['input_shape'], input_var=params['input_var'],
            name='g_in')
        l_noise = InputLayer(
            shape=params['noise_shape'], input_var=params['noise_var'],
            name='g_noise')
        l_cond = InputLayer(
            shape=params['cond_shape'], input_var=params['cond_var'],
            name='g_cond')
        l_mask = InputLayer(
            shape=params['mask_shape'], input_var=params['mask_var'],
            name='g_mask')

        # recurrent layers for bidirectional network
        l_forward_data = LSTMLayer(
            l_in, params['n_units'][0], mask_input=l_mask,
            ingate=gate_params, forgetgate=gate_params,
            cell=cell_params, outgate=gate_params,
            learn_init=True, grad_clipping=params['grad_clip'],
            only_return_final=False,
            nonlinearity=params['non_linearities'][0])
        l_forward_noise = LSTMLayer(
            l_noise, params['n_units'][0], mask_input=l_mask,
            ingate=gate_params, forgetgate=gate_params,
            cell=cell_params, outgate=gate_params,
            learn_init=True, grad_clipping=params['grad_clip'],
            only_return_final=False,
            nonlinearity=params['non_linearities'][1])

        l_backward_data = LSTMLayer(
            l_in, params['n_units'][0], mask_input=l_mask,
            ingate=gate_params, forgetgate=gate_params,
            cell=cell_params, outgate=gate_params,
            learn_init=True, grad_clipping=params['grad_clip'],
            only_return_final=False, backwards=True,
            nonlinearity=params['non_linearities'][0])
        l_backward_noise = LSTMLayer(
            l_noise, params['n_units'][0], mask_input=l_mask,
            ingate=gate_params, forgetgate=gate_params,
            cell=cell_params, outgate=gate_params,
            learn_init=True, grad_clipping=params['grad_clip'],
            only_return_final=False, backwards=True,
            nonlinearity=params['non_linearities'][1])

        # concatenate output of forward and backward layers
        l_lstm_concat = concat(
            [l_forward_data, l_forward_noise, l_backward_data,
             l_backward_noise], axis=2)

        # dense layer on output of data and noise lstms, w/dropout
        l_lstm_dense = DenseLayer(
            DropoutLayer(l_lstm_concat, p=0.5),
            num_units=params['n_units'][1], num_leading_axes=2,
            W=HeNormal(gain='relu'), b=Constant(0.1),
            nonlinearity=params['non_linearities'][2])

        # batch norm for lstm dense
        # l_lstm_dense = lasagne.layer.BatchNorm(l_lstm_dense)

        # concatenate dense layer of lstsm with condition
        l_lstm_cond_concat = concat(
            [l_lstm_dense, l_cond], axis=2)

        # dense layer with dense layer lstm and condition, w/dropout
        l_out = DenseLayer(
            DropoutLayer(l_lstm_cond_concat, p=0.5),
            num_units=params['n_units'][2],
            num_leading_axes=2,
            W=HeNormal(gain=1.0), b=Constant(0.1),
            nonlinearity=params['non_linearities'][3])
    elif arch == 2:
        raise Exception("arch 2 not implemented")
    elif arch == 3:
        raise Exception("arch 2 not implemented")

    print("Generator output:", layer.output_shape)
    return layer
Beispiel #20
0
def build_fan_reworked(input_var,
                       nb_filter=16,
                       input_size=(None, 3, tools.INP_PSIZE, tools.INP_PSIZE)):
    net = OrderedDict()

    # Input, standardization
    last = net['input'] = InputLayer(input_size, input_var=input_var)
    last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x))

    # load feature encoder
    feats = get_features(last)
    net['features_s8_1'] = feats["conv4_4"]
    net['features_s8_2'] = feats["conv4_1"]
    net['features_s4'] = feats["conv3_3"]

    # Pretrained Encoder as before
    last = net["conv1_1"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_1"] = layers.NonUpdateBatchNormLayer(last)
    last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify)
    last = net["conv1_2"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_2"] = layers.NonUpdateBatchNormLayer(last)
    last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify)

    # feature aggregation at multiple scales
    last = net["bn1"] = layers.NonUpdateBatchNormLayer(last,
                                                       beta=None,
                                                       gamma=None)
    last = fan_module_improved(last,
                               net,
                               "s8_1",
                               net['features_s8_1'],
                               nb_filter=nb_filter,
                               scale=8,
                               upsampling_strategy="repeat")
    last = net["bn2"] = layers.NonUpdateBatchNormLayer(last,
                                                       beta=None,
                                                       gamma=None)
    last = fan_module_improved(last,
                               net,
                               "s8_2",
                               net['features_s8_2'],
                               nb_filter=nb_filter,
                               scale=8,
                               upsampling_strategy="repeat")
    last = net["bn3"] = layers.NonUpdateBatchNormLayer(last,
                                                       beta=None,
                                                       gamma=None)
    last = fan_module_improved(last,
                               net,
                               "s4",
                               net['features_s4'],
                               nb_filter=nb_filter,
                               scale=4,
                               upsampling_strategy="repeat")
    # unclear if Fixed, NonUpdate or Regular Layer will work best...
    last = net["bn4"] = BatchNormLayer(last)

    # Decoder as before
    last = net["deconv1_2"] = transpose(last,
                                        net["conv1_2"],
                                        nonlinearity=None)
    last = net["deconv1_1"] = transpose(last,
                                        net["conv1_1"],
                                        nonlinearity=None)

    return last, net
def test_space_invaders(
    game_title='SpaceInvaders-v0',
    n_parallel_games=3,
    replay_seq_len=2,
):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None, ) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states
    window_size = 3

    # prev state input
    prev_window = InputLayer(
        (None, window_size) + tuple(observation_reshape.output_shape[1:]),
        name="previous window state")

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")

    memory_dict = {window: prev_window}

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc

    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None, ) +
                                 window.output_shape[2:])

    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(window_max, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    #fakes for a2c
    policy_eval = DenseLayer(nn,
                             num_units=n_actions,
                             nonlinearity=lasagne.nonlinearities.softmax,
                             name="a2c action probas")
    state_value_eval = DenseLayer(nn,
                                  num_units=1,
                                  nonlinearity=None,
                                  name="a2c state values")
    # resolver
    resolver = ProbabilisticResolver(policy_eval, name="resolver")

    # agent
    agent = Agent(observation_layer, memory_dict,
                  (q_eval, policy_eval, state_value_eval), resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [
                np.zeros((batch_size, ) + tuple(mem.output_shape[1:]),
                         dtype='float32') for mem in agent.agent_states
            ]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(
            step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor,
                          is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values, policy, etc obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, estimators = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )
    (q_values_sequence, policy_sequence, value_sequence) = estimators

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working

    elwise_mse_loss = 0.

    #1-step algos
    for algo in qlearning, sarsa:
        elwise_mse_loss += algo.get_elementwise_objective(
            q_values_sequence,
            env.actions[0],
            scaled_reward_seq,
            env.is_alive,
            gamma_or_gammas=0.99,
        )
    #qlearning_n_step
    for n in (1, 3, replay_seq_len - 1, replay_seq_len, replay_seq_len + 1,
              None):
        elwise_mse_loss += qlearning_n_step.get_elementwise_objective(
            q_values_sequence,
            env.actions[0],
            scaled_reward_seq,
            env.is_alive,
            gamma_or_gammas=0.99,
            n_steps=n)

    #a2c n_step

    elwise_mse_loss += a2c_n_step.get_elementwise_objective(
        policy_sequence,
        value_sequence[:, :, 0],
        env.actions[0],
        scaled_reward_seq,
        env.is_alive,
        gamma_or_gammas=0.99,
        n_steps=3)

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10**-4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward],
                                updates=updates)
    evaluation_fun = theano.function(
        [], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " %
              (epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
Beispiel #22
0
def build_big_fan(input_var,
                  nb_filter=96,
                  input_size=(None, 3, tools.INP_PSIZE, tools.INP_PSIZE)):
    net = OrderedDict()

    # Input, standardization
    last = net['input'] = InputLayer(input_size, input_var=input_var)
    last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x))

    # load feature encoder
    f = get_features(last)
    net['features_s8'] = f["conv4_1"]
    net['features_s4'] = f["conv3_3"]

    # Pretrained Encoder as before
    last = net["conv1_1"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_1"] = layers.NonUpdateBatchNormLayer(last)
    last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify)
    last = net["conv1_2"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_2"] = layers.NonUpdateBatchNormLayer(last)
    last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify)

    # Modified Middle Part
    last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear)

    # feature aggregation at multiple scales
    last = net["bn1"] = layers.NonUpdateBatchNormLayer(last)
    last = fan_module_simple(last,
                             net,
                             "s8",
                             net['features_s8'],
                             nb_filter=nb_filter,
                             scale=8)
    last = net["bn1"] = layers.NonUpdateBatchNormLayer(last)
    last = fan_module_simple(last,
                             net,
                             "s8",
                             net['features_s8'],
                             nb_filter=nb_filter,
                             scale=8)
    last = net["bn3"] = layers.NonUpdateBatchNormLayer(last)
    last = fan_module_simple(last,
                             net,
                             "s4",
                             net['features_s4'],
                             nb_filter=nb_filter,
                             scale=4)
    last = net["bn4"] = layers.NonUpdateBatchNormLayer(last)
    last = fan_module_simple(last,
                             net,
                             "s4",
                             net['features_s4'],
                             nb_filter=nb_filter,
                             scale=4)
    last = net["bn5"] = layers.NonUpdateBatchNormLayer(last)

    # Decoder as before
    last = net["deconv1_2"] = transpose(last,
                                        net["conv1_2"],
                                        nonlinearity=None)
    last = net["deconv1_1"] = transpose(last,
                                        net["conv1_1"],
                                        nonlinearity=None)

    return last, net
Beispiel #23
0
def architecture(input_var, input_shape, cfg):
    layer = InputLayer(input_shape, input_var)

    # filterbank, if any
    if cfg['filterbank'] == 'mel':
        import audio
        filterbank = audio.create_mel_filterbank(cfg['sample_rate'],
                                                 cfg['frame_len'],
                                                 cfg['mel_bands'],
                                                 cfg['mel_min'],
                                                 cfg['mel_max'])
        filterbank = filterbank[:input_shape[3]].astype(theano.config.floatX)
        layer = DenseLayer(layer,
                           num_units=cfg['mel_bands'],
                           num_leading_axes=-1,
                           W=T.constant(filterbank),
                           b=None,
                           nonlinearity=None)
    elif cfg['filterbank'] == 'mel_learn':
        layer = MelBankLayer(layer, cfg['sample_rate'], cfg['frame_len'],
                             cfg['mel_bands'], cfg['mel_min'], cfg['mel_max'])
    elif cfg['filterbank'] != 'none':
        raise ValueError("Unknown filterbank=%s" % cfg['filterbank'])

    # magnitude transformation, if any
    if cfg['magscale'] == 'log':
        layer = ExpressionLayer(layer, lambda x: T.log(T.maximum(1e-7, x)))
    elif cfg['magscale'] == 'log1p':
        layer = ExpressionLayer(layer, T.log1p)
    elif cfg['magscale'].startswith('log1p_learn'):
        # learnable log(1 + 10^a * x), with given initial a (or default 0)
        a = float(cfg['magscale'][len('log1p_learn'):] or 0)
        a = T.exp(theano.shared(lasagne.utils.floatX(a)))
        layer = lasagne.layers.ScaleLayer(layer,
                                          scales=a,
                                          shared_axes=(0, 1, 2, 3))
        layer = ExpressionLayer(layer, T.log1p)
    elif cfg['magscale'].startswith('pow_learn'):
        # learnable x^sigmoid(a), with given initial a (or default 0)
        a = float(cfg['magscale'][len('pow_learn'):] or 0)
        a = T.nnet.sigmoid(theano.shared(lasagne.utils.floatX(a)))
        layer = PowLayer(layer, exponent=a)
    elif cfg['magscale'] == 'pcen':
        layer = PCENLayer(layer)
        if cfg.get('pcen_fix_alpha'):
            layer.params[layer.log_alpha].remove("trainable")
    elif cfg['magscale'] == 'loudness_only':
        # cut away half a block length on the left and right
        layer = lasagne.layers.SliceLayer(layer,
                                          slice(cfg['blocklen'] // 2,
                                                -(cfg['blocklen'] // 2)),
                                          axis=2)
        # average over the frequencies and channels
        layer = lasagne.layers.ExpressionLayer(
            layer, lambda X: X.mean(axis=(1, 3), keepdims=True), lambda shp:
            (shp[0], 1, shp[2], 1))
    elif cfg['magscale'] != 'none':
        raise ValueError("Unknown magscale=%s" % cfg['magscale'])

    # temporal difference, if any
    if cfg['arch.timediff']:
        layer = TimeDiffLayer(layer, delta=cfg['arch.timediff'])

    # standardization per frequency band
    if cfg.get('input_norm', 'batch') == 'batch':
        layer = batch_norm_vanilla(layer, axes=(0, 2), beta=None, gamma=None)
    elif cfg['input_norm'] == 'instance':
        layer = lasagne.layers.StandardizationLayer(layer, axes=2)
    elif cfg['input_norm'] == 'none':
        pass
    else:
        raise ValueError("Unknown input_norm=%s" % cfg['input_norm'])

    # convolutional neural network
    kwargs = dict(nonlinearity=lasagne.nonlinearities.leaky_rectify,
                  W=lasagne.init.Orthogonal())
    maybe_batch_norm = batch_norm if cfg['arch.batch_norm'] else lambda x: x
    if cfg['arch.convdrop'] == 'independent':
        maybe_dropout = lambda x: dropout(x, 0.1)
    elif cfg['arch.convdrop'] == 'channels':
        maybe_dropout = lambda x: dropout(x, 0.1, shared_axes=(2, 3))
    elif cfg['arch.convdrop'] == 'bands':
        maybe_dropout = lambda x: dropout(x, 0.1, shared_axes=(1, 2))
    elif cfg['arch.convdrop'] == 'none':
        maybe_dropout = lambda x: x
    else:
        raise ValueError("Unknown arch.convdrop=%s" % cfg['arch.convdrop'])
    if cfg['arch'] == 'dense:16':
        layer = DenseLayer(layer, 16, **kwargs)
        layer = DenseLayer(layer,
                           1,
                           nonlinearity=lasagne.nonlinearities.sigmoid,
                           W=lasagne.init.Orthogonal())
        return layer
    convmore = cfg['arch.convmore']
    layer = Conv2DLayer(layer, int(64 * convmore), 3, **kwargs)
    if cfg.get('arch.firstconv_zeromean', False) == 'params':
        layer.W = layer.W - T.mean(layer.W, axis=(2, 3), keepdims=True)
    layer = maybe_batch_norm(layer)
    layer = maybe_dropout(layer)
    layer = Conv2DLayer(layer, int(32 * convmore), 3, **kwargs)
    layer = maybe_batch_norm(layer)
    layer = MaxPool2DLayer(layer, 3)
    layer = maybe_dropout(layer)
    layer = Conv2DLayer(layer, int(128 * convmore), 3, **kwargs)
    layer = maybe_batch_norm(layer)
    layer = maybe_dropout(layer)
    layer = Conv2DLayer(layer, int(64 * convmore), 3, **kwargs)
    layer = maybe_batch_norm(layer)
    if cfg['arch'] == 'ismir2015':
        layer = MaxPool2DLayer(layer, 3)
    elif cfg['arch'] == 'ismir2016':
        layer = maybe_dropout(layer)
        layer = Conv2DLayer(layer, int(128 * convmore),
                            (3, layer.output_shape[3] - 3), **kwargs)
        layer = maybe_batch_norm(layer)
        layer = MaxPool2DLayer(layer, (1, 4))
    else:
        raise ValueError('Unknown arch=%s' % cfg['arch'])
    layer = DenseLayer(dropout(layer, 0.5), 256, **kwargs)
    layer = maybe_batch_norm(layer)
    layer = DenseLayer(dropout(layer, 0.5), 64, **kwargs)
    layer = maybe_batch_norm(layer)
    layer = DenseLayer(dropout(layer, 0.5),
                       1,
                       nonlinearity=lasagne.nonlinearities.sigmoid,
                       W=lasagne.init.Orthogonal())
    return layer
Beispiel #24
0
def build_baseline9_fan_fan_bilinear(input_var, nb_filter=96):
    net = OrderedDict()

    import theano.tensor as T
    import numpy as np

    # Input, standardization
    last = net['input'] = InputLayer(
        (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var)
    last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x))

    # load feature encoder
    net['features_s8'] = get_features(last)["conv4_1"]
    net['features_s4'] = get_features(last)["conv3_3"]
    net['mask'] = ExpressionLayer(
        layers.upsample(net["features_s8"], 8, mode="bilinear"),
        lambda x: 1. * T.eq(x, x.max(axis=1, keepdims=True)))

    # Pretrained Encoder as before
    last = net["conv1_1"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_1"] = BatchNormLayer(last)
    last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify)
    last = net["conv1_2"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_2"] = BatchNormLayer(last)
    last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify)

    # Modified Middle Part
    last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear)

    # feature aggregation at multiple scales
    last = net["fan1"] = FeatureAwareNormLayer((last, net['mask']))
    last = fan_module_simple(last,
                             net,
                             "s8",
                             net['features_s8'],
                             nb_filter=nb_filter,
                             scale=8,
                             upsampling_strategy="bilinear")
    last = net["fan2"] = FeatureAwareNormLayer((last, net['mask']))
    last = fan_module_simple(last,
                             net,
                             "s4",
                             net['features_s4'],
                             nb_filter=nb_filter,
                             scale=4,
                             upsampling_strategy="bilinear")

    # Decoder as before
    last = net["deconv1_2"] = transpose(last,
                                        net["conv1_2"],
                                        nonlinearity=None)
    last = net["deconv1_1"] = transpose(last,
                                        net["conv1_1"],
                                        nonlinearity=None)

    last = net["fan"] = FeatureAwareNormLayer(
        (last, net['mask']),
        beta=nn.init.Constant(np.float32(128.)),
        gamma=nn.init.Constant(np.float32(25.)))

    return last, net
Beispiel #25
0
def build_finetuned2_fan(input_var,
                         nb_filter=96,
                         input_size=(None, 3, tools.INP_PSIZE,
                                     tools.INP_PSIZE)):
    net = OrderedDict()

    # Input, standardization
    last = net['input'] = InputLayer(input_size, input_var=input_var)
    last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x))

    # load feature encoder
    # TODO this is clearly a bug. only for compatibility reasons. remove once all weights are converted
    net['features_s8'] = get_features(last)["conv4_1"]
    net['features_s4'] = get_features(last)["conv3_3"]

    # Pretrained Encoder as before
    last = net["conv1_1"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_1"] = layers.NonUpdateBatchNormLayer(last)
    last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify)
    last = net["conv1_2"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_2"] = layers.NonUpdateBatchNormLayer(last)
    last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify)

    # Modified Middle Part
    last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear)

    # feature aggregation at multiple scales
    last = net["bn1"] = layers.NonUpdateBatchNormLayer(last)
    last = fan_module_simple(last,
                             net,
                             "s8",
                             net['features_s8'],
                             nb_filter=nb_filter,
                             scale=8)
    last = net["bn2"] = layers.NonUpdateBatchNormLayer(last)
    last = fan_module_simple(last,
                             net,
                             "s4",
                             net['features_s4'],
                             nb_filter=nb_filter,
                             scale=4)

    # Decoder as before
    last = net["deconv1_2"] = transpose(last,
                                        net["conv1_2"],
                                        nonlinearity=None)
    last = net["deconv1_1"] = transpose(last,
                                        net["conv1_1"],
                                        nonlinearity=None)

    last = net["bn"] = layers.FixedBatchNormLayer(last)

    weights = "170123_runs/run_H.E.T._1485012575.4045253/3.npz"
    data = tools.load_weights(last, weights)

    return last, net
Beispiel #26
0
def build_baseline8_fan_bilinear(input_var, nb_filter=96):
    net = OrderedDict()

    # Input, standardization
    last = net['input'] = InputLayer(
        (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var)
    last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x))

    # load feature encoder
    net['features_s8'] = get_features(last)["conv4_1"]
    net['features_s4'] = get_features(last)["conv3_3"]

    # Pretrained Encoder as before
    last = net["conv1_1"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_1"] = BatchNormLayer(last)
    last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify)
    last = net["conv1_2"] = ConvLayer(last,
                                      nb_filter,
                                      1,
                                      pad=0,
                                      flip_filters=False,
                                      nonlinearity=linear)
    last = net["bn1_2"] = BatchNormLayer(last)
    last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify)

    # Modified Middle Part
    last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear)

    # feature aggregation at multiple scales
    last = net["bn1"] = BatchNormLayer(last)
    last = fan_module_simple(last,
                             net,
                             "s8",
                             net['features_s8'],
                             nb_filter=nb_filter,
                             scale=8,
                             upsampling_strategy="bilinear")
    last = net["bn2"] = BatchNormLayer(last)
    last = fan_module_simple(last,
                             net,
                             "s4",
                             net['features_s4'],
                             nb_filter=nb_filter,
                             scale=4,
                             upsampling_strategy="bilinear")

    # Decoder as before
    last = net["deconv1_2"] = transpose(last,
                                        net["conv1_2"],
                                        nonlinearity=None)
    last = net["deconv1_1"] = transpose(last,
                                        net["conv1_1"],
                                        nonlinearity=None)

    last = net["bn"] = BatchNormLayer(last,
                                      beta=nn.init.Constant(128.),
                                      gamma=nn.init.Constant(25.))

    return last, net
Beispiel #27
0
def test_memory(
    game_title='SpaceInvaders-v0',
    n_parallel_games=3,
    replay_seq_len=2,
):
    """
    :param game_title: name of atari game in Gym
    :param n_parallel_games: how many games we run in parallel
    :param replay_seq_len: how long is one replay session from a batch
    """

    atari = gym.make(game_title)
    atari.reset()

    # Game Parameters
    n_actions = atari.action_space.n
    observation_shape = (None, ) + atari.observation_space.shape
    action_names = atari.get_action_meanings()
    del atari
    # ##### Agent observations

    # image observation at current tick goes here
    observation_layer = InputLayer(observation_shape, name="images input")

    # reshape to [batch, color, x, y] to allow for convolutional layers to work correctly
    observation_reshape = DimshuffleLayer(observation_layer, (0, 3, 1, 2))

    # Agent memory states

    memory_dict = OrderedDict([])

    ###Window
    window_size = 3

    # prev state input
    prev_window = InputLayer(
        (None, window_size) + tuple(observation_reshape.output_shape[1:]),
        name="previous window state")

    # our window
    window = WindowAugmentation(observation_reshape,
                                prev_window,
                                name="new window state")

    # pixel-wise maximum over the temporal window (to avoid flickering)
    window_max = ExpressionLayer(window,
                                 lambda a: a.max(axis=1),
                                 output_shape=(None, ) +
                                 window.output_shape[2:])

    memory_dict[window] = prev_window

    ###Stack
    #prev stack
    stack_w, stack_h = 4, 5
    stack_inputs = DenseLayer(observation_reshape, stack_w, name="prev_stack")
    stack_controls = DenseLayer(observation_reshape,
                                3,
                                nonlinearity=lasagne.nonlinearities.softmax,
                                name="prev_stack")
    prev_stack = InputLayer((None, stack_h, stack_w),
                            name="previous stack state")
    stack = StackAugmentation(stack_inputs, prev_stack, stack_controls)
    memory_dict[stack] = prev_stack

    stack_top = lasagne.layers.SliceLayer(stack, 0, 1)

    ###RNN preset

    prev_rnn = InputLayer((None, 16), name="previous RNN state")
    new_rnn = RNNCell(prev_rnn, observation_reshape)
    memory_dict[new_rnn] = prev_rnn

    ###GRU preset
    prev_gru = InputLayer((None, 16), name="previous GRUcell state")
    new_gru = GRUCell(prev_gru, observation_reshape)
    memory_dict[new_gru] = prev_gru

    ###GRUmemorylayer
    prev_gru1 = InputLayer((None, 15), name="previous GRUcell state")
    new_gru1 = GRUMemoryLayer(15, observation_reshape, prev_gru1)
    memory_dict[new_gru1] = prev_gru1

    #LSTM with peepholes
    prev_lstm0_cell = InputLayer(
        (None, 13), name="previous LSTMCell hidden state [with peepholes]")

    prev_lstm0_out = InputLayer(
        (None, 13), name="previous LSTMCell output state [with peepholes]")

    new_lstm0_cell, new_lstm0_out = LSTMCell(
        prev_lstm0_cell,
        prev_lstm0_out,
        input_or_inputs=observation_reshape,
        peepholes=True,
        name="newLSTM1 [with peepholes]")

    memory_dict[new_lstm0_cell] = prev_lstm0_cell
    memory_dict[new_lstm0_out] = prev_lstm0_out

    #LSTM without peepholes
    prev_lstm1_cell = InputLayer(
        (None, 14), name="previous LSTMCell hidden state [no peepholes]")

    prev_lstm1_out = InputLayer(
        (None, 14), name="previous LSTMCell output state [no peepholes]")

    new_lstm1_cell, new_lstm1_out = LSTMCell(
        prev_lstm1_cell,
        prev_lstm1_out,
        input_or_inputs=observation_reshape,
        peepholes=False,
        name="newLSTM1 [no peepholes]")

    memory_dict[new_lstm1_cell] = prev_lstm1_cell
    memory_dict[new_lstm1_out] = prev_lstm1_out

    ##concat everything

    for i in [flatten(window_max), stack_top, new_rnn, new_gru, new_gru1]:
        print(i.output_shape)
    all_memory = concat([
        flatten(window_max),
        stack_top,
        new_rnn,
        new_gru,
        new_gru1,
        new_lstm0_out,
        new_lstm1_out,
    ])

    # ##### Neural network body
    # you may use any other lasagne layers, including convolutions, batch_norms, maxout, etc

    # a simple lasagne network (try replacing with any other lasagne network and see what works best)
    nn = DenseLayer(all_memory, num_units=50, name='dense0')

    # Agent policy and action picking
    q_eval = DenseLayer(nn,
                        num_units=n_actions,
                        nonlinearity=lasagne.nonlinearities.linear,
                        name="QEvaluator")

    # resolver
    resolver = EpsilonGreedyResolver(q_eval, epsilon=0.1, name="resolver")

    # agent
    agent = Agent(observation_layer, memory_dict, q_eval, resolver)

    # Since it's a single lasagne network, one can get it's weights, output, etc
    weights = lasagne.layers.get_all_params(resolver, trainable=True)

    # Agent step function
    print('compiling react')
    applier_fun = agent.get_react_function()

    # a nice pythonic interface
    def step(observation, prev_memories='zeros', batch_size=n_parallel_games):
        """ returns actions and new states given observation and prev state
        Prev state in default setup should be [prev window,]"""
        # default to zeros
        if prev_memories == 'zeros':
            prev_memories = [
                np.zeros((batch_size, ) + tuple(mem.output_shape[1:]),
                         dtype='float32') for mem in agent.agent_states
            ]
        res = applier_fun(np.array(observation), *prev_memories)
        action = res[0]
        memories = res[1:]
        return action, memories

    # # Create and manage a pool of atari sessions to play with

    pool = GamePool(game_title, n_parallel_games)

    observation_log, action_log, reward_log, _, _, _ = pool.interact(step, 50)

    print(np.array(action_names)[np.array(action_log)[:3, :5]])

    # # experience replay pool
    # Create an environment with all default parameters
    env = SessionPoolEnvironment(observations=observation_layer,
                                 actions=resolver,
                                 agent_memories=agent.agent_states)

    def update_pool(env, pool, n_steps=100):
        """ a function that creates new sessions and ads them into the pool
        throwing the old ones away entirely for simplicity"""

        preceding_memory_states = list(pool.prev_memory_states)

        # get interaction sessions
        observation_tensor, action_tensor, reward_tensor, _, is_alive_tensor, _ = pool.interact(
            step, n_steps=n_steps)

        # load them into experience replay environment
        env.load_sessions(observation_tensor, action_tensor, reward_tensor,
                          is_alive_tensor, preceding_memory_states)

    # load first  sessions
    update_pool(env, pool, replay_seq_len)

    # A more sophisticated way of training is to store a large pool of sessions and train on random batches of them.
    # ### Training via experience replay

    # get agent's Q-values obtained via experience replay
    _env_states, _observations, _memories, _imagined_actions, q_values_sequence = agent.get_sessions(
        env,
        session_length=replay_seq_len,
        batch_size=env.batch_size,
        optimize_experience_replay=True,
    )

    # Evaluating loss function

    scaled_reward_seq = env.rewards
    # For SpaceInvaders, however, not scaling rewards is at least working

    elwise_mse_loss = qlearning.get_elementwise_objective(
        q_values_sequence,
        env.actions[0],
        scaled_reward_seq,
        env.is_alive,
        gamma_or_gammas=0.99,
    )

    # compute mean over "alive" fragments
    mse_loss = elwise_mse_loss.sum() / env.is_alive.sum()

    # regularize network weights
    reg_l2 = regularize_network_params(resolver, l2) * 10**-4

    loss = mse_loss + reg_l2

    # Compute weight updates
    updates = lasagne.updates.adadelta(loss, weights, learning_rate=0.01)

    # mean session reward
    mean_session_reward = env.rewards.sum(axis=1).mean()

    # # Compile train and evaluation functions

    print('compiling')
    train_fun = theano.function([], [loss, mean_session_reward],
                                updates=updates)
    evaluation_fun = theano.function(
        [], [loss, mse_loss, reg_l2, mean_session_reward])
    print("I've compiled!")

    # # Training loop

    for epoch_counter in range(10):
        update_pool(env, pool, replay_seq_len)
        loss, avg_reward = train_fun()
        full_loss, q_loss, l2_penalty, avg_reward_current = evaluation_fun()

        print("epoch %i,loss %.5f, rewards: %.5f " %
              (epoch_counter, full_loss, avg_reward_current))
        print("rec %.3f reg %.3f" % (q_loss, l2_penalty))
Beispiel #28
0
def build_baseline3_vgg(input_var, nb_filter=64):
    net = OrderedDict()

    def get_weights(file):
        with open(file, "rb") as f:
            vgg16 = pickle.load(f, encoding="latin-1")
            weights = vgg16['param values']
        return weights[0], weights[1], weights[2], weights[3]

    # Input, standardization
    last = net['input'] = InputLayer(
        (None, 3, tools.INP_PSIZE, tools.INP_PSIZE), input_var=input_var)
    last = net['norm'] = ExpressionLayer(last, lambda x: normalize(x))

    # load feature encoder
    net['features_s8'] = get_features(last)["conv4_1"]
    net['features_s4'] = get_features(last)["conv3_3"]

    # Pretrained Encoder as before
    W1, b1, W2, b2 = get_weights("vgg16.pkl")
    last = net["conv1_1"] = ConvLayer(last,
                                      nb_filter,
                                      3,
                                      pad=1,
                                      flip_filters=False,
                                      nonlinearity=linear,
                                      W=W1,
                                      b=b1)
    last = net["relu1_1"] = NonlinearityLayer(last, nonlinearity=rectify)
    last = net["conv1_2"] = ConvLayer(last,
                                      nb_filter,
                                      3,
                                      pad=1,
                                      flip_filters=False,
                                      nonlinearity=linear,
                                      W=W2,
                                      b=b2)
    last = net["relu1_2"] = NonlinearityLayer(last, nonlinearity=rectify)
    last = net["pool"] = PoolLayer(last, 2, mode="average_exc_pad")

    # Modified Middle Part
    last = net["middle"] = ConvLayer(last, nb_filter, 1, nonlinearity=linear)

    # feature aggregation at multiple scales
    last = fan_module_simple(last,
                             net,
                             "s8",
                             net['features_s8'],
                             nb_filter=64,
                             scale=4)
    last = fan_module_simple(last,
                             net,
                             "s4",
                             net['features_s4'],
                             nb_filter=64,
                             scale=2)

    # Decoder as before
    last = net["unpool"] = Upscale2DLayer(last, 2)
    last = net["deconv1_2"] = transpose(last,
                                        net["conv1_2"],
                                        nonlinearity=None)
    last = net["deconv1_1"] = transpose(last,
                                        net["conv1_1"],
                                        nonlinearity=None)

    last = net["bn"] = BatchNormLayer(last,
                                      beta=nn.init.Constant(128.),
                                      gamma=nn.init.Constant(25.))

    return last, net
Beispiel #29
0
    def __init__(self, config):
        self.clouds = T.tensor3(dtype='float32')
        self.norms = [
            T.tensor3(dtype='float32') for step in xrange(config['steps'])
        ]
        self.target = T.vector(dtype='int64')
        KDNet = {}
        if config['input_features'] == 'no':
            KDNet['input'] = InputLayer((None, 1, 2**config['steps']),
                                        input_var=self.clouds)
        else:
            KDNet['input'] = InputLayer((None, 3, 2**config['steps']),
                                        input_var=self.clouds)
        for i in xrange(config['steps']):
            KDNet['norm{}_r'.format(i + 1)] = InputLayer(
                (None, 3, 2**(config['steps'] - 1 - i)),
                input_var=self.norms[i])
            KDNet['norm{}_l'.format(i + 1)] = ExpressionLayer(
                KDNet['norm{}_r'.format(i + 1)], lambda X: -X)
            KDNet['norm{}_l_X-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '-', 0, config['n_f'][i + 1])
            KDNet['norm{}_l_Y-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '-', 1, config['n_f'][i + 1])
            KDNet['norm{}_l_Z-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '-', 2, config['n_f'][i + 1])
            KDNet['norm{}_l_X+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '+', 0, config['n_f'][i + 1])
            KDNet['norm{}_l_Y+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '+', 1, config['n_f'][i + 1])
            KDNet['norm{}_l_Z+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_l'.format(i + 1)], '+', 2, config['n_f'][i + 1])
            KDNet['norm{}_r_X-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '-', 0, config['n_f'][i + 1])
            KDNet['norm{}_r_Y-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '-', 1, config['n_f'][i + 1])
            KDNet['norm{}_r_Z-'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '-', 2, config['n_f'][i + 1])
            KDNet['norm{}_r_X+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '+', 0, config['n_f'][i + 1])
            KDNet['norm{}_r_Y+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '+', 1, config['n_f'][i + 1])
            KDNet['norm{}_r_Z+'.format(i + 1)] = SPTNormReshapeLayer(
                KDNet['norm{}_r'.format(i + 1)], '+', 2, config['n_f'][i + 1])
            KDNet['cloud{}'.format(i+1)] = SharedDotLayer(KDNet['input'], config['n_f'][i]) if i == 0 else \
                                    ElemwiseSumLayer([KDNet['cloud{}_l_X-_masked'.format(i)],
                                                     KDNet['cloud{}_l_Y-_masked'.format(i)],
                                                     KDNet['cloud{}_l_Z-_masked'.format(i)],
                                                     KDNet['cloud{}_l_X+_masked'.format(i)],
                                                     KDNet['cloud{}_l_Y+_masked'.format(i)],
                                                     KDNet['cloud{}_l_Z+_masked'.format(i)],
                                                     KDNet['cloud{}_r_X-_masked'.format(i)],
                                                     KDNet['cloud{}_r_Y-_masked'.format(i)],
                                                     KDNet['cloud{}_r_Z-_masked'.format(i)],
                                                     KDNet['cloud{}_r_X+_masked'.format(i)],
                                                     KDNet['cloud{}_r_Y+_masked'.format(i)],
                                                     KDNet['cloud{}_r_Z+_masked'.format(i)]])
            KDNet['cloud{}_bn'.format(i + 1)] = BatchNormDNNLayer(
                KDNet['cloud{}'.format(i + 1)])
            KDNet['cloud{}_relu'.format(i + 1)] = NonlinearityLayer(
                KDNet['cloud{}_bn'.format(i + 1)], rectify)
            KDNet['cloud{}_r'.format(i + 1)] = ExpressionLayer(
                KDNet['cloud{}_relu'.format(i + 1)], lambda X: X[:, :, 1::2],
                (None, config['n_f'][i], 2**(config['steps'] - i - 1)))
            KDNet['cloud{}_l'.format(i + 1)] = ExpressionLayer(
                KDNet['cloud{}_relu'.format(i + 1)], lambda X: X[:, :, ::2],
                (None, config['n_f'][i], 2**(config['steps'] - i - 1)))

            KDNet['cloud{}_l_X-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_Y-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_Z-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_X+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_Y+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])
            KDNet['cloud{}_l_Z+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_l'.format(i + 1)], config['n_f'][i + 1])

            KDNet['cloud{}_r_X-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_X-'.format(i + 1)].W,
                b=KDNet['cloud{}_l_X-'.format(i + 1)].b)
            KDNet['cloud{}_r_X-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_X-'.format(i + 1)].W,
                b=KDNet['cloud{}_l_X-'.format(i + 1)].b)
            KDNet['cloud{}_r_Y-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_Y-'.format(i + 1)].W,
                b=KDNet['cloud{}_l_Y-'.format(i + 1)].b)
            KDNet['cloud{}_r_Z-'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_Z-'.format(i + 1)].W,
                b=KDNet['cloud{}_l_Z-'.format(i + 1)].b)
            KDNet['cloud{}_r_X+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_X+'.format(i + 1)].W,
                b=KDNet['cloud{}_l_X+'.format(i + 1)].b)
            KDNet['cloud{}_r_Y+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_Y+'.format(i + 1)].W,
                b=KDNet['cloud{}_l_Y+'.format(i + 1)].b)
            KDNet['cloud{}_r_Z+'.format(i + 1)] = SharedDotLayer(
                KDNet['cloud{}_r'.format(i + 1)],
                config['n_f'][i + 1],
                W=KDNet['cloud{}_l_Z+'.format(i + 1)].W,
                b=KDNet['cloud{}_l_Z+'.format(i + 1)].b)

            KDNet['cloud{}_l_X-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_X-'.format(i + 1)],
                KDNet['norm{}_l_X-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_Y-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_Y-'.format(i + 1)],
                KDNet['norm{}_l_Y-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_Z-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_Z-'.format(i + 1)],
                KDNet['norm{}_l_Z-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_X+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_X+'.format(i + 1)],
                KDNet['norm{}_l_X+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_Y+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_Y+'.format(i + 1)],
                KDNet['norm{}_l_Y+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_l_Z+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_l_Z+'.format(i + 1)],
                KDNet['norm{}_l_Z+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_X-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_X-'.format(i + 1)],
                KDNet['norm{}_r_X-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_Y-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_Y-'.format(i + 1)],
                KDNet['norm{}_r_Y-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_Z-_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_Z-'.format(i + 1)],
                KDNet['norm{}_r_Z-'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_X+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_X+'.format(i + 1)],
                KDNet['norm{}_r_X+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_Y+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_Y+'.format(i + 1)],
                KDNet['norm{}_r_Y+'.format(i + 1)]
            ], T.mul)
            KDNet['cloud{}_r_Z+_masked'.format(i + 1)] = ElemwiseMergeLayer([
                KDNet['cloud{}_r_Z+'.format(i + 1)],
                KDNet['norm{}_r_Z+'.format(i + 1)]
            ], T.mul)

        KDNet['cloud_fin'] = ElemwiseSumLayer([
            KDNet['cloud{}_l_X-_masked'.format(config['steps'])],
            KDNet['cloud{}_l_Y-_masked'.format(config['steps'])],
            KDNet['cloud{}_l_Z-_masked'.format(config['steps'])],
            KDNet['cloud{}_l_X+_masked'.format(config['steps'])],
            KDNet['cloud{}_l_Y+_masked'.format(config['steps'])],
            KDNet['cloud{}_l_Z+_masked'.format(config['steps'])],
            KDNet['cloud{}_r_X-_masked'.format(config['steps'])],
            KDNet['cloud{}_r_Y-_masked'.format(config['steps'])],
            KDNet['cloud{}_r_Z-_masked'.format(config['steps'])],
            KDNet['cloud{}_r_X+_masked'.format(config['steps'])],
            KDNet['cloud{}_r_Y+_masked'.format(config['steps'])],
            KDNet['cloud{}_r_Z+_masked'.format(config['steps'])]
        ])

        KDNet['cloud_fin_bn'] = BatchNormDNNLayer(KDNet['cloud_fin'])
        KDNet['cloud_fin_relu'] = NonlinearityLayer(KDNet['cloud_fin_bn'],
                                                    rectify)
        KDNet['cloud_fin_reshape'] = ReshapeLayer(KDNet['cloud_fin_relu'],
                                                  (-1, config['n_f'][-1]))
        KDNet['output'] = DenseLayer(KDNet['cloud_fin_reshape'],
                                     config['num_classes'],
                                     nonlinearity=softmax)

        prob = get_output(KDNet['output'])
        prob_det = get_output(KDNet['output'], deterministic=True)

        weights = get_all_params(KDNet['output'], trainable=True)
        l2_pen = regularize_network_params(KDNet['output'], l2)

        loss = categorical_crossentropy(
            prob, self.target).mean() + config['l2'] * l2_pen
        accuracy = categorical_accuracy(prob, self.target).mean()

        lr = theano.shared(np.float32(config['learning_rate']))
        updates = adam(loss, weights, learning_rate=lr)

        self.train_fun = theano.function([self.clouds] + self.norms +
                                         [self.target], [loss, accuracy],
                                         updates=updates)
        self.prob_fun = theano.function([self.clouds] + self.norms +
                                        [self.target], [loss, prob_det])

        self.KDNet = KDNet
Beispiel #30
0
    def residual_block(l, increase_dim=False, projection=False):
        input_num_filters = l.output_shape[1]
        if increase_dim:
            first_stride = (2, 2)
            out_num_filters = input_num_filters * 2
        else:
            first_stride = (1, 1)
            out_num_filters = input_num_filters

        #print(l.output_shape)
        l_l = DenseLayer(l,
                         num_units=l.output_shape[3],
                         num_leading_axes=-1,
                         nonlinearity=None)
        #print(l.output_shape[3])
        #print("l_1.output_shape", l_l.output_shape)
        #stride=first_stride
        stack_left_1 = batch_norm(
            ConvLayer(l_l,
                      num_filters=out_num_filters,
                      filter_size=(3, 3),
                      stride=first_stride,
                      nonlinearity=rectify,
                      pad='same',
                      W=lasagne.init.HeNormal(gain='relu'),
                      flip_filters=False))
        stack_left_2 = batch_norm(
            ConvLayer(stack_left_1,
                      num_filters=out_num_filters,
                      filter_size=(3, 3),
                      stride=(1, 1),
                      nonlinearity=None,
                      pad='same',
                      W=lasagne.init.HeNormal(gain='relu'),
                      flip_filters=False))

        #stack_right_1 = batch_norm(ConvLayer(ElemwiseSumLayer([l, NegativeLayer(l_l)]), num_filters=out_num_filters, filter_size=(2,2), stride=first_stride, nonlinearity=rectify, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))
        #stack_right_2 = batch_norm(ConvLayer(stack_right_1, num_filters=out_num_filters, filter_size=(2,2), stride=(1,1), nonlinearity=None, pad='same', W=lasagne.init.HeNormal(gain='relu'), flip_filters=False))
        print("first stack: ", stack_left_2.output_shape)

        # add shortcut connections
        if increase_dim:
            if projection:
                # projection shortcut, as option B in paper
                projection = batch_norm(
                    ConvLayer(l,
                              num_filters=out_num_filters,
                              filter_size=(1, 1),
                              stride=(2, 2),
                              nonlinearity=None,
                              pad='same',
                              b=None,
                              flip_filters=False))
                print("projection shape: ", projection.output_shape)
                ##block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, stack_right_2, projection]),nonlinearity=rectify)
                block = NonlinearityLayer(ElemwiseSumLayer(
                    [stack_left_2, projection]),
                                          nonlinearity=rectify)
            else:
                # identity shortcut, as option A in paper
                #print(l.output_shape[2])
                if (l.output_shape[2] % 2 == 0 and l.output_shape[3] % 2 == 0):
                    identity = ExpressionLayer(
                        l, lambda X: X[:, :, ::2, ::2], lambda s:
                        (s[0], s[1], s[2] // 2, s[3] // 2))
                elif (l.output_shape[2] % 2 == 0
                      and l.output_shape[3] % 2 == 1):
                    identity = ExpressionLayer(
                        l, lambda X: X[:, :, ::2, ::2], lambda s:
                        (s[0], s[1], s[2] // 2, s[3] // 2 + 1))
                elif (l.output_shape[2] % 2 == 1
                      and l.output_shape[3] % 2 == 0):
                    identity = ExpressionLayer(
                        l, lambda X: X[:, :, ::2, ::2], lambda s:
                        (s[0], s[1], s[2] // 2 + 1, s[3] // 2))
                else:
                    identity = ExpressionLayer(
                        l, lambda X: X[:, :, ::2, ::2], lambda s:
                        (s[0], s[1], s[2] // 2 + 1, s[3] // 2 + 1))
                padding = PadLayer(identity,
                                   [(int)(out_num_filters / 4), 0, 0],
                                   batch_ndim=1)
                print('------------------')
                print(stack_left_2.output_shape)
                #print(stack_right_2.output_shape)
                print(identity.output_shape)
                print(padding.output_shape)
                #block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, stack_right_2, padding]),nonlinearity=rectify)
                block = NonlinearityLayer(ElemwiseSumLayer(
                    [stack_left_2, padding]),
                                          nonlinearity=rectify)
        else:
            #block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, stack_right_2, l]),nonlinearity=rectify)
            print("l output shape: ", l.output_shape)
            block = NonlinearityLayer(ElemwiseSumLayer([stack_left_2, l]),
                                      nonlinearity=rectify)

        return block