Exemple #1
0
 def __init__(self,
              Period=init.Uniform((10,100)),
              Shift=init.Uniform( (0., 1000.)),
              On_End=init.Constant(0.05)):
     self.Period = Period
     self.Shift = Shift
     self.On_End = On_End
Exemple #2
0
 def __init__(
         self,
         W_in=init.Normal(0.1),
         W_hid=init.Normal(0.1),
         # W_cell=init.Normal(0.1),
         b=init.Constant(0.),
         a_g=init.Uniform(0.1),
         b_g_hid_to_hid=init.Uniform(0.1),
         b_g_in_to_hid=init.Uniform(0.1),
         nonlinearity=nonlinearities.sigmoid,
         learn_a_g=True,
         learn_b_g_in_to_hid=True,
         learn_b_g_hid_to_hid=True):
     # TODO: Make formulation with peepholes and W_cell
     self.W_in = W_in
     self.W_hid = W_hid
     # if W_cell is not None:
     #     self.W_cell = W_cell
     self.a_g = a_g
     if a_g is not None:
         self.learn_a_g = learn_a_g
     self.b_g_in_to_hid = b_g_in_to_hid
     if b_g_hid_to_hid is not None:
         self.learn_b_g_in_to_hid = learn_b_g_hid_to_hid
     self.b_g_hid_to_hid = b_g_hid_to_hid
     if b_g_hid_to_hid is not None:
         self.learn_b_g_hid_to_hid = learn_b_g_hid_to_hid
     self.b = b
     # For the nonlinearity, if None is supplied, use identity
     if nonlinearity is None:
         self.nonlinearity = nonlinearities.identity
     else:
         self.nonlinearity = nonlinearity
Exemple #3
0
    def __init__(self, incoming, latent, nonlinearity=None,
                 W=init.Uniform(),
                 b=init.Uniform(),
                 batch_size=512,
                 p=0.0,
                 **kwargs):
        super(AE, self).__init__(incoming, **kwargs)
        self.num_batch, self.num_units = self.input_shape
        if nonlinearity is None:
            self.nonlinearity = identity
        else:
            self.nonlinearity = nonlinearity

        self.n_hidden = latent
        self.x = incoming
        self.batch_size = batch_size
        #num_inputs = int(np.prod(self.input_shape[1:]))
        rng = np.random.RandomState(123)
        self.drng = rng
        self.rng = RandomStreams(rng.randint(2 ** 30))
        self.p = p

        initial_W = np.asarray(
            rng.uniform(
                    low=-4 * np.sqrt(6. / (self.n_hidden + self.num_units)),
                    high=4 * np.sqrt(6. / (self.n_hidden + self.num_units)),
                    size=(self.num_units, self.n_hidden)
            ),
            dtype=theano.config.floatX
        )

        #self.W = self.create_param(initial_W, (num_inputs, n_hidden), name="W")
        #self.bvis = self.create_param(bvis, (num_units,), name="bvis") if bvis is not None else None
        #self.bhid = self.create_param(bhid, (n_hidden,), name="bhid") if bhid is not None else None
        self.W = theano.shared(value=initial_W, name='W', borrow=True)

        bvis = theano.shared(
                value=np.zeros(
                    self.num_units,
                    dtype=theano.config.floatX
                ),
                borrow=True
            )

        bhid = theano.shared(
                value=np.zeros(
                    self.n_hidden,
                    dtype=theano.config.floatX
                ),
                name='b',
                borrow=True
            )

        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = theano.shared(value=initial_W.T, name='W_T', borrow=True)
Exemple #4
0
    def __init__(
        self,
        Period=init.Uniform((10, 100)),  # 初始化周期
        Shift=init.Uniform((0., 1000.)),  # 初始化相位
        On_End=init.Constant(0.05)):  #  开放时间长度比例

        self.Period = Period
        self.Shift = Shift
        self.On_End = On_End
Exemple #5
0
    def __init__(self,
                 env_spec,
                 hidden_sizes=(32, 32),
                 hidden_nonlinearity=NL.rectify,
                 hidden_w_init=LI.HeUniform(),
                 hidden_b_init=LI.Constant(0.),
                 output_nonlinearity=NL.tanh,
                 output_w_init=LI.Uniform(-3e-3, 3e-3),
                 output_b_init=LI.Uniform(-3e-3, 3e-3),
                 bn=False):

        assert isinstance(env_spec.action_space, Box)

        Serializable.quick_init(self, locals())

        l_obs = L.InputLayer(shape=(None, env_spec.observation_space.flat_dim))

        l_hidden = l_obs
        if bn:
            l_hidden = batch_norm(l_hidden)

        for idx, size in enumerate(hidden_sizes):
            l_hidden = L.DenseLayer(
                l_hidden,
                num_units=size,
                W=hidden_w_init,
                b=hidden_b_init,
                nonlinearity=hidden_nonlinearity,
                name="h%d" % idx)
            if bn:
                l_hidden = batch_norm(l_hidden)

        l_output = L.DenseLayer(
            l_hidden,
            num_units=env_spec.action_space.flat_dim,
            W=output_w_init,
            b=output_b_init,
            nonlinearity=output_nonlinearity,
            name="output")

        # Note the deterministic=True argument. It makes sure that when getting
        # actions from single observations, we do not update params in the
        # batch normalization layers

        action_var = L.get_output(l_output, deterministic=True)
        self._output_layer = l_output

        self._f_actions = tensor_utils.compile_function([l_obs.input_var],
                                                        action_var)

        super(DeterministicMLPPolicy, self).__init__(env_spec)
        LasagnePowered.__init__(self, [l_output])
Exemple #6
0
            def lstm_layer(input,
                           nunits,
                           return_final,
                           backwards=False,
                           name='LSTM'):
                ingate = Gate(W_in=init.Uniform(0.01),
                              W_hid=init.Uniform(0.01),
                              b=init.Constant(0.0))
                forgetgate = Gate(W_in=init.Uniform(0.01),
                                  W_hid=init.Uniform(0.01),
                                  b=init.Constant(5.0))
                cell = Gate(
                    W_cell=None,
                    nonlinearity=T.tanh,
                    W_in=init.Uniform(0.01),
                    W_hid=init.Uniform(0.01),
                )
                outgate = Gate(W_in=init.Uniform(0.01),
                               W_hid=init.Uniform(0.01),
                               b=init.Constant(0.0))

                lstm = LSTMLayer(input,
                                 num_units=nunits,
                                 backwards=backwards,
                                 peepholes=False,
                                 ingate=ingate,
                                 forgetgate=forgetgate,
                                 cell=cell,
                                 outgate=outgate,
                                 name=name,
                                 only_return_final=return_final,
                                 mask_input=mask)
                return lstm
Exemple #7
0
 def __init__(self,
              incoming,
              num_filters,
              filter_size,
              group=1,
              stride=(1, 1),
              border_mode="valid",
              untie_biases=False,
              W=init.Uniform(),
              b=init.Constant(0.),
              nonlinearity=nonlinearities.rectify,
              convolution=T.nnet.conv2d,
              **kwargs):
     self.group = group
     # actually border_mode is useless in lasagne so I removed it
     super(CaffeConv2DLayer, self).__init__(incoming,
                                            num_filters,
                                            filter_size,
                                            stride=stride,
                                            pad=border_mode,
                                            untie_biases=untie_biases,
                                            W=W,
                                            b=b,
                                            nonlinearity=nonlinearity,
                                            convolution=convolution,
                                            **kwargs)
     self.border_mode = border_mode
    def __init__(self,
                 incoming,
                 gamma=init.Uniform([0.95, 1.05]),
                 beta=init.Constant(0.),
                 nonlinearity=nonlinearities.rectify,
                 epsilon=0.001,
                 **kwargs):
        super(BatchNormalizationLayer, self).__init__(incoming, **kwargs)
        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        self.num_units = int(np.prod(self.input_shape[1:]))
        self.gamma = self.add_param(gamma, (self.num_units, ),
                                    name="BatchNormalizationLayer:gamma",
                                    trainable=True)
        self.beta = self.add_param(beta, (self.num_units, ),
                                   name="BatchNormalizationLayer:beta",
                                   trainable=True)
        self.epsilon = epsilon

        self.mean_inference = theano.shared(np.zeros(
            (1, self.num_units), dtype=theano.config.floatX),
                                            borrow=True,
                                            broadcastable=(True, False))
        self.mean_inference.name = "shared:mean-" + self.name  ####

        self.variance_inference = theano.shared(np.zeros(
            (1, self.num_units), dtype=theano.config.floatX),
                                                borrow=True,
                                                broadcastable=(True, False))
        self.variance_inference.name = "shared:variance-" + self.name  ####
 def __init__(self,
              incoming,
              num_filters,
              filter_size,
              groups=1,
              strides=(1, 1),
              border_mode=None,
              untie_biases=False,
              W=init.Uniform(),
              b=init.Constant(0.),
              nonlinearity=nonlinearities.rectify,
              pad=None,
              dimshuffle=True,
              flip_filters=False,
              partial_sum=1,
              **kwargs):
     super(CaffeConv2DCCLayer, self).__init__(incoming,
                                              num_filters,
                                              filter_size,
                                              strides=strides,
                                              border_mode=border_mode,
                                              untie_biases=untie_biases,
                                              W=W,
                                              b=b,
                                              nonlinearity=nonlinearity,
                                              pad=pad,
                                              dimshuffle=dimshuffle,
                                              flip_filters=flip_filters,
                                              partial_sum=partial_sum,
                                              **kwargs)
     self.groups = groups
     self.filter_acts_op = FilterActs(numGroups=self.groups,
                                      stride=self.stride,
                                      partial_sum=self.partial_sum,
                                      pad=self.pad)
Exemple #10
0
 def __init__(self, input_layer, num_units, W=init.Uniform(), **kwargs):
     super(RecurrentSoftmaxLayer, self).__init__(input_layer)
     self.num_units = num_units
     self.num_time_steps = self.input_shape[1]
     self.num_features = self.input_shape[2]
     self.W = self.create_param(W, (self.num_features, self.num_units),
                                name="W")
def ptb_lstm(input_var, vocabulary_size, hidden_size, seq_len, num_layers,
             dropout, batch_size):
    l_input = L.InputLayer(shape=(batch_size, seq_len), input_var=input_var)
    l_embed = L.EmbeddingLayer(l_input,
                               vocabulary_size,
                               hidden_size,
                               W=init.Uniform(1.0))
    l_lstms = []
    for i in range(num_layers):
        l_lstm = L.LSTMLayer(l_embed if i == 0 else l_lstms[-1],
                             hidden_size,
                             ingate=L.Gate(W_in=init.GlorotUniform(),
                                           W_hid=init.Orthogonal()),
                             forgetgate=L.Gate(W_in=init.GlorotUniform(),
                                               W_hid=init.Orthogonal(),
                                               b=init.Constant(1.0)),
                             cell=L.Gate(
                                 W_in=init.GlorotUniform(),
                                 W_hid=init.Orthogonal(),
                                 W_cell=None,
                                 nonlinearity=lasagne.nonlinearities.tanh),
                             outgate=L.Gate(W_in=init.GlorotUniform(),
                                            W_hid=init.Orthogonal()))
        l_lstms.append(l_lstm)
    l_drop = L.DropoutLayer(l_lstms[-1], dropout)
    l_out = L.DenseLayer(l_drop, num_units=vocabulary_size, num_leading_axes=2)
    l_out = L.ReshapeLayer(
        l_out,
        (l_out.output_shape[0] * l_out.output_shape[1], l_out.output_shape[2]))
    l_out = L.NonlinearityLayer(l_out,
                                nonlinearity=lasagne.nonlinearities.softmax)
    return l_out
Exemple #12
0
    def __init__(self, incoming, num_units, hidden_nonlinearity, name=None,
             W_init=SpectralRadius(density=0.2),
             hidden_init=LI.Constant(0.), hidden_init_trainable=True,
             Wi_init=LI.Uniform(0.5), leak_rate=0.5, **kwargs):
        
        if hidden_nonlinearity is None:
            hidden_nonlinearity = NL.identity

        L.Layer.__init__(self, incoming, name=name) # skip direct parent, we'll do all that init here

        input_shape = self.input_shape[2:]

        input_dim = ext.flatten_shape_dim(input_shape)
        # self._name = name
        # initial hidden state
        self.h0 = self.add_param(hidden_init, (num_units,), name="h0", trainable=hidden_init_trainable,
                                 regularizable=False)
        # Weights from input to hidden
        self.W_xh = self.add_param(Wi_init, (input_dim, num_units), name="W_xh", trainable=False, regularizable=False)
        
        # Recurrent weights
        self.W_hh = self.add_param(W_init, (num_units, num_units), name="W_hh", trainable=False, regularizable=False)
        self.leak_rate = leak_rate
        
        self.num_units = num_units
        self.nonlinearity = hidden_nonlinearity
Exemple #13
0
    def __init__(self,
                 Period=init.Uniform((10, 100)),
                 Shift=init.Uniform((0., 1000.)),
                 On_End=init.Constant(0.05),
                 Event_W=init.GlorotUniform(),
                 Event_b=init.Constant(0.),
                 out_W=init.GlorotUniform(),
                 out_b=init.Constant(0.)):

        self.Period = Period
        self.Shift = Shift
        self.On_End = On_End
        self.Event_W = Event_W
        self.Event_b = Event_b
        self.out_W = out_W
        self.out_b = out_b
Exemple #14
0
    def __init__(self,
                 incomming,
                 num_units,
                 seq_length,
                 W_scale=init.Uniform(),
                 W_time=init.Uniform(),
                 b_scale=init.Constant(0.),
                 b_time=init.Constant(0.),
                 nonlinearity_scale=nonlinearities.identity,
                 nonlinearity_time=nonlinearities.softmax,
                 **kwargs):
        """
        :parameters:
            - num_units : int
                Number of segments the layer can represent.

            - seq_length : int
                Number of segments the layer can represent.

            - nonlinearity_scale, nonlinearity_time : callable or None

            - W_scale, W_time, b_scale, b_time :
                Theano shared variable, numpy array or callable
        """
        super(PolygonOutputLayer, self).__init__(incomming, **kwargs)
        self.num_units = num_units
        self.seq_length = seq_length
        self.nonlinearity_scale = nonlinearity_scale
        self.nonlinearity_time = nonlinearity_time

        # params
        num_inputs = int(np.prod(self.input_shape[1:]))
        self.W_scale = self.create_param(W_scale, (num_inputs, num_units),
                                         name='W_scale')
        self.b_scale = (self.create_param(
            b_scale,
            (num_units, ), name='b_scale') if b_scale is not None else None)

        if num_units > 1:
            self.W_time = self.create_param(W_time, (num_inputs, num_units),
                                            name='W_time')
            self.b_time = (self.create_param(
                b_time,
                (num_units, ), name='b_time') if b_time is not None else None)
        else:
            self.W_time = None
            self.b_time = None
Exemple #15
0
 def _forward(self, inputX, hidden_units):
     rows, cols = inputX.shape
     layer = layers.InputLayer(shape=(rows, cols), input_var=self.X)
     layer = layers.DenseLayer(layer, num_units=hidden_units,
                               W=init.GlorotUniform(), b=init.Uniform(),
                               nonlinearity=nonlinearities.tanh)
     Hout = layers.get_output(layer)
     forwardfn = theano.function([self.X], Hout, allow_input_downcast=True)
     return forwardfn(inputX)
Exemple #16
0
def test_bilinear_group_conv(x_shape, u_shape, batch_size=2):
    X_var = T.tensor4('X')
    U_var = T.matrix('U')
    l_x = L.InputLayer(shape=(None, ) + x_shape, input_var=X_var, name='x')
    l_u = L.InputLayer(shape=(None, ) + u_shape, input_var=U_var, name='u')
    X = np.random.random((batch_size, ) + x_shape).astype(theano.config.floatX)
    U = np.random.random((batch_size, ) + u_shape).astype(theano.config.floatX)

    l_xu_outer = LT.OuterProductLayer([l_x, l_u])
    l_x_diff_pred = LT.GroupConv2DLayer(l_xu_outer,
                                        x_shape[0],
                                        filter_size=5,
                                        stride=1,
                                        pad='same',
                                        untie_biases=True,
                                        groups=x_shape[0],
                                        nonlinearity=None,
                                        W=init.Uniform(),
                                        b=init.Uniform())
    X_diff_pred_var = L.get_output(l_x_diff_pred)
    X_diff_pred_fn = theano.function([X_var, U_var], X_diff_pred_var)
    X_diff_pred = X_diff_pred_fn(X, U)

    u_dim, = u_shape
    l_x_convs = []
    for i in range(u_dim + 1):
        l_x_conv = LT.GroupConv2DLayer(
            l_x,
            x_shape[0],
            filter_size=5,
            stride=1,
            pad='same',
            untie_biases=True,
            groups=x_shape[0],
            nonlinearity=None,
            W=l_x_diff_pred.W.get_value()[:, i:i + 1],
            b=l_x_diff_pred.b.get_value() if i == u_dim else None)
        l_x_convs.append(l_x_conv)
    l_x_diff_pred_bw = LT.BatchwiseSumLayer(l_x_convs + [l_u])
    X_diff_pred_bw_var = L.get_output(l_x_diff_pred_bw)
    X_diff_pred_bw_fn = theano.function([X_var, U_var], X_diff_pred_bw_var)
    X_diff_pred_bw = X_diff_pred_bw_fn(X, U)

    assert np.allclose(X_diff_pred, X_diff_pred_bw, atol=1e-7)
Exemple #17
0
    def __init__(self, incomings, Ws=init.Uniform(), bs = init.Constant(0.), nonlinearity=nonlinearities.sigmoid, prob_func=nonlinearities.linear, **kwargs):
        super(GatedMultipleInputsLayer,self).__init__(incomings,**kwargs)
        num_out = self.input_shapes[0][1]
        # make gates
        self.Ws = [self.create_param(Ws, (num_out,num_out)) for i in range(len(incomings))]
        self.bs = [self.create_param(bs, (num_out,)) for i in range(len(incomings))]

        self.num_inputs = len(incomings)
        self.nonlinearity = nonlinearity
        self.prob_func = prob_func
Exemple #18
0
    def __init__(self, incoming, num_units, W=init.Uniform(), E=init.Uniform(),
                 b=init.Constant(0.), nonlinearity=nonlinearities.rectify,
                 **kwargs):
        super(NCAALayer, self).__init__(incoming, **kwargs)
        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        self.num_units = num_units
        assert num_units % 2 == 0

        self.input_shape = incoming.get_output_shape()
        num_inputs = int(np.prod(self.input_shape[1:]))
        assert (num_inputs - 2 * team_features) % (2 * ppt) == 0

        self.W = self.create_param(W, ((num_inputs - 2 * team_features) / 2 / ppt, num_units / 2))
        self.E = self.create_param(E, (team_features, num_units / 2))
        self.b = (self.create_param(b, (num_units / 2,))
                  if b is not None else None)
def createMLP(layers, s):
    l_in = lasagne.layers.InputLayer(shape=(None, s))
    prev_layer = l_in
    Ws = []
    for layer in layers:
        enc = lasagne.layers.DenseLayer(prev_layer,
                                        num_units=layer,
                                        nonlinearity=rectify,
                                        W=init.Uniform(0.001),
                                        b=None)
        Ws += [enc.W]
        drop = lasagne.layers.DropoutLayer(enc, p=0.0)
        prev_layer = drop
    idx = 1
    last_enc = prev_layer
    # I need to put here the mask
    mask = lasagne.layers.InputLayer(shape=(None, layers[-1]))
    mask_layer = lasagne.layers.ElemwiseMergeLayer([prev_layer, mask],
                                                   merge_function=T.mul)
    prev_layer = mask_layer
    for layer in layers[-2::-1]:
        print layer
        dec = lasagne.layers.DenseLayer(prev_layer,
                                        num_units=layer,
                                        nonlinearity=rectify,
                                        W=Ws[-idx].T,
                                        b=None)
        idx += 1
        drop = lasagne.layers.DropoutLayer(dec, p=0.0)
        prev_layer = drop

    model = lasagne.layers.DenseLayer(prev_layer,
                                      num_units=s,
                                      nonlinearity=identity,
                                      W=Ws[0].T,
                                      b=None)

    x_sym = T.dmatrix()
    mask_sym = T.dmatrix()
    all_params = lasagne.layers.get_all_params(model)
    for i in all_params:
        print i
    output = lasagne.layers.get_output(model,
                                       inputs={
                                           l_in: x_sym,
                                           mask: mask_sym
                                       })
    loss_eval = lasagne.objectives.squared_error(output, x_sym).sum()
    loss_eval /= (2. * batch_size)
    updates = lasagne.updates.adam(loss_eval, all_params)

    return l_in, model, last_enc, theano.function([x_sym, mask_sym],
                                                  loss_eval,
                                                  updates=updates), mask
    def __init__(self, incomings, V=init.Uniform(), **kwargs):

        assert len(incomings) == 4
        assert len(incomings[0].output_shape) == 3
        assert len(incomings[1].output_shape) == 3
        assert len(incomings[2].output_shape) == 2
        assert len(incomings[3].output_shape) == 2

        super(WeightedFeatureLayer, self).__init__(incomings, **kwargs)

        emb_size = incomings[0].output_shape[2]
        self.V = self.add_param(V, (emb_size, ), name="V")
Exemple #21
0
def test_group_conv(x_shape, num_filters, groups, batch_size=2):
    X_var = T.tensor4('X')
    l_x = L.InputLayer(shape=(None, ) + x_shape, input_var=X_var, name='x')
    X = np.random.random((batch_size, ) + x_shape).astype(theano.config.floatX)

    l_conv = LT.GroupConv2DLayer(l_x,
                                 num_filters,
                                 filter_size=3,
                                 stride=1,
                                 pad='same',
                                 untie_biases=True,
                                 groups=groups,
                                 nonlinearity=None,
                                 W=init.Uniform(),
                                 b=init.Uniform())
    conv_var = L.get_output(l_conv)
    conv_fn = theano.function([X_var], conv_var)
    tic()
    conv = conv_fn(X)
    toc("conv time for x_shape=%r, num_filters=%r, groups=%r, batch_size=%r\n\t"
        % (x_shape, num_filters, groups, batch_size))

    l_scan_conv = LT.ScanGroupConv2DLayer(l_x,
                                          num_filters,
                                          filter_size=3,
                                          stride=1,
                                          pad='same',
                                          untie_biases=True,
                                          groups=groups,
                                          nonlinearity=None,
                                          W=l_conv.W,
                                          b=l_conv.b)
    scan_conv_var = L.get_output(l_scan_conv)
    scan_conv_fn = theano.function([X_var], scan_conv_var)
    tic()
    scan_conv = scan_conv_fn(X)
    toc("scan_conv time for x_shape=%r, num_filters=%r, groups=%r, batch_size=%r\n\t"
        % (x_shape, num_filters, groups, batch_size))

    assert np.allclose(conv, scan_conv)
Exemple #22
0
    def __init__(self,
                 input_layer,
                 gamma=init.Uniform([0.95, 1.05]),
                 beta=init.Constant(0.),
                 nonlinearity=nonlinearities.rectify,
                 epsilon=0.001,
                 **kwargs):
        super(BatchNormLayer, self).__init__(input_layer, **kwargs)
        self.additional_updates = None
        self.epsilon = epsilon
        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        input_shape = input_layer.get_output_shape()

        if len(input_shape) == 2:  # in case of dense layer
            self.axis = (0)
            param_shape = (input_shape[-1])
            self.gamma = self.create_param(gamma, param_shape)
            self.beta = self.create_param(beta, param_shape)
            ema_shape = (1, input_shape[-1])
            ema_bc = (True, False)
        elif len(input_shape) == 4:  # in case of conv2d layer
            self.axis = (0, 2, 3)
            param_shape = (input_shape[1], 1, 1)

            # it has to be made broadcastable on the first axis
            self.gamma = theano.shared(utils.floatX(gamma(param_shape)),
                                       broadcastable=(False, True, True),
                                       borrow=True)
            self.beta = theano.shared(utils.floatX(beta(param_shape)),
                                      broadcastable=(False, True, True),
                                      borrow=True)
            ema_shape = (1, input_shape[1], 1, 1)
            ema_bc = (True, False, True, True)
        else:
            raise NotImplementedError

        self.mean_ema = theano.shared(np.zeros(ema_shape,
                                               dtype=theano.config.floatX),
                                      borrow=True,
                                      broadcastable=ema_bc)

        self.variance_ema = theano.shared(np.ones(ema_shape,
                                                  dtype=theano.config.floatX),
                                          borrow=True,
                                          broadcastable=ema_bc)

        self.batch_cnt = theano.shared(0)
    def __init__(self, incoming, num_filters, filter_size, strides=(1, 1), border_mode=None, untie_biases=False,
                 W=init.Uniform(), b=init.Constant(0.), nonlinearity=nonlinearities.rectify, pad=None,
                 flip_filters=False, **kwargs):
        super(Conv2DDNNLayer, self).__init__(incoming, **kwargs)
        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        self.num_filters = num_filters
        self.filter_size = filter_size
        if isinstance(strides, int):
            strides = (strides, strides)
        self.strides = strides
        self.untie_biases = untie_biases
        self.flip_filters = flip_filters

        if border_mode is not None and pad is not None:
            raise RuntimeError("You cannot specify both 'border_mode' and 'pad'. To avoid ambiguity, please specify only one of them.")
        elif border_mode is None and pad is None:
            # no option specified, default to valid mode
            self.pad = (0, 0)
            self.border_mode = 'valid'
        elif border_mode is not None:
            if border_mode == 'valid':
                self.pad = (0, 0)
                self.border_mode = 'valid'
            elif border_mode == 'full':
                self.pad = (self.filter_size[0] - 1, self.filter_size[1] - 1)
                self.border_mode = 'full'
            elif border_mode == 'same':
                # dnn_conv does not support same, so we just specify padding directly.
                # only works for odd filter size, but the even filter size case is probably not worth supporting.
                self.pad = ((self.filter_size[0] - 1) // 2, (self.filter_size[1] - 1) // 2)
                self.border_mode = None
            else:
                raise RuntimeError("Unsupported border_mode for Conv2DDNNLayer: %s" % border_mode)
        else:
            if isinstance(pad, int):
                pad = (pad, pad)
            self.pad = pad
            self.border_mode = None

        self.W = self.create_param(W, self.get_W_shape(), name="W")
        if b is None:
            self.b = None
        elif self.untie_biases:
            output_shape = self.get_output_shape()
            self.b = self.create_param(b, (num_filters, output_shape[2], output_shape[3]), name="b")
        else:
            self.b = self.create_param(b, (num_filters,), name="b")
    def __init__(self,
                 incoming,
                 num_units,
                 n_hidden,
                 W=init.Uniform(),
                 bhid=init.Constant(0.),
                 bvis=init.Constant(0.),
                 nonlinearity=nonlinearities.rectify,
                 **kwargs):
        super(AutoEncoder, self).__init__(incoming, **kwargs)
        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        self.num_units = num_units
        self.n_hidden = n_hidden
        self.x = incoming

        #num_inputs = int(np.prod(self.input_shape[1:]))
        num_inputs = num_units
        rng = np.random.RandomState(123)
        self.rng = RandomStreams(rng.randint(2**30))

        initial_W = np.asarray(rng.uniform(
            low=-4 * np.sqrt(6. / (n_hidden + num_units)),
            high=4 * np.sqrt(6. / (n_hidden + num_units)),
            size=(num_units, n_hidden)),
                               dtype=theano.config.floatX)

        #self.W = self.create_param(initial_W, (num_inputs, n_hidden), name="W")
        #self.bvis = self.create_param(bvis, (num_units,), name="bvis") if bvis is not None else None
        #self.bhid = self.create_param(bhid, (n_hidden,), name="bhid") if bhid is not None else None
        self.W = theano.shared(value=initial_W, name='W', borrow=True)

        bvis = theano.shared(value=np.zeros(num_units,
                                            dtype=theano.config.floatX),
                             borrow=True)

        bhid = theano.shared(value=np.zeros(n_hidden,
                                            dtype=theano.config.floatX),
                             name='b',
                             borrow=True)

        # b corresponds to the bias of the hidden
        self.b = bhid
        # b_prime corresponds to the bias of the visible
        self.b_prime = bvis
        # tied weights, therefore W_prime is W transpose
        self.W_prime = self.W.T
Exemple #25
0
  def __init__(self, incomings, num_units, nonlinearity=nonlinearities.sigmoid,
               W=init.Uniform(), b = init.Constant(0.0), **kwargs):
    super(MergeDense, self).__init__(incomings=incomings, **kwargs)

    self.num_units = num_units

    self.input_shapes = [ inc.output_shape for inc in incomings ]

    self.weights = [
      self.get_weights(W, shape=input_shape, name='W%d' % i)
      for i, input_shape in enumerate(self.input_shapes)
    ]

    self.b = self.add_param(b, (self.num_units,), name="b", regularizable=False)

    self.nonlinearity = nonlinearity
Exemple #26
0
 def __init__(self,
              W_in=init.Orthogonal(0.1),
              W_hid=init.Orthogonal(0.1),
              W_cell=init.Uniform(0.1),
              b=init.Constant(0.),
              nonlinearity=nonlinearities.sigmoid):
     self.W_in = W_in
     self.W_hid = W_hid
     # Don't store a cell weight vector when cell is None
     if W_cell is not None:
         self.W_cell = W_cell
     self.b = b
     # For the nonlinearity, if None is supplied, use identity
     if nonlinearity is None:
         self.nonlinearity = nonlinearities.identity
     else:
         self.nonlinearity = nonlinearity
        def lstm_layer(input,
                       nunits,
                       return_final,
                       backwards=False,
                       name='LSTM'):
            ingate = Gate(W_in=init.Uniform(0.01),
                          W_hid=init.Uniform(0.01),
                          b=init.Constant(0.0))
            forgetgate = Gate(W_in=init.Uniform(0.01),
                              W_hid=init.Uniform(0.01),
                              b=init.Constant(5.0))
            cell = Gate(
                W_cell=None,
                nonlinearity=T.tanh,
                W_in=init.Uniform(0.01),
                W_hid=init.Uniform(0.01),
            )
            outgate = Gate(W_in=init.Uniform(0.01),
                           W_hid=init.Uniform(0.01),
                           b=init.Constant(0.0))

            lstm = LSTMLayer(input,
                             num_units=nunits,
                             backwards=backwards,
                             peepholes=False,
                             ingate=ingate,
                             forgetgate=forgetgate,
                             cell=cell,
                             outgate=outgate,
                             name=name,
                             only_return_final=return_final)

            rec = RecurrentLayer(input,
                                 num_units=nunits,
                                 W_in_to_hid=init.GlorotNormal('relu'),
                                 W_hid_to_hid=init.GlorotNormal('relu'),
                                 backwards=backwards,
                                 nonlinearity=rectify,
                                 only_return_final=return_final,
                                 name=name)
            return lstm
Exemple #28
0
    def __init__(self,
                 incoming,
                 num_units,
                 W=init.Uniform(),
                 b=init.Constant(0.),
                 nonlinearity=nonlinearities.rectify,
                 **kwargs):
        super(CaffeDenseLayer, self).__init__(incoming, **kwargs)
        if nonlinearity is None:
            self.nonlinearity = nonlinearities.identity
        else:
            self.nonlinearity = nonlinearity

        self.num_units = num_units

        num_inputs = int(np.prod(self.input_shape[1:]))

        self.W = self.create_param(W, (num_inputs, num_units), name="W")
        self.b = (self.create_param(b, (num_units, ), name="b")
                  if b is not None else None)
 def __init__(self,
              incoming,
              num_filters,
              filter_size,
              groups=1,
              stride=(1, 1),
              border_mode=None,
              untie_biases=False,
              W=init.Uniform(),
              b=init.Constant(0.),
              nonlinearity=nonlinearities.rectify,
              pad=None,
              dimshuffle=True,
              flip_filters=False,
              partial_sum=1,
              **kwargs):
     super(CaffeConv2DCCLayer, self).__init__(incoming,
                                              num_filters,
                                              filter_size,
                                              stride=stride,
                                              untie_biases=untie_biases,
                                              W=W,
                                              b=b,
                                              nonlinearity=nonlinearity,
                                              pad=pad,
                                              dimshuffle=dimshuffle,
                                              flip_filters=flip_filters,
                                              partial_sum=partial_sum,
                                              **kwargs)
     self.groups = groups
     # the FilterActs in pylearn2 cannot accept tuple-type pad
     if isinstance(self.pad, int):
         self.pad = self.pad
     elif isinstance(self.pad, tuple):
         self.pad = self.pad[0]
     else:
         self.pad = 0
     self.filter_acts_op = FilterActs(numGroups=self.groups,
                                      stride=self.stride,
                                      partial_sum=self.partial_sum,
                                      pad=self.pad)
Exemple #30
0
    def __init__(self,
                 incoming,
                 num_units,
                 W_in_to_hid=init.Uniform(),
                 W_hid_to_hid=init.Uniform(),
                 a_g=init.Uniform(0.1),
                 b_g_hid_to_hid=init.Uniform(0.1),
                 b_g_in_to_hid=init.Uniform(0.1),
                 b=init.Constant(0.),
                 nonlinearity=nonlinearities.rectify,
                 hid_init=init.Constant(0.),
                 backwards=False,
                 learn_init=False,
                 learn_a_g=True,
                 learn_b_g_in_to_hid=True,
                 learn_b_g_hid_to_hid=True,
                 gradient_steps=-1,
                 grad_clipping=0,
                 unroll_scan=False,
                 precompute_input=True,
                 mask_input=None,
                 only_return_final=False,
                 **kwargs):

        if isinstance(incoming, tuple):
            input_shape = incoming
        else:
            input_shape = incoming.output_shape
        # Retrieve the supplied name, if it exists; otherwise use ''
        if 'name' in kwargs:
            basename = kwargs['name'] + '.'
            # Create a separate version of kwargs for the contained layers
            # which does not include 'name'
            layer_kwargs = dict(
                (key, arg) for key, arg in kwargs.items() if key != 'name')
        else:
            basename = ''
            layer_kwargs = kwargs
        # We will be passing the input at each time step to the dense layer,
        # so we need to remove the second dimension (the time dimension)
        in_to_hid = DenseLayer(InputLayer((None, ) + input_shape[2:]),
                               num_units,
                               W=W_in_to_hid,
                               b=None,
                               nonlinearity=None,
                               name=basename + 'input_to_hidden',
                               **layer_kwargs)
        # The hidden-to-hidden layer expects its inputs to have num_units
        # features because it recycles the previous hidden state
        hid_to_hid = DenseLayer(InputLayer((None, num_units)),
                                num_units,
                                W=W_hid_to_hid,
                                b=None,
                                nonlinearity=None,
                                name=basename + 'hidden_to_hidden',
                                **layer_kwargs)

        # Make child layer parameters intuitively accessible
        self.W_in_to_hid = in_to_hid.W
        self.W_hid_to_hid = hid_to_hid.W

        super(MIRecurrentLayer,
              self).__init__(incoming,
                             in_to_hid,
                             hid_to_hid,
                             a_g=a_g,
                             b_g_in_to_hid=b_g_in_to_hid,
                             b_g_hid_to_hid=b_g_hid_to_hid,
                             b=b,
                             nonlinearity=nonlinearity,
                             hid_init=hid_init,
                             backwards=backwards,
                             learn_init=learn_init,
                             learn_a_g=learn_a_g,
                             learn_b_g_in_to_hid=learn_b_g_in_to_hid,
                             gradient_steps=gradient_steps,
                             grad_clipping=grad_clipping,
                             unroll_scan=unroll_scan,
                             precompute_input=precompute_input,
                             mask_input=mask_input,
                             only_return_final=only_return_final,
                             **kwargs)