Beispiel #1
0
def normalize_mst_data(__mst_data, avg, std):
    _sAvg = theano.shared(avg.T.astype(fpX)[np.newaxis, :, :, np.newaxis])
    _sStd = theano.shared(std.T.astype(fpX)[np.newaxis, :, :, np.newaxis])
    ### set the broadcastability of the sample axis
    _sAvg = T.patternbroadcast(_sAvg, (True, False, False, False))
    _sStd = T.patternbroadcast(_sStd, (True, False, False, False))
    return (__mst_data - _sAvg) / _sStd, [_sAvg, _sStd]
Beispiel #2
0
    def grad(self, inp, grads):
        bottom, weights = inp
        top, = grads
        d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp,
                                             self.border_mode,
                                             self.subsample,
                                             self.filter_flip)(
            weights, top, bottom.shape[-2:])
        d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
                                               self.border_mode,
                                               self.subsample,
                                               self.filter_flip)(

            bottom, top, weights.shape[-2:])

        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        # Also make sure that the gradient lives on the same device than
        # the corresponding input.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
        d_bottom = bottom.type.filter_variable(d_bottom)
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        d_weights = weights.type.filter_variable(d_weights)
        return d_bottom, d_weights
Beispiel #3
0
    def apply(self, input_):
        aggregate_axes = [0] + [1 + i for i, b in enumerate(self.broadcastable) if b]
        # NOTE: don't put batch_stats on self because apply may be
        # called multiple times
        batch_stats = dict(
            (stat, getattr(input_, stat)(axis=aggregate_axes,
                                         keepdims=True))
            for stat in self.stats)

        for stat, role in self.roles.items():
            graph.add_transform([batch_stats[stat]],
                                graph.ConstantTransform(
                                    # adding zero to ensure it's a TensorType(float32, row)
                                    # just like the corresponding batch_stat, rather than a
                                    # CudaNdarray(float32, row).  -__-
                                    0 + T.patternbroadcast(
                                        self.population_stats[stat],
                                        [True] + self.broadcastable)),
                                reason="population_normalization")

            # make the batch statistics identifiable to get_updates() below
            add_role(batch_stats[stat], self.roles[stat])
            batch_stats[stat] = self.annotated_statistic(batch_stats[stat])

        gamma = T.patternbroadcast(self.gamma, [True] + self.broadcastable)
        beta = T.patternbroadcast(self.beta, [True] + self.broadcastable)
        return theano.tensor.nnet.bn.batch_normalization(
            inputs=input_, gamma=gamma, beta=beta,
            mean=batch_stats["mean"],
            std=T.sqrt(batch_stats["var"] + self.epsilon))
Beispiel #4
0
def make_normalize_mst_data(_mst_data, nf, nv):
    _sAvg = theano.shared(np.zeros(shape=(1, nf, nv, 1), dtype=fpX))
    _sStd = theano.shared(np.zeros(shape=(1, nf, nv, 1), dtype=fpX))
    return (_mst_data - T.patternbroadcast(_sAvg, (True, False, False, False))
            ) / T.patternbroadcast(_sStd, (True, False, False, False)), [
                _sAvg, _sStd
            ]
Beispiel #5
0
    def grad(self, inp, grads):
        bottom, weights = inp
        top, = grads
        d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp,
                                             self.border_mode,
                                             self.subsample,
                                             self.filter_flip)(
            weights, top, bottom.shape[-2:])
        d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
                                               self.border_mode,
                                               self.subsample,
                                               self.filter_flip)(

            bottom, top, weights.shape[-2:])

        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        # Also make sure that the gradient lives on the same device than
        # the corresponding input.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
        d_bottom = bottom.type.filter_variable(d_bottom)
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        d_weights = weights.type.filter_variable(d_weights)
        return d_bottom, d_weights
Beispiel #6
0
def filterbank(center, width, logsigma2, shape):
    assert len(shape) == 3

    batch_size, window_size, image_size = shape
    w = T.patternbroadcast(
        T.arange(window_size, dtype='float32').reshape((1, window_size, 1)),
        [True, False, True],
    )
    i = T.patternbroadcast(
        T.arange(image_size, dtype='float32').reshape((1, 1, image_size)),
        [True, True, False],
    )

    center = T.patternbroadcast(center.reshape((batch_size, 1, 1)),
                                [False, True, True])
    width = T.patternbroadcast(width.reshape((batch_size, 1, 1)),
                               [False, True, True])
    logsigma2 = T.patternbroadcast(logsigma2.reshape((batch_size, 1, 1)),
                                   [False, True, True])

    mu = (image_size - 1) * \
        ((1 + center) / 2 + width * (w / (window_size - 1) - 0.5))

    F = T.exp(-(mu - i)**2 / (2 * T.exp(logsigma2 / 2)))
    F = F / T.maximum(T.sum(F, 2, keepdims=True), 1e-7)

    return F
Beispiel #7
0
    def grad(self, inp, grads):
        bottom, top = inp[:2]
        weights, = grads
        d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp,
                                             self.border_mode,
                                             self.subsample,
                                             self.filter_flip)(
                                                 weights,
                                                 top,
                                                 bottom.shape[-2:])
        d_top = AbstractConv2d(self.imshp,
                               self.kshp,
                               self.border_mode,
                               self.subsample,
                               self.filter_flip)(bottom, weights)
        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        # Also make sure that the gradient lives on the same device than
        # the corresponding input.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
        d_bottom = bottom.type.filter_variable(d_bottom)
        d_top = patternbroadcast(d_top, top.broadcastable)
        d_top = top.type.filter_variable(d_top)

        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_bottom, d_top) + d_height_width
Beispiel #8
0
    def grad(self, inp, grads):
        bottom, top = inp[:2]
        weights, = grads
        d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp,
                                             self.border_mode,
                                             self.subsample,
                                             self.filter_flip)(
                                                 weights,
                                                 top,
                                                 bottom.shape[-2:])
        d_top = AbstractConv2d(self.imshp,
                               self.kshp,
                               self.border_mode,
                               self.subsample,
                               self.filter_flip)(bottom, weights)
        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        # Also make sure that the gradient lives on the same device than
        # the corresponding input.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
        d_bottom = bottom.type.filter_variable(d_bottom)
        d_top = patternbroadcast(d_top, top.broadcastable)
        d_top = top.type.filter_variable(d_top)

        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_bottom, d_top) + d_height_width
Beispiel #9
0
def squeeze(x, axis):
    '''Remove a 1-dimension from the tensor at index "axis".
    '''
    broadcastable = x.broadcastable[:axis] + x.broadcastable[axis+1:]
    x = T.patternbroadcast(x, [i == axis for i in range(x.type.ndim)])
    x = T.squeeze(x)
    x = T.patternbroadcast(x, broadcastable)
    return x
Beispiel #10
0
def squeeze(x, axis):
    '''Remove a 1-dimension from the tensor at index "axis".
    '''
    broadcastable = x.broadcastable[:axis] + x.broadcastable[axis + 1:]
    x = T.patternbroadcast(x, [i == axis for i in range(x.type.ndim)])
    x = T.squeeze(x)
    x = T.patternbroadcast(x, broadcastable)
    return x
Beispiel #11
0
    def get_output_for(self,input, **kwargs):

        if input.ndim > 2:
            input = inpu.flatten(2)

        inputData = input * 10
        inputData.name = 'inputData'
        
        inputData_reshape = inputData.dimshuffle(0, 'x', 'x', 1)
        inputData_reshape.name = 'inputData_reshape'
        inputData_reshape = T.patternbroadcast(inputData_reshape, (False, True, True, False))
        #mean_reshape has dimension: (1, NumofClass, NumofComponent, p)
        mean_reshape = self._means.dimshuffle('x', 0, 1, 2)
        mean_reshape = T.patternbroadcast(mean_reshape, (True, False, False,False))
        mean_reshape.name = 'mean_reshape'

        #self.sigma = nonlinearities.rectify(self.sigma) + T.ones_like(self.sigma)
        sigma = T.exp(self.sigma)
        sigma_reshape = sigma.dimshuffle('x', 0, 1, 2)
        sigma_reshape = T.patternbroadcast(sigma_reshape, (True, False, False, False))
        sigma_reshape.name = 'sigma_reshape'

        #self.weights = nonlinearities.rectify(self.weights) + 1e-16
        weights = T.exp(self.weights)
        weights_sum = T.sum(weights, axis = 1)
        weights_sum = T.patternbroadcast(weights_sum.dimshuffle(0,'x'), (False, True))
        weights = weights / weights_sum
        
        weights_reshape = weights.dimshuffle('x', 0, 1)
        weights_reshape = T.patternbroadcast(weights_reshape, (True, False, False))
        weights_reshape.name = 'weights_reshape' 
        sigma_inverse_sqrt = T.sqrt(1.0/sigma_reshape)
        sigma_inverse_sqrt.name = 'sigma_inverse_sqrt'

        # positive: 
        sqrtTemp = T.sqr((inputData_reshape - mean_reshape) * sigma_inverse_sqrt).sum(axis = 3) 
        
        # negative: 784 * log(sigma) ? sigma = 0.1 -> -1805, else positive.
        sigmaTemp = T.log(sigma_reshape).sum(axis = 3)
        

        # positive:28x28 dimension, then we have 784 * log(2\pi) = 1440
        dimTemp = T.ones((self.num_models, self.num_components), 'float32') * self.dim * T.log(2.0 * np.pi)
        
        logComponentOutput = - 1.0 / 2 * (sqrtTemp + sigmaTemp + dimTemp)
        #logComponentOutput = -1.0/2 * sqrtTemp
        logComponentOutput.name = 'logComponentOutput'
        logComponentSum = logComponentOutput + T.log(weights_reshape) 
        logComponentSum.name = 'logComponentSum'
        logComponentSum_max = logComponentSum.max(axis = 2)
        logComponentSum_max_reshape = logComponentSum_max.dimshuffle(0, 1, 'x')
        componentSum_before = T.exp(logComponentSum - logComponentSum_max_reshape)
        componentSum_before_sum = componentSum_before.sum(axis = 2)
        addLog =  T.log(componentSum_before_sum + T.ones_like(componentSum_before_sum)) + logComponentSum_max
        #addLog = (componentSum_before + T.ones_like().sum(axis = 2)
        #return logComponentOutput, sqrtTemp, sigmaTemp, dimTemp, logComponentSum, logComponentSum_mean_reshape, componentSum_before, addLog, classSum
        return addLog
Beispiel #12
0
    def _train_fprop(self, state_below):
        if self.layer_type == "fc":
            miu = state_below.mean(axis=0)
            var = T.mean((state_below - miu) ** 2, axis=0)
        elif self.layer_type == "conv":
            miu = state_below.mean(axis=(0, 2, 3), keepdims=True)
            var = T.mean((state_below - miu) ** 2, axis=(0, 2, 3), keepdims=True)
        self.moving_mean = self.mem * miu + (1 - self.mem) * self.moving_mean
        self.moving_var = self.mem * var + (1 - self.mem) * self.moving_var

        Z = (state_below - self.moving_mean) / T.sqrt(self.moving_var + self.epsilon)
        gamma = T.patternbroadcast(self.gamma, self.broadcastable)
        beta = T.patternbroadcast(self.beta, self.broadcastable)
        return gamma * Z + beta
Beispiel #13
0
 def f(W_0, W_1):
     index = 0
     d = {
         b_layers[0].W: T.patternbroadcast(
             W_0,
             (False, False, False, False)
         ),
         b_layers[1].W: T.patternbroadcast(
             W_1,
             (False, False, False, False)
         ),
         b_x: train_set_x_b[index*batch_size:(index+1)*batch_size],
         y: train_set_y[index*batch_size:(index+1)*batch_size]
     }
     return theano.clone(b_cost, d)
Beispiel #14
0
    def _train_fprop(self, state_below):
        if self.layer_type == 'fc':
            miu = state_below.mean(axis=0)
            var = T.mean((state_below - miu)**2, axis=0)
        elif self.layer_type == 'conv':
            miu = state_below.mean(axis=(0, 2, 3), keepdims=True)
            var = T.mean((state_below - miu)**2, axis=(0, 2, 3), keepdims=True)
        self.moving_mean = self.mem * miu + (1 - self.mem) * self.moving_mean
        self.moving_var = self.mem * var + (1 - self.mem) * self.moving_var

        Z = (state_below - self.moving_mean) / T.sqrt(self.moving_var +
                                                      self.epsilon)
        gamma = T.patternbroadcast(self.gamma, self.broadcastable)
        beta = T.patternbroadcast(self.beta, self.broadcastable)
        return gamma * Z + beta
Beispiel #15
0
    def compute_output(self, network, in_vw):
        # gather hyperparameters
        initial_alpha = network.find_hyperparameter(["initial_alpha"], 0.25)

        # calculate_shape
        ndim = in_vw.ndim
        parameter_axes = treeano.utils.find_axes(
            network,
            ndim,
            positive_keys=["parameter_axes"],
            negative_keys=["non_parameter_axes"],
            positive_default=[treeano.utils.nth_non_batch_axis(network, 0)])
        broadcastable = tuple([i not in parameter_axes for i in range(ndim)])
        shape = tuple(
            [1 if b else s for b, s in zip(broadcastable, in_vw.shape)])

        # create state
        alpha_vw = network.create_vw(
            "alpha",
            is_shared=True,
            shape=shape,
            tags={"parameter", "bias"},
            default_inits=[treeano.inits.ConstantInit(initial_alpha)],
        )
        alpha = T.patternbroadcast(alpha_vw.variable, broadcastable)

        # return output
        network.create_vw(
            "default",
            variable=treeano.utils.rectify(in_vw.variable,
                                           negative_coefficient=alpha),
            shape=in_vw.shape,
            tags={"output"},
        )
Beispiel #16
0
def random_node(old):
    """Creates random node with shared params

    Parameters
    ----------
    old : pm.FreeRV

    Returns
    -------
    tuple : (new node, shared mu, shared rho)
    """
    if len(old.broadcastable) > 0:
        rho = theano.shared(
            np.ones(old.tag.test_value.shape),
            name='{}_rho_shared'.format(old.name),
            broadcastable=old.broadcastable)
        mu = theano.shared(
            old.tag.test_value,
            name='{}_mu_shared'.format(old.name),
            broadcastable=old.broadcastable)
        e = tt.patternbroadcast(
            tt_rng().normal(rho.shape), old.broadcastable)
    else:
        rho = theano.shared(
            np.ones(old.tag.test_value.shape),
            name='{}_rho_shared'.format(old.name))
        mu = theano.shared(
            old.tag.test_value,
            name='{}_mu_shared'.format(old.name))
        e = tt_rng().normal(rho.shape)
    return mu + rho2sd(rho) * e, mu, rho
Beispiel #17
0
def local_gpualloc(node):
    replace = False
    if node.op == tensor.alloc:
        if node.inputs[0].owner and node.inputs[0].owner.op == host_from_gpu:
            replace = True
        elif all([
                c != 'output' and c.op == gpu_from_host
                for c, idx in node.outputs[0].clients
        ]):
            replace = True
        elif all([
                c != 'output' and c.op == tensor.join and all([
                    i.owner and i.owner.op in [host_from_gpu, tensor.alloc]
                    for i in c.inputs[1:]
                ]) for c, idx in node.outputs[0].clients
        ]):
            replace = True
    if replace:
        val = node.inputs[0]
        shp = node.inputs[1:]
        old_out = node.outputs[0]
        val2 = tensor.shape_padleft(val, len(shp) - val.ndim)
        new_out = host_from_gpu(gpu_alloc(val, *shp))
        if new_out.type != old_out.type:
            assert new_out.type.ndim == old_out.type.ndim
            assert new_out.type.dtype == old_out.type.dtype
            for b_old, b_new in zip(old_out.type.broadcastable,
                                    new_out.type.broadcastable):
                assert b_new or (not b_old)
            new_out = tensor.patternbroadcast(new_out.old_out.broadcastable)

        return [new_out]
    def apply_dropout(self, input, const=0):
        # Using theano constant to prevent upcasting
        one = T.constant(1)

        if self.rescale:
            input /= self.q

        # use nonsymbolic shape for dropout mask if possible
        mask_shape = self.input_shape
        if any(s is None for s in mask_shape):
            mask_shape = input.shape

        # apply dropout, respecting shared axes
        if self.shared_axes:
            shared_axes = tuple(a if a >= 0 else a + input.ndim
                                for a in self.shared_axes)
            mask_shape = tuple(1 if a in shared_axes else s
                               for a, s in enumerate(mask_shape))
        mask = self._srng.binomial(mask_shape, p=self.q, dtype=input.dtype)
        if self.shared_axes:
            bcast = tuple(bool(s == 1) for s in mask_shape)
            mask = T.patternbroadcast(mask, bcast)

        if const != 0:
            return (input * mask) + (const * (T.constant(1) - mask))
        else:
            return input * mask
Beispiel #19
0
 def __init__(
     self,
     data,
     batch_size=128,
     dtype=None,
     broadcastable=None,
     name="Minibatch",
     random_seed=42,
     update_shared_f=None,
     in_memory_size=None,
 ):
     if dtype is None:
         data = pm.smartfloatX(np.asarray(data))
     else:
         data = np.asarray(data, dtype)
     in_memory_slc = self.make_static_slices(in_memory_size)
     self.shared = theano.shared(data[in_memory_slc])
     self.update_shared_f = update_shared_f
     self.random_slc = self.make_random_slices(self.shared.shape,
                                               batch_size, random_seed)
     minibatch = self.shared[self.random_slc]
     if broadcastable is None:
         broadcastable = (False, ) * minibatch.ndim
     minibatch = tt.patternbroadcast(minibatch, broadcastable)
     self.minibatch = minibatch
     super().__init__(self.minibatch.type, None, None, name=name)
     theano.Apply(theano.compile.view_op,
                  inputs=[self.minibatch],
                  outputs=[self])
     self.tag.test_value = copy(self.minibatch.tag.test_value)
Beispiel #20
0
    def grad(self, inp, grads):
        weights, top = inp[:2]
        bottom, = grads
        d_weights = AbstractConv2d_gradWeights(
            self.imshp, self.kshp, self.border_mode,
            self.subsample)(bottom, top, weights.shape[-2:])
        d_top = AbstractConv2d(self.imshp, self.kshp, self.border_mode,
                               self.subsample)(bottom, weights)
        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        d_top = patternbroadcast(d_top, top.broadcastable)

        d_height_width = (theano.gradient.DisconnectedType()(), )
        return (d_weights, d_top) + d_height_width
Beispiel #21
0
def local_gpualloc(node):
    replace = False
    if node.op == tensor.alloc:
        if node.inputs[0].owner and node.inputs[0].owner.op == host_from_gpu:
            replace = True
        elif all([c != 'output' and c.op == gpu_from_host
                  for c, idx in node.outputs[0].clients]):
            replace = True
        elif all([c != 'output' and c.op == tensor.join and
                  all([i.owner and i.owner.op in [host_from_gpu, tensor.alloc]
                       for i in c.inputs[1:]])
                  for c, idx in node.outputs[0].clients]):
            replace = True
    if replace:
        val = node.inputs[0]
        shp = node.inputs[1:]
        old_out = node.outputs[0]
        val2 = tensor.shape_padleft(val, len(shp) - val.ndim)
        new_out = host_from_gpu(gpu_alloc(val, *shp))
        if new_out.type != old_out.type:
            assert new_out.type.ndim == old_out.type.ndim
            assert new_out.type.dtype == old_out.type.dtype
            for b_old, b_new in zip(old_out.type.broadcastable,
                                    new_out.type.broadcastable):
                assert b_new or (not b_old)
            new_out = tensor.patternbroadcast(new_out. old_out.broadcastable)

        return [new_out]
Beispiel #22
0
def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None):
    if not isinstance(inputs, list):
        inputs = [inputs]

    if not isinstance(hidden_dims, list):
        hidden_dims = [hidden_dims]

    if h0s is None:
        h0s = [None]*len(hidden_dims)

    for i in xrange(len(hidden_dims)):
        if h0s[i] is None:
            h0_unbatched = lib.param(
                name + '.h0_' + str(i),
                numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX)
            )
            num_batches = inputs[0].shape[1]
            h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])

        h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)

    outputs, _ = theano.scan(
        step_fn,
        sequences=inputs,
        outputs_info=h0s,
        non_sequences=non_sequences
    )

    return outputs
Beispiel #23
0
    def __init__(self,
                 input_shape,
                 fantasy_particles=1,
                 n_cd=1,
                 reset_pps_int=-1,
                 **kwargs):
        CostCD.__init__(self, **kwargs)
        Persistent.__init__(self, reset_pps_int)

        self.pps_shape = [fantasy_particles] + list(input_shape)
        """Initialize Fantasy particles """
        if len(self.pps_shape) > 2 and (self.pps_shape[3] is None
                                        or self.pps_shape[2] is None):
            raise NotImplementedError("PCD cannot yet deal with dynamic "
                                      "dimension lengths. Hint: Use fixed "
                                      "length training and dynamic length "
                                      "sampling.")
        else:
            self.pps = theano.shared(np.cast[fx](np.random.uniform(
                0, 1, self.pps_shape)),
                                     borrow=True)

        if self.pps.broadcastable != self.gibbs_step_(self.pps).broadcastable:
            rebroadcast = T.patternbroadcast(self.gibbs_step_(self.pps),
                                             self.pps.broadcastable)
        else:
            rebroadcast = self.gibbs_step_(self.pps)

        self.pps_gibbs_step = self.pps_gibbs_step_fun(rebroadcast)

        pps_input_step = partial(self.pps_gibbs_step, self.pps.get_value())
        for _ in range(n_cd):
            self.callback_add(pps_input_step, Notifier.BATCH_FINISHED)
Beispiel #24
0
def dropout(x, level, noise_shape=None, seed=None):
    '''Sets entries in `x` to zero at random,
    while scaling the entire tensor.

    # Arguments
        x: tensor
        level: fraction of the entries in the tensor
            that will be set to 0.
        noise_shape: shape for randomly generated keep/drop flags,
            must be broadcastable to the shape of `x`
        seed: random seed to ensure determinism.
    '''
    if level < 0. or level >= 1:
        raise Exception('Dropout level must be in interval [0, 1[.')
    if seed is None:
        seed = np.random.randint(1, 10e6)

    rng = RandomStreams(seed=seed)
    retain_prob = 1. - level

    if noise_shape is None:
        random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype)
    else:
        random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype)
        random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape])

    x *= random_tensor
    x /= retain_prob
    return x
Beispiel #25
0
def Recurrent(name, hidden_dims, step_fn, inputs, non_sequences=[], h0s=None):
    if not isinstance(inputs, list):
        inputs = [inputs]

    if not isinstance(hidden_dims, list):
        hidden_dims = [hidden_dims]

    if h0s is None:
        h0s = [None]*len(hidden_dims)

    for i in xrange(len(hidden_dims)):
        if h0s[i] is None:
            h0_unbatched = lib.param(
                name + '.h0_' + str(i),
                numpy.zeros((hidden_dims[i],), dtype=theano.config.floatX)
            )
            num_batches = inputs[0].shape[1]
            h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])

        h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)

    outputs, _ = theano.scan(
        step_fn,
        sequences=inputs,
        outputs_info=h0s,
        non_sequences=non_sequences
    )

    return outputs
    def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic or self.p == 0:
            return input
        else:
            # Using theano constant to prevent upcasting
            one = T.constant(1, dtype='int8')

            retain_prob = one - self.p
            if self.rescale:
                input /= retain_prob

            # use nonsymbolic shape for dropout mask if possible
            mask_shape = self.input_shape
            if any(s is None for s in mask_shape):
                mask_shape = input.shape

            # apply dropout, respecting shared axes
            if self.shared_axes:
                shared_axes = tuple(a if a >= 0 else a + input.ndim
                                    for a in self.shared_axes)
                mask_shape = tuple(1 if a in shared_axes else s
                                   for a, s in enumerate(mask_shape))
            mask = self._srng.binomial(mask_shape,
                                       p=retain_prob,
                                       dtype=input.dtype)
            if self.shared_axes:
                bcast = tuple(bool(s == 1) for s in mask_shape)
                mask = T.patternbroadcast(mask, bcast)
            return input * mask
Beispiel #27
0
    def get_output_for(self, input, deterministic=False, **kwargs):
        if deterministic or self.p == 0:
            return input
        else:
            # Using theano constant to prevent upcasting
            one = T.constant(1)

            retain_prob = one - self.p
            if self.rescale:
                input /= retain_prob

            # use nonsymbolic shape for dropout mask if possible
            mask_shape = self.input_shape
            if any(s is None for s in mask_shape):
                mask_shape = input.shape

            # apply dropout, respecting shared axes
            if self.shared_axes:
                shared_axes = tuple(a if a >= 0 else a + input.ndim
                                    for a in self.shared_axes)
                mask_shape = tuple(1 if a in shared_axes else s
                                   for a, s in enumerate(mask_shape))
            mask = self._srng.binomial(mask_shape, p=retain_prob,
                                       dtype=input.dtype)
            if self.shared_axes:
                bcast = tuple(bool(s == 1) for s in mask_shape)
                mask = T.patternbroadcast(mask, bcast)
            return input * mask
Beispiel #28
0
def Recurrence(processed_frames, h0, reset):
    """
    processed_frames.shape: (batch size, n frames, DIM)
    h0.shape: (batch size, N_GRUS, DIM)
    reset.shape: ()
    output.shape: (batch size, n frames, DIM)
    """

    # print "warning no recurrence"
    # return T.zeros_like(processed_frames), h0

    learned_h0 = lib.param(
        'Recurrence.h0', numpy.zeros((N_GRUS, DIM),
                                     dtype=theano.config.floatX))
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    gru0 = lib.ops.LowMemGRU('Recurrence.GRU0',
                             DIM,
                             DIM,
                             processed_frames,
                             h0=h0[:, 0])
    grus = [gru0]
    for i in xrange(1, N_GRUS):
        gru = lib.ops.LowMemGRU('Recurrence.GRU' + str(i),
                                DIM,
                                DIM,
                                grus[-1],
                                h0=h0[:, i])
        grus.append(gru)

    last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1)

    return (grus[-1], last_hidden)
Beispiel #29
0
Datei: dnn.py Projekt: c0g/Theano
 def local_conv_dnn_alternative(node):
     if not dnn_available():
         return
     if isinstance(node.op, GpuConv):
         border_mode = node.op.border_mode
         subsample = node.op.subsample
         if border_mode not in ['full', 'valid'] or subsample != (1, 1):
             return
         img, kern = node.inputs
         direction_hint = node.op.direction_hint
         if border_mode == 'full':
             # for a full convolution, try using the forward pass instead
             # of the backward pass wrt. inputs
             direction_hint = 'forward!'
         elif border_mode == 'valid':
             # for a valid convolution, try using the backward pass wrt.
             # weights instead of the forward pass and vice versa
             if direction_hint == 'bprop weights':
                 direction_hint = 'forward'
             else:
                 direction_hint = 'bprop weights'
         rval = dnn_conv(img, kern,
                         border_mode=border_mode, subsample=subsample,
                         direction_hint=direction_hint)
         if node.outputs[0].broadcastable != rval.broadcastable:
             rval = tensor.patternbroadcast(
                 rval, node.outputs[0].type.broadcastable)
         return [rval]
Beispiel #30
0
def dropout(x, level, noise_shape=None, seed=None):
    '''Sets entries in `x` to zero at random,
    while scaling the entire tensor.

    # Arguments
        x: tensor
        level: fraction of the entries in the tensor
            that will be set to 0.
        noise_shape: shape for randomly generated keep/drop flags,
            must be broadcastable to the shape of `x`
        seed: random seed to ensure determinism.
    '''
    if level < 0. or level >= 1:
        raise Exception('Dropout level must be in interval [0, 1[.')
    if seed is None:
        seed = np.random.randint(1, 10e6)

    rng = RandomStreams(seed=seed)
    retain_prob = 1. - level

    if noise_shape is None:
        random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype)
    else:
        random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype)
        random_tensor = T.patternbroadcast(random_tensor, [dim == 1 for dim in noise_shape])

    x *= random_tensor
    x /= retain_prob
    return x
Beispiel #31
0
    def __init__(self, input, input_shape=None):
        if isinstance(input, Layer):
            self.input = input.output
            Layer.linkstruct[input].append(self)
            if input_shape == None:
                input_shape = input.output_shape
        else:
            self.input = input
        self.input_shape = input_shape

        #Only square image allowed
        assert input_shape[2] == input_shape[3]

        #Extend one pixel at each direction
        shapeext = input_shape[0], input_shape[
            1], input_shape[2] + 2, input_shape[3] + 2
        inputext = CachedAlloc(dtypeX(-INF), *shapeext)

        inputext = T.set_subtensor(
            inputext[:, :, 1:input_shape[2] + 1, 1:input_shape[3] + 1],
            self.input)
        self.output_shape = input_shape[0], input_shape[1], (
            input_shape[2] + 1) / 2, (input_shape[3] + 1) / 2

        self.output = images2neibs(inputext, (3, 3), (2, 2),
                                   'ignore_borders').mean(axis=-1)
        self.output = T.patternbroadcast(
            self.output.reshape(self.output_shape), (False, ) * 4)
Beispiel #32
0
 def local_conv_dnn_alternative(node):
     if not dnn_available():
         return
     if isinstance(node.op, GpuConv):
         border_mode = node.op.border_mode
         subsample = node.op.subsample
         if border_mode not in ['full', 'valid'] or subsample != (1, 1):
             return
         img, kern = node.inputs
         direction_hint = node.op.direction_hint
         if border_mode == 'full':
             # for a full convolution, try using the forward pass instead
             # of the backward pass wrt. inputs
             direction_hint = 'forward!'
         elif border_mode == 'valid':
             # for a valid convolution, try using the backward pass wrt.
             # weights instead of the forward pass and vice versa
             if direction_hint == 'bprop weights':
                 direction_hint = 'forward'
             else:
                 direction_hint = 'bprop weights'
         rval = dnn_conv(img,
                         kern,
                         border_mode=border_mode,
                         subsample=subsample,
                         direction_hint=direction_hint)
         if node.outputs[0].broadcastable != rval.broadcastable:
             rval = tensor.patternbroadcast(
                 rval, node.outputs[0].type.broadcastable)
         return [rval]
Beispiel #33
0
    def forward(self, inputtensor):

        if self.deterministic or self.p == 0:
            return inputtensor

        else:
            x = inputtensor[0]
            # Using theano constant to prevent upcasting
            one = T.constant(1)

            retain_prob = one - self.p
            if self.rescale:
                x /= retain_prob

            mask_shape = x.shape

            # apply dropout, respecting shared axes
            if self.shared_axes:
                shared_axes = tuple(a if a >= 0 else a + x.ndim
                                    for a in self.shared_axes)
                mask_shape = tuple(1 if a in shared_axes else s
                                   for a, s in enumerate(mask_shape))
            mask = self._srng.binomial(mask_shape,
                                       p=retain_prob,
                                       dtype=x.dtype)
            if self.shared_axes:
                bcast = tuple(bool(s == 1) for s in mask_shape)
                mask = T.patternbroadcast(mask, bcast)
            x = x * mask
            return (x, )
Beispiel #34
0
    def grad(self, inp, grads):
        bottom, weights = inp
        top, = grads
        d_bottom = AbstractConv2d_gradInputs(
            self.imshp, self.kshp, self.border_mode, self.subsample,
            self.filter_flip)(weights, top, bottom.shape[-2:])
        d_weights = AbstractConv2d_gradWeights(
            self.imshp, self.kshp, self.border_mode, self.subsample,
            self.filter_flip)(bottom, top, weights.shape[-2:])

        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        return d_bottom, d_weights
Beispiel #35
0
    def createGradientFunctions(self):
        #create
        X = T.dmatrices("X")
        mu, logSigma, u, v, f, R = T.dcols("mu", "logSigma", "u", "v", "f", "R")
        mu = sharedX( np.random.normal(10, 10, (self.dimTheta, 1)), name='mu') 
        logSigma = sharedX(np.random.uniform(0, 4, (self.dimTheta, 1)), name='logSigma')
        logLambd = sharedX(np.matrix(np.random.uniform(0, 10)),name='logLambd')
        logLambd = T.patternbroadcast(T.dmatrix("logLambd"),[1,1])
        negKL = 0.5 * T.sum(1 + 2*logSigma - mu ** 2 - T.exp(logSigma) ** 2)
        theta = mu+T.exp(logSigma)*v
        W=theta
        y=X[:,0]
        X_sim=X[:,1:]
        f = (T.dot(X_sim,W)+u).flatten()
        
        gradvariables = [mu, logSigma, logLambd]
        
        
        logLike = T.sum(-(0.5 * np.log(2 * np.pi) + logLambd) - 0.5 * ((y-f)/(T.exp(logLambd)))**2)

        logp = (negKL + logLike)/self.m

        optimizer = -logp
        
        self.negKL = th.function([mu, logSigma], negKL, on_unused_input='ignore')
        self.f = th.function(gradvariables + [X,u,v], f, on_unused_input='ignore')
        self.logLike = th.function(gradvariables + [X, u, v], logLike,on_unused_input='ignore')
        derivatives = T.grad(logp,gradvariables)
        derivatives.append(logp)

        self.gradientfunction = th.function(gradvariables + [X, u, v], derivatives, on_unused_input='ignore')
        self.lowerboundfunction = th.function(gradvariables + [X, u, v], logp, on_unused_input='ignore')

        self.optimizer = BatchGradientDescent(objective=optimizer, params=gradvariables,inputs = [X,u,v],conjugate=True,max_iter=1)
Beispiel #36
0
def test_local_dimshuffle_subtensor():

    dimshuffle_subtensor = out2in(local_dimshuffle_subtensor)

    x = tensor.dtensor4("x")
    x = tensor.patternbroadcast(x, (False, True, False, False))
    i = tensor.iscalar("i")

    out = x[:, :, 10:30, ::i].dimshuffle(0, 2, 3)

    g = FunctionGraph([x, i], [out])
    dimshuffle_subtensor(g)

    topo = g.toposort()
    assert any([not isinstance(x, DimShuffle) for x in topo])

    # Test dimshuffle remove dimensions the subtensor don't "see".
    x = tensor.tensor(broadcastable=(False, True, False), dtype="float64")
    out = x[i].dimshuffle(1)

    g = FunctionGraph([x, i], [out])
    dimshuffle_subtensor(g)

    topo = g.toposort()
    assert any([not isinstance(x, DimShuffle) for x in topo])

    # Test dimshuffle remove dimensions the subtensor don't "see" but
    # have in between dimensions.
    x = tensor.tensor(broadcastable=(False, True, False, True),
                      dtype="float64")
    out = x[i].dimshuffle(1)

    f = theano.function([x, i], out)

    topo = f.maker.fgraph.toposort()
    assert any([not isinstance(x, DimShuffle) for x in topo])
    assert f(np.random.rand(5, 1, 4, 1), 2).shape == (4, )

    # Test a corner case that had Theano return a bug.
    x = tensor.dtensor4("x")
    x = tensor.patternbroadcast(x, (False, True, False, False))

    assert x[:, :, 0:3, ::-1].dimshuffle(0, 2, 3).eval({
        x: np.ones((5, 1, 6, 7))
    }).shape == (5, 3, 7)
Beispiel #37
0
 def _step(tensor):
     tensor._keras_shape = (batch_size, 1, input_dim)
     # tensor._uses_learning_phase = x._uses_learning_phase
     tensor._uses_learning_phase = False  # TODO: should this be hard-coded?
     output = self.model(tensor)
     for layer in self.layers:
         layer.initial_state = layer.final_states
     output = T.patternbroadcast(output, tensor.broadcastable)
     return output, self.feedback_function(output)
Beispiel #38
0
def euclidean_distance_angles_biwi(y_true, y_pred):

    diff = y_pred - y_true
    weights = theano.shared(
        np.expand_dims(3 * np.array([0.2, 0.35, 0.45]), axis=0))
    weights = T.patternbroadcast(weights, (True, False))
    diff = diff * weights

    return K.sqrt(K.sum(K.square(diff), axis=-1, keepdims=True))
Beispiel #39
0
def Recurrent(
    name, 
    hidden_dims, 
    step_fn, 
    inputs, 
    non_sequences=[], 
    h0s=None,
    reset=None
    ):

    if not isinstance(inputs, list):
        inputs = [inputs]

    if not isinstance(hidden_dims, list):
        hidden_dims = [hidden_dims]

    if h0s is None:
        h0s = [None]*len(hidden_dims)

    for i in xrange(len(hidden_dims)):
        if h0s[i] is None:
            h0_unbatched = lib.param(
                name + '.h0_' + str(i),
                np.zeros((hidden_dims[i],), dtype=theano.config.floatX)
            )
            num_batches = inputs[0].shape[1]
            h0s[i] = T.alloc(h0_unbatched, num_batches, hidden_dims[i])

        h0s[i] = T.patternbroadcast(h0s[i], [False] * h0s[i].ndim)

    if reset is not None:
        last_hiddens = []
        for i in xrange(len(h0s)):
            # The shape of last_hidden doesn't matter right now; we assume
            # it won't be used until we put something proper in it.
            last_hidden = theano.shared(
                np.zeros([1]*h0s[i].ndim, dtype=h0s[i].dtype),
                name=name+'.last_hidden_'+str(i)
            )
            last_hiddens.append(last_hidden)
            h0s[i] = theano.ifelse.ifelse(reset, h0s[i], last_hidden)

    outputs, _ = theano.scan(
        step_fn,
        sequences=inputs,
        outputs_info=h0s,
        non_sequences=non_sequences
    )

    if reset is not None:
        if len(last_hiddens) == 1:
            last_hiddens[0].default_update = outputs[-1]
        else:
            for i in xrange(len(last_hiddens)):
                last_hiddens[i].default_update = outputs[i][-1]

    return outputs
Beispiel #40
0
 def f(W_real):
     index = 0
     d = {
         W: T.patternbroadcast(
             W_real,
             (False, False, False, False)
         )
     }
     return theano.clone(expr, d)
Beispiel #41
0
    def get_theano_variables(self, inputs=None, outputs=None):
        """
        Returns a dict containing inputs, outputs and graph corresponding to
        the Theano version of the pyfn.

        This version of the function returns a single vector input.
        """
        inputs = utils.as_seq(inputs, tuple)
        outputs = utils.as_seq(outputs, tuple)

        if inputs:
            sym_inputs = [self.get_symbolic(x) for x in inputs]
        else:
            sym_inputs = self.s_inputs.values()

        if outputs:
            sym_outputs = [self.get_symbolic(x) for x in outputs]
        else:
            sym_outputs = self.s_outputs.values()

        if len(sym_outputs) > 1:
            raise ValueError(
                'VectorArg functions should return a single output.')

        # get symbolic inputs corresponding to shared inputs in s_inputs
        s_memo = OrderedDict()
        sym_args = utils.flat_from_doc(sym_inputs)
        real_args = utils.flat_from_doc(self.all_init_args)

        # create a symbolic vector, then split it up into symbolic input
        # args
        inputs_dtype = self.vector_from_args(self.all_init_args).dtype
        theano_input = tt.vector(name='theta', dtype=inputs_dtype)
        i = 0
        for sa, ra in zip(sym_args, real_args):
            if sa.ndim > 0:
                vector_arg = theano_input[i: i + ra.size].reshape(ra.shape)
            else:
                vector_arg = theano_input[i]
            s_memo[sa] = tt.patternbroadcast(
                vector_arg.astype(str(sa.dtype)),
                broadcastable=sa.broadcastable)
            i += ra.size

        # get new graph, replacing shared inputs with symbolic ones
        graph = theano.gof.graph.clone_get_equiv(
            theano.gof.graph.inputs(sym_outputs),
            sym_outputs,
            memo=s_memo.copy())

        # get symbolic outputs
        theano_outputs = graph[sym_outputs[0]]

        f_in, f_out = self.finalize(theano_input, theano_outputs, graph)

        return f_in, f_out, graph
Beispiel #42
0
def limit_param_norms(parameter_updater, param, max_norm, input_axes):
    '''
    Modifies the update of an SgdParameterUpdater to limit param L2 norms.

    Parameter norms are computed by summing over the input_axes, provided.
    These are so named because you typically want to sum over the axes
    that get dotted with the input to the node (e.g. input_axes=[0] for Linear,
    input_axes=[1, 2, 3] for Conv2D).

    Parameters
    ----------

    parameter_updater: simplelearn.training.ParameterUpdater
      The parameter updater whose updates this will modify.

    param: theano shared variable

      The parameter being updated by parameter_updater.

      (No way to get this from SgdParameterUpdater at present; it updates the
      parameter and its velocity, and there's no way to safely distinguish them
      in parameter_updates.update_pairs)

    max_norm: floating-point scalar
      The maximum L2 norm to be permitted for the parameters.

    input_axes: Sequence
      A Sequence of ints. The indices to sum over when computing the
      L2 norm of the updated params.
    '''

    assert_is_instance(parameter_updater, ParameterUpdater)
    assert_in(param, parameter_updater.update_pairs)

    assert_floating(max_norm)
    assert_greater(max_norm, 0.0)

    assert_greater(len(input_axes), 0)
    assert_all_integer(input_axes)
    assert_all_greater_equal(input_axes, 0)
    assert_all_less(input_axes, param.ndim)

    input_axes = numpy.asarray(input_axes)
    updated_param = parameter_updater.update_pairs[param]

    norms = T.sqrt(T.sum(T.sqr(updated_param),
                         axis=input_axes,
                         keepdims=True))
    desired_norms = T.clip(norms, 0, max_norm)

    broadcast_mask = numpy.zeros(param.ndim, dtype=bool)
    broadcast_mask[input_axes] = True
    scales = T.patternbroadcast(desired_norms / (1e-7 + norms),
                                broadcast_mask)

    parameter_updater.update_pairs[param] = updated_param * scales
Beispiel #43
0
def big_frame_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE)
    h0.shape:              (batch size, N_BIG_GRUS, BIG_DIM)
    reset.shape:           ()
    output[0].shape:       (batch size, n frames, DIM)
    output[1].shape:       same as h0.shape
    output[2].shape:       (batch size, seq len, Q_LEVELS)
    """

    learned_h0 = lib.param(
        'BigFrameLevel.h0',
        numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX))
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM)
    learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    frames = input_sequences.reshape(
        (input_sequences.shape[0], input_sequences.shape[1] / BIG_FRAME_SIZE,
         BIG_FRAME_SIZE))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    frames = (frames.astype('float32') /
              lib.floatX(Q_LEVELS / 2)) - lib.floatX(1)
    frames *= lib.floatX(2)

    gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0',
                             BIG_FRAME_SIZE,
                             BIG_DIM,
                             frames,
                             h0=h0[:, 0])
    grus = [gru0]
    for i in xrange(1, N_BIG_GRUS):
        gru = lib.ops.LowMemGRU('BigFrameLevel.GRU' + str(i),
                                BIG_DIM,
                                BIG_DIM,
                                grus[-1],
                                h0=h0[:, i])
        grus.append(gru)

    output = lib.ops.Linear('BigFrameLevel.Output', BIG_DIM,
                            DIM * BIG_FRAME_SIZE / FRAME_SIZE, grus[-1])
    output = output.reshape(
        (output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM))

    last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1)

    independent_preds = lib.ops.Linear('BigFrameLevel.IndependentPreds',
                                       BIG_DIM, Q_LEVELS * BIG_FRAME_SIZE,
                                       grus[-1])
    independent_preds = independent_preds.reshape(
        (independent_preds.shape[0],
         independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS))

    return (output, last_hidden, independent_preds)
def test_local_dimshuffle_subtensor():

    dimshuffle_subtensor = out2in(local_dimshuffle_subtensor)

    x = tensor.dtensor4('x')
    x = tensor.patternbroadcast(x, (False, True, False, False))
    i = tensor.iscalar('i')

    out = x[:, :, 10:30, ::i].dimshuffle(0, 2, 3)

    g = FunctionGraph([x, i], [out])
    dimshuffle_subtensor(g)

    topo = g.toposort()
    assert any([not isinstance(x, DimShuffle) for x in topo])

    # Test dimshuffle remove dimensions the subtensor don't "see".
    x = tensor.tensor(broadcastable=(False, True, False), dtype='float64')
    out = x[i].dimshuffle(1)

    g = FunctionGraph([x, i], [out])
    dimshuffle_subtensor(g)

    topo = g.toposort()
    assert any([not isinstance(x, DimShuffle) for x in topo])

    # Test dimshuffle remove dimensions the subtensor don't "see" but
    # have in between dimensions.
    x = tensor.tensor(broadcastable=(False, True, False, True),
                      dtype='float64')
    out = x[i].dimshuffle(1)

    f = theano.function([x, i], out)

    topo = f.maker.fgraph.toposort()
    assert any([not isinstance(x, DimShuffle) for x in topo])
    assert f(np.random.rand(5, 1, 4, 1), 2).shape == (4,)

    # Test a corner case that had Theano return a bug.
    x = tensor.dtensor4('x')
    x = tensor.patternbroadcast(x, (False, True, False, False))

    assert x[:,:, 0:3, ::-1].dimshuffle(0,2,3).eval({x: np.ones((5, 1, 6, 7))}).shape == (5, 3, 7)
Beispiel #45
0
    def grad(self, inp, grads):
        weights, top = inp[:2]
        bottom, = grads
        d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
                                               self.border_mode,
                                               self.subsample)(
                                                   bottom, top,
                                                   weights.shape[-2:])
        d_top = AbstractConv2d(self.imshp, self.kshp,
                               self.border_mode, self.subsample)(
                                   bottom, weights)
        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        d_top = patternbroadcast(d_top, top.broadcastable)

        d_height_width = (theano.gradient.DisconnectedType()(),)
        return (d_weights, d_top) + d_height_width
Beispiel #46
0
def local_gpuaalloc(node):
    new_out = gpu_alloc(*node.inputs)
    # We need to hide new broadcastable dimensions because
    # ReplaceValidate doesn't like when they change.
    if new_out.broadcastable != node.outputs[0].broadcastable:
        # but if a dim is suddenly not broadcastable anymore then that's a bug
        for b_old, b_new in zip(node.outputs[0].broadcastable, new_out.broadcastable):
            assert b_new or (not b_old)
        new_out = tensor.patternbroadcast(new_out, node.outputs[0].broadcastable)
    return (new_out,)
Beispiel #47
0
    def get_theano_variables(self, inputs=None, outputs=None):
        """
        Returns a dict containing inputs, outputs and graph corresponding to
        the Theano version of the pyfn.

        This version of the function returns a single vector input.
        """
        inputs = utils.as_seq(inputs, tuple)
        outputs = utils.as_seq(outputs, tuple)

        if inputs:
            sym_inputs = [self.get_symbolic(x) for x in inputs]
        else:
            sym_inputs = self.s_inputs.values()

        if outputs:
            sym_outputs = [self.get_symbolic(x) for x in outputs]
        else:
            sym_outputs = self.s_outputs.values()

        if len(sym_outputs) > 1:
            raise ValueError(
                'VectorArg functions should return a single output.')

        # get symbolic inputs corresponding to shared inputs in s_inputs
        s_memo = OrderedDict()
        sym_args = utils.flat_from_doc(sym_inputs)
        real_args = utils.flat_from_doc(self.all_init_args)

        # create a symbolic vector, then split it up into symbolic input
        # args
        inputs_dtype = self.vector_from_args(self.all_init_args).dtype
        theano_input = tt.vector(name='theta', dtype=inputs_dtype)
        i = 0
        for sa, ra in zip(sym_args, real_args):
            if sa.ndim > 0:
                vector_arg = theano_input[i:i + ra.size].reshape(ra.shape)
            else:
                vector_arg = theano_input[i]
            s_memo[sa] = tt.patternbroadcast(vector_arg.astype(str(sa.dtype)),
                                             broadcastable=sa.broadcastable)
            i += ra.size

        # get new graph, replacing shared inputs with symbolic ones
        graph = theano.gof.graph.clone_get_equiv(
            theano.gof.graph.inputs(sym_outputs),
            sym_outputs,
            memo=s_memo.copy())

        # get symbolic outputs
        theano_outputs = graph[sym_outputs[0]]

        f_in, f_out = self.finalize(theano_input, theano_outputs, graph)

        return f_in, f_out, graph
Beispiel #48
0
def big_frame_level_rnn(input_sequences, h0, reset):
    """
    input_sequences.shape: (batch size, n big frames * BIG_FRAME_SIZE)
    h0.shape:              (batch size, N_BIG_GRUS, BIG_DIM)
    reset.shape:           ()
    output[0].shape:       (batch size, n frames, DIM)
    output[1].shape:       same as h0.shape
    output[2].shape:       (batch size, seq len, Q_LEVELS)
    """

    learned_h0 = lib.param(
        'BigFrameLevel.h0',
        numpy.zeros((N_BIG_GRUS, BIG_DIM), dtype=theano.config.floatX)
    )
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_BIG_GRUS, BIG_DIM)
    learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    frames = input_sequences.reshape((
        input_sequences.shape[0],
        input_sequences.shape[1] / BIG_FRAME_SIZE,
        BIG_FRAME_SIZE
    ))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    frames = (frames.astype('float32') / lib.floatX(Q_LEVELS/2)) - lib.floatX(1)
    frames *= lib.floatX(2)

    gru0 = lib.ops.LowMemGRU('BigFrameLevel.GRU0', BIG_FRAME_SIZE, BIG_DIM, frames, h0=h0[:, 0])
    grus = [gru0]
    for i in xrange(1, N_BIG_GRUS):
        gru = lib.ops.LowMemGRU('BigFrameLevel.GRU'+str(i), BIG_DIM, BIG_DIM, grus[-1], h0=h0[:, i])
        grus.append(gru)

    output = lib.ops.Linear(
        'BigFrameLevel.Output', 
        BIG_DIM,
        DIM * BIG_FRAME_SIZE / FRAME_SIZE,
        grus[-1]
    )
    output = output.reshape((output.shape[0], output.shape[1] * BIG_FRAME_SIZE / FRAME_SIZE, DIM))

    last_hidden = T.stack([gru[:,-1] for gru in grus], axis=1)

    independent_preds = lib.ops.Linear(
        'BigFrameLevel.IndependentPreds', 
        BIG_DIM,
        Q_LEVELS * BIG_FRAME_SIZE,
        grus[-1]
    )
    independent_preds = independent_preds.reshape((independent_preds.shape[0], independent_preds.shape[1] * BIG_FRAME_SIZE, Q_LEVELS))

    return (output, last_hidden, independent_preds)
    def make_functions(self):
        for param, update in self.updates.items():
            if param.broadcastable != update.broadcastable:
                self.updates[param] = T.patternbroadcast(
                    update, param.broadcastable)

        self.train_func = theano.function(inputs=self.inputs,
                                          outputs=self.train_outputs,
                                          updates=self.updates)
        self.valid_func = theano.function(inputs=self.inputs,
                                          outputs=self.valid_outputs)
Beispiel #50
0
def frame_level_rnn(input_sequences, other_input, h0, reset):
    """
    input_sequences.shape: (batch size, n frames * FRAME_SIZE)
    other_input.shape:     (batch size, n frames, DIM)
    h0.shape:              (batch size, N_GRUS, DIM)
    reset.shape:           ()
    output.shape:          (batch size, n frames * FRAME_SIZE, DIM)
    """

    learned_h0 = lib.param(
        'FrameLevel.h0', numpy.zeros((N_GRUS, DIM),
                                     dtype=theano.config.floatX))
    learned_h0 = T.alloc(learned_h0, h0.shape[0], N_GRUS, DIM)
    learned_h0 = T.patternbroadcast(learned_h0, [False] * learned_h0.ndim)
    h0 = theano.ifelse.ifelse(reset, learned_h0, h0)

    frames = input_sequences.reshape(
        (input_sequences.shape[0], input_sequences.shape[1] / FRAME_SIZE,
         FRAME_SIZE))

    # Rescale frames from ints in [0, Q_LEVELS) to floats in [-2, 2]
    # (a reasonable range to pass as inputs to the RNN)
    frames = (frames.astype('float32') /
              lib.floatX(Q_LEVELS / 2)) - lib.floatX(1)
    frames *= lib.floatX(2)

    gru_input = lib.ops.Linear('FrameLevel.InputExpand', FRAME_SIZE, DIM,
                               frames) + other_input

    gru0 = lib.ops.LowMemGRU('FrameLevel.GRU0',
                             DIM,
                             DIM,
                             gru_input,
                             h0=h0[:, 0])
    grus = [gru0]
    for i in xrange(1, N_GRUS):
        gru = lib.ops.LowMemGRU('FrameLevel.GRU' + str(i),
                                DIM,
                                DIM,
                                grus[-1],
                                h0=h0[:, i])
        grus.append(gru)

    output = lib.ops.Linear('FrameLevel.Output',
                            DIM,
                            FRAME_SIZE * DIM,
                            grus[-1],
                            initialization='he')
    output = output.reshape(
        (output.shape[0], output.shape[1] * FRAME_SIZE, DIM))

    last_hidden = T.stack([gru[:, -1] for gru in grus], axis=1)

    return (output, last_hidden)
Beispiel #51
0
def unflatten(flatarr, shapes, symb_arrs):    
    arrs = []
    n = 0        
    for (shape,symb_arr) in zip(shapes,symb_arrs):
        size = np.prod(list(shape))
        arr = flatarr[n:n+size].reshape(shape)
        if arr.type.broadcastable != symb_arr.type.broadcastable:
            arr = TT.patternbroadcast(arr, symb_arr.type.broadcastable)
        arrs.append( arr )
        n += size
    return arrs
Beispiel #52
0
    def grad(self, inp, grads):
        bottom, weights = inp
        top, = grads
        d_bottom = AbstractConv2d_gradInputs(self.imshp, self.kshp,
                                             self.border_mode,
                                             self.subsample,
                                             self.filter_flip)(
            weights, top, bottom.shape[-2:])
        d_weights = AbstractConv2d_gradWeights(self.imshp, self.kshp,
                                               self.border_mode,
                                               self.subsample,
                                               self.filter_flip)(

            bottom, top, weights.shape[-2:])

        # Make sure that the broadcastable pattern of the inputs is used
        # for the gradients, even if the grad opts are not able to infer
        # that the dimensions are broadcastable.
        d_bottom = patternbroadcast(d_bottom, bottom.broadcastable)
        d_weights = patternbroadcast(d_weights, weights.broadcastable)
        return d_bottom, d_weights
Beispiel #53
0
    def apply(self, input_, application_call):
        """Apply the linear transformation followed by masking with noise.
        Parameters
        ----------
        input_ : :class:`~tensor.TensorVariable`
            The input on which to apply the transformations
        Returns
        -------
        output : :class:`~tensor.TensorVariable`
            The transformed input
        """

        # When not in training mode, turn off noise
        if not self._training_mode:
            return input_

        if self.tied_sigma:
            average = tensor.shape_padright(self.flatten.apply(input_), 2)
            noise_level = (self.prior_noise_level -
                    tensor.clip(self.mask.apply(average), -16, 16))
            noise_level = tensor.patternbroadcast(noise_level,
                    (False, False, True, True))
            noise_level = copy_and_tag_noise(
                    noise_level, self, LOG_SIGMA, 'log_sigma')
        else:
            average = input_
            noise_level = (self.prior_noise_level -
                    tensor.clip(self.mask.apply(input_), -16, 16))
            noise_level = copy_and_tag_noise(
                    noise_level, self, LOG_SIGMA, 'log_sigma')
        # Allow incomplete batches by just taking the noise that is needed
        if self.tied_noise:
            if self.noise_batch_size is not None:
                noise = self.parameters[0][:input_.shape[0], :]
            else:
                noise = self.theano_rng.normal(input_.shape[0:2])
            noise = tensor.shape_padright(2)
        else:
            if self.noise_batch_size is not None:
                noise = self.parameters[0][:input_.shape[0], :, :, :]
            else:
                noise = self.theano_rng.normal(input_.shape)
        kl = (
            self.prior_noise_level - noise_level
            + 0.5 * (
                tensor.exp(2 * noise_level)
                + (average - self.prior_mean) ** 2
                ) / tensor.exp(2 * self.prior_noise_level)
            - 0.5
            )
        application_call.add_auxiliary_variable(kl, roles=[NITS], name='nits')
        return input_ + self.noise_rate * tensor.exp(noise_level) * noise
Beispiel #54
0
Datei: ext.py Projekt: cknd/rllab
def unflatten_tensor_variables(flatarr, shapes, symb_arrs):
    import theano.tensor as TT
    import numpy as np
    arrs = []
    n = 0
    for (shape, symb_arr) in zip(shapes, symb_arrs):
        size = np.prod(list(shape))
        arr = flatarr[n:n + size].reshape(shape)
        if arr.type.broadcastable != symb_arr.type.broadcastable:
            arr = TT.patternbroadcast(arr, symb_arr.type.broadcastable)
        arrs.append(arr)
        n += size
    return arrs
Beispiel #55
0
def gaussian(P,rows,cols,components):
	input_size = rows * cols
	points = theano.shared(np.asarray(
		np.dstack(
			np.meshgrid(np.arange(cols), np.arange(rows))
			).reshape(input_size,2),
		dtype=np.float32)
		)
	P.g_mean   = np.random.rand(components,2) * np.array([rows,cols])
	P.g_scale  = 5 * np.random.rand(components,2) 
	P.g_thetas = 2 * np.pi * np.random.rand(components)

	shifted = T.patternbroadcast(points.reshape((input_size,1,2)),(False,True,False))\
			- T.patternbroadcast(P.g_mean.reshape((1,components,2)),(True,False,False))
	rot     = rotation(P.g_thetas)
	scale = T.patternbroadcast(P.g_scale.reshape((components,2,1)),(False,False,True))
	B = T.patternbroadcast((rot/scale).reshape((1,components,2,2)),(True,False,False,False))
	decorr = T.sum(
			B * T.patternbroadcast(shifted.reshape((input_size,components,1,2)),(False,False,True,False)),
			axis = 3
			)
	Z = T.sum(decorr ** 2,axis=2)
	return T.exp(-Z)
Beispiel #56
0
    def compute_output(self, network, in_vw):
        inits = list(toolz.concat(network.find_hyperparameters(
            ["bias_inits",
             "inits"],
            [])))
        # gather hyperparameters
        broadcastable = network.find_hyperparameter(["broadcastable"],
                                                    None)
        broadcastable_axes = network.find_hyperparameter(
            ["broadcastable_axes"],
            None)
        batch_axis = network.find_hyperparameter(["batch_axis"])
        # have broadcastable as a tuple take precedence over broadcastable_axes
        if broadcastable is None:
            if broadcastable_axes is None:
                if batch_axis is None:
                    # no minibatch axis = no default broadcasting
                    broadcastable_axes = []
                elif batch_axis >= in_vw.ndim:
                    # scalar input = no broadcasting
                    broadcastable_axes = []
                else:
                    # by default, broadcast over minibatch axis, if any
                    broadcastable_axes = [batch_axis]
            broadcastable = [False] * in_vw.ndim
            for axis in broadcastable_axes:
                broadcastable[axis] = True

        assert len(broadcastable) == in_vw.ndim
        shape = tuple([1 if is_broadcastable else size
                       for is_broadcastable, size in zip(broadcastable,
                                                         in_vw.shape)])
        b = network.create_vw(
            name="bias",
            is_shared=True,
            shape=shape,
            tags={"parameter", "bias"},
            inits=inits,
        )
        b_var = b.variable
        # not calling patternbroadcast if not broadcastable, because it seems
        # to have a small overhead
        if any(broadcastable):
            b_var = T.patternbroadcast(b_var, broadcastable)
        network.create_vw(
            name="default",
            variable=(in_vw.variable + b_var),
            shape=in_vw.shape,
            tags={"output"},
        )
def test_local_dimshuffle_subtensor():

    dimshuffle_subtensor = out2in(local_dimshuffle_subtensor)

    x = tensor.tensor4('x')
    x = tensor.patternbroadcast(x, (False, True, False, False))
    i = tensor.iscalar('i')

    out = x[:, :, 10:30, ::i].dimshuffle(0,2,3)

    g = FunctionGraph([x,i], [out])
    dimshuffle_subtensor(g)

    topo = g.toposort()
    assert any([not isinstance(x, DimShuffle) for x in topo])
Beispiel #58
0
    def test_broadcast(self):
        # Test that we can rebroadcast
        data = numpy.random.rand(10, 10).astype('float32')
        output_var = f32sc(name="output", value=data)

        up = tensor.unbroadcast(output_var.sum().dimshuffle('x', 'x'), 0, 1)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func()

        up = tensor.patternbroadcast(output_var.sum().dimshuffle('x', 'x'),
                                     output_var.type.broadcastable)
        output_func = theano.function(inputs=[], outputs=[],
                                      updates=[(output_var, up)])
        output_func()