Beispiel #1
0
def pickling_variable(v, target=None):
    """ This function only apply for trainable parameters
    Warning
    -------
    This pickling method won't save "auxiliary_variables" and "updates"
    tag of variables
    """
    # load variable
    if isinstance(v, str):
        try:
            if os.path.exists(v):
                v = open(v, 'r')
        except:
            pass
        name, value, dtype, roles = cPickle.loads(v)
        v = variable(value, dtype=dtype, name=name, target=target)
        for i in roles:
            add_role(v, i)
        return v
    elif is_trainable_variable(v):
        name = v.name if ':' not in v.name else v.name.split(':')[0]
        value = get_value(v)
        dtype = v.dtype.as_numpy_dtype if hasattr(
            v.dtype, 'as_numpy_dtype') else v.dtype
        # ====== shape and roles ====== #
        roles = getattr(v.tag, 'roles', [])
        return cPickle.dumps([name, value, dtype, roles],
                             protocol=cPickle.HIGHEST_PROTOCOL)
    else:
        raise Exception('Variable must be in string form or trainable variable'
                        ' (i.e. SharedVariable in theano)')
Beispiel #2
0
 def get_mean_logsigma(self, x):
     b_mean = 0. if not hasattr(self, 'b_mean') else self.b_mean
     b_logsigma = 0. if not hasattr(self, 'b_logsigma') else self.b_logsigma
     mean = self.activation(K.dot(x, self.W_mean) + b_mean)
     logsigma = self.activation(K.dot(x, self.W_logsigma) + b_logsigma)
     mean.name = 'variational_mean'
     logsigma.name = 'variational_logsigma'
     add_role(mean, VARIATIONAL_MEAN)
     add_role(logsigma, VARIATIONAL_LOGSIGMA)
     return mean, logsigma
Beispiel #3
0
def rnn(input_dim,
        hidden_dim,
        W_init=glorot_uniform,
        b_init=constant(0.),
        bidirectional=False,
        one_vector=False,
        return_variable=True,
        name=None):
    """ Fast initalize all Standard RNN weights

    Parameters
    ----------
    one_vector: bool
        if True, all the weights are flatten and concatenated into 1 big vector
    return_variable: bool
        if False, only return the numpy array
    bidirectional: bool
        if True, return parameters for both forward and backward RNN

    Return
    ------
    [W_i, b_wi, R_h, b_wh]

    """
    if name is None: name = uuid()

    def init():
        W_i = W_init((input_dim, hidden_dim))
        b_wi = b_init((hidden_dim))
        R_h = W_init((hidden_dim, hidden_dim))
        b_wh = b_init((hidden_dim))
        return [W_i, b_wi, R_h, b_wh]

    params = init() + init() if bidirectional else init()
    roles = [WEIGHT, BIAS]
    if one_vector:
        params = [np.concatenate([p.flatten() for p in params])]
        roles = [PARAMETER]
    # names
    if one_vector:
        names = [name + '_rnn']
    else:
        names = ["_W_i", "_b_wi", "_R_h", "_b_wh"]
        if bidirectional:
            names = [i + '_fw' for i in names] + [i + '_bw' for i in names]
        names = [name + i for i in names]
    # create variable or not
    if return_variable:
        params = [variable(p, name=n) for p, n in zip(params, names)]
        for i, p in enumerate(params):
            add_role(p, roles[i % 2])
    return params if len(params) > 1 else params[0]
Beispiel #4
0
    def set_inputs(self, *inputs):
        self._input_info = []
        self._inputs = []
        for i in inputs:
            if not K.is_placeholder(i):
                raise ValueError('Only accept input which is placeholder.')
            name, dtype, shape = i.name, i.dtype, K.get_shape(i)
            self._input_info.append([name, dtype, shape])
            self._inputs.append(i)
        # ====== Try to check if the inputs match the Ops ====== #
        try:
            # call this to initialize the parameters and get
            # estimated output shape (we assume training and deploying
            # mode get the same shape).
            for i in self._inputs:
                add_role(i, TRAINING)
            self._y_train = self._seq_ops(*self._inputs)

            for i in self._inputs:
                add_role(i, DEPLOYING)
            self._y_pred = self._seq_ops(*self._inputs)

            # create default output
            if len(self._output_info) == 0:
                shape = K.get_shape(self._y_train)
                self._outputs = [
                    K.placeholder(shape=shape,
                                  dtype=self._y_train.dtype,
                                  name='output1')
                ]
                self._output_info = [('output1', self._y_train.dtype, shape)]

            # reset all functions
            for i, j in self._functions.items():
                del self._functions[i]
                del j
            self._functions = {}
        except Exception, e:
            warnings.warn('Inputs do not match the Ops requirements, '
                          'error: ' + str(e))
            self._input_info = []
            self._inputs = []
Beispiel #5
0
    def test_computational_graph2(self):
        np.random.seed(1208)

        X = K.variable(np.zeros((8, 12)), name='X')
        Y = K.variable(np.random.rand(12, 8), name='Y')
        Z = K.placeholder(shape=(8, 8), name='Z')
        a = K.dot(X, Y)
        add_role(a, AUXILIARY)
        add_updates(a, X, X + 12)
        a = a + Z
        g1 = K.ComputationGraph(a)

        self.assertEqual(len(g1.trainable_variables), 2)
        self.assertEqual(len(g1.placeholders), 1)
        self.assertEqual(len(g1.updates), 1)
        self.assertEqual(len(g1.auxiliary_variables), 1)

        f = K.function(Z, [a] + g1.auxiliary_variables)

        output = f(np.random.rand(8, 8))
        self.assertEqual(repr(np.sum(output[0]))[:5], "32.20")
        self.assertEqual(np.sum(output[1]), 0)
        self.assertEqual(np.unique(K.eval(X)).tolist(), [12.])
Beispiel #6
0
def lstm(input_dim,
         hidden_dim,
         W_init=glorot_uniform,
         b_init=constant(0.),
         bidirectional=False,
         one_vector=False,
         return_variable=True,
         name=None):
    """ Fast initalize all Standard LSTM weights (without peephole connection)

    Parameters
    ----------
    one_vector: bool
        if True, all the weights are flatten and concatenated into 1 big vector
    return_variable: bool
        if False, only return the numpy array
    bidirectional: bool
        if True, return parameters for both forward and backward RNN

    Return
    ------
    [W_i, b_wi, W_f, b_wf, W_c, b_wc, W_o, b_wo,
     R_i, b_ri, R_f, b_rf, R_c, b_rc, R_o, b_ro]

    """
    if name is None: name = uuid()

    def init():
        # input to hidden
        W_i = W_init((input_dim, hidden_dim))
        b_wi = b_init((hidden_dim))
        W_f = W_init((input_dim, hidden_dim))
        b_wf = b_init((hidden_dim))
        W_c = W_init((input_dim, hidden_dim))
        b_wc = b_init((hidden_dim))
        W_o = W_init((input_dim, hidden_dim))
        b_wo = b_init((hidden_dim))
        # hidden to hidden
        R_i = W_init((hidden_dim, hidden_dim))
        b_ri = b_init((hidden_dim))
        R_f = W_init((hidden_dim, hidden_dim))
        b_rf = b_init((hidden_dim))
        R_c = W_init((hidden_dim, hidden_dim))
        b_rc = b_init((hidden_dim))
        R_o = W_init((hidden_dim, hidden_dim))
        b_ro = b_init((hidden_dim))
        return [
            W_i, b_wi, W_f, b_wf, W_c, b_wc, W_o, b_wo, R_i, b_ri, R_f, b_rf,
            R_c, b_rc, R_o, b_ro
        ]

    params = init() + init() if bidirectional else init()
    roles = [WEIGHT, BIAS]
    if one_vector:
        params = [np.concatenate([p.flatten() for p in params])]
        roles = [PARAMETER]
    # names
    if one_vector:
        names = [name + '_lstm']
    else:
        names = [
            "_W_i", "_b_wi", "_W_f", "_b_wf", "_W_c", "_b_wc", "_W_o", "_b_wo",
            "_R_i", "_b_ri", "_R_f", "_b_rf", "_R_c", "_b_rc", "_R_o", "_b_ro"
        ]
        if bidirectional:
            names = [i + '_fw' for i in names] + [i + '_bw' for i in names]
        names = [name + i for i in names]
    # create variable or not
    if return_variable:
        params = [variable(p, name=n) for p, n in zip(params, names)]
        for i, p in enumerate(params):
            add_role(p, roles[i % 2])
    return params if len(params) > 1 else params[0]
Beispiel #7
0
    def create_params(self, spec, shape, name, nnops, roles=[], nb_params=1):
        """
        Parameters
        ----------
        spec: variable, numpy.ndarray, function
            specification for initializing the weights
        shape: tuple, list
            expected shape for given variable
        name: str
            name for the variable
        nnops: NNOps
            parent operator of this parameters
        roles: odin.basic.VariableRole
            categories of this variable
        nb_params: int
            number of parameters that horizontally stacked into
            given `shape (e.g. nb_params=2, create 2 parameters with
            given `shape and horizontally stack them into 1 parameters)
            * do NOT support when `spec` is variable.
        """
        if not isinstance(roles, (tuple, list)):
            roles = [roles]
        if not isinstance(nnops, NNOps):
            raise Exception('nnops must be instance of odin.nnet.base.NNOps')

        shape = tuple(shape)  # convert to tuple if needed
        if any(d <= 0 for d in shape):
            raise ValueError(
                ("Cannot create param with a non-positive shape dimension. "
                 "Tried to create param with shape=%r, name=%r") %
                (shape, name))

        # ====== create parameters ====== #
        spec = as_tuple(spec, nb_params)
        spec = [_initialize_param(name, s, shape) for s in spec]
        # check shape returned
        shape = list(set([i[-1] for i in spec]))
        if len(shape) > 1:
            raise Exception(
                'shape are inconsitent among all given "spec", the '
                'created shape is: %s' % str(shape))
        shape = shape[0]
        # check spec returned
        spec = [i[0] for i in spec]
        if isinstance(spec[0], np.ndarray):
            with K.variable_scope(nnops.name):
                spec = np.concatenate(spec, axis=-1)
                shape = spec.shape
                spec = K.variable(spec, name=name)
        elif K.is_trainable_variable(spec[0]):
            if nb_params > 1:
                with K.variable_scope(nnops.name):
                    spec = np.concatenate([K.get_value(i) for i in spec],
                                          axis=-1)
                    shape = spec.shape
                    spec = K.variable(spec, name=name)
            else:
                spec = spec[0]
        elif K.is_variable(spec[0]):
            shape = (shape[0] * nb_params,) if len(shape) == 1 \
                else shape[:-1] + (shape[-1] * nb_params,)
            spec = K.concatenate(spec, axis=-1)
        # ====== assign annotations ====== #
        # only add role for trainable variables
        for i in roles:
            if isinstance(i, VariableRole) and K.is_trainable_variable(spec):
                add_role(spec, i)
        # return actual variable or expression
        # override other parameters with same name
        self._variables[name] = spec
        # set parameter attribute for NNOps
        setattr(nnops, name, spec)
        return spec
Beispiel #8
0
def randrectify(x, lower=0.3, upper=0.8, shared_axes='auto'):
    """ This function is adpated from Lasagne
    Original work Copyright (c) 2014-2015 lasagne contributors
    All rights reserved.
    LICENSE: https://github.com/Lasagne/Lasagne/blob/master/LICENSE

    Applies a randomized leaky rectify activation to x.

    The randomized leaky rectifier was first proposed and used in the Kaggle
    NDSB Competition, and later evaluated in [1]_. Compared to the standard
    leaky rectifier :func:`leaky_rectify`, it has a randomly sampled slope
    for negative input during training, and a fixed slope during evaluation.

    Equation for the randomized rectifier linear unit during training:
    :math:`\\varphi(x) = \\max((\\sim U(lower, upper)) \\cdot x, x)`

    During evaluation, the factor is fixed to the arithmetic mean of `lower`
    and `upper`.

    Parameters
    ----------
    lower : Theano shared variable, expression, or constant
        The lower bound for the randomly chosen slopes.

    upper : Theano shared variable, expression, or constant
        The upper bound for the randomly chosen slopes.

    shared_axes : 'auto', 'all', int or tuple of int
        The axes along which the random slopes of the rectifier units are
        going to be shared. If ``'auto'`` (the default), share over all axes
        except for the second - this will share the random slope over the
        minibatch dimension for dense layers, and additionally over all
        spatial dimensions for convolutional layers. If ``'all'``, share over
        all axes, thus using a single random slope.

     References
    ----------
    .. [1] Bing Xu, Naiyan Wang et al. (2015):
       Empirical Evaluation of Rectified Activations in Convolutional Network,
       http://arxiv.org/abs/1505.00853
    """
    input_shape = get_shape(x)
    # ====== check lower and upper ====== #
    if is_trainable_variable(lower):
        add_role(lower, ACTIVATION_PARAMETER)
        lower.name = 'lower'
    if is_trainable_variable(upper):
        add_role(upper, ACTIVATION_PARAMETER)
        upper.name = 'upper'
    if not is_variable(lower > upper) and lower > upper:
        raise ValueError("Upper bound for Randomized Rectifier needs "
                         "to be higher than lower bound.")
    # ====== check shared_axes ====== #
    if shared_axes == 'auto':
        shared_axes = (0, ) + tuple(range(2, len(input_shape)))
    elif shared_axes == 'all':
        shared_axes = tuple(range(len(input_shape)))
    elif isinstance(shared_axes, int):
        shared_axes = (shared_axes, )
    else:
        shared_axes = shared_axes
    # ====== main logic ====== #
    if not is_training() or upper == lower:
        x = relu(x, (upper + lower) / 2.0)
    else:  # Training mode
        shape = list(input_shape)
        if builtins.any(s is None for s in shape):
            shape = list(x.shape)
        for ax in shared_axes:
            shape[ax] = 1

        rnd = random_uniform(tuple(shape), low=lower, high=upper, dtype=FLOATX)
        rnd = addbroadcast(rnd, *shared_axes)
        x = relu(x, rnd)
    add_shape(x, input_shape)
    return x