Python Lop Exemples, theano.tensor.Lop Python Exemples

Exemple #1

0

Afficher le fichier

Fichier : toyexample5.py Projet : jrbtaylor/trace-rnn

 def step(x_t,y_t,h_tm1,Wx,Wh,bh,Wy,by,lr,switch):
     h_t = relu(T.dot(x_t,Wx)+T.dot(h_tm1,Wh)+bh)
     yo_t = relu(T.dot(h_t,Wy)+by)
     
     updates = OrderedDict()
     
     # Train the RNN: backprop (loss + DNI output)
     loss = T.mean(T.square(yo_t-y_t))
     dni_out = self.dni.output(h_t)
     for param in self.params:
         dlossdparam = T.grad(loss,param)
         dniJ = T.Lop(h_t,param,dni_out,disconnected_inputs='ignore')
         updates[param] = param-lr*T.switch(T.gt(switch,0),
                                            dlossdparam+dniJ,
                                            dlossdparam)
                     
     # Update the DNI (from the last step)
     # re-calculate the DNI prediction from the last step
     # note: can't be passed through scan or T.grad won't work
     dni_out_old = self.dni.output(h_tm1)
     # dni_target: current loss backprop'ed + new dni backprop'ed
     dni_target = T.grad(loss,h_tm1) \
                  +T.Lop(h_t,h_tm1,dni_out)
     dni_error = T.sum(T.square(dni_out_old-dni_target))
     for param in self.dni.params:
         gparam = T.grad(dni_error,param)
         updates[param] = param-lr*gparam
     
     return [h_t,loss,dni_error],updates

Exemple #2

0

Afficher le fichier

    def test_multiple_outputs(self):
        m = tensor.matrix('m')
        v = tensor.vector('v')
        m_ = tensor.matrix('m_')
        v_ = tensor.vector('v_')

        mval = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        vval = self.rng.uniform(size=(7, )).astype(theano.config.floatX)
        m_val = self.rng.uniform(size=(3, 7)).astype(theano.config.floatX)
        v_val = self.rng.uniform(size=(7, )).astype(theano.config.floatX)

        rop_out1 = tensor.Rop([m, v, m + v], [m, v], [m_, v_])
        assert isinstance(rop_out1, list)
        assert len(rop_out1) == 3
        rop_out2 = tensor.Rop((m, v, m + v), [m, v], [m_, v_])
        assert isinstance(rop_out2, tuple)
        assert len(rop_out2) == 3
        lop_out1 = tensor.Lop([m, v, m + v], (m, v), [m_, v_])
        assert isinstance(lop_out1, tuple)
        assert len(lop_out1) == 2
        lop_out2 = tensor.Lop((m, v, m + v), [m, v], [m_, v_])
        assert isinstance(lop_out2, list)
        assert len(lop_out2) == 2

        all_outs = []
        for o in rop_out1, rop_out2, lop_out1, lop_out2:
            all_outs.extend(o)
        f = theano.function([m, v, m_, v_], all_outs)
        f(mval, vval, m_val, v_val)

Exemple #3

0

Afficher le fichier

            def Gv_step(*gv_args):
                idx = TT.cast(gv_args[0], 'int32')
                nw_inps = [x[idx * options['cbs']: \
                             (idx + 1) * options['cbs']] for x in
                           loc_inputs]
                replace = dict(zip(model.inputs, nw_inps))
                nw_outs = safe_clone(model.gc_outs, replace)
                final_results = dict(
                    zip(model.params, [None] * len(model.params)))
                for nw_out, out_operator in zip(nw_outs,
                                                model.gc_outs_operator):
                    loc_params = [
                        x for x in model.params
                        if x in theano.gof.graph.inputs([nw_out])
                    ]
                    loc_args = [
                        x for x, y in zip(args, model.params)
                        if y in theano.gof.graph.inputs([nw_out])
                    ]
                    if out_operator == 'softmax':
                        factor = const(options['cbs']) * (nw_out + eps)
                    elif out_operator == 'sigmoid':
                        factor = const(
                            options['cbs'])  # * nw_out * (1 - nw_out)
                    else:
                        factor = const(options['cbs'])
                    if out_operator != 'sigmoid':
                        loc_Gvs = TT.Lop(nw_out, loc_params,
                                     TT.Rop(nw_out, loc_params, loc_args) /\
                                     factor)
                    else:
                        tnwout = TT.nnet.sigmoid(nw_out)
                        loc_Gvs = TT.Lop(nw_out, loc_params,
                                         TT.Rop(nw_out, loc_params,
                                                loc_args) *\
                                         tnwout * (1 - tnwout)/ factor)

                    for lp, lgv in zip(loc_params, loc_Gvs):
                        if final_results[lp] is None:
                            final_results[lp] = lgv
                        else:
                            final_results[lp] += lgv

                Gvs = [
                    ogv + final_results[param]
                    for (ogv, param) in zip(gv_args[1:], model.params)
                ]
                return [gv_args[0] + const(1)] + Gvs

                nw_cost, nw_preactiv_out = safe_clone(
                    [model.train_cost, model.preactiv_out], replace)
                nw_gvs = TT.Lop(
                    nw_preactiv_out, model.params,
                    TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params,
                           args))

                Gvs = [ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs)]
                return [gv_args[0] + const(1)] + Gvs

Exemple #4

0

Afficher le fichier

Fichier : hypergrad.py Projet : zhanghonglishanzai/drmad

def hypergrad(params_ele,
              params_hyper,
              dvalid_dtheta,
              loss_ele,
              loss_hyper,
              loss_ele_penalty=0.):
    """ Function defining the hypergradients: gradients of validation cost
        with respect to various hyperparameters.     
    
        The function is separating penalty hyperparameters 
        (which is assumed to depend only on w) from noise and other hyperparameters,
        due to otherwise dependancy errors in the Lop operator.
        
        Inputs: 
        
        paramsT1, paramsT2 :: T1 and T2 parameters
        c1, c2 :: cross-entropy on training and validation set
        p1, p2 :: penalty terms on training and validation set (p2 assumed 0)
        
    """
    # initializations
    reg_penalty, reg_noise, grad_penalty, grad_noise, w, dvalid_dw = [], [], [], [], [], []

    # separate different types of parameters
    for regular in params_hyper:
        reg_type, _ = regular.name.split('_')
        if reg_type in penalty_list:
            reg_penalty += [regular]
        elif reg_type in noise_list:
            reg_noise += [regular]
        else:
            print 'Hypergrad not implemented for ', reg_type

    # separate weight parameters and gradients
    for (param, grad) in zip(params_ele, dvalid_dtheta):
        paramType, _ = param.name.split('_')
        if paramType == 'W':
            w += [param]
            dvalid_dw += [grad]

    # hyper-gradients
    if reg_penalty:
        dpenalty_dw = T.grad(loss_ele_penalty, w)
        dpenalty_dw = [-grad for grad in dpenalty_dw]
        grad_penalty = T.Lop(dpenalty_dw, reg_penalty, dvalid_dw)
    if reg_noise:
        dele_dtheta = T.grad(loss_ele, params_ele)
        dele_dtheta = [-grad for grad in dele_dtheta]
        grad_noise = T.Lop(dele_dtheta, reg_noise, dvalid_dtheta)

    # outputs
    params_hyper = reg_penalty + reg_noise
    dvalid_dgamma = grad_penalty + grad_noise

    return params_hyper, dvalid_dgamma

Exemple #5

0

Afficher le fichier

Fichier : hypergrad.py Projet : jelennal/t1t2

def hypergrad(paramsT1, paramsT2, gradC2T1, c1, c2, p1=0., p2=0.):
    ''' Function defining the hypergradients: gradients of validation cost 
        with respect to various hyperparameters.     
    
        The function is separating penalty hyperparameters 
        (which is assumed to depend only on W) from noise and other hyperparameters,
        due to otherwise dependancy errors in the Lop operator.
        
        Inputs: 
        
        paramsT1, paramsT2 :: T1 and T2 parameters
        c1, c2 :: cross-entropy on training and validation set
        p1, p2 :: penalty terms on training and validation set (p2 assumed 0)
        
    '''
    # initializations
    rglrzPenal = []
    rglrzNoiz = []
    gradPenal = []
    gradNoiz = []
    W = []
    gradC2W = []

    # separate different types of parameters
    for rglrz in paramsT2:
        rglrzType, _ = rglrz.name.split('_')
        if rglrzType in penalList:
            rglrzPenal += [rglrz]
        elif rglrzType in noizList:
            rglrzNoiz += [rglrz]
        else:
            print 'Hypergrad not implemented for ', rglrzType

    # separate weight parameters and gradients
    for (param, grad) in zip(paramsT1, gradC2T1):
        paramType, _ = param.name.split('_')
        if paramType == 'W':
            W += [param]
            gradC2W += [grad]

    # hyper-gradients
    if rglrzPenal != []:
        gradPW = T.grad(p1, W)
        gradPW = [-grad for grad in gradPW]
        gradPenal = T.Lop(gradPW, rglrzPenal, gradC2W)
    if rglrzNoiz != []:
        gradE1T1 = T.grad(c1, paramsT1)
        gradE1T1 = [-grad for grad in gradE1T1]
        gradNoiz = T.Lop(gradE1T1, rglrzNoiz, gradC2T1)

    # outputs
    paramsT2 = rglrzPenal + rglrzNoiz
    gradC2T2 = gradPenal + gradNoiz

    return paramsT2, gradC2T2

Exemple #6

0

Afficher le fichier

Fichier : model_dbm_inpainting.py Projet : vd114/galatea

    def Gvs(self, *args):
        # Contribution of hid_sig
        nw_args1 = TT.Lop(
            self.hid_sig, self.params,
            TT.Rop(self.hid_sig, self.params, args) /
            ((1 - self.hid_sig) * self.hid_sig * self.mbs))
        nw_args2 = TT.Lop(
            self.hid_sftmax, self.params,
            TT.Rop(self.hid_sftmax, self.params, args) /
            (self.hid_sftmax * self.mbs))

        return [x + y for x, y in zip(nw_args1, nw_args2)]

Exemple #7

0

Afficher le fichier

Fichier : __init__.py Projet : prithv1/condnet

def reinforce_no_baseline(params, policy, cost, lr, regularising_cost=None):
    """
    return reinforce updates
    @policy and @cost should be of shape (minibatch_size, 1)
    @policy should be the probability of the sampled actions
    """
    log_pol = T.log(policy)
    if regularising_cost is None:
        return [(i, i - lr * gi) for i, gi in zip(
            params, T.Lop(f=log_pol, wrt=params, eval_points=cost))]
    else:
        return [(i, i - lr * (gi + gr)) for i, gi, gr in zip(
            params, T.Lop(f=log_pol, wrt=params, eval_points=cost),
            T.grad(regularising_cost, params))]

Exemple #8

0

Afficher le fichier

 def compute_Gv(*args):
     (hid_sig, hid_sftmax) = self.get_hiddens()
     nw_args1 = TT.Lop(
         hid_sig, self.params,
         TT.Rop(hid_sig, self.params, args) /
         ((1 - hid_sig) * hid_sig * self.batchsize))
     nw_args2 = TT.Lop(
         hid_sftmax, self.params,
         TT.Rop(hid_sftmax, self.params, args) /
         (hid_sftmax * self.batchsize))
     fin_vals = [x + y for x, y in zip(nw_args1, nw_args2)]
     new_vals = safe_clone(fin_vals, [self.X, self.Y],
                           [self.loc_x, self.loc_y])
     return new_vals, {}

Exemple #9

0

Afficher le fichier

Fichier : my_layers.py Projet : iqbal-chowdhury/nmn

 def setup(self, bottom, top):
     input = T.tensor4("input")
     v = T.matrix("v")
     result = T.sum(input, axis=(2, 3))
     result_g = T.Lop(result, input, v)
     self.f = theano.function([input], result)
     self.b = theano.function([input, v], result_g)

Exemple #10

0

Afficher le fichier

            def Gv_step(*gv_args):
                idx = TT.cast(gv_args[0], 'int32')
                nw_inps = [x[idx * options['cbs']: \
                             (idx + 1) * options['cbs']] for x in
                           loc_inputs]
                replace = dict(zip(model.inputs, nw_inps))
                nw_outs = safe_clone(model.outs, replace)
                final_results = dict(zip(model.params, [None] * len(model.params)))
                for nw_out, out_operator in zip(nw_outs, model.outs_operator):
                    loc_params = [x for x in model.params
                                  if x in theano.gof.graph.inputs([nw_out])]
                    loc_args = [x for x, y in zip(cgv, model.params)
                                if y in theano.gof.graph.inputs([nw_out])]
                    if out_operator == 'softmax':
                        factor = const(options['cbs']) * nw_out
                    elif out_operator == 'sigmoid':
                        factor = const(options['cbs']) * nw_out * (1 - nw_out)
                    else:
                        factor = const(options['cbs'])

                    loc_Gvs = TT.Lop(nw_out, loc_params,
                                     TT.Rop(nw_out, loc_params, loc_args) /\
                                     factor)

                    for lp, lgv in zip(loc_params, loc_Gvs):
                        if final_results[lp] is None:
                            final_results[lp] = lgv
                        else:
                            final_results[lp] += lgv

                Gvs = [ogv + final_results[param]
                       for (ogv, param) in zip(gv_args[1:], model.params)]
                return [gv_args[0] + const(1)] + Gvs

Exemple #11

0

Afficher le fichier

def create_esgd_updates(updates, params, gparams, gsums, xsums, lr, eps, gamma,
                        momentum):
    has_momentum = momentum.get_value() > 0.0
    samples = [
        default_mrng.normal(size=p.shape,
                            avg=0,
                            std=1,
                            dtype=theano.config.floatX) for p in params
    ]
    HVs = T.Lop(gparams, params, samples)

    i = theano.shared(np.float64(0.0).astype(theano.config.floatX))
    i_t = i + 1.0
    omg_t = 1.0 - gamma**i_t
    for p, g, m, D, Hv in zip(params, gparams, gsums, xsums, HVs):
        if is_subtensor_op(p):
            raise Exception("ESGD subtensor update not implemented!")
        else:
            D_t = D * gamma + T.sqr(Hv) * (1.0 - gamma)
            if has_momentum:
                m_t = m * momentum + g
                updates[m] = m_t
            else:
                m_t = g
            g_t = m_t / (T.sqrt(D_t / omg_t + eps))
            updates[D] = D_t
            updates[p] = p - lr * g_t
    updates[i] = i_t

Exemple #12

0

Afficher le fichier

            def compute_Ax(x):

                # There are three ways to compute the Fisher-vector product:

                # 1. https://github.com/joschu/modular_rl/blob/master/modular_rl/trpo.py#L54
                # Use theano.gradient.disconnected_grad and call theano.tensor.grad() twice.
                # WARNING: In our case (with the attention mechanism) it is extremly slow.

                # 2. http://deeplearning.net/software/theano/tutorial/gradients.html#hessian-times-a-vector
                # Use only theano.tensor.Rop, but you will need to calculate the fixed_output outside
                # of the compiled function, because disconnected_grad will not work with Rop.

                # 3. https://github.com/pascanur/natgrad/blob/master/model_convMNIST_standard.py
                # Rop devided by output because a metric F is based on gradient of log(output).
                # Here we also split the vector of parameters. Not checked, but it may be
                # faster then supply few vectors to minresQLP.

                xs = []
                offset = 0
                for p in params:
                    shape = p.get_value().shape
                    size = np.prod(shape)
                    xs.append(x[offset:offset + size].reshape(shape))
                    offset += size

                jvp = T.Rop(new_output, params, xs) / (
                    new_output * self.batch_size * self.history + TINY)
                fvp = T.Lop(new_output, params, jvp)
                fvp = T.concatenate([g.flatten() for g in fvp])

                return [fvp], {}

Exemple #13

0

Afficher le fichier

Fichier : cg_optimizer.py Projet : ahefnycmu/rpsp

    def __init__(self, t_cost, t_traj_info, t_inputs, params, reg=1e-5):
        t_new_params = [
            _np2theano(p.name, p.get_value(borrow=True)) for p in params
        ]

        t_mean = t_traj_info['act_mean']
        t_mean = t_mean.reshape((-1, t_mean.shape[-1]))
        t_logstd = t_traj_info['act_logstd']
        t_logstd = t_logstd.reshape((-1, t_logstd.shape[-1]))
        t_new_mean = t_traj_info['new_act_mean']
        t_new_mean = t_new_mean.reshape((-1, t_new_mean.shape[-1]))
        t_new_logstd = t_traj_info['new_act_logstd']
        t_new_logstd = t_new_logstd.reshape((-1, t_new_logstd.shape[-1]))

        print 'Compiling cost function ... ',
        s = time()
        self.cost = theano.function(inputs=t_inputs,
                                    outputs=t_cost,
                                    on_unused_input='ignore')
        print 'finished in %f seconds' % (time() - s)

        print 'Building cost grad function ... ',
        s = time()
        _t_cost_grad = T.grad(-t_cost, wrt=params)
        print 'finished in %f seconds' % (time() - s)

        print 'Compiling cost grad function ... ',
        s = time()
        self._cost_grad = theano.function(inputs=t_inputs,
                                          outputs=[t_cost] + _t_cost_grad,
                                          on_unused_input='ignore')
        print 'finished in %f seconds' % (time() - s)

        print 'Building Hx function ... ',
        s = time()
        mu = T.concatenate([t_new_mean, t_new_logstd], axis=-1)
        Jx = sum([T.Rop(mu, p, x) for (p, x) in zip(params, t_new_params)])
        M = T.tile(T.eye(2), (mu.shape[0], 1, 1))
        Jx = Jx.reshape((Jx.shape[0], Jx.shape[1], 1))
        Jx = T.tile(Jx, (1, 1, Jx.shape[1]))
        MJx = Jx
        JMJx = [
            T.Lop(MJx, p, x, disconnected_inputs='ignore')
            for (p, x) in zip(params, t_new_params)
        ]
        Hx = [h + reg * p for (h, p) in zip(JMJx, t_new_params)]
        print 'finished in %f seconds' % (time() - s)

        # TODO: Use mask to handle  different lengths.

        print 'Compiling Hx function ...',
        s = time()
        self._constraint_Hx = theano.function(inputs=t_inputs + t_new_params,
                                              outputs=Hx,
                                              on_unused_input='ignore')

        self.constraint_Hx = lambda inputs, params: self._constraint_Hx(*(
            inputs + params))
        print 'finished in %f seconds' % (time() - s)

Exemple #14

0

Afficher le fichier

Fichier : rsa.py Projet : futurulus/colors-in-context

 def mean_weighted_grad(weights, loss):
     # Lop to the rescue! Here I was calling T.jacobian and trying to
     # broadcast things and elementwise-multiply through the resulting lists,
     # when a function already existed to do all of that for me...
     return T.Lop(loss,
                  params,
                  weights / T.cast(weights.shape[0], 'float32'),
                  disconnected_inputs='ignore')

Exemple #15

0

Afficher le fichier

    def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x which is a
        vector. The output is still a vector.
        """
        # TEST ROP
        vx = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        rop_f = function([self.x, self.v], yv, on_unused_input="ignore")
        J, _ = theano.scan(
            lambda i, y, x: tensor.grad(y[i], x),
            sequences=tensor.arange(y.shape[0]),
            non_sequences=[y, self.x],
        )
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input="ignore")

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), "ROP mismatch: %s %s" % (v1, v2)

        known_fail = False
        try:
            tensor.Rop(theano.clone(y, replace={self.x: break_op(self.x)}),
                       self.x, self.v)
        except ValueError:
            known_fail = True

        # TEST LOP

        vx = np.asarray(self.rng.uniform(size=self.in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input="ignore")
        J, _ = theano.scan(
            lambda i, y, x: tensor.grad(y[i], x),
            sequences=tensor.arange(y.shape[0]),
            non_sequences=[y, self.x],
        )
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), "LOP mismatch: %s %s" % (v1, v2)

        if known_fail:
            pytest.skip("Rop does not handle non-differentiable inputs "
                        "correctly. Bug exposed by fixing Add.grad method.")

Exemple #16

0

Afficher le fichier

 def setup(self, bottom, top):
     import theano.tensor as T
     import theano
     x = T.dvector('x')
     v = T.dvector('v')
     y = x * 2
     yg = T.Lop(y, x, v)
     self.f = theano.function([x], y)
     self.b = theano.function([x, v], yg, on_unused_input='warn')

Exemple #17

0

Afficher le fichier

def test_rop_lop():
    mx = tensor.matrix('mx')
    mv = tensor.matrix('mv')
    v = tensor.vector('v')
    y = matrix_inverse(mx).sum(axis=0)

    yv = tensor.Rop(y, mx, mv)
    yv2 = tensor.Rop_via_Lop(y, mx, mv)
    rop_f = function([mx, mv], [yv, yv2])

    sy, _ = theano.scan(lambda i, y, x, v: (tensor.grad(y[i], x) * v).sum(),
                        sequences=tensor.arange(y.shape[0]),
                        non_sequences=[y, mx, mv])
    scan_f = function([mx, mv], sy)

    rng = np.random.RandomState(utt.fetch_seed())
    vx = np.asarray(rng.randn(4, 4), theano.config.floatX)
    vv = np.asarray(rng.randn(4, 4), theano.config.floatX)

    v1 = scan_f(vx, vv)
    v2, v3 = rop_f(vx, vv)

    assert _allclose(v2, v1), ('Rop mismatch: %s %s' % (v2, v1))
    assert _allclose(v3, v1), ('Rop_via_Lop mismatch: %s %s' % (v3, v1))

    raised = False
    try:
        tensor.Rop(theano.clone(y, replace={mx: break_op(mx)}), mx, mv)
    except ValueError:
        raised = True
    if not raised:
        raise Exception(('Op did not raised an error even though the function'
                         ' is not differentiable'))

    try:
        tensor.Rop_via_Lop(theano.clone(y, replace={mx: break_op(mx)}), mx, mv)
    except theano.gradient.NullTypeGradError:
        raised = True
    except theano.gradient.DisconnectedInputError:
        raised = True

    if not raised:
        raise Exception((
            'Rop_via_Lop for Op did not raise an error even though the function'
            ' is not differentiable'))

    vv = np.asarray(rng.uniform(size=(4, )), theano.config.floatX)
    yv = tensor.Lop(y, mx, v)
    lop_f = function([mx, v], yv)

    sy = tensor.grad((v * y).sum(), mx)
    scan_f = function([mx, v], sy)

    v1 = lop_f(vx, vv)
    v2 = scan_f(vx, vv)
    assert _allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

Exemple #18

0

Afficher le fichier

Fichier : test_rop.py Projet : rsk2327/Theano

    def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x which is a
        vector. The output is still a vector.

        """
        # TEST ROP
        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        rop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input='ignore')

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('ROP mismatch: %s %s' % (v1, v2))
        known_fail = False
        try:
            self.check_nondiff_rop(
                theano.clone(y, replace={self.x: break_op(self.x)}))
        except AssertionError:
            known_fail = True

        # TEST LOP

        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=out_shape),
                           theano.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

        if known_fail:
            raise KnownFailureTest(
                "Rop doesn't handle non-differentiable "
                "inputs correctly. Bug exposed by fixing Add.grad"
                " method.")

Exemple #19

0

Afficher le fichier

    def __init__(self, p, inputs, s, costs):
        # useful data for reshaping
        self.shapes = [i.get_value().shape for i in p]
        self.sizes = map(np.prod, self.shapes)
        self.positions = np.cumsum([0] + self.sizes)[:-1]

        self.p = p
        self.inputs = inputs
        self.s = s
        self.costs = costs

        g = T.grad(costs[0], p)
        g = map(T.as_tensor_variable, g)  # for CudaNdarray
        self.f_gc = theano.function(inputs, g + costs)  # gradient computation
        self.f_cost = theano.function(inputs, costs)  # quick cost evaluation

        symbolic_types = T.scalar, T.vector, T.matrix, T.tensor3, T.tensor4

        coefficient = T.scalar()  # this is lambda*mu

        # this computes the product Gv = J'HJv (G is the Gauss-Newton matrix)
        v = [symbolic_types[len(i)]() for i in self.shapes]
        Jv = T.Rop(s, p, v)
        HJv = T.grad(T.sum(T.grad(costs[0], s) * Jv),
                     s,
                     consider_constant=[Jv])
        Gv = T.grad(T.sum(HJv * s), p, consider_constant=[HJv, Jv])
        Gv = map(T.as_tensor_variable, Gv)  # for CudaNdarray
        self.function_Gv = theano.function(inputs + v + [coefficient],
                                           Gv,
                                           givens={},
                                           on_unused_input='ignore')
        # compute J'sqrt(diag(H))v for jacobi preconditioner
        r = T.matrix()
        sqrt_Hv = T.sqrt(T.grad(T.sum(T.grad(costs[0], s)), s)) * r
        J_sqrt_Hv = T.Lop(s, p, sqrt_Hv)
        J_sqrt_Hv = map(T.as_tensor_variable, J_sqrt_Hv)  # for CudaNdarray

        self.function_J_sqrt_Hv = theano.function(inputs + [r],
                                                  J_sqrt_Hv,
                                                  givens={},
                                                  on_unused_input='ignore')
        # compute Hv
        dp = T.grad(costs[0], p)
        total = 0
        for dp_, v_ in zip(dp, v):
            total += T.sum(dp_ * v_)

        Hv = T.grad(total, p)
        Hv = map(T.as_tensor_variable, Hv)  # for CudaNdarray
        self.function_Hv = theano.function(inputs + v + [coefficient],
                                           Hv,
                                           on_unused_input='ignore')

Exemple #20

0

Afficher le fichier

Fichier : test_rop.py Projet : botev/Theano

    def check_mat_rop_lop(self, y, out_shape):
        """
        Test the Rop/Lop when input is a matrix and the output is a vector

        :param y: the output variable of the op applied to self.mx
        :param out_shape: Used to generate a random tensor
                          corresponding to the evaluation point of the Rop
                          (i.e. the tensor with which you multiply the
                          Jacobian). It should be a tuple of ints.

        If the Op has more than 1 input, one of them must be mx, while
        others must be shared variables / constants. We will test only
        against the input self.mx, so you must call
        check_mat_rop_lop/check_rop_lop for the other inputs.

        We expect all inputs/outputs have dtype floatX.

        If you want to test an Op with an output matrix, add a sum
        after the Op you want to test.
        """
        vx = np.asarray(self.rng.uniform(size=self.mat_in_shape),
                        theano.config.floatX)
        vv = np.asarray(self.rng.uniform(size=self.mat_in_shape),
                        theano.config.floatX)
        yv = tensor.Rop(y, self.mx, self.mv)
        yv2 = tensor.Rop_via_Lop(y, self.mx, self.mv)
        rop_f = function([self.mx, self.mv], [yv, yv2],
                         on_unused_input='ignore')
        sy, _ = theano.scan(lambda i, y, x, v:
                            (tensor.grad(y[i], x) * v).sum(),
                            sequences=tensor.arange(y.shape[0]),
                            non_sequences=[y, self.mx, self.mv])
        scan_f = function([self.mx, self.mv], sy, on_unused_input='ignore')

        v1, v2 = rop_f(vx, vv)
        v3 = scan_f(vx, vv)

        assert np.allclose(v1, v3), ('ROP mismatch: %s %s' % (v1, v3))
        assert np.allclose(v2, v3), ('ROP_VIA_LOP mismatch: %s %s' % (v2, v3))

        self.check_nondiff_rop(
            theano.clone(y, replace={self.mx: break_op(self.mx)}))

        vv = np.asarray(self.rng.uniform(size=out_shape), theano.config.floatX)
        yv = tensor.Lop(y, self.mx, self.v)
        lop_f = function([self.mx, self.v], yv)

        sy = tensor.grad((self.v * y).sum(), self.mx)
        scan_f = function([self.mx, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert np.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

Exemple #21

0

Afficher le fichier

 def _get_updates_for(self, param, grad):
     D_tm1 = shared_like(param, 'D_ewma')
     v = self.rng.normal(param.shape)
     if self.hv_method == 'rop':
         Hv = TT.Rop(grad, param, v)
     if self.hv_method == 'lop':
         Hv = TT.Lop(grad, param, v)
     if self.hv_method == 'grad':
         Hv = TT.grad(TT.sum(grad * v), param)
     D_t = self.ewma * D_tm1 + (1 - self.ewma) * Hv * Hv
     den = TT.sqrt(D_t) + self.epsilon
     yield D_tm1, D_t
     yield param, param - grad * self.learning_rate / den

Exemple #22

0

Afficher le fichier

Fichier : my_layers.py Projet : iqbal-chowdhury/nmn

    def setup(self, bottom, top):
        weights = T.matrix("weights")
        weights_bc = weights.dimshuffle((0, 1, "x", "x"))
        feats = T.tensor4("weights")
        v = T.tensor3("v")

        dot = weights_bc * feats
        result = T.sum(dot, axis=1)

        g_w, g_f = T.Lop(result, [weights, feats], v)
        self.f = theano.function([weights, feats], result)
        self.b_w = theano.function([weights, feats, v], g_w)
        self.b_f = theano.function([weights, feats, v], g_f)

Exemple #23

0

Afficher le fichier

Fichier : my_layers.py Projet : iqbal-chowdhury/nmn

    def setup(self, bottom, top):
        small_size = bottom[0].shape[1]
        small = T.matrix("small")
        big = T.tensor4("big")
        v = T.tensor4("v")
        small_bc = small.dimshuffle(0, 1, "x", "x")
        small_bc = T.addbroadcast(small_bc, 0)
        result = big + small_bc

        g_small, g_big = T.Lop(result, [small, big], v)
        self.f = theano.function([small, big], result)
        self.b_small = theano.function([v], g_small)
        self.b_big = theano.function([v], g_big)

Exemple #24

0

Afficher le fichier

Fichier : toyexample4.py Projet : jrbtaylor/trace-rnn

            def step(x_t, y_t, h_tmT, Wx, Wh, bh, Wy, by, lr, switch):

                # manually build the graph for the inner loop...
                # passing correct h_tm1 is impossible in nested scans
                yo_t = []
                h_tm1 = h_tmT
                for t in range(self.steps):
                    h_t = relu(T.dot(x_t[t], Wx) + T.dot(h_tm1, Wh) + bh)
                    yo_t.append(relu(T.dot(h_t, Wy) + by))
                    h_tm1 = h_t

                updates = OrderedDict()

                # Train the RNN: backprop (loss + DNI output)
                loss = T.mean(T.square(yo_t - y_t))
                dni_out = self.dni.output(h_t)
                for param in self.params:
                    dlossdparam = T.grad(loss, param)
                    dniJ = T.Lop(h_t,
                                 param,
                                 dni_out,
                                 disconnected_inputs='ignore')
                    updates[param] = param - lr * T.switch(
                        T.gt(switch, 0), dlossdparam + dniJ, dlossdparam)

                # Update the DNI (from the last step)
                # re-calculate the DNI prediction from the last step
                # note: can't be passed through scan or T.grad won't work
                dni_out_old = self.dni.output(h_tmT)
                # dni_target: current loss backprop'ed + new dni backprop'ed
                dni_target = T.grad(loss,h_tmT) \
                             +T.Lop(h_t,h_tmT,dni_out)
                dni_error = T.sum(T.square(dni_out_old - dni_target))
                for param in self.dni.params:
                    gparam = T.grad(dni_error, param)
                    updates[param] = param - lr * gparam

                return [h_t, loss, dni_error], updates

Exemple #25

0

Afficher le fichier

Fichier : hf.py Projet : gdesjardins/HessianFree

 def gauss_vect_mult(v):
     """
     Multiply a vector by the Gauss-Newton matrix JHJ'
       where J is the Jacobian between output and params and H is the Hessian between costs and output
       H should be diagonal and positive.
     Also add the ridge
     """
     Jv = T.Rop(output, params, v)
     HJv = T.Rop(T.grad(opt_cost, output), output, Jv)
     JHJv = T.Lop(output, params, HJv)
     if not isinstance(JHJv, list):
         JHJv = [JHJv]
     JHJv = [a + ridge * b for a, b in zip(JHJv, v)]
     return JHJv

Exemple #26

0

Afficher le fichier

Fichier : my_layers.py Projet : iqbal-chowdhury/nmn

 def setup(self, bottom, top):
     attention = T.tensor4("attention")
     input = T.tensor4("input")
     v = T.matrix("v")
     attention_bc = T.addbroadcast(attention, 1)
     attended = T.mul(input, attention_bc)
     result = T.sum(attended, axis=(2, 3))
     result_g_attention, result_g_input = T.Lop(result, [attention, input],
                                                v)
     self.f = theano.function([attention, input], result)
     self.b_attention = theano.function([attention, input, v],
                                        result_g_attention)
     self.b_input = theano.function([attention, input, v],
                                    result_g_attention)

Exemple #27

0

Afficher le fichier

    def parse_args(self, bottom, top):
        function_str = self.pythonargs[0]
        top_shape = self.pythonargs[1]

        old_function_str = self.function_str
        old_top_shape = self.top_shape
        self.function_str = function_str
        self.top_shape = top_shape
        if function_str != old_function_str or len(top_shape) != len(
                old_top_shape):
            if old_function_str != '':
                print(
                    'TheanoGPU function string different from cache: recompiling'
                )
            import theano.tensor as T
            import theano
            from theano.sandbox.cuda.basic_ops import gpu_from_host
            x = []
            for i in range(len(bottom)):
                if len(bottom[i].shape) == 1:
                    x.append(T.vector('x%d' % i))
                if len(bottom[i].shape) == 2:
                    x.append(T.matrix('x%d' % i))
                if len(bottom[i].shape) == 3:
                    x.append(T.tensor3('x%d' % i))
                if len(bottom[i].shape) == 4:
                    x.append(T.tensor4('x%d' % i))

            y = eval(function_str)
            self.f = theano.function(x,
                                     gpu_from_host(y),
                                     on_unused_input='ignore')

            if len(self.top_shape) == 1:
                v = T.vector('v')
            elif len(self.top_shape) == 2:
                v = T.matrix('v')
            elif len(self.top_shape) == 3:
                v = T.tensor3('v')
            elif len(self.top_shape) == 4:
                v = T.tensor4('v')
            self.b = []
            for i in range(len(bottom)):
                yg = T.Lop(y, x[i], v)
                self.b.append(
                    theano.function(x + [v],
                                    gpu_from_host(yg),
                                    on_unused_input='ignore'))

Exemple #28

0

Afficher le fichier

    def check_rop_lop(self, y, out_shape):
        """
        As check_mat_rop_lop, except the input is self.x witch is a
        vector. The output is still a vector.

        """
        # TEST ROP
        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)

        yv = tensor.Rop(y, self.x, self.v)
        rop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(J, self.v)

        scan_f = function([self.x, self.v], sy, on_unused_input='ignore')

        v1 = rop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('ROP mismatch: %s %s' % (v1, v2))
        self.check_nondiff_rop(
            theano.clone(y, replace={self.x: break_op(self.x)}))

        # TEST LOP

        vx = numpy.asarray(self.rng.uniform(size=self.in_shape),
                           theano.config.floatX)
        vv = numpy.asarray(self.rng.uniform(size=out_shape),
                           theano.config.floatX)

        yv = tensor.Lop(y, self.x, self.v)
        lop_f = function([self.x, self.v], yv, on_unused_input='ignore')
        J, _ = theano.scan(lambda i, y, x: tensor.grad(y[i], x),
                           sequences=tensor.arange(y.shape[0]),
                           non_sequences=[y, self.x])
        sy = tensor.dot(self.v, J)

        scan_f = function([self.x, self.v], sy)

        v1 = lop_f(vx, vv)
        v2 = scan_f(vx, vv)
        assert numpy.allclose(v1, v2), ('LOP mismatch: %s %s' % (v1, v2))

Exemple #29

0

Afficher le fichier

def compute_Lx(energies, params, deltas):
    # expectations and derivatives are commutative.
    cenergies = energies - T.mean(energies)
    Minv = T.cast(1. / energies.shape[0], floatX)

    rhs_terms = []
    for param_j, delta_j in zip(params, deltas):
        rhs_term = T.Rop(cenergies, param_j, delta_j)
        rhs_terms += [rhs_term]

    Lx_terms = []
    for param_i in params:
        Lx_term = 0
        for rhs in rhs_terms:
            Lx_term += Minv * T.Lop(cenergies, param_i, rhs)
        Lx_terms += [Lx_term]
    return Lx_terms

Exemple #30

0

Afficher le fichier

                def Gv_step(*gv_args):
                    idx = TT.cast(gv_args[0], 'int32')
                    nw_inps = [x[idx * options['cbs']: \
                                 (idx + 1) * options['cbs']] for x in
                               loc_inputs]
                    replace = dict(zip(model.inputs, nw_inps))
                    nw_cost, nw_preactiv_out = safe_clone(
                        [model.train_cost, model.preactiv_out], replace)
                    nw_gvs = TT.Lop(
                        nw_preactiv_out, model.params,
                        TT.Rop(TT.grad(nw_cost, nw_preactiv_out), model.params,
                               cgv))

                    Gvs = [
                        ogv + ngv for (ogv, ngv) in zip(gv_args[1:], nw_gvs)
                    ]
                    return [gv_args[0] + const(1)] + Gvs