Ejemplos de Out en Python, ejemplos de theano.Out en Python

Ejemplo n.º 1

0

Mostrar archivo

Archivo: common_layers.py Proyecto: yiweichen04/tnarihi-caffe-helper

    def build_theano_functions(self, bottom, top):
        # building Theano functions
        from caffe_helper.theano_util import init_theano
        init_theano()

        import theano as tn
        import theano.tensor as T
        p = np.float32(self.p_)
        axis = self.axis_
        if axis is None:
            axis = tuple(range(1, len(bottom[0].shape)))

        # blob to CudaNdArray
        # Forward pass
        Tensor = T.TensorType('float32', [False] * len(bottom[0].shape))
        s_x = Tensor('x')  # bottom data
        s_dz = Tensor('dz')  # top diff
        s_z = s_x * ((s_x**p).sum(axis, keepdims=True)**(np.float32(-1. / p)))
        # See http://goo.gl/wIVRsP for `tn.Out(x, borrow=True)`
        self.f_forward = tn.function([s_x], tn.Out(s_z, borrow=True))

        # Backward pass
        s_l = (s_dz * s_z).sum()
        s_grad = tn.grad(s_l, wrt=s_x)
        self.f_backward = tn.function([s_x, s_dz], tn.Out(s_grad, borrow=True))

Ejemplo n.º 2

0

Mostrar archivo

Archivo: test_misc.py Proyecto: ViralLeadership/Repositorios

def test_bug_2009_07_17_borrowed_output():
    # Regression test for a bug where output was borrowed by mistake.
    a = theano.tensor.dmatrix()
    b = theano.tensor.dmatrix()
    # The output should *NOT* be borrowed.
    g = theano.function([a, b],
            theano.Out(theano.tensor.dot(a, b), borrow=False))

    x = np.zeros((1, 2))
    y = np.ones((2, 5))

    z = g(x, y)
    print(z)         # Should be zero.
    x.fill(1)
    print(g(x, y))   # Should be non-zero.
    print(z)         # Should still be zero.
    assert np.linalg.norm(z) == 0

    # The code above was supposed to fail when it was written (or, more
    # accurately, on the next revision, i.e. when it was merged with the
    # rest of the code, i.e. on revision cac9c9e9f08e).
    # However, for some reason, it does not fail anymore when at this revision.
    # Thus, a new test (below) was added that exhibits the same issue. Note
    # that it may better be moved into the test_nnet.py test file if it turns
    # out the bug was caused by 'crossentropy_softmax_argmax_1hot_with_bias',
    # and was not a more general issue.
    test_output_activation_no_bias = theano.tensor.dmatrix()
    test_b2 = theano.tensor.dvector()
    test_target = theano.tensor.ivector()
    nll_softmax_argmax = (
            crossentropy_softmax_argmax_1hot_with_bias(
                test_output_activation_no_bias,
                test_b2,
                test_target))
    output = nll_softmax_argmax[1]
    g = theano.function([test_output_activation_no_bias, test_b2, test_target],
            theano.Out(output, borrow=False))

    a = np.zeros((1, 5))
    b = np.ones(5)
    c = np.zeros(1, dtype=np.int32)

    z = g(a, b, c)
    z_backup = copy.copy(z)
    id_z = id(z)
    print(('Output z after first call: %s' % (z, )))
    a[0, 0] = 1
    id_other = id(g(a, b, c))
    print(('Output z after second call: %s' % (z, )))
    # Ensure that calling the function again returns a pointer towards a new
    # array.
    assert id_z != id_other
    # Just to be 100% sure, ensure that z was not altered.
    assert (z == z_backup).all()

Ejemplo n.º 3

0

Mostrar archivo

    def test_aliasing_3(self):

        import theano, theano.tensor

        x = theano.tensor.matrix()
        y = 2*x
        f = theano.function([theano.In(x, borrow=True)], theano.Out(y, borrow=True))

Ejemplo n.º 4

0

Mostrar archivo

Archivo: test_dnn.py Proyecto: uestcxi/Theano

def test_dnn_conv_desc_merge():
    if not dnn.dnn_available(test_ctx_name):
        raise SkipTest(dnn.dnn_available.msg)
    kern_shp = T.as_tensor_variable(
        numpy.asarray([3, 1, 2, 2]).astype('int64'))
    desc1 = dnn.GpuDnnConvDesc(border_mode='valid', subsample=(2, 2),
                               conv_mode='conv')(kern_shp)
    desc2 = dnn.GpuDnnConvDesc(border_mode='full', subsample=(1, 1),
                               conv_mode='cross')(kern_shp)
    # CDataType is not DeepCopyable so this will crash if we don't use
    # borrow=True
    f = theano.function([], [theano.Out(desc1, borrow=True),
                             theano.Out(desc2, borrow=True)])

    d1, d2 = f()

    # This will be the case if they are merged, which would be bad.
    assert d1 != d2

Ejemplo n.º 5

0

Mostrar archivo

def cpu_expr_to_gpu(expr, unsafe=False):
    """Given a CPU expr return the same expression for the GPU.

    If unsafe is set to True, subsequent function calls evaluating the
    expression might return arrays pointing at the same memory region.
    """
    expr = T.cast(expr, 'float32')
    return theano.Out(theano.sandbox.cuda.basic_ops.gpu_from_host(expr),
                      borrow=unsafe)

Ejemplo n.º 6

0

Mostrar archivo

def test_dnn_conv_desc_merge():
    if not cuda.dnn.dnn_available():
        raise SkipTest(cuda.dnn.dnn_available.msg)
    img_shp = T.as_tensor_variable(numpy.asarray([2, 1, 8, 8]).astype('int64'))
    kern_shp = T.as_tensor_variable(
        numpy.asarray([3, 1, 2, 2]).astype('int64'))
    desc1 = dnn.GpuDnnConvDesc(border_mode='valid',
                               subsample=(2, 2),
                               conv_mode='conv')(img_shp, kern_shp)
    desc2 = dnn.GpuDnnConvDesc(border_mode='full',
                               subsample=(1, 1),
                               conv_mode='cross')(img_shp, kern_shp)
    # CDataType is not DeepCopyable so this will crash if we don't use
    # borrow=True
    f = theano.function(
        [], [theano.Out(desc1, borrow=True),
             theano.Out(desc2, borrow=True)],
        mode=mode_with_gpu)

    d1, d2 = f()

    # This will be the case if they are merged, which would be bad.
    assert d1 != d2

    desc1v2 = dnn.GpuDnnConvDesc(border_mode='valid',
                                 subsample=(2, 2),
                                 conv_mode='conv')(img_shp, kern_shp)
    f = theano.function(
        [], [theano.Out(desc1, borrow=True),
             theano.Out(desc1v2, borrow=True)],
        mode=mode_with_gpu)
    assert len([
        n for n in f.maker.fgraph.apply_nodes
        if isinstance(n.op, dnn.GpuDnnConvDesc)
    ]) == 1

    # CDATA type don't equal even if they represent the same object
    # So we can't use debugmode with it.
    if theano.config.mode not in ["DebugMode", "DEBUG_MODE"]:
        d1, d2 = f()

        # They won't be equal if they aren't merged.
        assert d1 == d2

Ejemplo n.º 7

0

Mostrar archivo

def pairwise_theano_tensor_prepare(dtype):
    X = TT.matrix(dtype=str(dtype))
    dists = TT.sqrt(TT.sum(TT.sqr(X[:, None, :] - X), axis=2))
    name = 'pairwise_theano_broadcast_' + dtype
    rval = theano.function([X],
                           theano.Out(dists, borrow=True),
                           allow_input_downcast=True,
                           name=name)
    rval.__name__ = name
    return rval

Ejemplo n.º 8

0

Mostrar archivo

Archivo: test_rng_mrg.py Proyecto: wjbianjason/Theano

def t_binomial(mean, size, const_size, var_input, input, steps, rtol):
    R = MRG_RandomStreams(234, use_cuda=False)
    u = R.binomial(size=size, p=mean)
    f = theano.function(var_input, u, mode=mode)
    out = f(*input)

    # Increase the number of steps if sizes implies only a few samples
    if numpy.prod(const_size) < 10:
        steps_ = steps * 100
    else:
        steps_ = steps
    basictest(f,
              steps_,
              const_size,
              prefix='mrg  cpu',
              inputs=input,
              allow_01=True,
              target_avg=mean,
              mean_rtol=rtol)

    if mode != 'FAST_COMPILE' and cuda_available:
        R = MRG_RandomStreams(234, use_cuda=True)
        u = R.binomial(size=size, p=mean, dtype='float32')
        # well, it's really that this test w GPU doesn't make sense otw
        assert u.dtype == 'float32'
        f = theano.function(var_input,
                            theano.Out(
                                theano.sandbox.cuda.basic_ops.gpu_from_host(u),
                                borrow=True),
                            mode=mode_with_gpu)
        gpu_out = numpy.asarray(f(*input))

        basictest(f,
                  steps_,
                  const_size,
                  prefix='mrg  gpu',
                  inputs=input,
                  allow_01=True,
                  target_avg=mean,
                  mean_rtol=rtol)
        numpy.testing.assert_array_almost_equal(out, gpu_out, decimal=6)

    RR = theano.tensor.shared_randomstreams.RandomStreams(234)

    uu = RR.binomial(size=size, p=mean)
    ff = theano.function(var_input, uu, mode=mode)
    # It's not our problem if numpy generates 0 or 1
    basictest(ff,
              steps_,
              const_size,
              prefix='numpy',
              allow_01=True,
              inputs=input,
              target_avg=mean,
              mean_rtol=rtol)

Ejemplo n.º 9

0

Mostrar archivo

def pairwise_theano_blas_prepare(dtype):
    X = TT.matrix(dtype=str(dtype))
    X_norm_2 = (X**2).sum(axis=1)
    dists = TT.sqrt(2 * X_norm_2 - TT.dot(X, X.T))
    name = 'pairwise_theano_blas_' + dtype
    rval = theano.function([X],
                           theano.Out(dists, borrow=True),
                           allow_input_downcast=True,
                           name=name)
    rval.__name__ = name
    return rval

Ejemplo n.º 10

0

Mostrar archivo

    def test_loading_and_saving_1(self):

        import cPickle
        import theano, theano.tensor

        x = theano.tensor.matrix()
        y = 2*x
        my_obj =  theano.function([theano.In(x, borrow=True)]
                                  , theano.Out(y, borrow=True))

        mode_instance = theano.compile.mode.get_mode(None)
        if not isinstance(mode_instance, theano.compile.debugmode.DebugMode):
            # Here, we work in a temporary directory in order not to clutter
            # the Theano repository. Code relative to creating that dir and
            # removing it afterwards should _not_ be backported to the tutorial.
            from tempfile import mkdtemp
            origdir = os.getcwd()
            tmpdir = None
            try:
                tmpdir = mkdtemp()
                os.chdir(tmpdir)

                f = open('obj.save', 'wb')
                cPickle.dump(my_obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
                f.close()


                f = open('obj.save', 'rb')
                loaded_obj = cPickle.load(f)
                f.close()

                obj1 = my_obj
                obj2 = my_obj
                obj3 = my_obj

                f = open('objects.save', 'wb')
                for obj in [obj1, obj2, obj3]:
                    cPickle.dump(obj, f, protocol=cPickle.HIGHEST_PROTOCOL)
                f.close()

                f = open('objects.save', 'rb')
                loaded_objects = []
                for i in range(3):
                    loaded_objects.append(cPickle.load(f))
                f.close()
            finally:
                # Get back to the orinal dir, and temporary one.
                os.chdir(origdir)
                if tmpdir is not None:
                    shutil.rmtree(tmpdir)

Ejemplo n.º 11

0

Mostrar archivo

Archivo: mf.py Proyecto: afshinrahimi/edgexplain

def MFmanual(V,T,r,l,gamma,iterations,P=None,Q=None,H=None):
    """
        Paramters:
         V : As many rows as documents
         T : As many rows as documents
        """
    V = V.T
    T = T.T
    rng = np.random
    n = np.size(V,1)
    td = np.size(T,0)
    vd = np.size(V,0)
    if(P is None):
        P = rng.random((vd,r)).astype(theano.config.floatX)
    if(Q is None):
        Q = rng.random((td,r)).astype(theano.config.floatX)
    if(H is None):
        H = rng.random((r,n)).astype(theano.config.floatX)


    tV = theano.shared(V.astype(theano.config.floatX),name="V")
    tT = theano.shared(T.astype(theano.config.floatX),name="T")
    tH = theano.shared(H,name="H")
    tQ = theano.shared(Q,name="Q")
    tP = theano.shared(P,name="P")
    tLambda = Th.scalar(name="l")
    tGamma = Th.scalar(name="gamma")

    tEV = (1.0/2.0)*((tV-Th.dot(tP,tH))**2).sum() 
    tET = (1.0/2.0)*((tT-Th.dot(tQ,tH))**2).sum() 
    tReg = (1.0/2.0)*tLambda*(((tP**2).sum())+((tQ**2).sum())+((tH**2).sum()))

    tCost = tEV + tET + tReg

    gP = -1.0 *(Th.dot(tV,tH.T) - Th.dot(tP,Th.dot(tH,tH.T)) - tLambda*tP)
    gQ = -1.0 *(Th.dot(tT,tH.T) - Th.dot(tQ,Th.dot(tH,tH.T)) - tLambda*tQ)
    gH = -1.0 *(Th.dot(tP.T,tV) - Th.dot(tP.T,Th.dot(tP,tH)) + Th.dot(tQ.T,tT) - Th.dot(tQ.T,Th.dot(tQ,tH)) - tLambda*tH)


    train = theano.function(
            inputs=[tGamma,tLambda],
            outputs=[theano.Out(tCost,borrow=True)],
            updates={tP:tP - tGamma * gP, tQ : tQ - tGamma*gQ, tH : tH - tGamma*gH },
            name="train")

    for i in range(0,iterations):
        print train(np.asarray(gamma,dtype=theano.config.floatX),np.asarray(l,dtype=theano.config.floatX));

    return tP.get_value(),tQ.get_value(),tH.get_value()

Ejemplo n.º 12

0

Mostrar archivo

    def setup(self, bottom, top):
        self.reshape(bottom, top)
        from caffe_helper.theano_util import init_theano
        init_theano()

        import theano as tn
        import theano.tensor as T
        shape1 = bottom[0].shape  # prediction
        shape2 = bottom[1].shape  # label
        s_p = T.TensorType('float32', [False] * len(shape1))('p')
        s_t = T.TensorType('float32', [False] * len(shape2))('t')

        # Forward pass
        FLTMIN = np.finfo(np.float32).tiny
        s_l = -T.mean(
            T.log(T.maximum(FLTMIN, s_p.flatten(2)))[T.arange(s_t.shape[0]),
                                                     T.cast(s_t, 'int32')])
        self.f_forward = tn.function([s_p, s_t], tn.Out(s_l, borrow=True))

        # Backward pass
        s_dz = T.fscalar('dz')
        sg_p = tn.grad(s_dz * s_l, wrt=s_p)
        self.f_backward = tn.function([s_p, s_t, s_dz],
                                      tn.Out(sg_p, borrow=True))

Ejemplo n.º 13

0

Mostrar archivo

Archivo: test_basic.py Proyecto: thiboeri/Theano

    def test_csc_correct_output_faster_than_scipy(self):
        sparse_dtype = 'float64'
        dense_dtype = 'float64'

        a = SparseType('csc', dtype=sparse_dtype)()
        b = tensor.matrix(dtype=dense_dtype)
        d = theano.dot(a, b)
        f = theano.function([a, b], theano.Out(d, borrow=True))

        for M, N, K, nnz in [
            (4, 3, 2, 3),
            (40, 30, 20, 3),
            (40, 30, 20, 30),
            (400, 3000, 200, 6000),
        ]:
            spmat = sp.csc_matrix(random_lil((M, N), sparse_dtype, nnz))
            mat = numpy.asarray(numpy.random.randn(N, K), dense_dtype)
            theano_times = []
            scipy_times = []
            for i in xrange(5):
                t0 = time.time()
                theano_result = f(spmat, mat)
                t1 = time.time()
                scipy_result = spmat * mat
                t2 = time.time()

                theano_times.append(t1 - t0)
                scipy_times.append(t2 - t1)

            theano_time = numpy.min(theano_times)
            scipy_time = numpy.min(scipy_times)

            speedup = scipy_time / theano_time
            print scipy_times
            print theano_times
            print(
                'M=%(M)s N=%(N)s K=%(K)s nnz=%(nnz)s theano_time'
                '=%(theano_time)s speedup=%(speedup)s') % locals()

            # fail if Theano is slower than scipy by more than a certain amount
            overhead_tol = 0.003  # seconds overall
            overhead_rtol = 1.2  # times as long
            self.assertTrue(numpy.allclose(theano_result, scipy_result))
            if not theano.config.mode in ["DebugMode", "DEBUG_MODE"]:
                self.assertFalse(
                    theano_time > overhead_rtol * scipy_time + overhead_tol)

Ejemplo n.º 14

0

Mostrar archivo

Archivo: test_basic_ops.py Proyecto: amaldas13/Theano

def test_Gpujoin_inplace():
    """Test Gpujoin to work inplace.

    This function tests the case when several elements are passed to the
    Gpujoin function but all except one of them are empty. In this case
    Gpujoin should work inplace and the output should be the view of the
    non-empty element.
    """
    s = T.lscalar()
    data = numpy.array([3, 4, 5], dtype=theano.config.floatX)
    x = gpuarray_shared_constructor(data, borrow=True)
    z = T.zeros((s, ))

    join = GpuJoin(view=0)
    c = join(0, x, z)

    f = theano.function([s], theano.Out(c, borrow=True))
    assert x.get_value(borrow=True, return_internal_type=True) is f(0)
    assert numpy.allclose(f(0), [3, 4, 5])

Ejemplo n.º 15

0

Mostrar archivo

Archivo: test_basic_ops.py Proyecto: michaelosthege/aesara

def test_Gpujoin_inplace():
    # Test Gpujoin to work inplace.
    #
    # This function tests the case when several elements are passed to the
    # Gpujoin function but all except one of them are empty. In this case
    # Gpujoin should work inplace and the output should be the view of the
    # non-empty element.
    s = tt.lscalar()
    data = np.array([3, 4, 5], dtype=theano.config.floatX)
    x = gpuarray_shared_constructor(data, borrow=True)
    z = tt.zeros((s, ))

    join = GpuJoin(view=0)
    c = join(0, x, z)

    f = theano.function([s], theano.Out(c, borrow=True))
    if not isinstance(mode_with_gpu, theano.compile.DebugMode):
        assert x.get_value(borrow=True, return_internal_type=True) is f(0)
    assert np.allclose(f(0), [3, 4, 5])

Ejemplo n.º 16

0

Mostrar archivo

Archivo: theano_computation.py Proyecto: cc13ny/ape

    def function(self, additional_tags=None, gpu=False):
        mode = theano.compile.mode.get_default_mode()

        inputs = self.inputs
        output = self.outputs[0]
        inputs = [Var.var for Var in inputs]
        output = output.var
        assert len(self.outputs) == 1, "Multiple output assumption fails"

        if gpu:
            inputs, cpu_inputs = zip(*map(cpu_var_to_gpu_var, inputs))
            output = self.op(*cpu_inputs)
            output = theano.sandbox.cuda.basic_ops.gpu_from_host(output)
            output = theano.Out(output, borrow=True)
        else:
            mode = mode.excluding('gpu')

        if additional_tags:
            mode = mode.including(additional_tags)

        return theano.function(inputs, output, mode=mode, name='test')

Ejemplo n.º 17

0

Mostrar archivo

    def __init__(self, tt_input, tt_output, updates=None, name='Unnamed Function',
                 borrow_inp=False, borrow_out=False, profile_execution=False):
        self.name = name
        self.func = None
        self.profile = profile_execution
        self.last_exec_time = None
        self.updates = updates
        if borrow_inp:
            tt_input = [theano.In(x, borrow=True) for x in tt_input]

        self.tt_input = tt_input

        self.single_return = False
        if not isinstance(tt_output, (list, tuple)):
            tt_output = [tt_output,]
            self.single_return = True

        if borrow_out:
            tt_output = [theano.Out(x, borrow=True) for x in tt_output]

        self.tt_output = tt_output

Ejemplo n.º 18

0

Mostrar archivo

Archivo: trainers.py Proyecto: zhp562176325/deepy

    def learning_function(self):
        """
        Get the learning function.
        :param func:
        :return:
        """
        network_updates = list(self.network.updates) + list(self.network.training_updates)
        learning_updates = list(self._learning_updates())
        update_list = network_updates + learning_updates

        logging.info("network updates: %s" % " ".join(map(str, [x[0] for x in network_updates])))
        logging.info("learning updates: %s" % " ".join(map(str, [x[0] for x in learning_updates])))

        variables = self.network.input_variables + self.network.target_variables
        givens = None
        return theano.function(
            variables,
            map(lambda v: theano.Out(v, borrow=True), self.training_variables),
            updates=update_list, allow_input_downcast=True,
            mode=self.config.get("theano_mode", None),
            givens=givens)

Ejemplo n.º 19

0

Mostrar archivo

    def function(self, additional_tags=None, gpu=False):
        # inputs = [inp.clone() for inp in self.apply.inputs]
        # output = self.apply.op(inputs)
        # env = theano.FunctionGraph(inputs, [output])
        mode = theano.compile.mode.get_default_mode()

        inputs = self.apply.inputs
        output = self.apply.outputs[0]
        assert len(self.apply.outputs) == 1, "Multiple output assumption fails"

        if gpu:
            inputs, cpu_inputs = zip(*map(cpu_var_to_gpu_var, inputs))
            output = self.apply.op(*cpu_inputs)
            output = theano.sandbox.cuda.basic_ops.gpu_from_host(output)
            output = theano.Out(output, borrow=True)
        else:
            mode = mode.excluding('gpu')

        if additional_tags:
            mode = mode.including(additional_tags)

        return theano.function(inputs, output, mode=mode)

Ejemplo n.º 20

0

Mostrar archivo

Archivo: test_basic.py Proyecto: thiboeri/Theano

    def test_dot_sparse_sparse(self):
        #test dot for 2 input sparse matrix
        sparse_dtype = 'float64'
        sp_mat = {'csc': sp.csc_matrix, 'csr': sp.csr_matrix}

        for sparse_format_a in ['csc', 'csr']:
            for sparse_format_b in ['csc', 'csr']:
                a = SparseType(sparse_format_a, dtype=sparse_dtype)()
                b = SparseType(sparse_format_b, dtype=sparse_dtype)()
                d = theano.dot(a, b)
                f = theano.function([a, b], theano.Out(d, borrow=True))
                topo = f.maker.env.toposort()
                for M, N, K, nnz in [
                    (4, 3, 2, 3),
                    (40, 30, 20, 3),
                    (40, 30, 20, 30),
                    (400, 3000, 200, 6000),
                ]:
                    a_val = sp_mat[sparse_format_a](random_lil(
                        (M, N), sparse_dtype, nnz))
                    b_val = sp_mat[sparse_format_b](random_lil(
                        (N, K), sparse_dtype, nnz))
                    f(a_val, b_val)

Ejemplo n.º 21

0

Mostrar archivo

Archivo: trainers.py Proyecto: zuxfoucault/deepy

    def __init__(self, network, config=None, method=None):

        if method:
            logging.info("changing optimization method to '%s'" % method)
            if not config:
                config = TrainerConfig()
            elif isinstance(config, dict):
                config = TrainerConfig(config)
            config.method = method

        super(GeneralNeuralTrainer, self).__init__(network, config)

        logging.info('compiling %s learning function', self.__class__.__name__)

        network_updates = list(network.updates) + list(
            network.training_updates)
        learning_updates = list(self.learning_updates())
        update_list = network_updates + learning_updates
        logging.info("network updates: %s" %
                     " ".join(map(str, [x[0] for x in network_updates])))
        logging.info("learning updates: %s" %
                     " ".join(map(str, [x[0] for x in learning_updates])))

        if False and config.data_transmitter:
            variables = [config.data_transmitter.get_iterator()]
            givens = config.data_transmitter.get_givens()
        else:
            variables = network.input_variables + network.target_variables
            givens = None

        self.learning_func = theano.function(
            variables,
            map(lambda v: theano.Out(v, borrow=True), self.training_variables),
            updates=update_list,
            allow_input_downcast=True,
            mode=self.config.get("theano_mode", None),
            givens=givens)

Ejemplo n.º 22

0

Mostrar archivo

Archivo: test_rng_mrg.py Proyecto: takuhironoda/Theano

def test_binomial():
    # TODO: test size=None, ndim=X
    # TODO: test size=X, ndim!=X.ndim
    # TODO: test random seed in legal value(!=0 and other)
    # TODO: test sample_size not a multiple of guessed #streams
    # TODO: test size=Var, with shape that change from call to call
    # we test size in a tuple of int and a tensor.shape.
    # we test the param p with int.

    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE']
            or mode == 'Mode' and config.linker in ['py']):
        sample_size = (10, 50)
        steps = 50
        rtol = 0.02
    else:
        sample_size = (500, 50)
        steps = int(1e3)
        rtol = 0.01

    x = tensor.matrix()
    v = tensor.vector()
    for mean in [0.1, 0.5]:
        for size, const_size, var_input, input in [
            (sample_size, sample_size, [], []),
            (x.shape, sample_size, [x],
             [numpy.zeros(sample_size, dtype=config.floatX)]),
            ((x.shape[0], sample_size[1]), sample_size, [x],
             [numpy.zeros(sample_size, dtype=config.floatX)]),
                # test empty size (scalar)
            ((), (), [], []),
        ]:

            # print ''
            # print 'ON CPU with size=(%s) and mean(%d):' % (str(size), mean)
            R = MRG_RandomStreams(234, use_cuda=False)
            # Note: we specify `nstreams` to avoid a warning.
            u = R.binomial(size=size,
                           p=mean,
                           nstreams=rng_mrg.guess_n_streams(size, warn=False))
            f = theano.function(var_input, u, mode=mode)
            # theano.printing.debugprint(f)
            out = f(*input)
            # print 'random?[:10]\n', out[0, 0:10]
            # print 'random?[-1,-10:]\n', out[-1, -10:]

            # Increase the number of steps if sizes implies only a few samples
            if numpy.prod(const_size) < 10:
                steps_ = steps * 100
            else:
                steps_ = steps
            basictest(f,
                      steps_,
                      const_size,
                      prefix='mrg  cpu',
                      inputs=input,
                      allow_01=True,
                      target_avg=mean,
                      mean_rtol=rtol)

            if mode != 'FAST_COMPILE' and cuda_available:
                # print ''
                # print 'ON GPU with size=(%s) and mean(%d):' % (str(size), mean)
                R = MRG_RandomStreams(234, use_cuda=True)
                u = R.binomial(size=size,
                               p=mean,
                               dtype='float32',
                               nstreams=rng_mrg.guess_n_streams(size,
                                                                warn=False))
                # well, it's really that this test w GPU doesn't make sense otw
                assert u.dtype == 'float32'
                f = theano.function(
                    var_input,
                    theano.Out(theano.sandbox.cuda.basic_ops.gpu_from_host(u),
                               borrow=True),
                    mode=mode_with_gpu)
                # theano.printing.debugprint(f)
                gpu_out = numpy.asarray(f(*input))
                # print 'random?[:10]\n', gpu_out[0, 0:10]
                # print 'random?[-1,-10:]\n', gpu_out[-1, -10:]
                basictest(f,
                          steps_,
                          const_size,
                          prefix='mrg  gpu',
                          inputs=input,
                          allow_01=True,
                          target_avg=mean,
                          mean_rtol=rtol)
                numpy.testing.assert_array_almost_equal(out,
                                                        gpu_out,
                                                        decimal=6)

            # print ''
            # print 'ON CPU w NUMPY with size=(%s) and mean(%d):' % (str(size),
            #                                                       mean)
            RR = theano.tensor.shared_randomstreams.RandomStreams(234)

            uu = RR.binomial(size=size, p=mean)
            ff = theano.function(var_input, uu, mode=mode)
            # It's not our problem if numpy generates 0 or 1
            basictest(ff,
                      steps_,
                      const_size,
                      prefix='numpy',
                      allow_01=True,
                      inputs=input,
                      target_avg=mean,
                      mean_rtol=rtol)

Ejemplo n.º 23

0

Mostrar archivo

 def _o(s):
     return tn.Out(s, borrow=True)

Ejemplo n.º 24

0

Mostrar archivo

Archivo: trainer.py Proyecto: vinayakathavale/nn_ner

    def train_model(self, lr_scheme,initial_learning_rate=0.01, min_lr=0.00001,learning_rate_decay=0.05,constant_steps=None,L1_reg=0.0000, L2_reg=0.0000,lr_global=False, n_epochs=100,momentum_term=0.9):
        logger.info("\n"+"\n".join(["\t%s : "%key+str(locals()[key]) for key in ["lr_scheme","lr_global","min_lr","initial_learning_rate","learning_rate_decay","L1_reg","L2_reg","n_epochs"]]))    
        cost = self.model.negative_log_likelihood(self.y) \
         + L2_reg * self.model.L2 #\
#         + L1_reg * self.model.L1 
        
        self.learning_rate = theano.shared(np.float32(initial_learning_rate))
        if constant_steps==None:
            self.constant_steps = np.inf
        else:
            self.constant_steps = constant_steps
        self.lr_scheme = lr_scheme

        def gen_updates_sgd():
            gparams = [theano.grad(cost, param) for param in self.model.params]
            updates = []
            for param_i, grad_i, n_in in zip(self.model.params, gparams, self.model.n_ins):
                if "embeddings" not in str(param_i):
                    updates.append((param_i, param_i - self.learning_rate/n_in * grad_i))
                else:
                    updates.append((param_i, param_i - self.learning_rate * grad_i))
            return updates
          
        def gen_updates_sgd_global():
            gparams = [theano.grad(cost, param) for param in self.model.params]
            updates = []
            for param_i, grad_i in zip(self.model.params, gparams):
                updates.append((param_i, param_i - self.learning_rate * grad_i))
            return updates

#        def gen_updates_regular_momentum(loss, all_parameters, learning_rate, momentum, weight_decay):
#            all_grads = [theano.grad(loss, param) for param in all_parameters]
#            updates = []
#            for param_i, grad_i in zip(all_parameters, all_grads):
#                mparam_i = theano.shared(param_i.get_value()*0.)
#                v = momentum * mparam_i - weight_decay * learning_rate * param_i  - learning_rate * grad_i
#                updates.append((mparam_i, v))
#                updates.append((param_i, param_i + v))
#            return updates
#        
#        def gen_updates_own_momentum():
#            agparams=[theano.shared(value=np.zeros(p.get_value().shape, dtype=theano.config.floatX), name='ag_'+p.name) \
#                for p in self.model.params]   # averaged gradients
#            gparams = [] # gradients
#            for pid,param in enumerate(self.model.params):
#                gparam = T.grad(cost, param)
#                gparams.append(gparam)
#            updates = []
#            for param, gparam, agparam, n_in in zip(self.model.params, gparams, agparams, self.model.n_ins):
#                updates.append((agparam,np.float32(1-momentum_term)*agparam + np.float32(momentum_term)*gparam))            
#                if lr_global:
#                    updates.append((param, param - self.learning_rate/n_in * (np.float32(1-momentum_term)*agparam + np.float32(momentum_term)*gparam)))
#                else:
#                    updates.append((param, param - self.learning_rate * (np.float32(1-momentum_term)*agparam + np.float32(momentum_term)*gparam)))
#            return updates
        if lr_global:
            updates = gen_updates_sgd_global()
        else:
            updates = gen_updates_sgd()
        train_model = theano.function(inputs=[self.index,self.permutation], outputs=theano.Out(cost, borrow=True),
            updates=updates,
            givens={
                self.x: self.train_set_x[self.permutation[self.index * self.batch_size:(self.index + 1) * self.batch_size]],
                self.y: self.train_set_y[self.permutation[self.index * self.batch_size:(self.index + 1) * self.batch_size]]},
            name="train_model")

        #==============================================================================
        # train model
        #==============================================================================
        theano.printing.pydotprint(train_model)
        logger.info('... training')

        min_valid_cost = np.inf
        best_epoch = 0
        test_score = 0.
        start_time = time.clock()
    
        epoch = 0
        self.trainingscosts=[]
        self.validationcosts=[]
        training_costs=[10]
        while (epoch <= n_epochs):
            self.trainingscosts.append(np.mean(training_costs))
            validation_costs = [self.validation_cost(i) for i
                                 in xrange(self.n_valid_batches)]
            self.validationcosts.append(np.mean(validation_costs))
            self.monitor_update()
            if self.validationcosts[-1]<min_valid_cost:
                min_valid_cost=self.validationcosts[-1]
                best_epoch=epoch
                self.test_error(epoch)
            if epoch%25==0:
                pickle.dump(self.model,open(os.path.join(self.modeldir,'model%i.pck'%epoch),'wb'),protocol=pickle.HIGHEST_PROTOCOL)
                hidden_values = [self.visualize_hidden(i) for i
                                 in np.random.randint(0,self.n_valid_batches,30)]
                image = np.vstack(hidden_values)
                binary_image = (image>0.999) | (image<-0.999)
                plt.imshow(binary_image,cmap=plt.cm.get_cmap('gray'), interpolation='nearest')
                plt.savefig(os.path.join(self.modeldir,'binary_hidden%i.png'%epoch))
                plt.clf()
                test_predictions = [self.predictions(i) for i
                                       in xrange(self.n_test_batches)]
                np.save(os.path.join(self.modeldir,"predictions.npy"),test_predictions)
                generate_output(self.modeldir,modelnumber=epoch, predictions=np.array(test_predictions))
            training_costs=[]
            perm=np.random.permutation(self.train_set_size).astype(np.int32)
            for minibatch_index in xrange(self.n_train_batches):
                training_costs.append(train_model(minibatch_index,perm))
            
            if epoch>0:
                if self.lr_scheme!="constant":
                    if self.lr_scheme=="continuous" and epoch>self.constant_steps:
                        self.learning_rate.set_value(np.float32(initial_learning_rate*(1+learning_rate_decay* self.constant_steps)/(1+learning_rate_decay*max(epoch,self.constant_steps))))
                    elif ((self.validationcosts[-1]-self.validationcosts[-2])>0 and (self.validationcosts[-1]-np.min(self.validationcosts))>0.01 and \
                    np.argmin(self.validationcosts)!=(len(self.validationcosts)-2)) or \
                    (((self.trainingscosts[-1]-self.trainingscosts[-2])>0) and (np.argmin(self.trainingscosts)!=(len(self.trainingscosts)-2))):
                        if self.lr_scheme=="stepwise":
                            self.learning_rate.set_value(np.float32(self.learning_rate.get_value()/3))
                        elif self.lr_scheme=="continuous":
                            self.constant_steps=epoch-1
                            self.learning_rate.set_value(np.float32(initial_learning_rate*(1+learning_rate_decay*self.constant_steps)/(1+learning_rate_decay*max(epoch,self.constant_steps))))
                    if self.learning_rate.get_value()<min_lr:
                        self.learning_rate.set_value(np.float32(min_lr))
                        self.lr_scheme=="constant" 
            epoch = epoch + 1
        end_time = time.clock()
        logger.info(('Optimization complete. Best validation score of %f %% '
               'obtained at epoch %i, with test performance %f %%') %
              (min_valid_cost, best_epoch, test_score * 100.))
        logger.info('The code for file ' +
                              os.path.split(__file__)[1] +
                              ' ran for %.2fm' % ((end_time - start_time) / 60.))
        self.monitor_update()
        test_predictions = [self.predictions(i) for i
                                       in xrange(self.n_test_batches)]
        generate_output(self.modeldir,predictions=np.array(test_predictions))

Ejemplo n.º 25

0

Mostrar archivo

Archivo: script.py Proyecto: jkramar/kaggle-walmart

# updates_ada=lasagne.updates.adagrad(objective,get_all_params(l_out),learning_rate=.03)
update = theano.function([l_in.input_var, target], [loss],
                         updates=updates_other,
                         allow_input_downcast=True)
update_hid = theano.function([l_in.input_var, target], [loss],
                             updates=updates_hid,
                             allow_input_downcast=True)
#update_ada=theano.function([l_in.input_var,target],[loss],updates=updates_ada,allow_input_downcast=True)
update_scal = theano.function([l_in.input_var, target], [],
                              updates=updates_scal,
                              allow_input_downcast=True)
check = theano.function([l_in.input_var, target], [loss_smoo],
                        allow_input_downcast=True)
predict = theano.function(
    [l_in.input_var],
    [theano.Out(get_output(l_out, deterministic=True), borrow=True)],
    allow_input_downcast=True)

for fno in xrange(12, 1000):
    reset()

    #diagn=theano.function([l_in.input_var,train_indices,valid_indices,target],[train_loss,valid_loss],allow_input_downcast=True,name="jill")
    filename = "a_fitted_nnet_t_" + str(fno) + ".nc"
    assert not os.path.exists(filename)
    f = netcdf.netcdf_file(filename, "w")
    f.createDimension("train", train_X.shape[0])
    f.createDimension("valid", valid_X.shape[0])
    f.createDimension("test", test_X.shape[0])
    f.createDimension("preds", 37)
    v_train = f.createVariable("train", np.float, ("train", "preds"))
    v_valid = f.createVariable("valid", np.float, ("valid", "preds"))

Ejemplo n.º 26

0

Mostrar archivo

Archivo: test_rng_mrg.py Proyecto: wjbianjason/Theano

def test_uniform():
    # TODO: test param low, high
    # TODO: test size=None
    # TODO: test ndim!=size.ndim
    # TODO: test bad seed
    # TODO: test size=Var, with shape that change from call to call
    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE']
            or mode == 'Mode' and config.linker in ['py']):
        sample_size = (10, 100)
        steps = 50
    else:
        sample_size = (500, 50)
        steps = int(1e3)

    x = tensor.matrix()
    for size, const_size, var_input, input in [
        (sample_size, sample_size, [], []),
        (x.shape, sample_size, [x],
         [numpy.zeros(sample_size, dtype=config.floatX)]),
        ((x.shape[0], sample_size[1]), sample_size, [x],
         [numpy.zeros(sample_size, dtype=config.floatX)]),
            # test empty size (scalar)
        ((), (), [], []),
    ]:

        #### TEST CPU IMPLEMENTATION ####
        # The python and C implementation are tested with DebugMode
        # print ''
        # print 'ON CPU with size=(%s):' % str(size)
        x = tensor.matrix()
        R = MRG_RandomStreams(234, use_cuda=False)
        # Note: we specify `nstreams` to avoid a warning.
        # TODO Look for all occurrences of `guess_n_streams` and `30 * 256`
        # for such situations: it would be better to instead filter the
        # warning using the warning module.
        u = R.uniform(size=size,
                      nstreams=rng_mrg.guess_n_streams(size, warn=False))
        f = theano.function(var_input, u, mode=mode)
        assert any([
            isinstance(node.op, theano.sandbox.rng_mrg.mrg_uniform)
            for node in f.maker.fgraph.toposort()
        ])
        # theano.printing.debugprint(f)
        cpu_out = f(*input)

        # print 'CPU: random?[:10], random?[-10:]'
        # print cpu_out[0, 0:10]
        # print cpu_out[-1, -10:]

        # Increase the number of steps if sizes implies only a few samples
        if numpy.prod(const_size) < 10:
            steps_ = steps * 100
        else:
            steps_ = steps
        basictest(f, steps_, const_size, prefix='mrg cpu', inputs=input)

        if mode != 'FAST_COMPILE' and cuda_available:
            # print ''
            # print 'ON GPU with size=(%s):' % str(size)
            R = MRG_RandomStreams(234, use_cuda=True)
            u = R.uniform(size=size,
                          dtype='float32',
                          nstreams=rng_mrg.guess_n_streams(size, warn=False))
            # well, it's really that this test w GPU doesn't make sense otw
            assert u.dtype == 'float32'
            f = theano.function(
                var_input,
                theano.Out(theano.sandbox.cuda.basic_ops.gpu_from_host(u),
                           borrow=True),
                mode=mode_with_gpu)
            assert any([
                isinstance(node.op, theano.sandbox.rng_mrg.GPU_mrg_uniform)
                for node in f.maker.fgraph.toposort()
            ])
            # theano.printing.debugprint(f)
            gpu_out = numpy.asarray(f(*input))

            # print 'GPU: random?[:10], random?[-10:]'
            # print gpu_out[0, 0:10]
            # print gpu_out[-1, -10:]
            basictest(f, steps_, const_size, prefix='mrg  gpu', inputs=input)

            numpy.testing.assert_array_almost_equal(cpu_out,
                                                    gpu_out,
                                                    decimal=6)

        # print ''
        # print 'ON CPU w Numpy with size=(%s):' % str(size)
        RR = theano.tensor.shared_randomstreams.RandomStreams(234)

        uu = RR.uniform(size=size)
        ff = theano.function(var_input, uu, mode=mode)
        # It's not our problem if numpy generates 0 or 1
        basictest(ff,
                  steps_,
                  const_size,
                  prefix='numpy',
                  allow_01=True,
                  inputs=input)

Ejemplo n.º 27

0

Mostrar archivo

def main():
    # Turn these knobs if you wish to work with larger/smaller data
    img_dims = (500, 500)
    fsize = 2
    n_channels = 3

    # Create a random image
    img = np.asarray(np.random.rand(*((n_channels, ) + img_dims)),
                     dtype=th.config.floatX)
    img = np.arange(n_channels * img_dims[0] * img_dims[1],
                    dtype=th.config.floatX).reshape(n_channels, *img_dims)

    # Adapt the code to use the CPU/GPU. In the GPU case, do NOT transfer the
    # results back to memory.
    wrap = ((lambda x: x) if th.config.device == "cpu" else
            (lambda x: th.Out(th.sandbox.cuda.basic_ops.gpu_from_host(x),
                              borrow=True)))

    # Convolution method
    x = th.shared(img)
    f = th.function(inputs=[],
                    outputs=wrap(im_to_col(x, fsize, n_channels=n_channels)),
                    name='im_to_col')

    # Time the convolution method
    tic = time.time()
    out_conv = f()
    conv_time = time.time() - tic
    print("Convolution-based method: {0}".format(conv_time))

    # Time the neighbors method
    neighs = N.NeighbourhoodsFromImages(1, (fsize, fsize),
                                        strides=(1, 1),
                                        ignore_border=True)(x)
    f = th.function([], outputs=wrap(neighs), name='old neighs')
    tic = time.time()
    out_old = f()
    neigh_time = time.time() - tic
    print("Neighbors-based method: {0}".format(neigh_time))

    # Time the new neighbours method ignore border
    neighs = N.images2neibs(x.dimshuffle('x', 0, 1, 2), (fsize, fsize), (1, 1),
                            mode='ignore_borders')
    f = th.function([], outputs=wrap(neighs), name='new neighs ignore border')
    tic = time.time()
    out_new = f()
    neigh_time = time.time() - tic
    print("New Neighbors-based ignore border method: {0}".format(neigh_time))

    # Time the new neighbours method
    neighs = N.images2neibs(x.dimshuffle('x', 0, 1, 2), (fsize, fsize), (1, 1),
                            mode='valid')
    f = th.function([], outputs=wrap(neighs), name='new neighs valid')
    tic = time.time()
    out_new = f()
    neigh_time = time.time() - tic
    print("New Neighbors-based valid method: {0}".format(neigh_time))

    # Print speedup results
    if conv_time < neigh_time:
        print("Conv faster than neigh. Speedup: {0}x".format(neigh_time /
                                                             conv_time))
    else:
        print("Neigh faster than conv. Speedup: {0}x".format(conv_time /
                                                             neigh_time))

Ejemplo n.º 28

0

Mostrar archivo

Archivo: test_rng_mrg.py Proyecto: wjbianjason/Theano

def test_normal0():

    steps = 50
    std = 2.
    if (mode in ['DEBUG_MODE', 'DebugMode', 'FAST_COMPILE']
            or mode == 'Mode' and config.linker in ['py']):
        sample_size = (25, 30)
        default_rtol = .02
    else:
        sample_size = (999, 50)
        default_rtol = .01
    sample_size_odd = (sample_size[0], sample_size[1] - 1)
    x = tensor.matrix()

    for size, const_size, var_input, input, avg, rtol, std_tol in [
        (sample_size, sample_size, [], [], -5., default_rtol, default_rtol),
        (x.shape, sample_size, [x],
         [numpy.zeros(sample_size,
                      dtype=config.floatX)], -5., default_rtol, default_rtol),
        ((x.shape[0], sample_size[1]), sample_size, [x],
         [numpy.zeros(sample_size,
                      dtype=config.floatX)], -5., default_rtol, default_rtol),
            # test odd value
        (sample_size_odd, sample_size_odd, [], [], -5., default_rtol,
         default_rtol),
            # test odd value
        (x.shape, sample_size_odd, [x],
         [numpy.zeros(sample_size_odd,
                      dtype=config.floatX)], -5., default_rtol, default_rtol),
        (sample_size, sample_size, [], [],
         numpy.arange(numpy.prod(sample_size),
                      dtype='float32').reshape(sample_size),
         10. * std / numpy.sqrt(steps), default_rtol),
            # test empty size (scalar)
        ((), (), [], [], -5., default_rtol, 0.02),
            # test with few samples at the same time
        ((1, ), (1, ), [], [], -5., default_rtol, 0.02),
        ((2, ), (2, ), [], [], -5., default_rtol, 0.02),
        ((3, ), (3, ), [], [], -5., default_rtol, 0.02),
    ]:
        # print ''
        # print 'ON CPU:'

        R = MRG_RandomStreams(234, use_cuda=False)
        # Note: we specify `nstreams` to avoid a warning.
        n = R.normal(size=size,
                     avg=avg,
                     std=std,
                     nstreams=rng_mrg.guess_n_streams(size, warn=False))
        f = theano.function(var_input, n, mode=mode)
        # theano.printing.debugprint(f)
        out = f(*input)
        # print 'random?[:10]\n', out[0, 0:10]

        # Increase the number of steps if size implies only a few samples
        if numpy.prod(const_size) < 10:
            steps_ = steps * 50
        else:
            steps_ = steps
        basictest(f,
                  steps_,
                  const_size,
                  target_avg=avg,
                  target_std=std,
                  prefix='mrg ',
                  allow_01=True,
                  inputs=input,
                  mean_rtol=rtol,
                  std_tol=std_tol)

        sys.stdout.flush()

        if mode != 'FAST_COMPILE' and cuda_available:
            # print ''
            # print 'ON GPU:'
            R = MRG_RandomStreams(234, use_cuda=True)
            n = R.normal(size=size,
                         avg=avg,
                         std=std,
                         dtype='float32',
                         nstreams=rng_mrg.guess_n_streams(size, warn=False))
            # well, it's really that this test w GPU doesn't make sense otw
            assert n.dtype == 'float32'
            f = theano.function(
                var_input,
                theano.Out(theano.sandbox.cuda.basic_ops.gpu_from_host(n),
                           borrow=True),
                mode=mode_with_gpu)

            # theano.printing.debugprint(f)
            sys.stdout.flush()
            gpu_out = numpy.asarray(f(*input))
            # print 'random?[:10]\n', gpu_out[0, 0:10]
            # print '----'
            sys.stdout.flush()
            basictest(f,
                      steps_,
                      const_size,
                      target_avg=avg,
                      target_std=std,
                      prefix='gpu mrg ',
                      allow_01=True,
                      inputs=input,
                      mean_rtol=rtol,
                      std_tol=std_tol)
            # Need to allow some rounding error as their is float
            # computation that are done on the gpu vs cpu
            assert numpy.allclose(out, gpu_out, rtol=5e-6, atol=5e-6)

        # print ''
        # print 'ON CPU w NUMPY:'
        RR = theano.tensor.shared_randomstreams.RandomStreams(234)

        nn = RR.normal(size=size, avg=avg, std=std)
        ff = theano.function(var_input, nn)

        basictest(ff,
                  steps_,
                  const_size,
                  target_avg=avg,
                  target_std=std,
                  prefix='numpy ',
                  allow_01=True,
                  inputs=input,
                  mean_rtol=rtol)

Ejemplo n.º 29

0

Mostrar archivo

def multi(grads, params, other_contexts):
    inputs = theano.gof.graph.inputs(grads)
    inputs = [
        inp for inp in inputs
        if not (isinstance(inp, T.Constant) or inp in params)
    ]

    symbolic_params = [_to_symbolic_var(p) for p in params]

    all_context_grads = []

    for ctx_i, context in enumerate(other_contexts):

        sharded_inputs = [
            theano.gpuarray.basic_ops.gpu_contiguous(
                inp[ctx_i::len(other_contexts) + 1]) for inp in inputs
        ]

        xfer_inputs = [
            theano.gpuarray.as_gpuarray_variable(inp, context)
            for inp in sharded_inputs
        ]

        xfer_params = [
            theano.gpuarray.as_gpuarray_variable(sp, context)
            for sp in symbolic_params
        ]

        replacements = {
            x: xfer_x
            for x, xfer_x in zip(params + inputs, xfer_params + xfer_inputs)
        }

        # For whatever reason, theano.clone likes to make its own copies of the
        # replacement nodes we give it, so we need to dig into its generated
        # graph to grab the copies it made.

        for var in (xfer_params + xfer_inputs):
            var.name = str(uuid.uuid4())

        context_grad_graphs = [
            theano.clone(g, replace=replacements) for g in grads
        ]

        new_inputs = []
        for var in (xfer_params + xfer_inputs):
            for g in context_grad_graphs:
                matches = search(g, lambda x: x.name == var.name)
                if len(matches):
                    new_inputs.append(matches[0])
                    break

        if len(new_inputs) != len(xfer_params + xfer_inputs):
            raise Exception()

        grads_fn = theano.function(new_inputs, [
            theano.Out(g.transfer(context), borrow=True)
            for g in context_grad_graphs
        ])

        context_grads_op = ContextGradsOp(grads_fn, context, len(params))
        context_grads = context_grads_op(*(params + sharded_inputs))

        if not (isinstance(context_grads, list)
                or isinstance(context_grads, tuple)):
            context_grads = [context_grads]

        all_context_grads.append(context_grads)

    # context -> grad to grad -> context
    all_context_grads = zip(*all_context_grads)

    for i in xrange(len(all_context_grads)):
        all_context_grads[i] = [g.transfer(None) for g in all_context_grads[i]]

    # # Also schedule work on the main GPU
    # for i in xrange(len(all_context_grads)):
    #     sharded_inputs = [
    #         inp[len(other_contexts)::len(other_contexts)+1]
    #         for inp in inputs
    #     ]
    #     all_context_grads[i].append(
    #         theano.clone(
    #             grads[i],
    #             replace={inp:si for inp,si in zip(inputs,sharded_inputs)}
    #         )
    #     )

    avg_grads = [
        reduce(lambda a, b: a + b, gs) / float(len(gs))
        for gs in all_context_grads
    ]

    return avg_grads