Esempio n. 1
0
    def grad(self,inputs, output_gradients):
        C,d, WShape, B = inputs
        dLdA ,= output_gradients

        z = T.zeros_like(C[0,0,0,0,:])
        dLdC = convTransp3D( dLdA, z, d, B, C.shape[1:4])
        dLdd = None #not differentiable, since d is not continuous
        dLdWShape = None #not differentiable, since d is not continuous
        dLdB = conv3D( C, dLdA, T.zeros_like(B[0,0,0,0,:]), d)

        return [ dLdC, dLdd, dLdWShape, dLdB ]
Esempio n. 2
0
    def grad(self, inputs, output_gradients):
        C, d, WShape, B = inputs
        dLdA, = output_gradients

        z = T.zeros_like(C[0, 0, 0, 0, :])
        dLdC = convTransp3D(dLdA, z, d, B, C.shape[1:4])
        # d actually does affect the outputs, so it's not disconnected
        dLdd = grad_undefined(self, 1, d)
        # The shape of the weights doesn't affect the output elements
        dLdWShape = DisconnectedType()()
        dLdB = conv3D(C, dLdA, T.zeros_like(B[0, 0, 0, 0, :]), d)

        return [dLdC, dLdd, dLdWShape, dLdB]
Esempio n. 3
0
    def grad(self, inputs, output_gradients):
        C, d, WShape, B = inputs
        dLdA, = output_gradients

        z = T.zeros_like(C[0, 0, 0, 0, :])
        dLdC = convTransp3D(dLdA, z, d, B, C.shape[1:4])
        # d actually does affect the outputs, so it's not disconnected
        dLdd = grad_undefined(self, 1, d)
        # The shape of the weights doesn't affect the output elements
        dLdWShape = DisconnectedType()()
        dLdB = conv3D(C, dLdA, T.zeros_like(B[0, 0, 0, 0, :]), d)

        return [dLdC, dLdd, dLdWShape, dLdB]
Esempio n. 4
0
    def grad(self, inputs, output_gradients):
        W, b, d, H, RShape = inputs
        dCdR, = output_gradients
        dCdH = conv3D(dCdR, W, T.zeros_like(H[0, 0, 0, 0, :]), d)
        WShape = W.shape
        dCdW = convGrad3D(dCdR, d, WShape, H)
        dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
        dCdd = None  #not differentiable, since d is not continuous
        dCdRShape = None  #not differentiable, since RShape is not continuous

        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon'

        dCdW.name = 'ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name + ',W=' + W_name
        dCdb.name = 'ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name + ',W=' + W_name + ',b=' + b_name
        dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name

        return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
Esempio n. 5
0
    def make_rule(self, local_particle, local_acc_updates, global_particle):
        """Make Downpour rule.

        All particles along with the global particle start from the same
        position. According to this rule, each local particle executes descent
        normally but their parameter updates are accumulated (e.g. by moving
        average) to a variable. Every N iterations, the local accumulated
        updates are added together and applied to the global particle. Each
        local particle restarts from global particle's position.

        Parameters
        ----------
        local_particle : {:ref:`theano.compile.SharedVariable`,
                          list of :ref:`theano.compile.SharedVariable`}
           A particle's position in parameter space doing local SGD.
        local_acc_updates : {:ref:`theano.compile.SharedVariable`,
                             list of :ref:`theano.compile.SharedVariable`}
           Shared variable accumulating local parameter updates.
        global_particle : {:ref:`theano.compile.SharedVariable`,
                           list of :ref:`theano.compile.SharedVariable`}
           A particle whose position is updated only by the Downpour process and
           resets position of local particles.

        .. seealso:: Notes on :meth:`GlobalDynamics.make_rule`

        """
        import theano
        from theano.tensor import basic
        if isinstance(local_particle, theano.compile.SharedVariable):
            local_particle = [local_particle]
        if isinstance(local_acc_updates, theano.compile.SharedVariable):
            local_acc_updates = [local_acc_updates]
        if isinstance(global_particle, theano.compile.SharedVariable):
            global_particle = [global_particle]

        new_global = []
        new_local = []
        new_acc_updates = []
        for lp, lau, gp in zip(local_particle, local_acc_updates,
                               global_particle):
            global_acc_updates = AllReduceSum(lau, inplace=True)
            if self.average:
                global_acc_updates /= self.worker.global_size
            new_global.append(gp + global_acc_updates)
            new_local.append(new_global[-1])
            new_acc_updates.append(basic.zeros_like(lau))

        updates = list(zip(local_particle, new_local)) + \
            list(zip(local_acc_updates, new_acc_updates)) + \
            list(zip(global_particle, new_global))

        self._fn = theano.function([], [],
                                   updates=updates,
                                   accept_inplace=True)
Esempio n. 6
0
    def grad(self, inputs, output_gradients):
        V, W, b, d = inputs
        dCdH, = output_gradients
        # make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        # print dCdH.broadcastable
        # print "dCdH.broadcastable"
        # quit(-1)
        # dCdH = printing.Print("dCdH = ",["shape"])

        # Make sure the broadcasting pattern of the gradient is the the same
        # as the initial variable
        dCdV = theano.tensor.nnet.convTransp3D(W, T.zeros_like(V[0, 0, 0,
                                                                 0, :]), d,
                                               dCdH, V.shape[1:4])
        dCdV = T.patternbroadcast(dCdV, V.broadcastable)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(V, d, WShape, dCdH)
        dCdW = T.patternbroadcast(dCdW, W.broadcastable)
        dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
        dCdb = T.patternbroadcast(dCdb, b.broadcastable)
        dCdd = grad_undefined(
            self, 3, inputs[3],
            "The gradient of Conv3D with respect to the convolution"
            " stride is undefined because Conv3D is only defined for"
            " integer strides.")

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon_dCdH'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon_V'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdV.name = 'Conv3D_dCdV(dCdH=' + dCdH_name + ',V=' + V_name + ')'
        dCdW.name = ('Conv3D_dCdW(dCdH=' + dCdH_name + ',V=' + V_name + ',W=' +
                     W_name + ')')
        dCdb.name = ('Conv3D_dCdb(dCdH=' + dCdH_name + ',V=' + V_name + ',W=' +
                     W_name + ',b=' + b_name + ')')

        return [dCdV, dCdW, dCdb, dCdd]
Esempio n. 7
0
    def grad(self, inputs, output_gradients):
        V, W, b, d = inputs
        dCdH, = output_gradients
        # make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        # print dCdH.broadcastable
        # print "dCdH.broadcastable"
        # quit(-1)
        # dCdH = printing.Print("dCdH = ",["shape"])

        # Make sure the broadcasting pattern of the gradient is the the same
        # as the initial variable
        dCdV = theano.tensor.nnet.convTransp3D(
            W, T.zeros_like(V[0, 0, 0, 0, :]), d, dCdH, V.shape[1:4])
        dCdV = T.patternbroadcast(dCdV, V.broadcastable)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(V, d, WShape, dCdH)
        dCdW = T.patternbroadcast(dCdW, W.broadcastable)
        dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
        dCdb = T.patternbroadcast(dCdb, b.broadcastable)
        dCdd = grad_undefined(
            self, 3, inputs[3],
            "The gradient of Conv3D with respect to the convolution"
            " stride is undefined because Conv3D is only defined for"
            " integer strides.")

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon_dCdH'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon_V'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdV.name = 'Conv3D_dCdV(dCdH=' + dCdH_name + ',V=' + V_name + ')'
        dCdW.name = ('Conv3D_dCdW(dCdH=' + dCdH_name + ',V=' + V_name +
                     ',W=' + W_name + ')')
        dCdb.name = ('Conv3D_dCdb(dCdH=' + dCdH_name + ',V=' + V_name +
                     ',W=' + W_name + ',b=' + b_name + ')')

        return [dCdV, dCdW, dCdb, dCdd]
Esempio n. 8
0
    def grad(self,inputs, output_gradients):
        V,W,b,d = inputs
        dCdH ,= output_gradients
        #make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        #print dCdH.broadcastable
        #print "dCdH.broadcastable"
        #quit(-1)
        #dCdH = printing.Print("dCdH = ",["shape"])

        # Make sure the broadcasting pattern of the gradient is the the same
        # as the initial variable
        dCdV = ConvTransp3D.convTransp3D(W, T.zeros_like(V[0,0,0,0,:]), d, dCdH, V.shape[1:4])
        dCdV = T.patternbroadcast(dCdV, V.broadcastable)
        WShape = W.shape
        dCdW = ConvGrad3D.convGrad3D(V,d,WShape,dCdH)
        dCdW = T.patternbroadcast(dCdW, W.broadcastable)
        dCdb = T.sum(dCdH, axis=(0,1,2,3))
        dCdb = T.patternbroadcast(dCdb, b.broadcastable)
        dCdd = None #not differentiable, since d is not continuous

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon'

        dCdV.name = 'Conv3D_dCdV.dCdH='+dCdH_name+',V='+V_name
        dCdW.name = 'Conv3D_dCdW.dCdH='+dCdH_name+',V='+V_name+',W='+W_name
        dCdb.name = 'Conv3D_dCdb.dCdH='+dCdH_name+',V='+V_name+',W='+W_name+',b='+b_name



        return [ dCdV, dCdW, dCdb, dCdd ]
Esempio n. 9
0
    def grad(self, inputs, output_gradients):
        V, W, b, d = inputs
        dCdH, = output_gradients
        #make all of these ops support broadcasting of scalar b to vector b and eplace the zeros_like in all their grads
        #print dCdH.broadcastable
        #print "dCdH.broadcastable"
        #quit(-1)
        #dCdH = printing.Print("dCdH = ",["shape"])

        dCdV = ConvTransp3D.convTransp3D(W, T.zeros_like(V[0, 0, 0, 0, :]), d,
                                         dCdH, V.shape[1:4])
        WShape = W.shape
        dCdW = ConvGrad3D.convGrad3D(V, d, WShape, dCdH)
        dCdb = T.sum(dCdH, axis=(0, 1, 2, 3))
        dCdd = None  #not differentiable, since d is not continuous

        if 'name' in dir(dCdH) and dCdH.name is not None:
            dCdH_name = dCdH.name
        else:
            dCdH_name = 'anon'

        if 'name' in dir(V) and V.name is not None:
            V_name = V.name
        else:
            V_name = 'anon'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon'

        dCdV.name = 'Conv3D_dCdV.dCdH=' + dCdH_name + ',V=' + V_name
        dCdW.name = 'Conv3D_dCdW.dCdH=' + dCdH_name + ',V=' + V_name + ',W=' + W_name
        dCdb.name = 'Conv3D_dCdb.dCdH=' + dCdH_name + ',V=' + V_name + ',W=' + W_name + ',b=' + b_name

        return [dCdV, dCdW, dCdb, dCdd]
Esempio n. 10
0
    def grad(self, inputs, output_gradients):
        W, b, d, H, RShape = inputs
        dCdR, = output_gradients
        dCdH = theano.tensor.nnet.conv3D(dCdR, W, T.zeros_like(H[0, 0, 0,
                                                                 0, :]), d)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(dCdR, d, WShape, H)
        dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
        # not differentiable, since d affects the output elements
        dCdd = grad_undefined(self, 2, d)
        # disconnected, since RShape just determines the output shape
        dCdRShape = DisconnectedType()()

        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon_dCdR'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon_H'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdW.name = ('ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name)
        dCdb.name = ('ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name + ',b=' + b_name)
        dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name

        return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
Esempio n. 11
0
    def grad(self, inputs, output_gradients):
        W, b, d, H, RShape = inputs
        dCdR, = output_gradients
        dCdH = theano.tensor.nnet.conv3D(dCdR, W, T.zeros_like(H[0, 0, 0, 0, :]), d)
        WShape = W.shape
        dCdW = theano.tensor.nnet.convGrad3D(dCdR, d, WShape, H)
        dCdb = T.sum(dCdR, axis=(0, 1, 2, 3))
        # not differentiable, since d affects the output elements
        dCdd = grad_undefined(self, 2, d)
        # disconnected, since RShape just determines the output shape
        dCdRShape = DisconnectedType()()

        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon_dCdR'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon_H'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon_W'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon_b'

        dCdW.name = ('ConvTransp3D_dCdW.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name)
        dCdb.name = ('ConvTransp3D_dCdb.H=' + H_name + ',dCdR=' + dCdR_name +
                     ',W=' + W_name + ',b=' + b_name)
        dCdH.name = 'ConvTransp3D_dCdH.H=' + H_name + ',dCdR=' + dCdR_name

        return [dCdW, dCdb, dCdd, dCdH, dCdRShape]
Esempio n. 12
0
    def grad(self,inputs, output_gradients):
        W,b,d,H, RShape = inputs
        dCdR ,= output_gradients
        dCdH = conv3D( dCdR, W, T.zeros_like(H[0,0,0,0,:]), d)
        WShape = W.shape
        dCdW = convGrad3D(dCdR,d,WShape,H)
        dCdb = T.sum(dCdR,axis=(0,1,2,3))
        dCdd = None #not differentiable, since d is not continuous
        dCdRShape = None #not differentiable, since RShape is not continuous


        if 'name' in dir(dCdR) and dCdR.name is not None:
            dCdR_name = dCdR.name
        else:
            dCdR_name = 'anon'

        if 'name' in dir(H) and H.name is not None:
            H_name = H.name
        else:
            H_name = 'anon'

        if 'name' in dir(W) and W.name is not None:
            W_name = W.name
        else:
            W_name = 'anon'

        if 'name' in dir(b) and b.name is not None:
            b_name = b.name
        else:
            b_name = 'anon'


        dCdW.name = 'ConvTransp3D_dCdW.H='+H_name+',dCdR='+dCdR_name+',W='+W_name
        dCdb.name = 'ConvTransp3D_dCdb.H='+H_name+',dCdR='+dCdR_name+',W='+W_name+',b='+b_name
        dCdH.name = 'ConvTransp3D_dCdH.H='+H_name+',dCdR='+dCdR_name

        return [ dCdW,  dCdb, dCdd, dCdH, dCdRShape ]