Beispiel #1
0
    def eval(self, **kwargs):

        want_clear = kwargs.pop('clear', True)
        want_bprop_inputs = kwargs.pop('want_bprop_inputs', False)
        bprop_inputs_loss = kwargs.pop('bprop_inputs_loss', None)
        self.clear()

        globals.flags.push("want_bprop_inputs", want_bprop_inputs)
        globals.flags.push("bprop_mode", want_bprop_inputs)

        # For each keyword, set the corresponding plug's value
        for key, val in kwargs.iteritems():
            getattr(self, key).fpval = val

        # Pull the final loss value for this minibatch
        result = {p.name: p.fpval for p in self.oplugs}

        # If needed, also pull the backprop'd input deltas and include them in the result
        if want_bprop_inputs or bprop_inputs_loss:
            # First set up special backprop values: "Z" (the prediction) backpropagates a negative value
            # All other output plugs (e.g. costs) backpropagate zero.
            for p in self.oplugs:
                if p.name == "Z":
                    bprop_inputs_loss.batchmean = False  # Disable scaling gradient by minibatch size
                    bprop_inputs_loss.Z.fpval = result["Z"]
                    bprop_inputs_loss.Y.fpval = sm.zeros_like(result["Z"])
                    #bprop_inputs_loss.Y.fpval = -1.*sm.ones_like(result["Z"])
                    p._bpval = bprop_inputs_loss.Z.bpval
                    result['Zmask'] = bprop_inputs_loss.Zmask._fpval

                    # Only backprop gradient of target #0, not the other targets
                    if p._bpval.shape[1] > 1:
                        p._bpval[:, 1:] = sm.zeros_like(p._bpval[:, 1:])
                    #p._bpval = -result["Z"]
                    #p._bpval = -sm.ones_like(result["Z"])
                else:
                    p._bpval = sm.zeros((0, 0))

            # Now backpropagate to each input, and store the result
            if want_bprop_inputs:
                result.update({
                    "d" + p.name: p.bpval
                    for p in self.iplugs if p.name in kwargs
                })

        globals.flags.pop("want_bprop_inputs")
        globals.flags.pop("bprop_mode")

        # Clear all stored values in the dependency graph, effectively resetting it
        if want_clear:
            self.clear()
        for key in kwargs.iterkeys():
            getattr(self, key).fpval = plug_null

        return result
Beispiel #2
0
    def eval(self,**kwargs):

        want_clear = kwargs.pop('clear',True)
        want_bprop_inputs = kwargs.pop('want_bprop_inputs',False)
        bprop_inputs_loss = kwargs.pop('bprop_inputs_loss',None)
        self.clear()

        globals.flags.push("want_bprop_inputs",want_bprop_inputs)
        globals.flags.push("bprop_mode",want_bprop_inputs)
        
        # For each keyword, set the corresponding plug's value
        for key,val in kwargs.iteritems():
            getattr(self,key).fpval = val

        # Pull the final loss value for this minibatch
        result = { p.name : p.fpval for p in self.oplugs }

        # If needed, also pull the backprop'd input deltas and include them in the result
        if want_bprop_inputs or bprop_inputs_loss:
            # First set up special backprop values: "Z" (the prediction) backpropagates a negative value
            # All other output plugs (e.g. costs) backpropagate zero.
            for p in self.oplugs:
                if p.name == "Z":
                    bprop_inputs_loss.batchmean = False # Disable scaling gradient by minibatch size
                    bprop_inputs_loss.Z.fpval = result["Z"]
                    bprop_inputs_loss.Y.fpval = sm.zeros_like(result["Z"])
                    #bprop_inputs_loss.Y.fpval = -1.*sm.ones_like(result["Z"])
                    p._bpval = bprop_inputs_loss.Z.bpval
                    result['Zmask'] = bprop_inputs_loss.Zmask._fpval

                    # Only backprop gradient of target #0, not the other targets
                    if p._bpval.shape[1] > 1:
                        p._bpval[:,1:] = sm.zeros_like(p._bpval[:,1:])
                    #p._bpval = -result["Z"]
                    #p._bpval = -sm.ones_like(result["Z"])
                else:
                    p._bpval = sm.zeros((0,0))

            # Now backpropagate to each input, and store the result
            if want_bprop_inputs:
                result.update({ "d"+p.name : p.bpval for p in self.iplugs if p.name in kwargs})

        globals.flags.pop("want_bprop_inputs")
        globals.flags.pop("bprop_mode")

        # Clear all stored values in the dependency graph, effectively resetting it
        if want_clear:
            self.clear()
        for key in kwargs.iterkeys():
            getattr(self,key).fpval = plug_null

        return result
Beispiel #3
0
 def _bprop(self,X,R,dZ):
     dX = sm.zeros_like(X)
     kangaroo_smat.poolrgn_bprop(dX,R,dZ,self.I,ptype="max")
     self.I = None
     return (dX,None)
Beispiel #4
0
 def _bprop(self,X,R,dZ):
     dX = sm.zeros_like(X)
     kangaroo_smat.poolrgn_bprop(dX,R,dZ,None,ptype="avg")
     return (dX,None)
Beispiel #5
0
    def _train_setup(self, trainable_plugs, cost):

        # For each trainable plug, figure out how many weights it needs.
        sizes  = [ np.prod(p.shape)*cost.ninst for p in trainable_plugs ]
        offsets = np.asarray(np.cumsum([0] + [ size for size in sizes ]),np.uint32)

        # Allocate giant contiguous arrays for P, dP, and mP
        P  = sm.zeros((offsets[-1],1))
        dP = sm.zeros_like(P)
        mP = sm.zeros_like(P)

        # Per-inst learn rates / momentum rates go here.
        # Giant contiguous array maps to same indicies as in P, dP, mP
        drate = sm.zeros_like(P)
        mrate = sm.zeros_like(P)

        trnodes = []

        # For each plug, create a trainable node that is bound to 
        # a chunk of our P (parameter) and dP (gradient) vectors, where the node can 
        for i,tplug in enumerate(trainable_plugs):

            # Grow the actual shape of the trainable parameters, using the
            # axis specified by the trainable plug.
            shape = list(tplug.shape)
            shape[tplug.inst_axis] *= tplug.node.ninst

            # Allocate a new trainable node, and connect it to the plug
            trnode = trainable(P[offsets[i]:offsets[i+1]].reshape(tuple(shape)),
                              dP[offsets[i]:offsets[i+1]].reshape(tuple(shape)))
            trnode >> tplug
            trnodes.append(trnode)

            # Assign instance-specific learning rates and momentum rates
            # to each corresponding element in the giant drate/mrate vectors
            if tplug.inst_axis == 0:
                k = np.prod(tplug.shape)
            else:
                k = tplug.shape[1]
            dratevec = drate[offsets[i]:offsets[i+1]]
            mratevec = mrate[offsets[i]:offsets[i+1]]
            _ext.madd_bcast(sm.ones_like(dratevec),self.rate,k,dratevec)
            _ext.madd_bcast(sm.ones_like(mratevec),self.momentum,k,mratevec)

            # Also initialize elements of P based on the trainable plug's initialization scale,
            # which can be different for each individual instance
            Pvec = P[offsets[i]:offsets[i+1]]
            initval = tplug.origin().node.init
            if isinstance(initval, np.ndarray) and initval.ndim == 3:
                # Specific initialization of individual filters
                Pvec[:] = sm.asarray(np.require(np.rollaxis(initval,1),requirements="C").reshape((-1,1)))
            else:
                # Random initialization
                _ext.madd_bcast(sm.randn(Pvec.shape[0],Pvec.shape[1]),
                                initval,k,Pvec)

            if hasattr(tplug.origin().node,'init_mu'):
                initmu_val = tplug.origin().node.init_mu
                if isinstance(initmu_val, list):
                    # Specific initialization of individual bias elements
                    initmu_val = np.tile(initmu_val,tplug.origin().node.ninst)
                    Pvec[:] = sm.asarray(initmu_val).reshape(Pvec.shape)
                else:
                    _ext.madd_bcast(sm.ones_like(Pvec),
                                    tplug.origin().node.init_mu,k,Pvec)  # Add shift
                

        return (P,dP,mP,drate,mrate,trnodes)