Ejemplo n.º 1
0
 def recordDefaults(learner,gradAccum,Y,P,**kw):
     """A default status message."""
     gradAccum.counter['n'] = mutil.numRows(Y)
     Tracer._record(gradAccum,
         Tracer.identification(learner,kw) 
         + Tracer.loss(learner,Y,P,kw) 
         + Tracer.timing(learner,kw))
Ejemplo n.º 2
0
 def defaultPlusAcc(learner,gradAccum,Y,P,**kw):
     """A default status message."""
     gradAccum.counter['n'] = mutil.numRows(Y)
     Tracer._announce(gradAccum,
         Tracer.identification(learner,kw) 
         + Tracer.loss(learner,Y,P,kw) 
         + Tracer.accuracy(learner,Y,P,kw) 
         + Tracer.timing(learner,kw))
Ejemplo n.º 3
0
 def shuffle(self):
     for mode in self.xDict:
         shuffledRowNums = NP.arange(mutil.numRows(self.xDict[mode]))
         NR.shuffle(shuffledRowNums)
         self.xDict[mode] = mutil.shuffleRows(self.xDict[mode],
                                              shuffledRowNums)
         self.yDict[mode] = mutil.shuffleRows(self.yDict[mode],
                                              shuffledRowNums)
Ejemplo n.º 4
0
 def train(self,mode,X,Y):
     trainStartTime = time.time()
     for i in range(self.epochs):
         startTime = time.time()
         n = mutil.numRows(X)
         args = {'i':i,'startTime':startTime}
         paramGrads = self.crossEntropyGrad(mode,X,Y,tracerArgs=args)
         self.regularizer.regularizeParams(self.prog,n)
         self.applyUpdate(paramGrads,self.rate)
Ejemplo n.º 5
0
 def datasetCrossEntropy(goldDset,predictedDset,perExample=True):
     """ Return cross entropy on a dataset. """
     result = 0.0
     for mode in goldDset.modesToLearn():
         assert predictedDset.hasMode(mode), "CrossEntropy: Mode '%s' not available in predictedDset" % mode
         Y = goldDset.getY(mode)
         P = predictedDset.getY(mode)
         divisor = mutil.numRows(Y) if perExample else 1.0
         result += Learner.crossEntropy(Y,P,perExample=False)/divisor
     return result
Ejemplo n.º 6
0
 def datasetAccuracy(goldDset,predictedDset):
     """ Return accuracy on a dataset relative to gold labels. """
     weightedSum = 0.0
     totalWeight = 0.0
     for mode in goldDset.modesToLearn():
         assert predictedDset.hasMode(mode), "Accuracy: Mode '%s' not available in predictedDset" % mode
         Y = goldDset.getY(mode)
         P = predictedDset.getY(mode)
         weight = mutil.numRows(Y)
         weightedSum += weight * Learner.accuracy(Y,P)
         totalWeight += weight
     if totalWeight == 0: return 0
     return weightedSum/totalWeight
Ejemplo n.º 7
0
 def accuracy(Y,P):
     """Evaluate accuracy of predictions P versus labels Y."""
     #TODO surely there's a better way of doing this
     def allZerosButArgmax(d):
         result = NP.zeros_like(d)
         result[d.argmax()] = 1.0
         return result
     n = mutil.numRows(P)
     ok = 0.0
     for i in range(n):
         pi = P.getrow(i)
         yi = Y.getrow(i)
         ti = mutil.mapData(allZerosButArgmax,pi)
         ok += yi.multiply(ti).sum()
     return ok/n
Ejemplo n.º 8
0
 def train(self,dset):
     trainStartTime = time.time()
     modes = dset.modesToLearn()
     numModes = len(modes)
     for i in range(self.epochs):
         startTime = time.time()
         epochCounter = GradAccumulator.counter()
         for j,mode in enumerate(dset.modesToLearn()):
             n = mutil.numRows(dset.getX(mode))
             args = {'i':i,'startTime':startTime,'mode':str(mode)}
             try:
                 paramGrads = self.crossEntropyGrad(mode,dset.getX(mode),dset.getY(mode),tracerArgs=args)
                 self.regularizer.regularizeParams(self.prog,n)
                 self.applyUpdate(paramGrads,self.rate)
                 GradAccumulator.accumToCounter(epochCounter,paramGrads.counter)
             except:
                 print "Unexpected error at %s:" % str(args), sys.exc_info()[:2]
                 raise
         self.epochTracer(self,epochCounter,i=i,startTime=trainStartTime)
Ejemplo n.º 9
0
    def train(self,dset):
        trainStartTime = time.time()
        modes = dset.modesToLearn()
        n = len(modes)
        for i in range(self.epochs):
            startTime = time.time()
            epochCounter = GradAccumulator.counter()
            k = 0
            for (mode,X,Y) in dset.minibatchIterator(batchSize=self.miniBatchSize):
                n = mutil.numRows(X)
                k = k+1
                args = {'i':i,'k':k,'startTime':startTime,'mode':mode}
                try:
                    paramGrads = self.crossEntropyGrad(mode,X,Y,tracerArgs=args)
                    self.regularizer.regularizeParams(self.prog,n)
                    self.applyUpdate(paramGrads,self.rate)
                    GradAccumulator.accumToCounter(epochCounter,paramGrads.counter)
                except:
                    print "Unexpected error at %s:" % str(args), sys.exc_info()[:2]
                    raise

            self.epochTracer(self,epochCounter,i=i,startTime=trainStartTime)
Ejemplo n.º 10
0
 def minibatchIterator(self, batchSize=100, shuffleFirst=True):
     """Iterate over triples (mode,X',Y') where X' and Y' are sets of
     batchSize rows from the full data for mode, randomly selected
     (without replacement) from the dataset."""
     # randomize the order of the examples
     if shuffleFirst: self.shuffle()
     # then sample an ordering of the modes
     modeList = self.modesToLearn()
     modeSampleDict = {}
     for modeIndex, mode in enumerate(modeList):
         numBatches = int(
             math.ceil(mutil.numRows(self.getX(mode)) / float(batchSize)))
         modeSampleDict[mode] = NP.ones(numBatches, dtype='int') * modeIndex
     modeSamples = NP.concatenate(modeSampleDict.values())
     NR.shuffle(modeSamples)
     # finally produce the minibatches
     currentOffset = [0] * len(modeList)
     for modeIndex in modeSamples:
         mode = modeList[modeIndex]
         lo = currentOffset[modeIndex]
         bX = mutil.selectRows(self.getX(mode), lo, lo + batchSize)
         bY = mutil.selectRows(self.getY(mode), lo, lo + batchSize)
         currentOffset[modeIndex] += batchSize
         yield mode, bX, bY
Ejemplo n.º 11
0
 def crossEntropy(Y,P,perExample=False):
     """Compute cross entropy some predications relative to some labels."""
     logP = mutil.mapData(NP.log,P)
     result = -(Y.multiply(logP).sum())
     return result/mutil.numRows(Y) if perExample else result
Ejemplo n.º 12
0
 def cheap(learner,gradAccum,Y,P,**kw):
     """Easy-to-compute status message."""
     gradAccum.counter['n'] = mutil.numRows(Y)
     Tracer._announce(gradAccum,
         Tracer.identification(learner,kw) 
         + Tracer.timing(learner,kw))
Ejemplo n.º 13
0
 def silent(learner,gradAccum,Y,P,**kw):
     """No output."""
     gradAccum.counter['n'] = mutil.numRows(Y)
     pass
Ejemplo n.º 14
0
def _doBackpropTask(task):
    """ Use the workerLearner
    """
    (mode, X, Y, args) = task
    paramGrads = workerLearner.crossEntropyGrad(mode, X, Y, tracerArgs=args)
    return (mutil.numRows(X), paramGrads)
Ejemplo n.º 15
0
 def totalNumExamples(self, miniBatches):
     """The total nummber of examples in all the miniBatches"""
     return sum(mutil.numRows(X) for (mode, X, Y) in miniBatches)
Ejemplo n.º 16
0
 def _doEval(self, db, values, pad):
     return db.zeros(mutil.numRows(values[0]), self.outputType)