def evalxc(self,xc,input): inferenceFun = xc.inferenceFunction('predict/io') rawPred = inferenceFun(input) # trim small numbers to zero pred = mutil.mapData(lambda d:np.clip((d - 1e-5),0.00,9999.99), rawPred) pred.eliminate_zeros() return pred
def _setFeatureWeightsForTypelessDB(self,epsilon=1.0): """Initialize each feature used in the feature part of a rule, i.e., for all rules annotated by "{foo(F):...}", declare 'foo/1' to be a parameter, and initialize it to something plausible. The 'something plausible' is based on looking at how the variables defining foo are defined, eg for something like "p(X,Y):- ... {posWeight(F):hasWord(X,F)}" a constant sparse vector with non-zero weights for all second arguments of hasWord will be used to initialize posWeight. The constant will be epsilon. """ for paramName,domainModes in self.paramDomains.items(): # we also need to infer a type for the parameter.... def typeOfWeights(mode): for i in range(mode.arity): if mode.isInput(i): return self.db.schema.getArgType(mode.functor,mode.arity,i) assert False weights = self.db.matrixPreimage(domainModes[0]) weightType = typeOfWeights(domainModes[0]) for mode in domainModes[1:]: weights = weights + self.db.matrixPreimage(mode) assert typeOfWeights(mode)==weightType, 'feature weights have incompatible types: derived from %s and %s' % (mode,domainModes[0]) weights = weights * 1.0/len(domainModes) weights = mutil.mapData(lambda d:np.clip(d,0.0,1.0), weights) self.db.setParameter(paramName,1,weights*epsilon) for (paramName,arity) in self.getParamList(): if not self.db.parameterIsInitialized(paramName,arity): logging.warn("Parameter %s could not be set automatically") logging.debug('total parameter size: %d', self.db.parameterSize())
def setRuleWeights(self,weights=None,epsilon=1.0,ruleIdPred=None): """Set the db predicate 'weighted/1' as a parameter, and initialize it to the given vector. If no vector 'weights' is given, default to a constant vector of epsilon for each rule. 'weighted/1' is the default parameter used to weight rule-ids features, e.g., "r" in p(X,Y):-... {r}. You can also specify the ruleIds with the name of a unary db relation that holds all the rule ids. """ if len(self.ruleIds)==0: pass elif ruleIdPred is not None: # TODO check this stuff and add type inference! assert (ruleIdPred,1) in self.db.matEncoding,'there is no unary predicate called %s' % ruleIdPred self.db.markAsParameter("weighted",1) self.db.setParameter("weighted",1,self.db.vector(declare.asMode('%s(o)' % ruleIdPred)) * epsilon) else: assert self.db.isTypeless(), 'cannot setRuleWeights for db with declared types unless ruleIdPred is given' self.db.markAsParameter("weighted",1) if weights==None: weights = self.db.onehot(self.ruleIds[0]) for rid in self.ruleIds[1:]: weights = weights + self.db.onehot(rid) weights = mutil.mapData(lambda d:np.clip(d,0.0,1.0), weights) self.db.setParameter("weighted",1,weights*epsilon)
def applyUpdate(self,paramGrads,rate): """Add each gradient to the appropriate param, after scaling by rate, and clip negative parameters to zero. """ paramGrads.fitParameterShapes() for (functor,arity),delta in paramGrads.items(): m0 = self.prog.db.getParameter(functor,arity) m1 = m0 + rate * delta m2 = mutil.mapData(lambda d:NP.clip(d,0.0,NP.finfo('float32').max), m1) self.prog.db.setParameter(functor,arity,m2)
def meanUpdate(self,functor,arity,delta,n,totalN=0): #clip the delta vector to avoid exploding gradients delta = mutil.mapData(lambda d:NP.clip(d,conf.minGradient,conf.maxGradient), delta) if arity==1: #for a parameter that is a row-vector, we have one #gradient per example and we will take the mean compensation = 1.0 if totalN==0 else float(n)/totalN return mutil.mean(delta)*compensation else: #for a parameter that is a matrix, we have one gradient for the whole matrix compensation = (1.0/n) if totalN==0 else (1.0/totalN) return delta*compensation
def accuracy(Y,P): """Evaluate accuracy of predictions P versus labels Y.""" #TODO surely there's a better way of doing this def allZerosButArgmax(d): result = NP.zeros_like(d) result[d.argmax()] = 1.0 return result n = mutil.numRows(P) ok = 0.0 for i in range(n): pi = P.getrow(i) yi = Y.getrow(i) ti = mutil.mapData(allZerosButArgmax,pi) ok += yi.multiply(ti).sum() return ok/n
def mapData(self,mapFun): """Apply some function to every gradient in the accumulator (in place).""" result = GradAccumulator() for k,m in self.items(): result.accum(k, mutil.mapData(mapFun,m)) return result
def crossEntropy(Y,P,perExample=False): """Compute cross entropy some predications relative to some labels.""" logP = mutil.mapData(NP.log,P) result = -(Y.multiply(logP).sum()) return result/mutil.numRows(Y) if perExample else result
def train(self, dset): modes = dset.modesToLearn() trainStartTime = time.time() sumSquareGrads = learn.GradAccumulator() for i in range(self.epochs): logging.info("starting epoch %d" % i) startTime = time.time() #generate the tasks miniBatches = list( dset.minibatchIterator(batchSize=self.miniBatchSize)) bpInputs = map( lambda (k, b): ParallelFixedRateGDLearner.miniBatchToTask( b, i, k, startTime), enumerate(miniBatches)) totalN = self.totalNumExamples(miniBatches) #generate gradients - in parallel bpOutputs = self.pool.map(_doBackpropTask, bpInputs) # accumulate to sumSquareGrads totalGradient = learn.GradAccumulator() for (n, paramGrads) in bpOutputs: for (functor, arity), grad in paramGrads.items(): totalGradient.accum( (functor, arity), self.meanUpdate(functor, arity, grad, n, totalN)) sumSquareGrads = sumSquareGrads.addedTo( totalGradient.mapData(NP.square)) #compute gradient-specific rate ratePerParam = sumSquareGrads.mapData(lambda d: d + 1e-1).mapData( NP.sqrt).mapData(NP.reciprocal) # scale down totalGradient by per-feature weight for (functor, arity), grad in totalGradient.items(): totalGradient[(functor, arity)] = grad.multiply(ratePerParam[(functor, arity)]) self.regularizer.regularizeParams(self.prog, totalN) for (functor, arity) in self.prog.db.paramList: m = self.prog.db.getParameter(functor, arity) print 'reg', functor, '/', arity, 'm shape', m.shape if (functor, arity) in totalGradient.keys(): print 'vs totalGradient shape', totalGradient[( functor, arity)].shape else: print 'not in totalGradient' #cannot use process gradients because I've already scaled them down, # need to just add and clip for (functor, arity), grad in totalGradient.items(): m0 = self.prog.db.getParameter(functor, arity) m1 = m0 + self.rate * grad m = mutil.mapData( lambda d: NP.clip(d, 0.0, NP.finfo('float32').max), m1) self.prog.db.setParameter(functor, arity, m) # send params to workers self.broadcastParameters() # status updates epochCounter = learn.GradAccumulator.mergeCounters( map(lambda (n, grads): grads.counter, bpOutputs)) self.epochTracer(self, epochCounter, i=i, startTime=trainStartTime)
def _doBackprop(self, delta, gradAccum, pad): newDelta = mutil.mapData(lambda d: numpy.reciprocal(d + 1), self.inner).multiply(delta) return self.fun.backprop(newDelta, gradAccum)
def _doEval(self, db, values, pad): self.inner = self.fun.eval(db, values, pad) return mutil.mapData(lambda d: numpy.log1p(d.clip(0, d)), self.inner)