def updateData(self, data): if isinstance(data, list): batchsize = data[0].shape[0] bindings = [dat.ptr for dat in data] else: batchsize = data.shape[0] bindings = [data.ptr] if isinstance(self.outshape, list): self.data = [ gpuarray.empty((batchsize, ) + outshape[1:], dtype=np.float32, allocator=memPool) for outshape in self.outshape ] else: self.data = gpuarray.empty((batchsize, ) + self.outshape[1:], dtype=np.float32, allocator=memPool) if isinstance(self.data, list): bindings.extend(data.ptr for data in self.data) else: bindings.append(self.data.ptr) self.engine.enqueue(batchsize, bindings)
def updateData(self, data): if isinstance(self.outshape, list): self.data = [gpuarray.empty(outshape, dtype=np.float32, allocator=memPool) for outshape in self.outshape] else: self.data = gpuarray.empty(self.outshape, dtype=np.float32, allocator=memPool) data = data if isinstance(data, list) else [data] inputs = {"data_%s" % i: (dat.ptr, dat.nbytes) for i, dat in enumerate(data)} outdata = self.data if isinstance(self.data, list) else [self.data] outputs = {"outdata_%s" % i: (data.ptr, data.nbytes) for i, data in enumerate(outdata)} self.engine.infer(outputs, inputs)
def __init__(self): self.accumErr = gpuarray.empty((), dtype=np.float32) self.devErr = gpuarray.empty((), dtype=np.float32) self.error = None self.valError = None self.grad = None self.batchsize = None self.numOfSamples = None self.dirty = True self.resetAccumulator()
def updateData(self, data): if self.train: size = data.size if data.size % 2 == 0 else data.size + 1 rands = gpuarray.empty((size, ), dtype=np.float32, allocator=memPool) if self.type == NoiseType.uniform: a, b = self.params fillUniform(rands, a, b, self.rng) elif self.type == NoiseType.gaussian: mean, sigma = self.params fillNormal(rands, mean, sigma, self.rng) else: raise NotImplementedError(self.type) self.rands = rands if data.dtype == np.float32 else rands.astype( data.dtype) self.rands = self.rands[:data.size].reshape(data.shape) if self.inplace: self.data = data else: if self.slice is not None: self.data = copy(None, data) else: self.data = gpuarray.empty(data.shape, dtype=data.dtype, allocator=memPool) if self.mode == InjectMode.add: addKer(data.dtype)(self.data, data, 1, self.rands, 1, slice=self.slice) elif self.mode == InjectMode.mul: mulKer(data.dtype)(self.data, data, self.rands, slice=self.slice) else: raise NotImplementedError(self.mode) else: self.data = data
def activateNeurons(self, neurons): rands = gpuarray.empty(neurons.shape, dtype=np.float32, allocator=memPool) self.rng.fillUniform(rands) rbmKer(neurons, neurons, rands)
def updateData(self, data): self.data = data if self.inplace else gpuarray.empty( data.shape, dtype=data.dtype, allocator=memPool) self.actFunc(data.dtype)(self.data, data, *self.actArgs, slice=self.slc)
def updateGrad(self, grad): if self.getSequences: fullgrad = grad else: seqlen = self.fulldata.shape[0] if self.direction == DirectionMode.uni: fullgrad = gpuarray.empty((seqlen, ) + grad.shape, dtype=grad.dtype, allocator=memPool) fullgrad[:seqlen - 1].fill(0.0) fullgrad[seqlen - 1].set(grad) else: fwdgrad, bwdgrad = grad batchsize, hsize = fwdgrad.shape[0], 2 * self.hsize fullgrad = gpuarray.zeros((seqlen, batchsize, hsize), dtype=fwdgrad.dtype, allocator=memPool) fullgrad[0, :, bwdgrad.shape[1]:].set(bwdgrad) fullgrad[-1, :, :fwdgrad.shape[1]].set(fwdgrad) self.grad, self.reserve = backwardDataRnn(fullgrad, self.fulldata, self.W, self.reserve, self.descRnn)
def calcGrad(self, pred, target): grad = gpuarray.empty(pred.shape, dtype=np.float32, allocator=memPool) norm = 1.0 / np.prod(target.shape) l1gradKer(grad, pred, target, norm) return grad
def updateGrad(self, grad): preAxis, postAxis = int(np.prod(grad.shape[:self.axis])), int( np.prod(grad.shape[self.axis:])) outgrad = grad.reshape(preAxis, 1, postAxis) wgrad = None if self.useWeights: v = self.v.reshape(preAxis, self.axisSize, 1) datagrad = BlasGroup.mulTensorBatch(v, outgrad, formatA="gbp", formatB="gbp", formatOut="gbp") wgrad = BlasGroup.mulTensorOnVecGroup(self.inData[0], grad, formatT="gbp") else: ones = gpuarray.empty(shape=(1, self.axisSize, 1), dtype=np.float32).fill(1.0) datagrad = BlasGroup.mulTensorBatch(ones, outgrad, formatA="gbp", formatB="gbp", formatOut="gbp") datagrad = datagrad.reshape(*grad.shape[:self.axis], self.axisSize, *grad.shape[self.axis:]) self.grad = [datagrad, wgrad] if self.useWeights else datagrad
def updateGrad(self, grad): self.grad = grad if self.inplace else gpuarray.empty( grad.shape, dtype=grad.dtype, allocator=memPool) self.actFuncDer(grad.dtype)(self.grad, grad, self.data, *self.actArgs, slice=self.slc)
def updateData(self, data): self.data = gpuarray.empty(data[0].shape, dtype=np.float32, allocator=memPool) self.data.fill(1.0) for dat in data: mulKer(dat.dtype)(self.data, dat, self.data)
def calcVal(self, pred, target): diff = gpuarray.empty(pred.shape, dtype=np.float32, allocator=memPool) fullnorm = 1.0 / np.prod(target.shape) devErr = gpuarray.zeros((), dtype=np.float32, allocator=memPool) smoothL1Ker(pred, target, devErr, diff, fullnorm, fullnorm) return devErr.get()
def updateGrad(self, grad): if self.intype != self.outtype: self.grad = gpuarray.empty(grad.shape, dtype=self.intype, allocator=memPool) self.gradKer(self.grad, grad) else: self.grad = grad
def updateData(self, data): if self.intype != self.outtype: self.data = gpuarray.empty(data.shape, dtype=self.outtype, allocator=memPool) self.dataKer(self.data, data) else: self.data = data
def updateData(self, data): if self.train: if self.inplace: self.data = data else: if self.slice is not None: self.data = copy(None, data) else: self.data = gpuarray.empty(data.shape, dtype=data.dtype, allocator=memPool) batchsize, maps, height, width = data.shape self.mapsize = height * width parttype = { np.float32: np.uint32, np.float16: np.uint16 }[data.dtype.type] intsize = np.dtype(np.uint32).itemsize itemsize = np.dtype(parttype).itemsize nbytes = (batchsize * maps * itemsize + intsize - 1) // intsize * intsize self.rands = gpuarray.empty((nbytes // itemsize, ), dtype=parttype, allocator=memPool) self.rng.fillInteger(self.rands.view(np.uint32)) p = 1.0 - self.p self.partition = int(p * np.iinfo(parttype).max) dropout2dKer(data.dtype)(self.data, data, self.rands, self.partition, p, self.mapsize, slice=self.slice) else: self.data = data
def updateGrad(self, grad): firstgrad = grad[0] self.grad = gpuarray.empty(firstgrad.shape, dtype=firstgrad.dtype, allocator=memPool) self.grad.fill(0) for gr in grad: Blas.toVectorAddVector(self.grad.ravel(), gr.ravel())
def calcGrad(self, pred, target): grad = gpuarray.empty(pred.shape, dtype=np.float32, allocator=memPool) fullnorm = 1.0 / np.prod(target.shape) norm = 1.0 / np.prod(target.shape[1:]) self.devErr.fill(0.0) smoothL1Ker(pred, target, self.devErr, grad, norm, fullnorm) return grad
def updateData(self, data): firstdata = data[0] self.data = gpuarray.empty(firstdata.shape, dtype=firstdata.dtype, allocator=memPool) self.data.fill(0) for dat in data: Blas.toVectorAddVector(self.data.ravel(), dat.ravel())
def __init__(self, nd, inmaps, outmaps, size, stride=1, pad=0, dilation=1, wscale=1.0, useBias=True, name=None, initscheme=None, empty=False, groups=1): super().__init__(name) self.stride = self.repeat(stride, nd) self.pad = self.repeat(pad, nd) self.dilation = self.repeat(dilation, nd) self.useBias = useBias self.groups = groups self.fwdAlgo, self.bwdFilterAlgo, self.bwdDataAlgo = None, None, None self.installDefaultAlgos() if inmaps % groups != 0 or outmaps % groups != 0: raise ModuleError( "Number of input and output maps must be divisible by number of groups " "(%d inmaps, %d outmaps, %d groups)" % (inmaps, outmaps, groups)) outmaps //= groups self.W = None self.b = None if empty: return Wshape = (inmaps, outmaps, *self.repeat(size, nd)) W = self.createTensorWithScheme(initscheme, Wshape, wscale, self.calcNeuronsNumber(Wshape, True)) self.setVar( "W", Variable( gpuarray.empty(Wshape, dtype=self.calctype ) if W is None else gpuarray.to_gpu(W))) if useBias: bshape = (1, outmaps) + self.repeat(1, nd) self.setVar("b", Variable(gpuarray.zeros(bshape, dtype=self.calctype)))
def calcGrad(self, scores, labels): if Config.verifyData: self.verifyLabels(labels) grad = gpuarray.empty(scores.shape, dtype=np.float32, allocator=memPool) self.devErr.fill(0.0) hingeKer(scores, labels, self.devErr, grad, scores.shape[0], scores.shape[1]) return grad
def calcVal(self, scores, labels): if Config.verifyData: self.verifyLabels(labels) diff = gpuarray.empty(scores.shape, dtype=np.float32, allocator=memPool) devErr = gpuarray.zeros((), dtype=np.float32, allocator=memPool) hingeKer(scores, labels, devErr, diff, scores.shape[0], scores.shape[1]) return devErr.get() / scores.shape[0]
def setupW(self, insize, outsize, initscheme, wscale): if not self.useW: return asize, bsize = (outsize, insize) if self.transpW else (insize, outsize) groups = self.groups if self.wmode == GroupMode.full else 1 Wshape = (groups, asize, bsize) W = self.createTensorWithScheme( initscheme, Wshape, wscale, self.calcNeuronsNumber(Wshape, self.transpW)) W = gpuarray.empty( Wshape, dtype=np.float32) if W is None else gpuarray.to_gpu(W) self.setVar("W", Variable(W))
def updateGrad(self, grad): if self.mode == PenaltyMode.l1: self.grad = gpuarray.empty(grad.shape, dtype=grad.dtype, allocator=memPool) l1penaltyKer(self.grad, grad, self.data, self.weight / grad.shape[0]) elif self.mode == PenaltyMode.l2: self.grad = Blas.addVectorToVector(grad.ravel(), self.data.ravel(), alpha=1.0, beta=-self.weight / grad.shape[0]) self.grad = self.grad.reshape(grad.shape) else: raise NotImplementedError(self.mode)
def updateGrad(self, grad): if self.train: if self.inplace: self.grad = grad else: if self.slice is not None: self.grad = copy(None, grad) else: self.grad = gpuarray.empty(grad.shape, dtype=grad.dtype, allocator=memPool) dropout2dKer(grad.dtype)(self.grad, grad, self.rands, self.partition, 1.0 - self.p, self.mapsize) else: self.grad = grad
def updateData(self, data): if self.mode == PadMode.constant: insize = data.shape[2] lpad, rpad = self.pad outsize = insize + lpad + rpad self.data = gpuarray.empty(data.shape[:2] + (outsize, ), dtype=np.float32, allocator=memPool) self.data.fill(self.fillValue) self.data[:, :, lpad:self.data.shape[2] - rpad] = data elif self.mode == PadMode.reflect: self.data = Pad.reflectpad1d(data, self.pad) else: raise NotImplementedError(self.mode)
def updateData(self, data): if self.mode == PadMode.constant: inh, inw = data.shape[2:] upad, bpad, lpad, rpad = self.pad outh, outw = inh + upad + bpad, inw + lpad + rpad self.data = gpuarray.empty(data.shape[:2] + (outh, outw), dtype=np.float32, allocator=memPool) self.data.fill(self.fillValue) self.data[:, :, upad:self.data.shape[2] - bpad, lpad:self.data.shape[3] - rpad] = data elif self.mode == PadMode.reflect: self.data = Pad.reflectpad2d(data, self.pad) else: raise NotImplementedError(self.mode)
def updateGrad(self, grad): if self.mode == InjectMode.mul: if self.inplace: self.grad = grad else: if self.slice is not None: self.grad = copy(None, grad) else: self.grad = gpuarray.empty(grad.shape, dtype=grad.dtype, allocator=memPool) mulKer(grad.dtype)(self.grad, grad, self.rands, slice=self.slice) elif self.mode == InjectMode.add: if self.inplace: self.grad = grad else: self.grad = copy(None, grad) else: raise NotImplementedError(self.mode)
def calcVal(self, pred, target): shape = pred.shape softmax = softmaxNd( pred.reshape(shape[0], int(np.prod(shape[1:])), 1, 1)) if self.normTarget: shape = target.shape target = softmaxNd( target.reshape(shape[0], int(np.prod(shape[1:])), 1, 1)) grad = gpuarray.empty(pred.shape, dtype=np.float32, allocator=memPool) gradnorm = 1.0 / softmax.shape[0] klDivergence = getAccuracyKernel("klDivergence") error = klDivergence(softmax, target, grad, gradnorm, allocator=memPool) return error.get() / shape[0]
def __init__(self, insize, outsize, wscale=1.0, useBias=True, initscheme=None, name=None, empty=False, transpose=False): super().__init__(name) self.registerBlueprint(locals()) self.transpose = transpose self.useBias = useBias self.W = None self.b = None if empty: return Wshape, bshape = ((outsize, insize), (insize, )) if transpose else ((insize, outsize), (outsize, )) W = self.createTensorWithScheme(initscheme, Wshape, wscale, factorShape=Wshape) self.setVar( "W", Variable( gpuarray.empty(Wshape, dtype=self.calctype ) if W is None else gpuarray.to_gpu(W))) if useBias: self.setVar("b", Variable(gpuarray.zeros(bshape, dtype=self.calctype)))
def updateData(self, data): hiddenStates, attentionMask = data mixedQueryLayer = self.modules['query'](hiddenStates) mixedKeyLayer = self.modules['key'](hiddenStates) mixedValueLayer = self.modules['value'](hiddenStates) queryLayer = self.transpose(mixedQueryLayer) keyLayer = self.transpose(mixedKeyLayer) valueLayer = self.transpose(mixedValueLayer) batchsize, maps, h, w = queryLayer.shape swap = SwapAxes(axis1=2, axis2=1) swap.calcMode(self.calctype) A = queryLayer.reshape((batchsize * maps, h, w)) B = swap(keyLayer.reshape((batchsize * maps, h, w))) attentionScores = mulTensorBatch(A, B, formatA="gbp", formatB="gbp", formatOut="gbp") attentionScores = attentionScores.reshape((batchsize, maps, h, h)) a = gpuarray.empty(attentionScores.shape, self.calctype).fill(1/math.sqrt(self.attentionHeadSize)) attentionScores = self.modules['mul']([attentionScores, a]) attentionScores = attentionScores + attentionMask softmax = SoftMax() softmax.calcMode(self.calctype) swap2 = SwapAxes(axis1=1, axis2=3) swap2.calcMode(self.calctype) attentionProbs = swap2(softmax(swap2(attentionScores))) contextLayer = mulTensorBatch(attentionProbs.reshape((batchsize * maps, h, h)), \ valueLayer.reshape((batchsize * maps, h, w)), \ formatA="gbp", formatB="gbp", formatOut="gbp") contextLayer = swap(contextLayer.reshape((batchsize, maps, h, w))).reshape((batchsize, h, self.allHeadSize)) self.data = contextLayer