def update_stats(self, batch): vmin = cp.dev_tensor_float(batch.shape[0]) vmax = cp.dev_tensor_float(batch.shape[0]) mean = cp.dev_tensor_float(batch.shape[0]) mean2 = cp.dev_tensor_float(batch.shape[0]) map(lambda x: cp.fill(x, 0), [mean, mean2]) cp.reduce_to_col(mean, batch) cp.reduce_to_col(mean2, batch, cp.reduce_functor.ADD_SQUARED) cp.reduce_to_col(vmin, batch, cp.reduce_functor.MIN) cp.reduce_to_col(vmax, batch, cp.reduce_functor.MAX) if "N" in self.__dict__: self.N += batch.shape[1] cp.apply_binary_functor(self.mean, mean, cp.binary_functor.ADD) cp.apply_binary_functor(self.mean2, mean2, cp.binary_functor.ADD) cp.apply_binary_functor(self.min, vmin, cp.binary_functor.MIN) cp.apply_binary_functor(self.max, vmin, cp.binary_functor.MAX) mean.dealloc() mean2.dealloc() vmin.dealloc() vmax.dealloc() else: self.N = batch.shape[1] self.mean = mean self.mean2 = mean2 self.min = vmin self.max = vmax
def update_stats(self,batch): vmin = cp.dev_tensor_float(batch.shape[0]) vmax = cp.dev_tensor_float(batch.shape[0]) mean = cp.dev_tensor_float(batch.shape[0]) mean2 = cp.dev_tensor_float(batch.shape[0]) map(lambda x: cp.fill(x,0), [mean,mean2]) cp.reduce_to_col(mean,batch) cp.reduce_to_col(mean2,batch,cp.reduce_functor.ADD_SQUARED) cp.reduce_to_col(vmin,batch,cp.reduce_functor.MIN) cp.reduce_to_col(vmax,batch,cp.reduce_functor.MAX) if "N" in self.__dict__: self.N += batch.shape[1] cp.apply_binary_functor(self.mean, mean, cp.binary_functor.ADD) cp.apply_binary_functor(self.mean2,mean2,cp.binary_functor.ADD) cp.apply_binary_functor(self.min,vmin,cp.binary_functor.MIN) cp.apply_binary_functor(self.max,vmin,cp.binary_functor.MAX) mean.dealloc() mean2.dealloc() vmin.dealloc() vmax.dealloc() else: self.N = batch.shape[1] self.mean = mean self.mean2 = mean2 self.min = vmin self.max = vmax
def allocUpdateMatrix(self): self.w_tmp = cp.dev_tensor_float_cm(self.mat.shape) cp.fill(self.w_tmp, 0) self.blo_tmp = cp.dev_tensor_float(len(self.bias_lo)) self.bhi_tmp = cp.dev_tensor_float(len(self.bias_hi)) cp.fill(self.blo_tmp, 0) cp.fill(self.bhi_tmp, 0)
def load(self, prefix, postfix): fn = os.path.join(prefix, "weights-%s.npy"%postfix) if os.path.exists(fn): self.mat.dealloc() self.mat = cp.dev_tensor_float_cm(np.load(fn)) self.bias_lo.dealloc() self.bias_hi.dealloc() self.bias_lo = cp.dev_tensor_float(np.load(os.path.join(prefix, "bias-lo-%s.npy"%postfix))) self.bias_hi = cp.dev_tensor_float(np.load(os.path.join(prefix, "bias-hi-%s.npy"%postfix)))
def test_pairwise_euclidean_dist(): from scipy.spatial.distance import cdist x = np.random.uniform(0,1,(20,10)) y = np.random.uniform(0,1,(30,10)) x_ = cp.dev_tensor_float(x) y_ = cp.dev_tensor_float(y) dists = cp.dev_tensor_float([x_.shape[0],y_.shape[0]]) cp.pairwise_distance_l2(dists,x_,y_) numpy_dist = cdist(x,y) ok_(np.linalg.norm(numpy_dist-dists.np)<1e-3)
def test_pairwise_euclidean_dist(): from scipy.spatial.distance import cdist x = np.random.uniform(0, 1, (20, 10)) y = np.random.uniform(0, 1, (30, 10)) x_ = cp.dev_tensor_float(x) y_ = cp.dev_tensor_float(y) dists = cp.dev_tensor_float([x_.shape[0], y_.shape[0]]) cp.pairwise_distance_l2(dists, x_, y_) numpy_dist = cdist(x, y) ok_(np.linalg.norm(numpy_dist - dists.np) < 1e-3)
def load(self, prefix, postfix): fn = os.path.join(prefix, "weights-%s.npy" % postfix) if os.path.exists(fn): self.mat.dealloc() self.mat = cp.dev_tensor_float_cm(np.load(fn)) self.bias_lo.dealloc() self.bias_hi.dealloc() self.bias_lo = cp.dev_tensor_float( np.load(os.path.join(prefix, "bias-lo-%s.npy" % postfix))) self.bias_hi = cp.dev_tensor_float( np.load(os.path.join(prefix, "bias-hi-%s.npy" % postfix)))
def load_weights(self,path): print "loading weights from ",path if not os.path.exists(os.path.join(path,"weights-0-%s.npy"%self.cfg.postfix)): print "Could not open weights." sys.exit(1) self.w_ =np.load(os.path.join(path,"weights-0-%s.npy"%self.cfg['postfix'])) self.bias_lo = cp.dev_tensor_float((np.load(os.path.join(path,"bias-lo-0-%s.npy"%self.cfg.postfix))).reshape(-1,1)) self.bias_hi = cp.dev_tensor_float((np.load(os.path.join(path,"bias-hi-0-%s.npy"%self.cfg.postfix))).reshape(-1,1)) self.w=cp.dev_tensor_float_cm(self.w_.copy("F")) self.num_vis=self.w_.shape[0] self.num_hids=self.w_.shape[1] print "Number of hidden units: ",self.num_hids
def numerator(self, mbp, batchsize): sid = 0 actv = cp.dev_tensor_float_cm([self.weight.shape[0], batchsize]) acth = cp.dev_tensor_float_cm([self.weight.shape[1], batchsize]) row = cp.dev_tensor_float([batchsize]) cp.fill(acth, 0.0) cp.fill(actv, 0.0) cp.fill(row, 0) print "Numerator: ", L = [] try: while True: mbp.getMiniBatch(batchsize, actv, sid) mbp.forgetOriginalData() sid += 1 L.append(self.partialsumV(actv, acth, row)) sys.stdout.write(".") sys.stdout.flush() except minibatch_provider.MiniBatchProviderEmpty: print "done." pass for m in [actv, acth, row]: m.dealloc() return math.fsum(L) / (len(L))
def denominator(self, batchsize): acth = cp.dev_tensor_float_cm([self.weight.shape[1], batchsize]) actv = cp.dev_tensor_float_cm([self.weight.shape[0], batchsize]) row = cp.dev_tensor_float([batchsize]) cp.fill(acth, 0.0) cp.fill(actv, 0.0) cp.fill(row, 0.0) n = acth.shape[0] nmax = 2**n if nmax % batchsize != 0: print "Error: 2**n=%d must be dividable by batchsize=%d!" % ( nmax, batchsize) sys.exit(1) L = [] widgets = [ "Denominator: ", Percentage(), ' ', Bar(marker=RotatingMarker()), ' ', ETA() ] pbar = ProgressBar(widgets=widgets, maxval=nmax) for i in xrange(0, nmax, acth.shape[1]): cp.set_binary_sequence(acth, i) L.append(self.partialsum(acth, actv, row)) if (i / acth.shape[1]) % 100 == 0: pbar.update(i) pbar.finish() for m in [actv, acth, row]: m.dealloc() return math.fsum(L)
def numerator(self, mbp, batchsize): sid = 0 actv = cp.dev_tensor_float_cm([self.weight.shape[0], batchsize]) acth = cp.dev_tensor_float_cm([self.weight.shape[1], batchsize]) row = cp.dev_tensor_float([batchsize]) cp.fill(acth, 0.0) cp.fill(actv, 0.0) cp.fill(row, 0) print "Numerator: ", L = [] try: while True: mbp.getMiniBatch(batchsize, actv, sid) mbp.forgetOriginalData() sid += 1 L.append(self.partialsumV(actv, acth, row)) sys.stdout.write('.') sys.stdout.flush() except minibatch_provider.MiniBatchProviderEmpty: print "done." pass for m in [actv, acth, row]: m.dealloc() return math.fsum(L) / (len(L))
def get_distance_matrix(self, test): t = cp.dev_tensor_float_cm(test) assert t.shape[1] == self.data.shape[1] tsq = cp.dev_tensor_float(t.shape[0]) cp.reduce_to_col(tsq,t,cp.reduce_functor.ADD_SQUARED) p = cp.dev_tensor_float_cm([self.data.shape[0], t.shape[0]]) cp.prod(p, self.data, t, 'n','t',-2, 0) cp.matrix_plus_col(p,self.dsq) cp.matrix_plus_row(p,tsq) return p
def get_distance_matrix(self, test): t = cp.dev_tensor_float_cm(test) assert t.shape[1] == self.data.shape[1] tsq = cp.dev_tensor_float(t.shape[0]) cp.reduce_to_col(tsq, t, cp.reduce_functor.ADD_SQUARED) p = cp.dev_tensor_float_cm([self.data.shape[0], t.shape[0]]) cp.prod(p, self.data, t, "n", "t", -2, 0) cp.matrix_plus_col(p, self.dsq) cp.matrix_plus_row(p, tsq) return p
def determine_partial_sum(bs, fs, img, in_ch, out_ch, repeat=5, verbose=True): """Returns best performing partial_sum parameter for alex' convolutions bs -- batch size fs -- filter size img -- length/height of a square image in_ch -- number input maps out_ch -- number output maps out_ch """ import timeit res = {} N = 1 t_dst = cp.dev_tensor_float(np.zeros((in_ch,fs*fs,out_ch))) t_delta = cp.dev_tensor_float(np.zeros((out_ch,img,img,bs))) t_input = cp.dev_tensor_float(np.zeros((in_ch,img,img,bs))) for ps in range(img*img + 1): if (ps == 0) or ((img*img) % ps == 0): def conv(): cp.d_conv2d_dfilt(t_dst, t_delta, t_input, -fs/2-1, 1, 1, ps) f = conv try: t = timeit.Timer(stmt=f) total = t.repeat(number=N, repeat=repeat) res[ps] = np.array(total)/N if verbose: print (" ps {:>5d}: min {:>1.5f}, avg {:>1.5f}, max {:>1.5f}" .format(ps, res[ps].min(), np.average(res[ps]), res[ps].max())) except Exception as inst: if verbose: print " ps {:>5d}: throws exception {:s}".format(ps, inst.args) res_ser = pd.Series(res) avg = [np.average(x) for x in res_ser] idx = np.argmin(avg) opt = res_ser.index[idx] print " optimal partial_sum:", opt print " using", np.min(avg), "s per call. Worst case", np.max(avg)/np.min(avg), "times slower." return opt
def kmeans(dataset, num_clusters, iters): # initialize clusters randomly rand_indices = np.random.randint(0, dataset.shape[0], num_clusters) clusters = dataset[rand_indices,:] # push initial clusters and dataset to device dataset_dev = cp.dev_tensor_float(dataset) clusters_dev = cp.dev_tensor_float(clusters) # allocate matrices for calculations (so we don't need to allocate in loop) dists = cp.dev_tensor_float([dataset_dev.shape[0], num_clusters]) nearest = cp.dev_tensor_uint(dataset_dev.shape[0]) # main loop for i in xrange(iters): # compute pairwise distances cp.pdist2(dists, dataset_dev, clusters_dev) # find closest cluster cp.reduce_to_col(nearest, dists, cp.reduce_functor.ARGMIN) # update cluster centers # (this is a special purpose function for kmeans) cp.compute_clusters(clusters_dev, dataset_dev, nearest) return [clusters_dev.np, nearest.np]
def delta_outputSoftMax(self, calculated, correct): derivative = calculated.copy() cp.apply_scalar_functor(derivative, cp.scalar_functor.EXP) sums = cp.dev_tensor_float(calculated.shape[1]) cp.fill(sums,0) cp.reduce_to_row(sums, derivative, cp.reduce_functor.ADD) cp.apply_scalar_functor(sums,cp.scalar_functor.ADD,0.1/derivative.shape[0]) rv = cp.transposed_view(derivative) cp.matrix_divide_col(rv,sums) cp.apply_binary_functor(derivative, correct, cp.binary_functor.AXPBY, -1.,1.) sums.dealloc() return derivative
def delta_outputSoftMax(self, calculated, correct): derivative = calculated.copy() cp.apply_scalar_functor(derivative, cp.scalar_functor.EXP) sums = cp.dev_tensor_float(calculated.shape[1]) cp.fill(sums, 0) cp.reduce_to_row(sums, derivative, cp.reduce_functor.ADD) cp.apply_scalar_functor(sums, cp.scalar_functor.ADD, 0.1 / derivative.shape[0]) rv = cp.transposed_view(derivative) cp.matrix_divide_col(rv, sums) cp.apply_binary_functor(derivative, correct, cp.binary_functor.AXPBY, -1., 1.) sums.dealloc() return derivative
def __init__(self, source_layer, target_layer): """Constructor @param source_layer reference to previous neuron layer. @param target_layer reference to next neuron layer. """ self.source = source_layer self.target = target_layer dim1 = self.target.activations.shape[0] dim2 = self.source.activations.shape[0] self.weight = cp.get_filled_matrix(dim1, dim2, 0.0) cp.fill_rnd_uniform(self.weight) self.weight -= 0.5 self.weight /= 10.0 self.bias = cp.dev_tensor_float(dim1) cp.fill(self.bias, 0)
def denominator(self, batchsize): acth = cp.dev_tensor_float_cm([self.weight.shape[1], batchsize]) actv = cp.dev_tensor_float_cm([self.weight.shape[0], batchsize]) row = cp.dev_tensor_float([batchsize]) cp.fill(acth, 0.0) cp.fill(actv, 0.0) cp.fill(row, 0.0) n = acth.shape[0] nmax = 2 ** n if nmax % batchsize != 0: print "Error: 2**n=%d must be dividable by batchsize=%d!" % (nmax, batchsize) sys.exit(1) L = [] widgets = ["Denominator: ", Percentage(), " ", Bar(marker=RotatingMarker()), " ", ETA()] pbar = ProgressBar(widgets=widgets, maxval=nmax) for i in xrange(0, nmax, acth.shape[1]): cp.set_binary_sequence(acth, i) L.append(self.partialsum(acth, actv, row)) if (i / acth.shape[1]) % 100 == 0: pbar.update(i) pbar.finish() for m in [actv, acth, row]: m.dealloc() return math.fsum(L)
def __init__(self, data, data_l, k): self.k = k self.data = cp.dev_tensor_float_cm(data) self.data_l = data_l self.dsq = cp.dev_tensor_float(self.data.shape[0]) cp.reduce_to_col(self.dsq,self.data,cp.reduce_functor.ADD_SQUARED)
import cuv_python as cp import numpy as np h = np.zeros((1,256)) # create numpy matrix d = cp.dev_tensor_float(h) # constructs by copying numpy_array h2 = np.zeros((1,256)).copy("F") # create numpy matrix d2 = cp.dev_tensor_float_cm(h2) # creates dev_tensor_float_cm (column-major float) object cp.fill(d,1) # terse form cp.apply_nullary_functor(d,cp.nullary_functor.FILL,1) # verbose form h = d.np # pull and convert to numpy assert(np.sum(h) == 256) assert(cp.sum(d) == 256) d.dealloc() # explicitly deallocate memory (optional)
def allocBias(self, layer1, layer2): self.bias_lo = cp.dev_tensor_float(layer1.size) self.bias_hi = cp.dev_tensor_float(layer2.size) cp.fill(self.bias_lo, 0) cp.fill(self.bias_hi, 0)
def createFilled(self, matList, dim1, dim2, value): if dim2==1: matList.append(cp.dev_tensor_float([dim1])) else: matList.append(cp.dev_tensor_float_cm([dim1, dim2])) cp.fill(matList[-1], value)
def createCopyFilled(self, matList, someMat, value): if len(someMat.shape)<2 or someMat.shape[1] == 1: matList.append(cp.dev_tensor_float(someMat.shape)) else: matList.append(cp.dev_tensor_float_cm(someMat.shape)) cp.fill(matList[-1], value)
# for converting cuv tensors to numpy and back import numpy as np import pylinreg import gtk lr = pylinreg.linear_regression(100, 20, 10) try: import xdot # Show the loss using GraphViz, xdot.py # You can also react to clicks by subclassing DotWindow. W = xdot.DotWindow() W.set_dotcode(lr.loss.dot()) W.connect('destroy', gtk.main_quit) gtk.main() except: print "make sure xdot is installed!" pass import cuv_python as cp # print some data stored in the inputs lr.Y.data = cp.dev_tensor_float(np.ones(lr.Y.data.shape).astype("float32")) lr.W.data = cp.dev_tensor_float(np.random.uniform(size=lr.W.data.shape).astype("float32")) print lr.Y.data.np[:5, :5] print lr.W.data.np[:5, :5]
def testNpyToTensorTrans(self): """ convert a numpy matrix to a tensor (transposed) """ n = np.arange(np.prod(self.shape)).reshape(self.shape).copy("F") t = cp.dev_tensor_float(n.astype("float32")) self.cmp3d_inv(t, n)
def testTensorToNpyCmTrans(self): """ convert a tensor to a numpy matrix (column major, transposed) """ t = cp.dev_tensor_float(self.shape) cp.sequence(t) n = t.np self.cmp3d(t, n)
def createFilled(self, matList, dim1, dim2, value): if dim2 == 1: matList.append(cp.dev_tensor_float([dim1])) else: matList.append(cp.dev_tensor_float_cm([dim1, dim2])) cp.fill(matList[-1], value)
def testTensorToNpy(self): """ convert a tensor to a numpy matrix """ t = cp.dev_tensor_float(self.shape) cp.sequence(t) n = t.np self.cmp3d(t, n)
def __init__(self, weight, bv, bh): self.weight = cp.dev_tensor_float_cm(weight) self.bv = cp.dev_tensor_float(bv) self.bh = cp.dev_tensor_float(bh)
def testTensorToNpyCmTrans(self): """ convert a tensor to a numpy matrix (column major, transposed) """ t = cp.dev_tensor_float(self.shape) cp.sequence(t) n = t.np self.cmp3d(t,n)
def testNpyToTensorTrans(self): """ convert a numpy matrix to a tensor (transposed) """ n = np.arange(np.prod(self.shape)).reshape(self.shape).copy("F") t = cp.dev_tensor_float(n.astype("float32")) self.cmp3d_inv(t,n)
def testTensorToNpy(self): """ convert a tensor to a numpy matrix """ t = cp.dev_tensor_float(self.shape) cp.sequence(t) n = t.np self.cmp3d(t,n)
def testNpyToTensor(self): """ convert a numpy matrix to a tensor """ n = np.arange(np.prod(self.shape)).reshape(self.shape) t = cp.dev_tensor_float(n.astype("float32")) self.cmp3d(t,n)
def testNpyToTensor(self): """ convert a numpy matrix to a tensor """ n = np.arange(np.prod(self.shape)).reshape(self.shape) t = cp.dev_tensor_float(n.astype("float32")) self.cmp3d(t, n)
def __init__(self, data, data_l, k): self.k = k self.data = cp.dev_tensor_float_cm(data) self.data_l = data_l self.dsq = cp.dev_tensor_float(self.data.shape[0]) cp.reduce_to_col(self.dsq, self.data, cp.reduce_functor.ADD_SQUARED)
def createCopyFilled(self, matList, someMat, value): if len(someMat.shape) < 2 or someMat.shape[1] == 1: matList.append(cp.dev_tensor_float(someMat.shape)) else: matList.append(cp.dev_tensor_float_cm(someMat.shape)) cp.fill(matList[-1], value)