def __init__(self, ic=None, nthr=256): super(CudaSim, self).__init__() # def gpu(gsc, exc, vel # cat=concatenate): self.nthr = nthr self.idel = (self.dataset.distances / self.vel / self.dt).astype(int32) self.horizon = self.idel.max() + 1 self.hist = zeros((self.horizon, self.n, self.nthr)) + 0.1 self.conn = self.dataset.weights self.state = ic or 0.1 * ones((self.n, self.nsv, self.nthr)) self.exc = self.exc * ones((self.nthr, )) self.gsc = self.gsc * ones((self.nthr, )) / self.n genc.RPointer.gpu = True src = genc.module(genc.model(dt=self.dt, noise=False, gpu=True, **genc.fhn), genc.step(self.n, len(genc.fhn['eqns']), model=genc.fhn['name'], noise=False, gpu=True), genc.wrap(self.horizon, gpu=True), gpu=True) self.mod = cuda.srcmod(src, ['step'], debug=False)
def __init__(self, ic=None): super(CeeSim, self).__init__() C = ascontiguousarray idel = (self.dataset.distances / self.vel / self.dt).astype(int64) self.idel = C(idel) self.horizon = self.idel.max() + 1 self.conn = C(self.dataset.weights) self.gsc = C(array([self.gsc])) / self.n self.exc = C(array([self.exc])) self.hist = C(zeros((self.horizon, self.n)) + 0.1) self.state = C(ic or 0.1 * ones((self.n, 2))) src = genc.module( genc.model(dt=self.dt, **genc.fhn), genc.step(self.n, len(genc.fhn['eqns']), model=genc.fhn['name']), genc.wrap(self.horizon)) self.mod = ccpp.srcmod(src, ['step'], debug=False) for arr_name in ['idel', 'conn', 'hist', 'state', 'gsc', 'exc']: arr = getattr(self, arr_name) ptr = arr.ctypes.data_as(ctypes.c_void_p) setattr(self, 'p_%s' % (arr_name, ), ptr)
def gpu(gsc, exc, vel, dt, dataset, tf=1500, ds=80, model="fhn_euler", cvar=0, kblock=128, ublock=1024, cat=concatenate): ts = r_[0:tf:dt] n = dataset.weights.shape[0] nsv = model_nsvs[model] nthr = len(gsc) npad = nthr % ublock if nthr%kblock or nthr%ublock and nthr > ublock else 0 nthr += npad idel = (dataset.distances/vel/dt).astype(int32) hist = random.uniform(low=-1., high=1., size=(idel.max()+1, n, nthr)) conn = dataset.weights X = random.uniform(low=-2, high=2, size=(n, nsv, nthr)) Xs = empty((1+len(ts)/ds, n, nsv, nthr), dtype=float32) hist[-1, ...] = X[:, cvar, :] """ mod = srcmod('parsweep.cu', ['kernel', 'update'], horizon=idel.max()+1, dt=dt, ds=ds, n=n, cvar=cvar, model=model, nsv=nsv) """ # todo, delay eval, spec exec context ONCE gen.RPointer.gpu = True mod = srcmod( gen.module(gen.model(dt=dt, noise=False, gpu=True, **gen.fhn), gen.step(n, len(gen.fhn['eqns']), model=gen.fhn['name'], noise=False, gpu=True), gen.wrap(idel.max() + 1, gpu=True), gpu=True), ['step'], debug=False) with arrays_on_gpu(_timed=False, _memdebug=True, idel=idel.astype(int32), hist=hist.astype(float32), conn=conn.astype(float32), X=X.astype(float32), exc=cat((exc, zeros((npad,)))).astype(float32), gsc=cat((gsc, zeros((npad,)))).astype(float32)) as g: Xs[0, ...] = g.X.get() for step, t in enumerate(ts): mod.step(int32(step), g.idel, g.hist, g.conn, g.X, g.gsc, g.exc, block=(kblock, 1, 1), grid=(nthr/kblock, 1)) # maybe separate again for perf & to be sure no thread mixing... """ mod.update(int32(step), g.hist, g.X, block=(ublock if nthr>=ublock else nthr, 1, 1), grid=(nthr/ublock if nthr/ublock > 0 else 1, 1)) """ if step%ds == 0 and not (1+step/ds)>=len(Xs): Xs[1+step/ds, ...] = g.X.get() Xs = rollaxis(Xs, 3) return Xs[:-npad] if npad else Xs
def gpu(gsc, exc, vel, dt, dataset, tf=1500, ds=80, model="fhn_euler", cvar=0, kblock=128, ublock=1024, cat=concatenate): ts = r_[0:tf:dt] n = dataset.weights.shape[0] nsv = model_nsvs[model] nthr = len(gsc) npad = nthr % ublock if nthr % kblock or nthr % ublock and nthr > ublock else 0 nthr += npad idel = (dataset.distances / vel / dt).astype(int32) hist = random.uniform(low=-1., high=1., size=(idel.max() + 1, n, nthr)) conn = dataset.weights X = random.uniform(low=-2, high=2, size=(n, nsv, nthr)) Xs = empty((1 + len(ts) / ds, n, nsv, nthr), dtype=float32) hist[-1, ...] = X[:, cvar, :] """ mod = srcmod('parsweep.cu', ['kernel', 'update'], horizon=idel.max()+1, dt=dt, ds=ds, n=n, cvar=cvar, model=model, nsv=nsv) """ # todo, delay eval, spec exec context ONCE gen.RPointer.gpu = True mod = srcmod(gen.module(gen.model(dt=dt, noise=False, gpu=True, **gen.fhn), gen.step(n, len(gen.fhn['eqns']), model=gen.fhn['name'], noise=False, gpu=True), gen.wrap(idel.max() + 1, gpu=True), gpu=True), ['step'], debug=False) with arrays_on_gpu(_timed=False, _memdebug=True, idel=idel.astype(int32), hist=hist.astype(float32), conn=conn.astype(float32), X=X.astype(float32), exc=cat((exc, zeros((npad, )))).astype(float32), gsc=cat((gsc, zeros((npad, )))).astype(float32)) as g: Xs[0, ...] = g.X.get() for step, t in enumerate(ts): mod.step(int32(step), g.idel, g.hist, g.conn, g.X, g.gsc, g.exc, block=(kblock, 1, 1), grid=(nthr / kblock, 1)) # maybe separate again for perf & to be sure no thread mixing... """ mod.update(int32(step), g.hist, g.X, block=(ublock if nthr>=ublock else nthr, 1, 1), grid=(nthr/ublock if nthr/ublock > 0 else 1, 1)) """ if step % ds == 0 and not (1 + step / ds) >= len(Xs): Xs[1 + step / ds, ...] = g.X.get() Xs = rollaxis(Xs, 3) return Xs[:-npad] if npad else Xs