def test_subsolvers_L2(rng, logger): pytest.importorskip('scipy', minversion='0.11') # version for lsmr ref_solver = lstsq.Cholesky() solvers = [ lstsq.Conjgrad(), lstsq.BlockConjgrad(), lstsq.ConjgradScipy(), lstsq.LSMRScipy() ] A, B = get_system(m=2000, n=1000, d=10, rng=rng) sigma = 0.1 * A.max() with Timer() as t0: x0, _ = ref_solver(A, B, sigma) xs = np.zeros((len(solvers), ) + x0.shape) for i, solver in enumerate(solvers): with Timer() as t: xs[i], info = solver(A, B, sigma) logger.info('solver: %r' % solver) logger.info('duration: %0.3f', t.duration) logger.info('duration relative to reference solver: %0.2f', (t.duration / t0.duration)) logger.info('info: %s', info) for solver, x in zip(solvers, xs): assert np.allclose(x0, x, atol=1e-5, rtol=1e-3), ("Solver %s" % solver.__name__)
def test_subsolvers_L2(rng, allclose): pytest.importorskip("scipy", minversion="0.11") # version for lsmr ref_solver = lstsq.Cholesky() solvers = [ lstsq.Conjgrad(), lstsq.BlockConjgrad(), lstsq.ConjgradScipy(), lstsq.LSMRScipy(), ] A, B = get_system(m=2000, n=1000, d=10, rng=rng) sigma = 0.1 * A.max() with Timer() as t0: x0, _ = ref_solver(A, B, sigma) xs = np.zeros((len(solvers), ) + x0.shape) for i, solver in enumerate(solvers): with Timer() as t: xs[i], info = solver(A, B, sigma) logging.info("solver: %r", solver) logging.info("duration: %0.3f", t.duration) logging.info("duration relative to reference solver: %0.2f", (t.duration / t0.duration)) logging.info("info: %s", info) for solver, x in zip(solvers, xs): assert allclose(x0, x, atol=1e-5, rtol=1e-3), f"Solver {solver.__name__}"
def test_reset(ctx, rng): # Yshapes = [(100,), (10, 17), (3, 3)] Yshapes = [(1000000, ), (1000, 1700), (3, 3)] values = rng.uniform(size=len(Yshapes)).astype(np.float32) queue = cl.CommandQueue(ctx) clY = CLRA(queue, RA([np.zeros(shape) for shape in Yshapes])) clvalues = to_device(queue, values) plan = plan_reset(queue, clY, clvalues) with Timer() as t: plan() print(t.duration) # with Timer() as t: # for i in range(len(clY)): # cl.enqueue_fill_buffer( # queue, clY.cl_buf.data, values[i], # clY.starts[i], clY.shape0s[i] * clY.shape1s[i]) # queue.finish() # print(t.duration) for y, v in zip(clY, values): assert np.all(y == v)
def test_eval_points(Simulator, nl_nodirect, plt, seed, rng, logger): n = 100 d = 5 filter = 0.08 eval_points = np.logspace(np.log10(300), np.log10(5000), 11) eval_points = np.round(eval_points).astype("int") max_points = eval_points.max() n_trials = 1 rmses = np.nan * np.zeros((len(eval_points), n_trials)) for j in range(n_trials): points = rng.normal(size=(max_points, d)) points *= (rng.uniform(size=max_points) / norm(points, axis=-1))[:, None] rng_j = np.random.RandomState(348 + j) seed = 903824 + j # generate random input in unit hypersphere x = rng_j.normal(size=d) x *= rng_j.uniform() / norm(x) for i, n_points in enumerate(eval_points): model = nengo.Network(seed=seed) with model: model.config[nengo.Ensemble].neuron_type = nl_nodirect() u = nengo.Node(output=x) a = nengo.Ensemble(n * d, dimensions=d, eval_points=points[:n_points]) nengo.Connection(u, a, synapse=0) up = nengo.Probe(u) ap = nengo.Probe(a) with Timer() as timer: sim = Simulator(model) sim.run(10 * filter) sim.close() t = sim.trange() xt = nengo.Lowpass(filter).filtfilt(sim.data[up], dt=sim.dt) yt = nengo.Lowpass(filter).filtfilt(sim.data[ap], dt=sim.dt) t0 = 5 * filter t1 = 7 * filter tmask = (t > t0) & (t < t1) rmses[i, j] = rms(yt[tmask] - xt[tmask]) logger.info("trial %d", j) logger.info(" n_points: %d", n_points) logger.info(" duration: %0.3f s", timer.duration) # subtract out mean for each model rmses_norm = rmses - rmses.mean(0, keepdims=True) mean = rmses_norm.mean(1) low = rmses_norm.min(1) high = rmses_norm.max(1) plt.semilogx(eval_points, mean, "k-") plt.semilogx(eval_points, high, "r-") plt.semilogx(eval_points, low, "b-") plt.xlim([eval_points[0], eval_points[-1]]) plt.xticks(eval_points, eval_points)
def test_large(Simulator, seed, allclose): """Test with a lot of big probes. Can also be used for speed.""" n = 10 def input_fn(t): return list(range(1, 10)) model = nengo.Network(label="test_large_probes", seed=seed) with model: probes = [] for i in range(n): xi = nengo.Node(label=f"x{i}", output=input_fn) probes.append(nengo.Probe(xi, "output")) with Simulator(model) as sim: simtime = 2.483 with Timer() as timer: sim.run(simtime) logging.info("Ran %d probes for %f sec simtime in %0.3f sec", n, simtime, timer.duration) t = sim.trange() x = np.asarray([input_fn(ti) for ti in t]) for p in probes: y = sim.data[p] assert allclose(y[1:], x[:-1]) # 1-step delay
def test_subsolvers_L1(rng): pytest.importorskip("sklearn") A, B = get_system(m=2000, n=1000, d=10, rng=rng) l1 = 1e-4 with Timer() as t: LstsqL1(l1=l1, l2=0)(A, B, rng=rng) logging.info("duration: %0.3f", t.duration)
def test_subsolvers_L1(rng, logger): pytest.importorskip('sklearn') A, B = get_system(m=2000, n=1000, d=10, rng=rng) l1 = 1e-4 with Timer() as t: LstsqL1(l1=l1, l2=0)(A, B, rng=rng) logger.info('duration: %0.3f', t.duration)
def fit_svm(x, y, feature): svm = LinearSVC(random_state=self.seed) with Timer() as t: svm.fit(x, y) log("SVM fitting for %ss done in %.3f seconds" % (feature.upper(), t.duration)) setattr(result, "%s_fit_time" % feature, t.duration) _, _, acc = test_svm(svm, x, y, "Training") setattr(result, "%s_train_acc" % feature, acc) return svm
def test_lif_speed(ctx, rng, heterogeneous): """Test the speed of the lif nonlinearity heterogeneous: if true, use a wide range of population sizes. """ dt = 1e-3 ref = 2e-3 tau = 20e-3 amp = 1.0 n_iters = 10 if heterogeneous: n_neurons = [1.0e5] * 50 + [1e3] * 5000 else: n_neurons = [1.1e5] * 50 n_neurons = list(map(int, n_neurons)) J = RA([rng.randn(n) for n in n_neurons], dtype=np.float32) V = RA([rng.uniform(low=0, high=1, size=n) for n in n_neurons], dtype=np.float32) W = RA( [rng.uniform(low=-10 * dt, high=10 * dt, size=n) for n in n_neurons], dtype=np.float32, ) OS = RA([np.zeros(n) for n in n_neurons], dtype=np.float32) queue = cl.CommandQueue( ctx, properties=cl.command_queue_properties.PROFILING_ENABLE) clJ = CLRA(queue, J) clV = CLRA(queue, V) clW = CLRA(queue, W) clOS = CLRA(queue, OS) for i, blockify in enumerate([False, True]): plan = plan_lif(queue, dt, clJ, clV, clW, clOS, ref, tau, amp, blockify=blockify) with Timer() as timer: for _ in range(n_iters): plan() print("plan %d: blockify = %s, dur = %0.3f" % (i, blockify, timer.duration))
def test_conv2d(local, Simulator, rng): f = 4 c = 2 ni, nj = 30, 32 si, sj = 5, 3 # f = 64 # c = 64 # ni, nj = 32, 32 # si, sj = 11, 11 si2 = int((si - 1) / 2.) sj2 = int((sj - 1) / 2.) fshape = (f, ni, nj, c, si, sj) if local else (f, c, si, sj) filters = rng.uniform(-1, 1, size=fshape) biases = rng.uniform(-1, 1, size=f) image = rng.uniform(-1, 1, size=(c, ni, nj)) model = nengo.Network() with model: u = nengo.Node(image.ravel()) v = nengo.Node(Conv2d((c, ni, nj), filters, biases, padding=(si2, sj2))) nengo.Connection(u, v, synapse=None) vp = nengo.Probe(v) with Simulator(model) as sim: with Timer() as timer: sim.step() print("Conv2d(local=%s): %0.3e" % (local, timer.duration)) # --- check result result = np.zeros((f, ni, nj)) for i in range(ni): for j in range(nj): i0, i1 = i - si2, i + si2 + 1 j0, j1 = j - sj2, j + sj2 + 1 sli = slice(max(-i0, 0), min(ni + si - i1, si)) slj = slice(max(-j0, 0), min(nj + sj - j1, sj)) w = (filters[:, i, j, :, sli, slj] if local else filters[:, :, sli, slj]) xij = image[:, max(i0, 0):min(i1, ni), max(j0, 0):min(j1, nj)] result[:, i, j] += np.dot(xij.ravel(), w.reshape(f, -1).T) result += biases.reshape(-1, 1, 1) y = sim.data[vp][-1].reshape((f, ni, nj)) assert np.allclose(result, y, rtol=1e-3, atol=1e-6)
def test_subsolvers_L1(rng, allclose): pytest.importorskip("sklearn") A, B = get_system(m=2000, n=1000, d=10, rng=rng) l1 = 1e-4 with Timer() as t: x, info = LstsqL1(l1=l1, l2=0)(A, B, rng=rng) logging.info("duration: %0.3f", t.duration) Ax = np.dot(A, x) assert rms(Ax - B) < 2e-2 assert allclose(Ax, B, atol=0.2, record_rmse=False) assert np.max(info["rmses"]) < 3e-2
def test_linearfilter(ctx, n_per_kind, rng): kinds = ( nengo.synapses.LinearFilter((2.,), (1.,), analog=False), nengo.synapses.Lowpass(0.005), nengo.synapses.Alpha(0.005), ) assert len(n_per_kind) == len(kinds) kinds_n = [(kind, n) for kind, n in zip(kinds, n_per_kind) if n > 0] dt = 0.001 steps = [kind.make_step((n,), (n,), dt, None, dtype=np.float32) for kind, n in kinds_n] A = RA([step.den for step in steps]) B = RA([step.num for step in steps]) X = RA([rng.normal(size=n) for kind, n in kinds_n]) Y = RA([np.zeros(n) for kind, n in kinds_n]) Xbuf = RA([np.zeros(shape) for shape in zip(B.sizes, X.sizes)]) Ybuf = RA([np.zeros(shape) for shape in zip(A.sizes, Y.sizes)]) queue = cl.CommandQueue(ctx) clA = CLRA(queue, A) clB = CLRA(queue, B) clX = CLRA(queue, X) clY = CLRA(queue, Y) clXbuf = CLRA(queue, Xbuf) clYbuf = CLRA(queue, Ybuf) n_calls = 3 plans = plan_linearfilter(queue, clX, clY, clA, clB, clXbuf, clYbuf) with Timer() as timer: for _ in range(n_calls): [plan() for plan in plans] print(timer.duration) for i, [kind, n] in enumerate(kinds_n): n = min(n, 100) step = kind.make_step((n, 1), (n, 1), dt, None, dtype=np.float32) x = X[i][:n] y = np.zeros_like(x) for _ in range(n_calls): y[:] = step(0, x) z = clY[i][:n] assert np.allclose(z, y, atol=1e-7, rtol=1e-5), kind
def _get_feature(self, feature, audio, result=None, n_frames=None): labels = sorted(list(audio)) with Timer() as t: if feature == 'mfcc': # Default to zscoring for MFCCs zscore = True if self.zscore is None else self.zscore x = mfccs(self.model, audio, zscore) elif feature == 'ncc': # Default to not zscoring for NCCs zscore = False if self.zscore is None else self.zscore x = nccs(self.model, audio, zscore, self.seed, self.upsample) else: raise ValueError("Possible features: 'mfcc', 'ncc'") log("%ss generated in %.3f seconds" % (feature.upper(), t.duration)) if result is not None: setattr(result, "%s_time" % feature, t.duration) if n_frames is None: n_frames = max(max(xx.shape[0] for xx in x[l]) for l in audio) x = normalize(x, n_frames) return np.vstack([np.vstack(x[l]) for l in labels])
def test_speed(ctx, rng): try: import pyopencl_blas except ImportError: pyopencl_blas = None # enable_out_of_order = ( # cl.command_queue_properties.OUT_OF_ORDER_EXEC_MODE_ENABLE) k = 300 # k = 100 # k = 32 # k = 16 ms = [rng.randint(100, 1000) for i in range(k)] ns = [rng.randint(100, 1000) for i in range(k)] # ms = [4096 for i in range(k)] # ns = [4096 for i in range(k)] aa = [ rng.uniform(-1, 1, size=(m, n)).astype('float32') for m, n in zip(ms, ns) ] xx = [rng.uniform(-1, 1, size=n).astype('float32') for n in ns] yy = [rng.uniform(-1, 1, size=m).astype('float32') for m in ms] ajs = [np.int32(i) for i in range(k)] xjs = [np.int32(i) for i in range(k)] # ajs = [rng.randint(k, size=p) for i in range(k)] # xjs = [rng.randint(k, size=p) for i in range(k)] # alpha = 0.5 # beta = 0.1 alpha = 1.0 beta = 1.0 # -- prepare initial conditions on device queue = cl.CommandQueue(ctx) # queue = cl.CommandQueue(ctx, properties=enable_out_of_order) clA = CLRA.from_arrays(queue, aa) clX = CLRA.from_arrays(queue, xx) clY = CLRA.from_arrays(queue, yy) A_js = RA(ajs, dtype=np.int32) X_js = RA(xjs, dtype=np.int32) # -- run cl computation prog = plan_ragged_gather_gemv(queue, alpha, clA, A_js, clX, X_js, beta, clY) plans = prog.choose_plans() print('') print('-' * 5 + ' Plans ' + '-' * 45) for plan in plans: print(plan) with Timer() as timer: for plan in plans: plan() print("nengo_ocl: %0.3f" % timer.duration) # -- speed test in ocl blas if pyopencl_blas: pyopencl_blas.setup() def array(a): cla = cl.array.Array(queue, a.shape, a.dtype) cla.set(a) return cla clAs = [array(a) for a in aa] clXs = [array(x.ravel()) for x in xx] clYs = [array(y.ravel()) for y in yy] queues = [cl.CommandQueue(ctx) for _ in range(k)] # queues = [cl.CommandQueue(ctx, properties=enable_out_of_order) # for _ in range(k)] queue.finish() with Timer() as timer: if 0: # use a single queue for A, X, Y in zip(clAs, clXs, clYs): pyopencl_blas.gemv(queue, A, X, Y) queue.finish() else: # use multiple parallel queues events = [] for i, [A, X, Y] in enumerate(zip(clAs, clXs, clYs)): q = queues[i % len(queues)] e = pyopencl_blas.gemv(q, A, X, Y) events.append(e) for q in queues: q.flush() cl.wait_for_events(events) print("clBLAS: %0.3f" % timer.duration)
def optimize(model, dg): """Optimizes the operator graph by merging operators. This reduces the number of iterators to iterate over in slow Python code (as opposed to fast C code). The resulting merged operators will also operate on larger chunks of sequential memory, making better use of CPU caching and prefetching. The optimization algorithm has worst case complexity :math:`O(n^2 + e)`, where :math:`n` is the number of operators and :math:`e` is the number of edges in the dependency graph. In practice the run time will be much better because not all :math:`n^2` pairwise combinations of operators will be evaluated. A grouping depending on the operator type and view bases is done with dictionaries. This grouping can be done in amortized linear time and reduces the actual worst-case runtime of the optimization algorithm to :math:`O(gm^2 + e)`, where :math:`g` is the number of groups and :math:`m` is the number of elements in a group. Moreover, information about memory alignment will be used to cut the inner loop short in many cases and gives a runtime much closer to linear in most cases. Note that this function modifies both ``model`` and ``dg``. Parameters ---------- model : `nengo.builder.Model` Builder output to optimize. dg : dict Dict of the form ``{a: {b, c}}`` where ``b`` and ``c`` depend on ``a``, specifying the operator dependency graph of the model. """ logger.debug("Optimizing model...") # We try first to merge operators with views only as these have a fixed # order for the memory alignment whereas operators without views could # be merged in a random order. Merging the views of operators will # propagate requirements in the memory ordering via the other # associated signals of the operator to other operators. # Once no more operators with views can be merged, we try to merge # operators without views and then try again merging views (because # each operator merge might generate new views). single_pass = OpMergePass(dg) n_initial_ops = len(dg) cum_duration = 0.0 before, after = None, None only_merge_ops_with_view = True while only_merge_ops_with_view or after < before: only_merge_ops_with_view = before is None or before != after before = len(single_pass.dg.forward) with Timer() as t: single_pass(only_merge_ops_with_view) after = len(single_pass.dg.forward) logger.debug( "[%s]: Reduced %i to %i operators in %fs.", "views" if only_merge_ops_with_view else "non-views", before, after, t.duration, ) # Prevent optimizer from running too long if we get up diminishing # returns. # Note that we don't break if there was no reduction at all because # in that case we want to toggle only_merge_ops_with_view which might # still yield some significant reduction. cum_duration += t.duration mean_reduction_rate = float(n_initial_ops - after) / cum_duration last_reduction_rate = float(before - after) / t.duration threshold = 0.01 scaled_rate = threshold * mean_reduction_rate if 0.0 < last_reduction_rate < scaled_rate: # pragma: no cover logger.debug( "Operator reduction rate fell below threshold of %.3f. " "Stopping optimizer.", threshold, ) break # Update model signals for sigdict in model.sig.values(): for name in sigdict: while sigdict[name] in single_pass.sig_replacements: sigdict[name] = single_pass.sig_replacements[sigdict[name]] # Reinitialize the model's operator list del model.operators[:] for op in dg: model.add_op(op)
def test_timer(): with Timer() as timer: for i in range(1000): 2 + 2 assert timer.duration > 0.0 assert timer.duration < 1.0 # Pretty bad worst case
def __init__(self, network, dt=0.001, seed=None, model=None, planner=greedy_planner): with Timer() as nengo_timer: if model is None: self.model = Model(dt=float(dt), label="%s, dt=%f" % (network, dt), decoder_cache=get_default_decoder_cache()) else: self.model = model if network is not None: # Build the network into the model self.model.build(network) logger.info("Nengo build in %0.3f s" % nengo_timer.duration) # --- set seed seed = np.random.randint(npext.maxint) if seed is None else seed self.seed = seed self.rng = np.random.RandomState(self.seed) self._step = Signal(np.array(0.0, dtype=np.float64), name='step') self._time = Signal(np.array(0.0, dtype=np.float64), name='time') # --- operators with Timer() as planner_timer: operators = list(self.model.operators) # convert DotInc, Reset, Copy, and ProdUpdate to MultiProdUpdate operators = list(map(MultiProdUpdate.convert_to, operators)) operators = MultiProdUpdate.compress(operators) # plan the order of operations, combining where appropriate op_groups = planner(operators) assert len([typ for typ, _ in op_groups if typ is Reset ]) < 2, ("All resets not planned together") # add time operator after planning, to ensure it goes first time_op = TimeUpdate(self._step, self._time) operators.insert(0, time_op) op_groups.insert(0, (type(time_op), [time_op])) self.operators = operators self.op_groups = op_groups logger.info("Planning in %0.3f s" % planner_timer.duration) with Timer() as signals_timer: # Initialize signals all_signals = signals_from_operators(operators) all_bases = stable_unique([sig.base for sig in all_signals]) sigdict = SignalDict() # map from Signal.base -> ndarray for op in operators: op.init_signals(sigdict) # Add built states to the probe dictionary self._probe_outputs = self.model.params # Provide a nicer interface to probe outputs self.data = ProbeDict(self._probe_outputs) self.all_data = RaggedArray( [sigdict[sb] for sb in all_bases], [getattr(sb, 'name', '') for sb in all_bases], dtype=np.float32) builder = ViewBuilder(all_bases, self.all_data) self._AX_views = {} self._YYB_views = {} for op_type, op_list in op_groups: self.setup_views(builder, op_type, op_list) for probe in self.model.probes: builder.append_view(self.model.sig[probe]['in']) builder.add_views_to(self.all_data) self.all_bases = all_bases self.sidx = builder.sidx self._prep_all_data() logger.info("Signals in %0.3f s" % signals_timer.duration) # --- create list of plans with Timer() as plans_timer: self._plan = [] for op_type, op_list in op_groups: self._plan.extend(self.plan_op_group(op_type, op_list)) self._plan.extend(self.plan_probes()) logger.info("Plans in %0.3f s" % plans_timer.duration) self.n_steps = 0
def __init__(self, *args, **kwargs): with Timer() as t: old_f(self, *args, **kwargs) Simulator.build_time = t.duration
def test_linearfilter(ctx, n_per_kind, rng): kinds = ( nengo.synapses.LinearFilter((2.0, ), (1.0, ), analog=False), nengo.synapses.Lowpass(0.005), nengo.synapses.Alpha(0.005), ) assert len(n_per_kind) == len(kinds) kinds_n = [(kind, n) for kind, n in zip(kinds, n_per_kind) if n > 0] dt = 0.001 steps = list() for kind, n in kinds_n: state = kind.make_state((n, ), (n, ), dt, dtype=np.float32) step = kind.make_step((n, ), (n, ), dt, rng=None, state=state) steps.append(step) # Nengo 3 uses state space filters. For now, convert back to transfer function. # Getting rid of this conversion would require a new plan_linearfilter. dens = list() nums = list() for f in steps: if type(f).__name__ == "NoX": # special case for a feedthrough den = np.array([1.0]) num = f.D else: num, den = ss2tf(f.A, f.B, f.C, f.D) # This preprocessing copied out of nengo2.8/synapses.LinearFilter.make_step num = num.flatten() assert den[0] == 1.0 num = num[1:] if num[0] == 0 else num den = den[1:] # drop first element (equal to 1) num, den = num.astype(np.float32), den.astype(np.float32) dens.append(den) nums.append(num) A = RA(dens) B = RA(nums) X = RA([rng.normal(size=n) for kind, n in kinds_n]) Y = RA([np.zeros(n) for kind, n in kinds_n]) Xbuf = RA([np.zeros(shape) for shape in zip(B.sizes, X.sizes)]) Ybuf = RA([np.zeros(shape) for shape in zip(A.sizes, Y.sizes)]) queue = cl.CommandQueue(ctx) clA = CLRA(queue, A) clB = CLRA(queue, B) clX = CLRA(queue, X) clY = CLRA(queue, Y) clXbuf = CLRA(queue, Xbuf) clYbuf = CLRA(queue, Ybuf) n_calls = 3 plans = plan_linearfilter(queue, clX, clY, clA, clB, clXbuf, clYbuf) with Timer() as timer: for _ in range(n_calls): for plan in plans: plan() print(timer.duration) for i, [kind, n] in enumerate(kinds_n): n = min(n, 100) state = kind.make_state((n, ), (n, ), dt, dtype=np.float32) step = kind.make_step((n, ), (n, ), dt, rng=None, state=state) x = X[i][:n].T y = np.zeros_like(x) for _ in range(n_calls): y[:] = step(0, x) z = clY[i][:n].T assert np.allclose(z, y, atol=1e-7, rtol=1e-5), kind
def run(self, *args, **kwargs): with Timer() as t: old_f(self, *args, **kwargs) Simulator.run_time = t.duration