def bench_eigvals(): numpy_eigvals = nl.eigvals scipy_eigvals = sl.eigvals print() print(' Finding matrix eigenvalues') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy ') for size,repeat in [(20,150),(100,7),(200,2)]: repeat *= 1 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size,size]) print('| %6.2f ' % measure('scipy_eigvals(a)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_eigvals(a)',repeat), end=' ') sys.stdout.flush() a = a[-1::-1,-1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_eigvals(a)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_eigvals(a)',repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def bench_load_trk(): rng = np.random.RandomState(42) dtype = 'float32' NB_STREAMLINES = 5000 NB_POINTS = 1000 points = [rng.rand(NB_POINTS, 3).astype(dtype) for i in range(NB_STREAMLINES)] scalars = [rng.rand(NB_POINTS, 10).astype(dtype) for i in range(NB_STREAMLINES)] repeat = 10 with InTemporaryDirectory(): trk_file = "tmp.trk" tractogram = Tractogram(points, affine_to_rasmm=np.eye(4)) TrkFile(tractogram).save(trk_file) streamlines_old = [d[0] - 0.5 for d in tv.read(trk_file, points_space="rasmm")[0]] mtime_old = measure('tv.read(trk_file, points_space="rasmm")', repeat) print("Old: Loaded {:,} streamlines in {:6.2f}".format(NB_STREAMLINES, mtime_old)) trk = nib.streamlines.load(trk_file, lazy_load=False) streamlines_new = trk.streamlines mtime_new = measure('nib.streamlines.load(trk_file, lazy_load=False)', repeat) print("\nNew: Loaded {:,} streamlines in {:6.2}".format(NB_STREAMLINES, mtime_new)) print("Speedup of {:.2f}".format(mtime_old / mtime_new)) for s1, s2 in zip(streamlines_new, streamlines_old): assert_array_equal(s1, s2) # Points and scalars with InTemporaryDirectory(): trk_file = "tmp.trk" tractogram = Tractogram(points, data_per_point={'scalars': scalars}, affine_to_rasmm=np.eye(4)) TrkFile(tractogram).save(trk_file) streamlines_old = [d[0] - 0.5 for d in tv.read(trk_file, points_space="rasmm")[0]] scalars_old = [d[1] for d in tv.read(trk_file, points_space="rasmm")[0]] mtime_old = measure('tv.read(trk_file, points_space="rasmm")', repeat) msg = "Old: Loaded {:,} streamlines with scalars in {:6.2f}" print(msg.format(NB_STREAMLINES, mtime_old)) trk = nib.streamlines.load(trk_file, lazy_load=False) scalars_new = trk.tractogram.data_per_point['scalars'] mtime_new = measure('nib.streamlines.load(trk_file, lazy_load=False)', repeat) msg = "New: Loaded {:,} streamlines with scalars in {:6.2f}" print(msg.format(NB_STREAMLINES, mtime_new)) print("Speedup of {:2f}".format(mtime_old / mtime_new)) for s1, s2 in zip(scalars_new, scalars_old): assert_array_equal(s1, s2)
def bench_svd(): numpy_svd = nl.svd scipy_svd = sl.svd print() print(' Finding the SVD decomposition') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy ') for size,repeat in [(20,150),(100,7),(200,2)]: repeat *= 1 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size,size]) print('| %6.2f ' % measure('scipy_svd(a)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_svd(a)',repeat), end=' ') sys.stdout.flush() a = a[-1::-1,-1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_svd(a)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_svd(a)',repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def bench_set_number_of_points(): repeat = 1 nb_points_per_streamline = 100 nb_points = 42 nb_streamlines = int(1e4) streamlines = [ np.random.rand(nb_points_per_streamline, 3).astype("float32") for i in range(nb_streamlines) ] print("Timing set_number_of_points() in Cython" "({0} streamlines)".format(nb_streamlines)) cython_time = measure("set_number_of_points(streamlines, nb_points)", repeat) print("Cython time: {0:.3}sec".format(cython_time)) del streamlines streamlines = [ np.random.rand(nb_points_per_streamline, 3).astype("float32") for i in range(nb_streamlines) ] python_time = measure( "[set_number_of_points_python(s, nb_points)" " for s in streamlines]", repeat) print("Python time: {0:.2}sec".format(python_time)) print("Speed up of {0}x".format(python_time / cython_time)) del streamlines
def bench_load_save(): rng = np.random.RandomState(20111001) repeat = 4 img_shape = (128, 128, 64) arr = rng.normal(size=img_shape) img = Nifti1Image(arr, np.eye(4)) sio = BytesIO() img.file_map['image'].fileobj = sio hdr = img.get_header() sys.stdout.flush() print("\nImage load save") print("----------------") hdr.set_data_dtype(np.float32) mtime = measure('img.to_file_map()', repeat) print('%30s %6.2f' % ('Save float64 to float32', mtime)) mtime = measure('img.from_file_map(img.file_map)', repeat) print('%30s %6.2f' % ('Load from float32', mtime)) hdr.set_data_dtype(np.int16) mtime = measure('img.to_file_map()', repeat) print('%30s %6.2f' % ('Save float64 to int16', mtime)) mtime = measure('img.from_file_map(img.file_map)', repeat) print('%30s %6.2f' % ('Load from int16', mtime)) arr = np.random.random_integers(low=-1000,high=-1000, size=img_shape) arr = arr.astype(np.int16) img = Nifti1Image(arr, np.eye(4)) sio = BytesIO() img.file_map['image'].fileobj = sio hdr = img.get_header() hdr.set_data_dtype(np.float32) mtime = measure('img.to_file_map()', repeat) print('%30s %6.2f' % ('Save Int16 to float32', mtime)) sys.stdout.flush()
def bench_load_save(): rng = np.random.RandomState(20111001) repeat = 4 img_shape = (128, 128, 64) arr = rng.normal(size=img_shape) img = Nifti1Image(arr, np.eye(4)) sio = BytesIO() img.file_map['image'].fileobj = sio hdr = img.get_header() sys.stdout.flush() print "\nImage load save" print "----------------" hdr.set_data_dtype(np.float32) mtime = measure('img.to_file_map()', repeat) print '%30s %6.2f' % ('Save float64 to float32', mtime) mtime = measure('img.from_file_map(img.file_map)', repeat) print '%30s %6.2f' % ('Load from float32', mtime) hdr.set_data_dtype(np.int16) mtime = measure('img.to_file_map()', repeat) print '%30s %6.2f' % ('Save float64 to int16', mtime) mtime = measure('img.from_file_map(img.file_map)', repeat) print '%30s %6.2f' % ('Load from int16', mtime) arr = np.random.random_integers(low=-1000,high=-1000, size=img_shape) arr = arr.astype(np.int16) img = Nifti1Image(arr, np.eye(4)) sio = BytesIO() img.file_map['image'].fileobj = sio hdr = img.get_header() hdr.set_data_dtype(np.float32) mtime = measure('img.to_file_map()', repeat) print '%30s %6.2f' % ('Save Int16 to float32', mtime) sys.stdout.flush()
def bench_vec_val_vect(): # nosetests -s --match '(?:^|[\\b_\\.//-])[Bb]ench' repeat = 100 etime = measure("np.einsum('...ij,...j,...kj->...ik', evecs, evals, evecs)", repeat) vtime = measure("vec_val_vect(evecs, evals)", repeat) print("einsum %4.2f; vec_val_vect %4.2f" % (etime, vtime))
def bench_eigvals(): numpy_eigvals = nl.eigvals scipy_eigvals = sl.eigvals print() print(' Finding matrix eigenvalues') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy ') for size, repeat in [(20, 150), (100, 7), (200, 2)]: repeat *= 1 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size, size]) print('| %6.2f ' % measure('scipy_eigvals(a)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_eigvals(a)', repeat), end=' ') sys.stdout.flush() a = a[-1::-1, -1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_eigvals(a)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_eigvals(a)', repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def bench_random(self): numpy_det = nl.det scipy_det = sl.det print() print(' Finding matrix determinant') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy ') for size, repeat in [(20, 1000), (100, 150), (500, 2), (1000, 1)][:-1]: repeat *= 2 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size, size]) print('| %6.2f ' % measure('scipy_det(a)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_det(a)', repeat), end=' ') sys.stdout.flush() a = a[-1::-1, -1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_det(a)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_det(a)', repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def bench_set_number_of_points(): repeat = 5 nb_streamlines = DATA['nb_streamlines'] msg = "Timing set_number_of_points() with {0:,} streamlines." print(msg.format(nb_streamlines * repeat)) cython_time = measure("set_number_of_points(streamlines, nb_points)", repeat) print("Cython time: {0:.3f} sec".format(cython_time)) python_time = measure("[set_number_of_points_python(s, nb_points)" " for s in streamlines]", repeat) print("Python time: {0:.2f} sec".format(python_time)) print("Speed up of {0:.2f}x".format(python_time/cython_time)) # Make sure it produces the same results. assert_array_almost_equal([set_number_of_points_python(s) for s in DATA["streamlines"]], set_number_of_points(DATA["streamlines"])) cython_time_arrseq = measure("set_number_of_points(streamlines, nb_points)", repeat) print("Cython time (ArrSeq): {0:.3f} sec".format(cython_time_arrseq)) print("Speed up of {0:.2f}x".format(python_time/cython_time_arrseq)) # Make sure it produces the same results. assert_array_equal(set_number_of_points(DATA["streamlines"]), set_number_of_points(DATA["streamlines_arrseq"]))
def test_fprop_faster(self): seed = 1234 repeat = 100 lstm = LSTM(input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) lstm.initialize(initer.UniformInitializer(seed)) lstm2 = LSTMFaster(input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) # Wi, Wo, Wf, Wm # Make sure the weights are the same. lstm2.layers_lstm[0].W.set_value(np.concatenate([lstm.layers_lstm[0].Wi.get_value(), lstm.layers_lstm[0].Wo.get_value(), lstm.layers_lstm[0].Wf.get_value(), lstm.layers_lstm[0].Wm.get_value()], axis=1)) lstm2.layers_lstm[0].U.set_value(np.concatenate([lstm.layers_lstm[0].Ui.get_value(), lstm.layers_lstm[0].Uo.get_value(), lstm.layers_lstm[0].Uf.get_value(), lstm.layers_lstm[0].Um.get_value()], axis=1)) input = T.tensor3('input') input.tag.test_value = DATA['batch'] fprop = theano.function([input], lstm.get_output(input)) fprop2 = theano.function([input], lstm2.get_output(input)) fprop_time = measure("out = fprop(DATA['batch'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) fprop2_time = measure("out = fprop2(DATA['batch'])", repeat) print("fprop faster time: {:.2f} sec.", fprop2_time) print("Speedup: {:.2f}x".format(fprop_time/fprop2_time)) out = fprop(DATA['batch']) out2 = fprop2(DATA['batch']) assert_array_equal(out, out2)
def bench_length(): repeat = 10 nb_streamlines = DATA['nb_streamlines'] streamlines = DATA["streamlines"] # Streamlines as a list of ndarrays. print("Timing length() with {0:,} streamlines.".format(nb_streamlines)) python_time = measure("[length_python(s) for s in streamlines]", repeat) print("Python time: {0:.2}sec".format(python_time)) cython_time = measure("length(streamlines)", repeat) print("Cython time: {0:.3}sec".format(cython_time)) print("Speed up of {0:.2f}x".format(python_time / cython_time)) # Make sure it produces the same results. assert_array_equal([length_python(s) for s in DATA["streamlines"]], length(DATA["streamlines"])) streamlines = DATA['streamlines_arrseq'] cython_time_arrseq = measure("length(streamlines)", repeat) print("Cython time (ArrSeq): {0:.3}sec".format(cython_time_arrseq)) print("Speed up of {0:.2f}x".format(python_time / cython_time_arrseq)) # Make sure it produces the same results. assert_array_equal(length(DATA["streamlines"]), length(DATA["streamlines_arrseq"]))
def bench_svd(): numpy_svd = nl.svd scipy_svd = sl.svd print() print(' Finding the SVD decomposition') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy ') for size, repeat in [(20, 150), (100, 7), (200, 2)]: repeat *= 1 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size, size]) print('| %6.2f ' % measure('scipy_svd(a)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_svd(a)', repeat), end=' ') sys.stdout.flush() a = a[-1::-1, -1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_svd(a)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_svd(a)', repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def bench_random(self): numpy_det = nl.det scipy_det = sl.det print() print(' Finding matrix determinant') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy ') for size,repeat in [(20,1000),(100,150),(500,2),(1000,1)][:-1]: repeat *= 2 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size,size]) print('| %6.2f ' % measure('scipy_det(a)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_det(a)',repeat), end=' ') sys.stdout.flush() a = a[-1::-1,-1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_det(a)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_det(a)',repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def bench_vec_val_vect(): # nosetests -s --match '(?:^|[\\b_\\.//-])[Bb]ench' repeat = 100 shape = (100, 100) evecs, evals = randn(*(shape + (3, 3))), randn(*(shape + (3,))) etime = measure("np.einsum('...ij,...j,...kj->...ik', evecs, evals, evecs)", repeat) vtime = measure("vec_val_vect(evecs, evals)", repeat) print("einsum %4.2f; vec_val_vect %4.2f" % (etime, vtime))
def bench_vec_val_vect(): # nosetests -s --match '(?:^|[\\b_\\.//-])[Bb]ench' repeat = 100 shape = (100, 100) evecs, evals = randn(*(shape + (3, 3))), randn(*(shape + (3, ))) etime = measure( "np.einsum('...ij,...j,...kj->...ik', evecs, evals, evecs)", repeat) vtime = measure("vec_val_vect(evecs, evals)", repeat) print("einsum %4.2f; vec_val_vect %4.2f" % (etime, vtime))
def bench_quickbundles(): dtype = "float32" repeat = 10 nb_points = 18 streams, hdr = nib.trackvis.read(get_data('fornix')) fornix = [s[0].astype(dtype) for s in streams] fornix = streamline_utils.set_number_of_points(fornix, nb_points) # Create eight copies of the fornix to be clustered (one in each octant). streamlines = [] streamlines += [s + np.array([100, 100, 100], dtype) for s in fornix] streamlines += [s + np.array([100, -100, 100], dtype) for s in fornix] streamlines += [s + np.array([100, 100, -100], dtype) for s in fornix] streamlines += [s + np.array([100, -100, -100], dtype) for s in fornix] streamlines += [s + np.array([-100, 100, 100], dtype) for s in fornix] streamlines += [s + np.array([-100, -100, 100], dtype) for s in fornix] streamlines += [s + np.array([-100, 100, -100], dtype) for s in fornix] streamlines += [s + np.array([-100, -100, -100], dtype) for s in fornix] # The expected number of clusters of the fornix using threshold=10 is 4. threshold = 10. expected_nb_clusters = 4 * 8 print("Timing QuickBundles 1.0 vs. 2.0") qb = QB_Old(streamlines, threshold, pts=None) qb1_time = measure("QB_Old(streamlines, threshold, nb_points)", repeat) print("QuickBundles time: {0:.4}sec".format(qb1_time)) assert_equal(qb.total_clusters, expected_nb_clusters) sizes1 = [qb.partitions()[i]['N'] for i in range(qb.total_clusters)] indices1 = [ qb.partitions()[i]['indices'] for i in range(qb.total_clusters) ] qb2 = QB_New(threshold) qb2_time = measure("clusters = qb2.cluster(streamlines)", repeat) print("QuickBundles2 time: {0:.4}sec".format(qb2_time)) print("Speed up of {0}x".format(qb1_time / qb2_time)) clusters = qb2.cluster(streamlines) sizes2 = map(len, clusters) indices2 = map(lambda c: c.indices, clusters) assert_equal(len(clusters), expected_nb_clusters) assert_array_equal(sizes2, sizes1) assert_arrays_equal(indices2, indices1) qb = QB_New(threshold, metric=MDFpy()) qb3_time = measure("clusters = qb.cluster(streamlines)", repeat) print("QuickBundles2_python time: {0:.4}sec".format(qb3_time)) print("Speed up of {0}x".format(qb1_time / qb3_time)) clusters = qb.cluster(streamlines) sizes3 = map(len, clusters) indices3 = map(lambda c: c.indices, clusters) assert_equal(len(clusters), expected_nb_clusters) assert_array_equal(sizes3, sizes1) assert_arrays_equal(indices3, indices1)
def bench_quickbundles(): dtype = "float32" repeat = 10 nb_points = 12 streams, hdr = nib.trackvis.read(get_fnames('fornix')) fornix = [s[0].astype(dtype) for s in streams] fornix = streamline_utils.set_number_of_points(fornix, nb_points) # Create eight copies of the fornix to be clustered (one in each octant). streamlines = [] streamlines += [s + np.array([100, 100, 100], dtype) for s in fornix] streamlines += [s + np.array([100, -100, 100], dtype) for s in fornix] streamlines += [s + np.array([100, 100, -100], dtype) for s in fornix] streamlines += [s + np.array([100, -100, -100], dtype) for s in fornix] streamlines += [s + np.array([-100, 100, 100], dtype) for s in fornix] streamlines += [s + np.array([-100, -100, 100], dtype) for s in fornix] streamlines += [s + np.array([-100, 100, -100], dtype) for s in fornix] streamlines += [s + np.array([-100, -100, -100], dtype) for s in fornix] # The expected number of clusters of the fornix using threshold=10 is 4. threshold = 10. expected_nb_clusters = 4 * 8 print("Timing QuickBundles 1.0 vs. 2.0") qb = QB_Old(streamlines, threshold, pts=None) qb1_time = measure("QB_Old(streamlines, threshold, nb_points)", repeat) print("QuickBundles time: {0:.4}sec".format(qb1_time)) assert_equal(qb.total_clusters, expected_nb_clusters) sizes1 = [qb.partitions()[i]['N'] for i in range(qb.total_clusters)] indices1 = [qb.partitions()[i]['indices'] for i in range(qb.total_clusters)] qb2 = QB_New(threshold) qb2_time = measure("clusters = qb2.cluster(streamlines)", repeat) print("QuickBundles2 time: {0:.4}sec".format(qb2_time)) print("Speed up of {0}x".format(qb1_time / qb2_time)) clusters = qb2.cluster(streamlines) sizes2 = map(len, clusters) indices2 = map(lambda c: c.indices, clusters) assert_equal(len(clusters), expected_nb_clusters) assert_array_equal(list(sizes2), sizes1) assert_arrays_equal(indices2, indices1) qb = QB_New(threshold, metric=MDFpy()) qb3_time = measure("clusters = qb.cluster(streamlines)", repeat) print("QuickBundles2_python time: {0:.4}sec".format(qb3_time)) print("Speed up of {0}x".format(qb1_time / qb3_time)) clusters = qb.cluster(streamlines) sizes3 = map(len, clusters) indices3 = map(lambda c: c.indices, clusters) assert_equal(len(clusters), expected_nb_clusters) assert_array_equal(list(sizes3), sizes1) assert_arrays_equal(indices3, indices1)
def test_fprop_faster(self): activation = "tanh" seed = 1234 repeat = 1000 layer = LayerLSTM(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) layer.initialize(initer.UniformInitializer(seed)) layer_fast = LayerLSTMFast(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) # Wi, Wo, Wf, Wm layer_fast.W.set_value( np.concatenate([ layer.Wi.get_value(), layer.Wo.get_value(), layer.Wf.get_value(), layer.Wm.get_value() ], axis=1)) layer_fast.U.set_value( np.concatenate([ layer.Ui.get_value(), layer.Uo.get_value(), layer.Uf.get_value(), layer.Um.get_value() ], axis=1)) input = T.matrix('input') input.tag.test_value = DATA['batch_one_step'] last_h = sharedX(DATA['state_h']) last_m = sharedX(DATA['state_m']) fprop = theano.function([input], layer.fprop(input, last_h, last_m)) fprop_faster = theano.function([input], layer_fast.fprop(input, last_h, last_m)) fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat) fprop_faster_time = measure( "h, m = fprop_faster(DATA['batch_one_step'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) print("fprop faster time: {:.2f} sec.", fprop_faster_time) print("Speedup: {:.2f}x".format(fprop_time / fprop_faster_time)) for i in range(DATA['seq_len']): h1, m1 = fprop(DATA['batch'][:, i, :]) h2, m2 = fprop_faster(DATA['batch'][:, i, :]) assert_array_equal(h1, h2) assert_array_equal(m1, m2)
def bench_length(): repeat = 1000 streamline = np.random.rand(1000, 3) print("Timing length() in Cython") cython_time = measure("length(streamline)", repeat) print("Cython time: {0:.2}sec".format(cython_time)) python_time = measure("length_python(streamline)", repeat) print("Python time: {0:.2}sec".format(python_time)) print("Speed up of {0}x".format(python_time/cython_time))
def bench_resample(): repeat = 1000 nb_points = 42 streamline = np.random.rand(1000, 3) print("Timing set_number_of_points() in Cython") cython_time = measure("set_number_of_points(streamline, nb_points)", repeat) print("Cython time: {0:.2}sec".format(cython_time)) python_time = measure("set_number_of_points_python(streamline, nb_points)", repeat) print("Python time: {0:.2}sec".format(python_time)) print("Speed up of {0}x".format(python_time/cython_time))
def bench_bounding_box(): vol = np.zeros((100, 100, 100)) vol[0, 0, 0] = 1 times = 100 time = measure("bounding_box(vol)", times) / times print("Bounding_box on a sparse volume: {}".format(time)) vol[:] = 10 times = 1 time = measure("bounding_box(vol)", times) / times print("Bounding_box on a dense volume: {}".format(time))
def bench_local_maxima(): repeat = 10000 vertices, faces = default_sphere.vertices, default_sphere.faces print('Timing peak finding') timed0 = measure("local_maxima(odf, edges)", repeat) print('Actual sphere: %0.2f' % timed0) # Create an artificial odf with a few peaks odf = np.zeros(len(vertices)) odf[1] = 1. odf[143] = 143. odf[505] = 505. timed1 = measure("local_maxima(odf, edges)", repeat) print('Few-peak sphere: %0.2f' % timed1)
def bench_quick_squash(): # nosetests -s --match '(?:^|[\\b_\\.//-])[Bb]ench' repeat = 10 shape = (300, 200) arrs = np.zeros(shape, dtype=object) scalars = np.zeros(shape, dtype=object) for ijk in ndindex(arrs.shape): arrs[ijk] = np.ones((3, 5)) scalars[ijk] = np.float32(0) print('\nSquashing benchmarks') for name, objs in ( ('floats', np.zeros(shape, float).astype(object)), ('ints', np.zeros(shape, int).astype(object)), ('arrays', arrs), ('scalars', scalars), ): print(name) timed0 = measure("quick_squash(objs)", repeat) timed1 = measure("old_squash(objs)", repeat) print("fast %4.2f; slow %4.2f" % (timed0, timed1)) objs[50, 50] = None timed0 = measure("quick_squash(objs)", repeat) timed1 = measure("old_squash(objs)", repeat) print("With None: fast %4.2f; slow %4.2f" % (timed0, timed1)) msk = objs != np.array(None) timed0 = measure("quick_squash(objs, msk)", repeat) timed1 = measure("old_squash(objs, msk)", repeat) print("With mask: fast %4.2f; slow %4.2f" % (timed0, timed1)) objs[50, 50] = np.float32(0) timed0 = measure("quick_squash(objs, msk)", repeat) timed1 = measure("old_squash(objs, msk)", repeat) print("Other dtype: fast %4.2f; slow %4.2f" % (timed0, timed1))
def bench_local_maxima(): repeat = 10000 sphere = get_sphere('symmetric724') vertices, faces = sphere.vertices, sphere.faces print('Timing peak finding') timed0 = measure("local_maxima(odf, edges)", repeat) print('Actual sphere: %0.2f' % timed0) # Create an artificial odf with a few peaks odf = np.zeros(len(vertices)) odf[1] = 1. odf[143] = 143. odf[505] = 505. timed1 = measure("local_maxima(odf, edges)", repeat) print('Few-peak sphere: %0.2f' % timed1)
def test_fprop_faster(self): seed = 1234 repeat = 100 lstm = LSTM( input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) lstm.initialize(initer.UniformInitializer(seed)) lstm2 = LSTMFaster( input_size=DATA['features_size'], hidden_sizes=[DATA['hidden_size']], ) # Wi, Wo, Wf, Wm # Make sure the weights are the same. lstm2.layers_lstm[0].W.set_value( np.concatenate([ lstm.layers_lstm[0].Wi.get_value(), lstm.layers_lstm[0].Wo.get_value(), lstm.layers_lstm[0].Wf.get_value(), lstm.layers_lstm[0].Wm.get_value() ], axis=1)) lstm2.layers_lstm[0].U.set_value( np.concatenate([ lstm.layers_lstm[0].Ui.get_value(), lstm.layers_lstm[0].Uo.get_value(), lstm.layers_lstm[0].Uf.get_value(), lstm.layers_lstm[0].Um.get_value() ], axis=1)) input = T.tensor3('input') input.tag.test_value = DATA['batch'] fprop = theano.function([input], lstm.get_output(input)) fprop2 = theano.function([input], lstm2.get_output(input)) fprop_time = measure("out = fprop(DATA['batch'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) fprop2_time = measure("out = fprop2(DATA['batch'])", repeat) print("fprop faster time: {:.2f} sec.", fprop2_time) print("Speedup: {:.2f}x".format(fprop_time / fprop2_time)) out = fprop(DATA['batch']) out2 = fprop2(DATA['batch']) assert_array_equal(out, out2)
def bench_local_maxima(): repeat = 10000 sphere = get_sphere('symmetric724') vertices, faces = sphere.vertices, sphere.faces odf = abs(vertices.sum(-1)) edges = unique_edges(faces) print('Timing peak finding') timed0 = measure("local_maxima(odf, edges)", repeat) print('Actual sphere: %0.2f' % timed0) # Create an artificial odf with a few peaks odf = np.zeros(len(vertices)) odf[1] = 1. odf[143] = 143. odf[505] = 505. timed1 = measure("local_maxima(odf, edges)", repeat) print('Few-peak sphere: %0.2f' % timed1)
def bench_length(): repeat = 1 nb_points_per_streamline = 100 nb_streamlines = int(1e5) streamlines = [np.random.rand(nb_points_per_streamline, 3).astype("float32") for i in range(nb_streamlines)] print("Timing length() in Cython ({0} streamlines)".format(nb_streamlines)) cython_time = measure("length(streamlines)", repeat) print("Cython time: {0:.3}sec".format(cython_time)) del streamlines streamlines = [np.random.rand(nb_points_per_streamline, 3).astype("float32") for i in range(nb_streamlines)] python_time = measure("[length_python(s) for s in streamlines]", repeat) print("Python time: {0:.2}sec".format(python_time)) print("Speed up of {0}x".format(python_time/cython_time)) del streamlines
def bench_local_maxima(): repeat = 10000 sphere = get_sphere("symmetric724") vertices, faces = sphere.vertices, sphere.faces odf = abs(vertices.sum(-1)) edges = unique_edges(faces) print("Timing peak finding") timed0 = measure("local_maxima(odf, edges)", repeat) print("Actual sphere: %0.2f" % timed0) # Create an artificial odf with a few peaks odf = np.zeros(len(vertices)) odf[1] = 1.0 odf[143] = 143.0 odf[505] = 505.0 timed1 = measure("local_maxima(odf, edges)", repeat) print("Few-peak sphere: %0.2f" % timed1)
def bench_zhang(): print 'Zhang min' print '=' * 10 #ref_time = measure('tm.most_similar_track_zhang(tracks300)') #print 'reference time: %f' % ref_time opt_time = measure('pf.most_similar_track_zhang(tracks300)') print 'optimized time: %f' % opt_time print
def bench_array_to_file(): rng = np.random.RandomState(20111001) repeat = 10 img_shape = (128, 128, 64, 10) arr = rng.normal(size=img_shape) sys.stdout.flush() print_git_title("\nArray to file") mtime = measure('array_to_file(arr, BytesIO(), np.float32)', repeat) print('%30s %6.2f' % ('Save float64 to float32', mtime)) mtime = measure('array_to_file(arr, BytesIO(), np.int16)', repeat) print('%30s %6.2f' % ('Save float64 to int16', mtime)) # Set a lot of NaNs to check timing arr[:, :, :, 1] = np.nan mtime = measure('array_to_file(arr, BytesIO(), np.float32)', repeat) print('%30s %6.2f' % ('Save float64 to float32, NaNs', mtime)) mtime = measure('array_to_file(arr, BytesIO(), np.int16)', repeat) print('%30s %6.2f' % ('Save float64 to int16, NaNs', mtime)) # Set a lot of infs to check timing arr[:, :, :, 1] = np.inf mtime = measure('array_to_file(arr, BytesIO(), np.float32)', repeat) print('%30s %6.2f' % ('Save float64 to float32, infs', mtime)) mtime = measure('array_to_file(arr, BytesIO(), np.int16)', repeat) print('%30s %6.2f' % ('Save float64 to int16, infs', mtime)) # Int16 input, float output arr = np.random.random_integers(low=-1000, high=1000, size=img_shape) arr = arr.astype(np.int16) mtime = measure('array_to_file(arr, BytesIO(), np.float32)', repeat) print('%30s %6.2f' % ('Save Int16 to float32', mtime)) sys.stdout.flush()
def bench_compress_streamlines(): repeat = 10 fname = get_data('fornix') streams, hdr = tv.read(fname) streamlines = [i[0] for i in streams] print("Timing compress_streamlines() in Cython ({0} streamlines)".format(len(streamlines))) cython_time = measure("compress_streamlines(streamlines)", repeat) print("Cython time: {0:.3}sec".format(cython_time)) del streamlines fname = get_data('fornix') streams, hdr = tv.read(fname) streamlines = [i[0] for i in streams] python_time = measure("map(compress_streamlines_python, streamlines)", repeat) print("Python time: {0:.2}sec".format(python_time)) print("Speed up of {0}x".format(python_time/cython_time)) del streamlines
def bench_compress_streamlines(): repeat = 10 fname = get_fnames('fornix') fornix = load_tractogram(fname, 'same', bbox_valid_check=False).streamlines streamlines = Streamlines(fornix) print("Timing compress_streamlines() in Cython" " ({0} streamlines)".format(len(streamlines))) cython_time = measure("compress_streamlines(streamlines)", repeat) print("Cython time: {0:.3}sec".format(cython_time)) del streamlines streamlines = Streamlines(fornix) python_time = measure("map(compress_streamlines_python, streamlines)", repeat) print("Python time: {0:.2}sec".format(python_time)) print("Speed up of {0}x".format(python_time / cython_time)) del streamlines
def bench_mdl_traj(): t=np.concatenate(tracks300) #t=tracks300[0] print 'MDL traj' print '=' * 10 opt_time = measure('pf.approximate_mdl_trajectory(t)') #opt_time = measure('tm.approximate_trajectory_partitioning(t)') #opt_time= measure('tm.minimum_description_length_unpartitoned(t)') print 'optimized time: %f' % opt_time print
def bench_mdl_traj(): t = np.concatenate(tracks300) #t=tracks300[0] print 'MDL traj' print '=' * 10 opt_time = measure('pf.approximate_mdl_trajectory(t)') #opt_time = measure('tm.approximate_trajectory_partitioning(t)') #opt_time= measure('tm.minimum_description_length_unpartitoned(t)') print 'optimized time: %f' % opt_time print
def bench_compress_streamlines(): repeat = 10 fname = get_fnames('fornix') streams, hdr = tv.read(fname) streamlines = [i[0] for i in streams] print("Timing compress_streamlines() in Cython" " ({0} streamlines)".format(len(streamlines))) cython_time = measure("compress_streamlines(streamlines)", repeat) print("Cython time: {0:.3}sec".format(cython_time)) del streamlines fname = get_fnames('fornix') streams, hdr = tv.read(fname) streamlines = [i[0] for i in streams] python_time = measure("map(compress_streamlines_python, streamlines)", repeat) print("Python time: {0:.2}sec".format(python_time)) print("Speed up of {0}x".format(python_time / cython_time)) del streamlines
def bench_finite_range(): rng = np.random.RandomState(20111001) repeat = 10 img_shape = (128, 128, 64, 10) arr = rng.normal(size=img_shape) sys.stdout.flush() print_git_title("\nFinite range") mtime = measure('finite_range(arr)', repeat) print('%30s %6.2f' % ('float64 all finite', mtime)) arr[:, :, :, 1] = np.nan mtime = measure('finite_range(arr)', repeat) print('%30s %6.2f' % ('float64 many NaNs', mtime)) arr[:, :, :, 1] = np.inf mtime = measure('finite_range(arr)', repeat) print('%30s %6.2f' % ('float64 many infs', mtime)) # Int16 input, float output arr = np.random.random_integers(low=-1000, high=-1000, size=img_shape) arr = arr.astype(np.int16) mtime = measure('finite_range(arr)', repeat) print('%30s %6.2f' % ('int16', mtime)) sys.stdout.flush()
def bench_finite_range(): rng = np.random.RandomState(20111001) repeat = 10 img_shape = (128, 128, 64, 10) arr = rng.normal(size=img_shape) sys.stdout.flush() print_git_title("\nFinite range") mtime = measure('finite_range(arr)', repeat) print('%30s %6.2f' % ('float64 all finite', mtime)) arr[:, :, :, 1] = np.nan mtime = measure('finite_range(arr)', repeat) print('%30s %6.2f' % ('float64 many NaNs', mtime)) arr[:, :, :, 1] = np.inf mtime = measure('finite_range(arr)', repeat) print('%30s %6.2f' % ('float64 many infs', mtime)) # Int16 input, float output arr = np.random.random_integers(low=-1000, high=1000, size=img_shape) arr = arr.astype(np.int16) mtime = measure('finite_range(arr)', repeat) print('%30s %6.2f' % ('int16', mtime)) sys.stdout.flush()
def test_fprop_faster(self): activation = "tanh" seed = 1234 repeat = 1000 layer = LayerLSTM(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) layer.initialize(initer.UniformInitializer(seed)) layer_fast = LayerLSTMFast(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) # Wi, Wo, Wf, Wm layer_fast.W.set_value(np.concatenate([layer.Wi.get_value(), layer.Wo.get_value(), layer.Wf.get_value(), layer.Wm.get_value()], axis=1)) layer_fast.U.set_value(np.concatenate([layer.Ui.get_value(), layer.Uo.get_value(), layer.Uf.get_value(), layer.Um.get_value()], axis=1)) input = T.matrix('input') input.tag.test_value = DATA['batch_one_step'] last_h = sharedX(DATA['state_h']) last_m = sharedX(DATA['state_m']) fprop = theano.function([input], layer.fprop(input, last_h, last_m)) fprop_faster = theano.function([input], layer_fast.fprop(input, last_h, last_m)) fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat) fprop_faster_time = measure("h, m = fprop_faster(DATA['batch_one_step'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) print("fprop faster time: {:.2f} sec.", fprop_faster_time) print("Speedup: {:.2f}x".format(fprop_time/fprop_faster_time)) for i in range(DATA['seq_len']): h1, m1 = fprop(DATA['batch'][:, i, :]) h2, m2 = fprop_faster(DATA['batch'][:, i, :]) assert_array_equal(h1, h2) assert_array_equal(m1, m2)
def bench_csdeconv(center=(50, 40, 40), width=12): img, gtab, labels_img = read_stanford_labels() data = img.get_data() labels = labels_img.get_data() shape = labels.shape mask = np.in1d(labels, [1, 2]) mask.shape = shape a, b, c = center hw = width // 2 idx = (slice(a - hw, a + hw), slice(b - hw, b + hw), slice(c - hw, c + hw)) data_small = data[idx].copy() mask_small = mask[idx].copy() voxels = mask_small.sum() cmd = "model.fit(data_small, mask_small)" print("== Benchmarking CSD fit on %d voxels ==" % voxels) msg = "SH order - %d, gradient directons - %d :: %g sec" # Basic case sh_order = 8 ConstrainedSphericalDeconvModel(gtab, None, sh_order=sh_order) time = npt.measure(cmd) print(msg % (sh_order, num_grad(gtab), time)) # Smaller data set # data_small = data_small[..., :75].copy() gtab = GradientTable(gtab.gradients[:75]) ConstrainedSphericalDeconvModel(gtab, None, sh_order=sh_order) time = npt.measure(cmd) print(msg % (sh_order, num_grad(gtab), time)) # Super resolution sh_order = 12 ConstrainedSphericalDeconvModel(gtab, None, sh_order=sh_order) time = npt.measure(cmd) print(msg % (sh_order, num_grad(gtab), time))
def bench_csdeconv(center=(50, 40, 40), width=12): img, gtab, labels_img = read_stanford_labels() data = img.get_data() labels = labels_img.get_data() shape = labels.shape mask = np.in1d(labels, [1, 2]) mask.shape = shape a, b, c = center hw = width // 2 idx = (slice(a - hw, a + hw), slice(b - hw, b + hw), slice(c - hw, c + hw)) data_small = data[idx].copy() mask_small = mask[idx].copy() voxels = mask_small.sum() cmd = "model.fit(data_small, mask_small)" print("== Benchmarking CSD fit on %d voxels ==" % voxels) msg = "SH order - %d, gradient directons - %d :: %g sec" # Basic case sh_order = 8 model = ConstrainedSphericalDeconvModel(gtab, None, sh_order=sh_order) time = npt.measure(cmd) print(msg % (sh_order, num_grad(gtab), time)) # Smaller data set data_small = data_small[..., :75].copy() gtab = GradientTable(gtab.gradients[:75]) model = ConstrainedSphericalDeconvModel(gtab, None, sh_order=sh_order) time = npt.measure(cmd) print(msg % (sh_order, num_grad(gtab), time)) # Super resolution sh_order = 12 model = ConstrainedSphericalDeconvModel(gtab, None, sh_order=sh_order) time = npt.measure(cmd) print(msg % (sh_order, num_grad(gtab), time))
def bench_length(): repeat = 1 nb_points_per_streamline = 100 nb_streamlines = int(1e5) streamlines = [ np.random.rand(nb_points_per_streamline, 3).astype("float32") for i in range(nb_streamlines) ] print("Timing length() in Cython ({0} streamlines)".format(nb_streamlines)) cython_time = measure("length(streamlines)", repeat) print("Cython time: {0:.3}sec".format(cython_time)) del streamlines streamlines = [ np.random.rand(nb_points_per_streamline, 3).astype("float32") for i in range(nb_streamlines) ] python_time = measure("[length_python(s) for s in streamlines]", repeat) print("Python time: {0:.2}sec".format(python_time)) print("Speed up of {0}x".format(python_time / cython_time)) del streamlines
def bench_random(self): numpy_solve = nl.solve scipy_solve = sl.solve print() print(' Solving system of linear equations') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy ') for size,repeat in [(20,1000),(100,150),(500,2),(1000,1)][:-1]: repeat *= 2 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size,size]) # larger diagonal ensures non-singularity: for i in range(size): a[i,i] = 10*(.1+a[i,i]) b = random([size]) print('| %6.2f ' % measure('scipy_solve(a,b)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_solve(a,b)',repeat), end=' ') sys.stdout.flush() a = a[-1::-1,-1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_solve(a,b)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_solve(a,b)',repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def bench_random(self): numpy_solve = nl.solve scipy_solve = sl.solve print() print(' Solving system of linear equations') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy ') for size, repeat in [(20, 1000), (100, 150), (500, 2), (1000, 1)][:-1]: repeat *= 2 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size, size]) # larger diagonal ensures non-singularity: for i in range(size): a[i, i] = 10 * (.1 + a[i, i]) b = random([size]) print('| %6.2f ' % measure('scipy_solve(a,b)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_solve(a,b)', repeat), end=' ') sys.stdout.flush() a = a[-1::-1, -1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_solve(a,b)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_solve(a,b)', repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def run_print(test_list): print() print(" Integrating sum(x**2) -- MISER Monte Carlo") print(" ==========================================") print() print(" ndims | npoints | nprocs | time ") print(" ------------------------------- ") for ndims,npoints,nprocs,repeat in test_list: print(" {ndims:5} | {npoints:7} | {nprocs:6} |".format(ndims=ndims,npoints=npoints,nprocs=nprocs),end="") xl = [0.]*ndims xu = [1.]*ndims time = measure("mcmiser(lambda x: sum(x**2),{npoints},{xl},{xu},nprocs={nprocs})".format(npoints=npoints,xl=str(xl),xu=str(xu),nprocs=str(nprocs)),repeat) print(" {time:.2f} (seconds for {ncalls} calls)".format(time=time,ncalls=repeat))
def bench_length(): repeat = 10 nb_streamlines = DATA['nb_streamlines'] msg = "Timing length() with {0:,} streamlines." print(msg.format(nb_streamlines * repeat)) python_time = measure("[length_python(s) for s in streamlines]", repeat) print("Python time: {0:.2f} sec".format(python_time)) cython_time = measure("length(streamlines)", repeat) print("Cython time: {0:.3f} sec".format(cython_time)) print("Speed up of {0:.2f}x".format(python_time/cython_time)) # Make sure it produces the same results. assert_array_almost_equal([length_python(s) for s in DATA["streamlines"]], length(DATA["streamlines"])) cython_time_arrseq = measure("length(streamlines)", repeat) print("Cython time (ArrSeq): {0:.3f} sec".format(cython_time_arrseq)) print("Speed up of {0:.2f}x".format(python_time/cython_time_arrseq)) # Make sure it produces the same results. assert_array_equal(length(DATA["streamlines"]), length(DATA["streamlines_arrseq"]))
def bench_set_number_of_points(): repeat = 1 nb_points_per_streamline = 100 nb_points = 42 nb_streamlines = int(1e4) streamlines = [np.random.rand(nb_points_per_streamline, 3).astype("float32") for i in range(nb_streamlines)] print("Timing set_number_of_points() in Cython" "({0} streamlines)".format(nb_streamlines)) cython_time = measure("set_number_of_points(streamlines, nb_points)", repeat) print("Cython time: {0:.3}sec".format(cython_time)) del streamlines streamlines = [np.random.rand(nb_points_per_streamline, 3).astype("float32") for i in range(nb_streamlines)] python_time = measure("[set_number_of_points_python(s, nb_points)" " for s in streamlines]", repeat) print("Python time: {0:.2}sec".format(python_time)) print("Speed up of {0}x".format(python_time/cython_time)) del streamlines
def bench_random(self): numpy_inv = nl.inv scipy_inv = sl.inv print() print(' Finding matrix inverse') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy') for size,repeat in [(20,1000),(100,150),(500,2),(1000,1)][:-1]: repeat *= 2 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size,size]) # large diagonal ensures non-singularity: for i in range(size): a[i,i] = 10*(.1+a[i,i]) print('| %6.2f ' % measure('scipy_inv(a)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_inv(a)',repeat), end=' ') sys.stdout.flush() a = a[-1::-1,-1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_inv(a)',repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_inv(a)',repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def bench_random(self): numpy_inv = nl.inv scipy_inv = sl.inv print() print(' Finding matrix inverse') print(' ==================================') print(' | contiguous | non-contiguous ') print('----------------------------------------------') print(' size | scipy | numpy | scipy | numpy') for size, repeat in [(20, 1000), (100, 150), (500, 2), (1000, 1)][:-1]: repeat *= 2 print('%5s' % size, end=' ') sys.stdout.flush() a = random([size, size]) # large diagonal ensures non-singularity: for i in range(size): a[i, i] = 10 * (.1 + a[i, i]) print('| %6.2f ' % measure('scipy_inv(a)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_inv(a)', repeat), end=' ') sys.stdout.flush() a = a[-1::-1, -1::-1] # turn into a non-contiguous array assert_(not a.flags['CONTIGUOUS']) print('| %6.2f ' % measure('scipy_inv(a)', repeat), end=' ') sys.stdout.flush() print('| %6.2f ' % measure('numpy_inv(a)', repeat), end=' ') sys.stdout.flush() print(' (secs for %s calls)' % (repeat))
def run_print(test_list): print() print(" Integrating exp(-sum(x**2))*sum(x**2) w. importance sampling") print(" ============================================================") print() print(" ndims | npoints | nprocs | time ") print(" ------------------------------- ") for ndims,npoints,nprocs,repeat in test_list: print(" {ndims:5} | {npoints:7} | {nprocs:6} |".format(ndims=ndims,npoints=npoints,nprocs=nprocs),end="") mean = "np.zeros(({ndims},))".format(ndims=ndims) cov = "np.eye({ndims}) / np.sqrt(2.)".format(ndims=ndims) time = measure( "mcimport(lambda x: sum(x**2),{npoints},\ lambda size: multivariate_normal({mean},{cov},size),\ nprocs={nprocs})".format( npoints=npoints,mean=mean,cov=cov,nprocs=str(nprocs)),repeat) print(" {time:.2f} (seconds for {ncalls} calls)".format(time=time,ncalls=repeat))
def run_print(test_list): print() print(" Integrating sum(x**2) -- MISER Monte Carlo") print(" ==========================================") print() print(" ndims | npoints | nprocs | time ") print(" ------------------------------- ") for ndims, npoints, nprocs, repeat in test_list: print(" {ndims:5} | {npoints:7} | {nprocs:6} |".format(ndims=ndims, npoints=npoints, nprocs=nprocs), end="") xl = [0.] * ndims xu = [1.] * ndims time = measure( "mcmiser(lambda x: sum(x**2),{npoints},{xl},{xu},nprocs={nprocs})". format(npoints=npoints, xl=str(xl), xu=str(xu), nprocs=str(nprocs)), repeat) print(" {time:.2f} (seconds for {ncalls} calls)".format( time=time, ncalls=repeat))
def test_fprop(self): activation = "tanh" seed = 1234 repeat = 1000 layer = LayerLSTM(input_size=DATA['features_size'], hidden_size=DATA['hidden_size'], activation=activation) layer.initialize(initer.UniformInitializer(seed)) # input = T.tensor3('input') input = T.matrix('input') input.tag.test_value = DATA['batch_one_step'] last_h = sharedX(DATA['state_h']) last_m = sharedX(DATA['state_m']) fprop = theano.function([input], layer.fprop_faster(input, last_h, last_m)) fprop_time = measure("h, m = fprop(DATA['batch_one_step'])", repeat) print("fprop time: {:.2f} sec.", fprop_time) h, m = fprop(DATA['batch_one_step'])
def bench_load_save(): rng = np.random.RandomState(20111001) repeat = 10 img_shape = (128, 128, 64, 10) arr = rng.normal(size=img_shape) img = Nifti1Image(arr, np.eye(4)) sio = BytesIO() img.file_map['image'].fileobj = sio hdr = img.header sys.stdout.flush() print() print_git_title("Image load save") hdr.set_data_dtype(np.float32) mtime = measure('sio.truncate(0); img.to_file_map()', repeat) print('%30s %6.2f' % ('Save float64 to float32', mtime)) mtime = measure('img.from_file_map(img.file_map)', repeat) print('%30s %6.2f' % ('Load from float32', mtime)) hdr.set_data_dtype(np.int16) mtime = measure('sio.truncate(0); img.to_file_map()', repeat) print('%30s %6.2f' % ('Save float64 to int16', mtime)) mtime = measure('img.from_file_map(img.file_map)', repeat) print('%30s %6.2f' % ('Load from int16', mtime)) # Set a lot of NaNs to check timing arr[:, :, :20] = np.nan mtime = measure('sio.truncate(0); img.to_file_map()', repeat) print('%30s %6.2f' % ('Save float64 to int16, NaNs', mtime)) mtime = measure('img.from_file_map(img.file_map)', repeat) print('%30s %6.2f' % ('Load from int16, NaNs', mtime)) # Int16 input, float output arr = np.random.random_integers(low=-1000, high=1000, size=img_shape) arr = arr.astype(np.int16) img = Nifti1Image(arr, np.eye(4)) sio = BytesIO() img.file_map['image'].fileobj = sio hdr = img.header hdr.set_data_dtype(np.float32) mtime = measure('sio.truncate(0); img.to_file_map()', repeat) print('%30s %6.2f' % ('Save Int16 to float32', mtime)) sys.stdout.flush()