from vbench.benchmark import Benchmark from datetime import datetime common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # cache_readonly setup = common_setup + """ from pandas.util.decorators import cache_readonly class Foo: @cache_readonly def prop(self): return 5 obj = Foo() """ misc_cache_readonly = Benchmark("obj.prop", setup, name="misc_cache_readonly", ncalls=2000000)
'key2' : key2}) df_key1 = DataFrame(np.random.randn(len(level1), 4), index=level1, columns=['A', 'B', 'C', 'D']) df_key2 = DataFrame(np.random.randn(len(level2), 4), index=level2, columns=['A', 'B', 'C', 'D']) df_shuf = df.reindex(df.index[shuf]) """ #---------------------------------------------------------------------- # DataFrame joins on key join_dataframe_index_single_key_small = \ Benchmark("df.join(df_key1, on='key1')", setup, name='join_dataframe_index_single_key_small') join_dataframe_index_single_key_bigger = \ Benchmark("df.join(df_key2, on='key2')", setup, name='join_dataframe_index_single_key_bigger') join_dataframe_index_single_key_bigger_sort = \ Benchmark("df_shuf.join(df_key2, on='key2', sort=True)", setup, name='join_dataframe_index_single_key_bigger_sort', start_date=datetime(2012, 2, 5)) join_dataframe_index_multi = \ Benchmark("df.join(df_multi, on=['key1', 'key2'])", setup, name='join_dataframe_index_multi', start_date=datetime(2011, 10, 20))
common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # intersection, union setup = common_setup + """ rng = DatetimeIndex(start='1/1/2000', periods=10000, freq=datetools.Minute()) if rng.dtype == object: rng = rng.view(Index) else: rng = rng.asobject rng2 = rng[:-1] """ index_datetime_intersection = Benchmark("rng.intersection(rng2)", setup) index_datetime_union = Benchmark("rng.union(rng2)", setup) setup = common_setup + """ rng = date_range('1/1/2000', periods=10000, freq='T') rng2 = rng[:-1] """ datetime_index_intersection = Benchmark("rng.intersection(rng2)", setup, start_date=datetime(2013, 9, 27)) datetime_index_union = Benchmark("rng.union(rng2)", setup, start_date=datetime(2013, 9, 27)) # integers setup = common_setup + """ N = 1000000
SECTION = 'Indexing and scalar value access' common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # Series.__getitem__, get_value, __getitem__(slice) setup = common_setup + """ tm.N = 1000 ts = tm.makeTimeSeries() dt = ts.index[500] """ statement = "ts[dt]" bm_getitem = Benchmark(statement, setup, ncalls=100000, name='time_series_getitem_scalar') setup = common_setup + """ index = tm.makeStringIndex(1000) s = Series(np.random.rand(1000), index=index) idx = index[100] """ statement = "s.get_value(idx)" bm_get_value = Benchmark(statement, setup, name='series_get_value', start_date=datetime(2011, 11, 12)) setup = common_setup + """ index = tm.makeStringIndex(1000000) s = Series(np.random.rand(1000000), index=index)
#---------------------------------------------------------------------- # cache_readonly setup = common_setup + """ from pandas.util.decorators import cache_readonly class Foo: @cache_readonly def prop(self): return 5 obj = Foo() """ misc_cache_readonly = Benchmark("obj.prop", setup, name="misc_cache_readonly", ncalls=2000000) #---------------------------------------------------------------------- # match setup = common_setup + """ uniques = tm.makeStringIndex(1000).values all = uniques.repeat(10) """ match_strings = Benchmark("match(all, uniques)", setup, start_date=datetime(2012, 5, 12))
from vbench.benchmark import Benchmark from datetime import datetime common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # DataFrame reindex columns setup = common_setup + """ df = DataFrame(index=range(10000), data=np.random.rand(10000,30), columns=range(30)) """ statement = "df.reindex(columns=df.columns[1:5])" reindex_frame_columns = Benchmark(statement, setup, name='dataframe_reindex_columns') #---------------------------------------------------------------------- setup = common_setup + """ rng = DateRange('1/1/1970', periods=10000, offset=datetools.Minute()) df = DataFrame(np.random.rand(10000, 10), index=rng, columns=range(10)) df['foo'] = 'bar' rng2 = Index(rng[::2]) """ statement = "df.reindex(rng2)" reindex_frame_daterange = Benchmark(statement, setup, name='dataframe_reindex_daterange') #----------------------------------------------------------------------
from vbench.benchmark import Benchmark from datetime import datetime common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # nanops setup = common_setup + """ s = Series(np.random.randn(100000), index=np.arange(100000)) s[::2] = np.nan """ stat_ops_series_std = Benchmark("s.std()", setup) #---------------------------------------------------------------------- # ops by level setup = common_setup + """ index = MultiIndex(levels=[np.arange(10), np.arange(100), np.arange(100)], labels=[np.arange(10).repeat(10000), np.tile(np.arange(100).repeat(100), 10), np.tile(np.tile(np.arange(100), 100), 10)]) random.shuffle(index.values) df = DataFrame(np.random.randn(len(index), 4), index=index) df_level = DataFrame(np.random.randn(100, 4), index=index.levels[1]) """ stat_ops_level_frame_sum = \ Benchmark("df.sum(level=1)", setup,
setup = common_setup + """ N, K = 5000, 50 index = tm.makeStringIndex(N) columns = tm.makeStringIndex(K) frame = DataFrame(np.random.randn(N, K), index=index, columns=columns) try: data = frame.to_dict() except: data = frame.toDict() some_dict = data.values()[0] dict_list = [dict(zip(columns, row)) for row in frame.values] """ frame_ctor_nested_dict = Benchmark("DataFrame(data)", setup) # From JSON-like stuff frame_ctor_list_of_dict = Benchmark("DataFrame(dict_list)", setup, start_date=datetime(2011, 12, 20)) series_ctor_from_dict = Benchmark("Series(some_dict)", setup) # nested dict, integer indexes, regression described in #621 setup = common_setup + """ data = dict((i,dict((j,float(j)) for j in xrange(100))) for i in xrange(2000)) """ frame_ctor_nested_dict_int64 = Benchmark("DataFrame(data)", setup) # dynamically generate benchmarks for every offset #
from vbench.benchmark import Benchmark from datetime import datetime SECTION = "Index / MultiIndex objects" common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # intersection, union setup = common_setup + """ rng = DateRange('1/1/2000', periods=10000, offset=datetools.Minute()) rng = rng.view(Index) rng2 = rng[:-1] """ index_datetime_intersection = Benchmark("rng.intersection(rng2)", setup, name='index_datetime_intersection') index_datetime_union = Benchmark("rng.union(rng2)", setup, name='index_datetime_union')
from vbench.benchmark import Benchmark from datetime import datetime common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # data alignment setup = common_setup + """n = 1000000 # indices = Index([rands(10) for _ in xrange(n)]) def sample(values, k): sampler = np.random.permutation(len(values)) return values.take(sampler[:k]) sz = 500000 rng = np.arange(0, 10000000000000, 10000000) stamps = np.datetime64(datetime.now()).view('i8') + rng idx1 = np.sort(sample(stamps, sz)) idx2 = np.sort(sample(stamps, sz)) ts1 = Series(np.random.randn(sz), idx1) ts2 = Series(np.random.randn(sz), idx2) """ stmt = "ts1 + ts2" series_align_int64_index = Benchmark(stmt, setup, start_date=datetime(2010, 6, 1), logy=True)
clfswh_d = dict([(x.descr, x) for x in clfswh[:]]) seed(1) """ #---------------------------------------------------------------------- # classifiers setup = common_setup + """ """ # TODO: is it possible to have smth like 'continuation' where # two steps are ran in the same env but separately timed? vb_clfs_binary_train = [] vb_clfs_binary_predict = [] for clf in clfswh['binary']: for nf in (2, 1000): # for ds with just few and lots of features clf_train_str = 'clfswh_d[%r].train(vb_ds0_l2[:,:%d])' % (clf.descr, nf) vb_clfs_binary_train.append( Benchmark(clf_train_str, setup=setup, name='%s.train(vb_ds0_l2[:,:%d])' % (clf.descr, nf))) # and predict on the trailing features of the dataset vb_clfs_binary_predict.append( Benchmark('clfswh_d[%r].predict(vb_ds0_l2[:,-%d:])' % (clf.descr, nf), setup=setup + "\n" + clf_train_str, name='%s.predict(vb_ds0_l2[:,-%d:])' % (clf.descr, nf)))
START_DATE = datetime(2011, 6, 1) setup_same_index = common_setup + """ # create 100 dataframes with the same index dr = np.asarray(DatetimeIndex(datetime(1990,1,1), datetime(2012,1,1))) data_frames = {} for x in xrange(100): df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), "c": [2]*len(dr)}, index=dr) data_frames[x] = df """ panel_from_dict_same_index = \ Benchmark("Panel.from_dict(data_frames)", setup_same_index, name='panel_from_dict_same_index', start_date=START_DATE, repeat=1, logy=True) setup_equiv_indexes = common_setup + """ data_frames = {} for x in xrange(100): dr = np.asarray(DatetimeIndex(datetime(1990,1,1), datetime(2012,1,1))) df = DataFrame({"a": [0]*len(dr), "b": [1]*len(dr), "c": [2]*len(dr)}, index=dr) data_frames[x] = df """ panel_from_dict_equiv_indexes = \ Benchmark("Panel.from_dict(data_frames)", setup_equiv_indexes, name='panel_from_dict_equiv_indexes', start_date=START_DATE, repeat=1, logy=True)
common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # Series.__getitem__, get_value setup = common_setup + """ tm.N = 1000 ts = tm.makeTimeSeries() dt = ts.index[500] """ statement = "ts[dt]" bm_getitem = Benchmark(statement, setup, ncalls=100000, name='series_getitem_scalar') setup = common_setup + """ index = [tm.rands(10) for _ in xrange(1000)] s = Series(np.random.rand(1000), index=index) idx = index[100] """ statement = "s.get_value(idx)" bm_df_getitem3 = Benchmark(statement, setup, name='series_get_value', start_date=datetime(2011, 11, 12)) #---------------------------------------------------------------------- # DataFrame __getitem__
# coding: utf8 from vbench.benchmark import Benchmark setup = """ from __future__ import division import buhmm import cmpy m = cmpy.machines.Even() m.set_start_node('A') data = m.symbols(1e6) """ code = """ x = buhmm.Infer(m, data) """ bm1 = Benchmark(code, setup, name='infer')
C = Connection(G, H, structure='%(structure)s' ) C.connect_full(G, H, weight=1) net = Network(G, H, C) net.prepare() net.run(defaultclock.dt) """ statement = "net.run(1 * second)" # Code generation was introduced here start_codegen = datetime(2010, 2, 6) # Sparse matrices bench_sparse = Benchmark(statement, common_setup + python_only_setup + \ setup_template % {'neurons' : 10, 'structure' : 'sparse'}, name='sparse connection matrix (10x10)') bench_sparse100 = Benchmark(statement, common_setup + python_only_setup + \ setup_template % {'neurons' : 100, 'structure' : 'sparse'}, name='sparse connection matrix (100x100)') bench_sparse100w = Benchmark(statement, common_setup + weave_setup + \ setup_template % {'neurons' : 100, 'structure' : 'sparse'}, name='sparse connection matrix (100x100) with weave') bench_sparse100wc = Benchmark(statement, common_setup + weave_setup + codegen_setup +\
common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # intersection, union setup = common_setup + """ rng = DateRange('1/1/2000', periods=10000, offset=datetools.Minute()) if rng.dtype == object: rng = rng.view(Index) else: rng = rng.asobject rng2 = rng[:-1] """ index_datetime_intersection = Benchmark("rng.intersection(rng2)", setup) index_datetime_union = Benchmark("rng.union(rng2)", setup) # integers setup = common_setup + """ N = 1000000 options = np.arange(N) left = Index(options.take(np.random.permutation(N)[:N // 2])) right = Index(options.take(np.random.permutation(N)[:N // 2])) """ index_int64_union = Benchmark('left.union(right)', setup, start_date=datetime(2011, 1, 1))
common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # Series.__getitem__, get_value, __getitem__(slice) setup = common_setup + """ tm.N = 1000 ts = tm.makeTimeSeries() dt = ts.index[500] """ statement = "ts[dt]" bm_getitem = Benchmark(statement, setup, ncalls=100000, name='series_getitem_scalar') setup = common_setup + """ index = [tm.rands(10) for _ in xrange(1000)] s = Series(np.random.rand(1000), index=index) idx = index[100] """ statement = "s.get_value(idx)" bm_df_getitem3 = Benchmark(statement, setup, name='series_get_value', start_date=datetime(2011, 11, 12)) setup = common_setup + """ index = tm.makeStringIndex(1000000)
'data2' : np.random.randn(100000), 'key1' : key1, 'key2' : key2}) df_key1 = DataFrame(np.random.randn(len(level1), 4), index=level1, columns=['A', 'B', 'C', 'D']) df_key2 = DataFrame(np.random.randn(len(level2), 4), index=level2, columns=['A', 'B', 'C', 'D']) """ #---------------------------------------------------------------------- # DataFrame joins on key join_dataframe_index_single_key_small = \ Benchmark("df.join(df_key1, on='key1')", setup, name='join_dataframe_index_single_key_small') join_dataframe_index_single_key_bigger = \ Benchmark("df.join(df_key2, on='key2')", setup, name='join_dataframe_index_single_key_bigger') join_dataframe_index_multi = \ Benchmark("df.join(df_multi, on=['key1', 'key2'])", setup, name='join_dataframe_index_multi', start_date=datetime(2011, 10, 20)) #---------------------------------------------------------------------- # DataFrame joins on index
""" SECTION = 'Binary ops' #---------------------------------------------------------------------- # binary ops #---------------------------------------------------------------------- # add setup = common_setup + """ df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) """ frame_add = \ Benchmark("df + df2", setup, name='frame_add', start_date=datetime(2012, 1, 1)) setup = common_setup + """ import pandas.core.expressions as expr df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) expr.set_numexpr_threads(1) """ frame_add_st = \ Benchmark("df + df2", setup, name='frame_add_st',cleanup="expr.set_numexpr_threads()", start_date=datetime(2013, 2, 26)) setup = common_setup + """ import pandas.core.expressions as expr df = DataFrame(np.random.randn(20000, 100))
from vbench.benchmark import Benchmark common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # DataFrame.index / columns property lookup time setup = common_setup + """ df = DataFrame(np.random.randn(10, 6)) cur_index = df.index """ stmt = "foo = df.index" getattr_dataframe_index = Benchmark(stmt, setup, name="getattr_dataframe_index") stmt = "df.index = cur_index" setattr_dataframe_index = Benchmark(stmt, setup, name="setattr_dataframe_index")
from vbench.benchmark import Benchmark from datetime import datetime common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # DataFrame reindex columns setup = common_setup + """ df = DataFrame(index=range(10000), data=np.random.rand(10000,30), columns=range(30)) """ statement = "df.reindex(columns=df.columns[1:5])" frame_reindex_columns = Benchmark(statement, setup) #---------------------------------------------------------------------- setup = common_setup + """ rng = DateRange('1/1/1970', periods=10000, offset=datetools.Minute()) df = DataFrame(np.random.rand(10000, 10), index=rng, columns=range(10)) df['foo'] = 'bar' rng2 = Index(rng[::2]) """ statement = "df.reindex(rng2)" dataframe_reindex = Benchmark(statement, setup) #---------------------------------------------------------------------- # multiindex reindexing
setup = common_setup + """ N, K = 5000, 50 index = [rands(10) for _ in xrange(N)] columns = [rands(10) for _ in xrange(K)] frame = DataFrame(np.random.randn(N, K), index=index, columns=columns) try: data = frame.to_dict() except: data = frame.toDict() some_dict = data.values()[0] dict_list = [dict(zip(columns, row)) for row in frame.values] """ frame_ctor_nested_dict = Benchmark("DataFrame(data)", setup) # From JSON-like stuff frame_ctor_list_of_dict = Benchmark("DataFrame(dict_list)", setup, start_date=datetime(2011, 12, 20)) series_ctor_from_dict = Benchmark("Series(some_dict)", setup) # nested dict, integer indexes, regression described in #621 setup = common_setup + """ data = dict((i,dict((j,float(j)) for j in xrange(100))) for i in xrange(2000)) """ frame_ctor_nested_dict_int64 = Benchmark("DataFrame(data)", setup)
setup = common_setup + """ import pandas.computation.expressions as expr expr.set_numexpr_threads(1) """ SECTION = 'Eval' #---------------------------------------------------------------------- # binary ops #---------------------------------------------------------------------- # add eval_frame_add_all_threads = \ Benchmark("pd.eval('df + df2 + df3 + df4')", common_setup, name='eval_frame_add_all_threads', start_date=datetime(2013, 7, 21)) eval_frame_add_one_thread = \ Benchmark("pd.eval('df + df2 + df3 + df4')", setup, name='eval_frame_add_one_thread', start_date=datetime(2013, 7, 26)) eval_frame_add_python = \ Benchmark("pd.eval('df + df2 + df3 + df4', engine='python')", common_setup, name='eval_frame_add_python', start_date=datetime(2013, 7, 21)) eval_frame_add_python_one_thread = \ Benchmark("pd.eval('df + df2 + df3 + df4', engine='python')", setup,
""" SECTION = 'Binary ops' #---------------------------------------------------------------------- # binary ops #---------------------------------------------------------------------- # add setup = common_setup + """ df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) """ frame_add = \ Benchmark("df + df2", setup, name='frame_add', start_date=datetime(2012, 1, 1)) setup = common_setup + """ import pandas.computation.expressions as expr df = DataFrame(np.random.randn(20000, 100)) df2 = DataFrame(np.random.randn(20000, 100)) expr.set_numexpr_threads(1) """ frame_add_st = \ Benchmark("df + df2", setup, name='frame_add_st',cleanup="expr.set_numexpr_threads()", start_date=datetime(2013, 2, 26)) setup = common_setup + """ import pandas.computation.expressions as expr df = DataFrame(np.random.randn(20000, 100))
N = 50000 rng = np.asarray(date_range('1/1/2000', periods=N, freq='T')) # rng2 = np.asarray(rng).astype('M8[ns]').astype('i8') series = {} for i in range(1, K + 1): data = np.random.randn(N)[:-i] this_rng = rng[:-i] data[100:] = np.nan series[i] = SparseSeries(data, index=this_rng) """ stmt = "SparseDataFrame(series)" bm_sparse1 = Benchmark(stmt, setup, name="sparse_series_to_frame", start_date=datetime(2011, 6, 1)) setup = common_setup + """ from pandas.core.sparse import SparseDataFrame """ stmt = "SparseDataFrame(columns=np.arange(100), index=np.arange(1000))" sparse_constructor = Benchmark(stmt, setup, name="sparse_frame_constructor", start_date=datetime(2012, 6, 1))
from vbench.benchmark import Benchmark from datetime import datetime common_setup = """from pandas_vb_common import * """ #---------------------------------------------------------------------- # Series constructors setup = common_setup + """ data = np.random.randn(100) index = Index(np.arange(100)) """ series_constructor_ndarray = \ Benchmark("Series(data, index=index)", setup=setup, name='series_constructor_ndarray') setup = common_setup + """ arr = np.random.randn(100, 100) """ frame_constructor_ndarray = \ Benchmark("DataFrame(arr)", setup=setup, name='frame_constructor_ndarray')
from vbench.benchmark import Benchmark from datetime import datetime common_setup = """from pandas_vb_common import * try: from pandas import date_range except ImportError: def date_range(start=None, end=None, periods=None, freq=None): return DatetimeIndex(start, end, periods=periods, offset=freq) """ #----------------------------------------------------------------------------- # Timeseries plotting setup = common_setup + """ N = 2000 M = 5 df = DataFrame(np.random.randn(N,M), index=date_range('1/1/1975', periods=N)) """ plot_timeseries_period = Benchmark("df.plot()", setup=setup, name='plot_timeseries_period')