def test(verbose=False, heavy=False): """ test(verbose=False, heavy=False) Run all the tests in the test suite. If `verbose` is set, the test suite will emit messages with full verbosity (not recommended unless you are looking into a certain problem). If `heavy` is set, the test suite will be run in *heavy* mode (you should be careful with this because it can take a lot of time and resources from your computer). """ bcolz.print_versions() print_heavy(heavy) # What a context this is! oldverbose, common.verbose = common.verbose, verbose oldheavy, common.heavy = common.heavy, heavy try: ret = unittest.TextTestRunner().run(suite()) sys.exit(ret.wasSuccessful() == False) finally: common.verbose = oldverbose common.heavy = oldheavy # there are pretty young heavies, too ;)
def test(verbose=False, heavy=False): """ test(verbose=False, heavy=False) Run all the tests in the test suite. If `verbose` is set, the test suite will emit messages with full verbosity (not recommended unless you are looking into a certain problem). If `heavy` is set, the test suite will be run in *heavy* mode (you should be careful with this because it can take a lot of time and resources from your computer). """ bcolz.print_versions() print_heavy(heavy) # What a context this is! oldverbose, common.verbose = common.verbose, verbose oldheavy, common.heavy = common.heavy, heavy try: return unittest.TextTestRunner().run(suite()) finally: common.verbose = oldverbose common.heavy = oldheavy # there are pretty young heavies, too ;)
def _init_ctable(self, path): """ Create empty ctable for given path. Obtain 、Create 、Append、Attr empty ctable for given path. addcol(newcol[, name, pos, move]) Add a new newcol object as column. append(cols) Append cols to this ctable -- e.g. : ctable Flush data in internal buffers to disk: This call should typically be done after performing modifications (__settitem__(), append()) in persistence mode. If you don’t do this, you risk losing part of your modifications. Parameters ---------- path : string The path to rootdir of the new ctable. """ bcolz_dir = os.path.dirname(path) print('bcolz_dir', bcolz_dir) if not os.path.exists(bcolz_dir): os.makedirs(bcolz_dir) print('path', path) initial_array = np.empty(0, np.uint32) # 配置bcolz bcolz.set_nthreads(Num * bcolz.detect_number_of_cores()) # Print all the versions of packages that bcolz relies on. bcolz.print_versions() """ clevel : int (0 <= clevel < 10) The compression level. shuffle : int The shuffle filter to be activated. Allowed values are bcolz.NOSHUFFLE (0), bcolz.SHUFFLE (1) and bcolz.BITSHUFFLE (2). The default is bcolz.SHUFFLE. cname : string (‘blosclz’, ‘lz4’, ‘lz4hc’, ‘snappy’, ‘zlib’, ‘zstd’) Select the compressor to use inside Blosc. quantize : int (number of significant digits) Quantize data to improve (lossy) compression. Data is quantized using np.around(scale*data)/scale, where scale is 2**bits, and bits is determined from the quantize value. For example, if quantize=1, bits will be 4. 0 means that the quantization is disabled. default : cparams(clevel=5, shuffle=1, cname='lz4', quantize=0) """ params = bcolz.cparams(clevel=9) table = bcolz.ctable( rootdir=path, columns=[ initial_array, initial_array, initial_array, initial_array, initial_array, initial_array, initial_array, ], names=self._bcolz_fields, mode='w', cparams=params ) print('cparams', table.cparams) table.flush() table = self._init_attr(table, path) # table.attrs['metadata'] = self._init_metadata(path) return table
# Benchmark based on Greg Redas's previous work: # http://www.gregreda.com/2013/10/26/using-pandas-on-the-movielens-dataset/ # The original MovieLens datasets are over here: # http://www.grouplens.org/datasets/movielens from time import time import os.path import numpy as np import bcolz import pandas as pd bcolz.print_versions() dset = 'ml-100k' fuser = os.path.join(dset, 'u.user') fdata = os.path.join(dset, 'u.data') fitem = os.path.join(dset, 'u.item') bcolz.defaults.cparams['cname'] = 'lz4' bcolz.defaults.cparams['clevel'] = 1 # pass in column names for each CSV u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code'] users = pd.read_csv(fuser, sep='|', names=u_cols) r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp'] ratings = pd.read_csv(fdata, sep='\t', names=r_cols) # the movies file contains columns indicating the movie's genres # let's only load the first five columns of the file with usecols m_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url']
# Benchmark based on Greg Redas's previous work: # http://www.gregreda.com/2013/10/26/using-pandas-on-the-movielens-dataset/ # The original MovieLens datasets are over here: # http://www.grouplens.org/datasets/movielens from time import time import os.path import numpy as np import bcolz import pandas as pd bcolz.print_versions() dset = 'ml-100k' fuser = os.path.join(dset, 'u.user') fdata = os.path.join(dset, 'u.data') fitem = os.path.join(dset, 'u.item') bcolz.defaults.cparams['cname'] = 'lz4' bcolz.defaults.cparams['clevel'] = 1 # pass in column names for each CSV u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code'] users = pd.read_csv(fuser, sep='|', names=u_cols) r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp'] ratings = pd.read_csv(fdata, sep='\t', names=r_cols) # the movies file contains columns indicating the movie's genres # let's only load the first five columns of the file with usecols m_cols = [