예제 #1
0
파일: all.py 프로젝트: Blosc/bcolz
def test(verbose=False, heavy=False):
    """
    test(verbose=False, heavy=False)

    Run all the tests in the test suite.

    If `verbose` is set, the test suite will emit messages with full
    verbosity (not recommended unless you are looking into a certain
    problem).

    If `heavy` is set, the test suite will be run in *heavy* mode (you
    should be careful with this because it can take a lot of time and
    resources from your computer).
    """
    bcolz.print_versions()
    print_heavy(heavy)

    # What a context this is!
    oldverbose, common.verbose = common.verbose, verbose
    oldheavy, common.heavy = common.heavy, heavy
    try:
        ret = unittest.TextTestRunner().run(suite())
        sys.exit(ret.wasSuccessful() == False)
    finally:
        common.verbose = oldverbose
        common.heavy = oldheavy  # there are pretty young heavies, too ;)
예제 #2
0
def test(verbose=False, heavy=False):
    """
    test(verbose=False, heavy=False)

    Run all the tests in the test suite.

    If `verbose` is set, the test suite will emit messages with full
    verbosity (not recommended unless you are looking into a certain
    problem).

    If `heavy` is set, the test suite will be run in *heavy* mode (you
    should be careful with this because it can take a lot of time and
    resources from your computer).
    """
    bcolz.print_versions()
    print_heavy(heavy)

    # What a context this is!
    oldverbose, common.verbose = common.verbose, verbose
    oldheavy, common.heavy = common.heavy, heavy
    try:
        return unittest.TextTestRunner().run(suite())
    finally:
        common.verbose = oldverbose
        common.heavy = oldheavy  # there are pretty young heavies, too ;)
예제 #3
0
    def _init_ctable(self, path):
        """
        Create empty ctable for given path.
        Obtain 、Create 、Append、Attr empty ctable for given path.
        addcol(newcol[, name, pos, move])	Add a new newcol object as column.
        append(cols)	Append cols to this ctable -- e.g. : ctable
        Flush data in internal buffers to disk:
        This call should typically be done after performing modifications
        (__settitem__(), append()) in persistence mode. If you don’t do this,
        you risk losing part of your modifications.

        Parameters
        ----------
        path : string
            The path to rootdir of the new ctable.
        """
        bcolz_dir = os.path.dirname(path)
        print('bcolz_dir', bcolz_dir)
        if not os.path.exists(bcolz_dir):
            os.makedirs(bcolz_dir)
            print('path', path)
        initial_array = np.empty(0, np.uint32)
        # 配置bcolz
        bcolz.set_nthreads(Num * bcolz.detect_number_of_cores())
        # Print all the versions of packages that bcolz relies on.
        bcolz.print_versions()
        """
        clevel : int (0 <= clevel < 10) The compression level.
        shuffle : int The shuffle filter to be activated. Allowed values are bcolz.NOSHUFFLE (0), 
                bcolz.SHUFFLE (1) and bcolz.BITSHUFFLE (2). The default is bcolz.SHUFFLE.
        cname : string (‘blosclz’, ‘lz4’, ‘lz4hc’, ‘snappy’, ‘zlib’, ‘zstd’)
                Select the compressor to use inside Blosc.
        quantize : int (number of significant digits)
                Quantize data to improve (lossy) compression. Data is quantized using np.around(scale*data)/scale,
                 where scale is 2**bits, and bits is determined from the quantize value. For example,
                  if quantize=1, bits will be 4. 0 means that the quantization is disabled.
        default : cparams(clevel=5, shuffle=1, cname='lz4', quantize=0)
        """
        params = bcolz.cparams(clevel=9)
        table = bcolz.ctable(
            rootdir=path,
            columns=[
                initial_array,
                initial_array,
                initial_array,
                initial_array,
                initial_array,
                initial_array,
                initial_array,
            ],
            names=self._bcolz_fields,
            mode='w',
            cparams=params
        )
        print('cparams', table.cparams)
        table.flush()
        table = self._init_attr(table, path)
        # table.attrs['metadata'] = self._init_metadata(path)
        return table
예제 #4
0
# Benchmark based on Greg Redas's previous work:
# http://www.gregreda.com/2013/10/26/using-pandas-on-the-movielens-dataset/
# The original MovieLens datasets are over here:
# http://www.grouplens.org/datasets/movielens

from time import time
import os.path
import numpy as np
import bcolz
import pandas as pd

bcolz.print_versions()

dset = 'ml-100k'
fuser = os.path.join(dset, 'u.user')
fdata = os.path.join(dset, 'u.data')
fitem = os.path.join(dset, 'u.item')

bcolz.defaults.cparams['cname'] = 'lz4'
bcolz.defaults.cparams['clevel'] = 1

# pass in column names for each CSV
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv(fuser, sep='|', names=u_cols)

r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv(fdata, sep='\t', names=r_cols)

# the movies file contains columns indicating the movie's genres
# let's only load the first five columns of the file with usecols
m_cols = ['movie_id', 'title', 'release_date', 'video_release_date', 'imdb_url']
예제 #5
0
# Benchmark based on Greg Redas's previous work:
# http://www.gregreda.com/2013/10/26/using-pandas-on-the-movielens-dataset/
# The original MovieLens datasets are over here:
# http://www.grouplens.org/datasets/movielens

from time import time
import os.path
import numpy as np
import bcolz
import pandas as pd

bcolz.print_versions()

dset = 'ml-100k'
fuser = os.path.join(dset, 'u.user')
fdata = os.path.join(dset, 'u.data')
fitem = os.path.join(dset, 'u.item')

bcolz.defaults.cparams['cname'] = 'lz4'
bcolz.defaults.cparams['clevel'] = 1

# pass in column names for each CSV
u_cols = ['user_id', 'age', 'sex', 'occupation', 'zip_code']
users = pd.read_csv(fuser, sep='|', names=u_cols)

r_cols = ['user_id', 'movie_id', 'rating', 'unix_timestamp']
ratings = pd.read_csv(fdata, sep='\t', names=r_cols)

# the movies file contains columns indicating the movie's genres
# let's only load the first five columns of the file with usecols
m_cols = [