Ejemplo n.º 1
0
def test_memory_numpy(tmpdir, mmap_mode):
    " Test memory with a function with numpy arrays."
    accumulator = list()

    def n(l=None):
        accumulator.append(1)
        return l

    memory = Memory(location=tmpdir.strpath, mmap_mode=mmap_mode, verbose=0)
    cached_n = memory.cache(n)

    rnd = np.random.RandomState(0)
    for i in range(3):
        a = rnd.random_sample((10, 10))
        for _ in range(3):
            assert np.all(cached_n(a) == a)
            assert len(accumulator) == i + 1
Ejemplo n.º 2
0
def test_memory_ignore(tmpdir):
    " Test the ignore feature of memory "
    memory = Memory(location=tmpdir.strpath, verbose=0)
    accumulator = list()

    @memory.cache(ignore=['y'])
    def z(x, y=1):
        accumulator.append(1)

    assert z.ignore == ['y']

    z(0, y=1)
    assert len(accumulator) == 1
    z(0, y=1)
    assert len(accumulator) == 1
    z(0, y=2)
    assert len(accumulator) == 1
Ejemplo n.º 3
0
def test_partial_decoration():
    "Check cache may be called with kwargs before decorating"
    memory = Memory(cachedir=env['dir'], verbose=0)

    test_values = [
        (['x'], 100, 'r'),
        ([], 10, None),
    ]
    for ignore, verbose, mmap_mode in test_values:

        @memory.cache(ignore=ignore, verbose=verbose, mmap_mode=mmap_mode)
        def z(x):
            pass

        yield nose.tools.assert_equal, z.ignore, ignore
        yield nose.tools.assert_equal, z._verbose, verbose
        yield nose.tools.assert_equal, z.mmap_mode, mmap_mode
Ejemplo n.º 4
0
def test_cached_function_race_condition_when_persisting_output(tmpdir, capfd):
    # Test race condition where multiple processes are writing into
    # the same output.pkl. See
    # https://github.com/joblib/joblib/issues/490 for more details.
    memory = Memory(location=tmpdir.strpath)
    func_cached = memory.cache(fast_func_with_complex_output)

    Parallel(n_jobs=2)(delayed(func_cached)() for i in range(3))

    stdout, stderr = capfd.readouterr()

    # Checking both stdout and stderr (ongoing PR #434 may change
    # logging destination) to make sure there is no exception while
    # loading the results
    exception_msg = 'Exception while loading results'
    assert exception_msg not in stdout
    assert exception_msg not in stderr
Ejemplo n.º 5
0
def test_memory_ignore():
    " Test the ignore feature of memory "
    memory = Memory(cachedir=env['dir'], verbose=0)
    accumulator = list()

    @memory.cache(ignore=['y'])
    def z(x, y=1):
        accumulator.append(1)

    yield nose.tools.assert_equal, z.ignore, ['y']

    z(0, y=1)
    yield nose.tools.assert_equal, len(accumulator), 1
    z(0, y=1)
    yield nose.tools.assert_equal, len(accumulator), 1
    z(0, y=2)
    yield nose.tools.assert_equal, len(accumulator), 1
Ejemplo n.º 6
0
def test_memory_recomputes_after_an_error_while_loading_results(
        tmpdir, monkeypatch):
    memory = Memory(location=tmpdir.strpath)

    def func(arg):
        # This makes sure that the timestamp returned by two calls of
        # func are different. This is needed on Windows where
        # time.time resolution may not be accurate enough
        time.sleep(0.01)
        return arg, time.time()

    cached_func = memory.cache(func)
    input_arg = 'arg'
    arg, timestamp = cached_func(input_arg)

    # Make sure the function is correctly cached
    assert arg == input_arg

    # Corrupting output.pkl to make sure that an error happens when
    # loading the cached result
    corrupt_single_cache_item(memory)

    # Make sure that corrupting the file causes recomputation and that
    # a warning is issued.
    recorded_warnings = monkeypatch_cached_func_warn(cached_func, monkeypatch)
    recomputed_arg, recomputed_timestamp = cached_func(arg)
    assert len(recorded_warnings) == 1
    exception_msg = 'Exception while loading results'
    assert exception_msg in recorded_warnings[0]
    assert recomputed_arg == arg
    assert recomputed_timestamp > timestamp

    # Corrupting output.pkl to make sure that an error happens when
    # loading the cached result
    corrupt_single_cache_item(memory)
    reference = cached_func.call_and_shelve(arg)
    try:
        reference.get()
        raise AssertionError(
            "It normally not possible to load a corrupted"
            " MemorizedResult"
        )
    except KeyError as e:
        message = "is corrupted"
        assert message in str(e.args)
Ejemplo n.º 7
0
def _setup_toy_cache(tmpdir, num_inputs=10):
    memory = Memory(cachedir=tmpdir.strpath, verbose=0)

    @memory.cache()
    def get_1000_bytes(arg):
        return 'a' * 1000

    inputs = list(range(num_inputs))
    for arg in inputs:
        get_1000_bytes(arg)

    hash_dirnames = [get_1000_bytes._get_output_dir(arg)[0] for arg in inputs]

    full_hashdirs = [
        os.path.join(get_1000_bytes.cachedir, dirname)
        for dirname in hash_dirnames
    ]
    return memory, full_hashdirs, get_1000_bytes
Ejemplo n.º 8
0
def test_memory_args_as_kwargs(tmpdir):
    """Non-regression test against 0.12.0 changes.

    https://github.com/joblib/joblib/pull/751
    """
    memory = Memory(location=tmpdir.strpath, verbose=0)

    @memory.cache
    def plus_one(a):
        return a + 1

    # It's possible to call a positional arg as a kwarg.
    assert plus_one(1) == 2
    assert plus_one(a=1) == 2

    # However, a positional argument that joblib hadn't seen
    # before would cause a failure if it was passed as a kwarg.
    assert plus_one(a=2) == 3
Ejemplo n.º 9
0
def test_memory_in_memory_function_code_change():
    _function_to_cache.__code__ = _sum.__code__

    mem = Memory(cachedir=env['dir'], verbose=0)
    f = mem.cache(_function_to_cache)

    nose.tools.assert_equal(f(1, 2), 3)
    nose.tools.assert_equal(f(1, 2), 3)

    with warnings.catch_warnings(record=True):
        # ignore name collision warnings
        warnings.simplefilter("always")

        # Check that inline function modification triggers a cache invalidation

        _function_to_cache.__code__ = _product.__code__
        nose.tools.assert_equal(f(1, 2), 2)
        nose.tools.assert_equal(f(1, 2), 2)
Ejemplo n.º 10
0
def _setup_toy_cache(tmpdir, num_inputs=10):
    memory = Memory(location=tmpdir.strpath, verbose=0)

    @memory.cache()
    def get_1000_bytes(arg):
        return 'a' * 1000

    inputs = list(range(num_inputs))
    for arg in inputs:
        get_1000_bytes(arg)

    func_id = _build_func_identifier(get_1000_bytes)
    hash_dirnames = [get_1000_bytes._get_output_identifiers(arg)[1]
                     for arg in inputs]

    full_hashdirs = [os.path.join(get_1000_bytes.store_backend.location,
                                  func_id, dirname)
                     for dirname in hash_dirnames]
    return memory, full_hashdirs, get_1000_bytes
Ejemplo n.º 11
0
def test_memory_numpy_check_mmap_mode(tmpdir):
    """Check that mmap_mode is respected even at the first call"""

    memory = Memory(location=tmpdir.strpath, mmap_mode='r', verbose=0)

    @memory.cache()
    def twice(a):
        return a * 2

    a = np.ones(3)

    b = twice(a)
    c = twice(a)

    assert isinstance(c, np.memmap)
    assert c.mode == 'r'

    assert isinstance(b, np.memmap)
    assert b.mode == 'r'
Ejemplo n.º 12
0
def test_memory_exception():
    """ Smoketest the exception handling of Memory.
    """
    memory = Memory(cachedir=env['dir'], verbose=0)

    class MyException(Exception):
        pass

    @memory.cache
    def h(exc=0):
        if exc:
            raise MyException

    # Call once, to initialise the cache
    h()

    for _ in range(3):
        # Call 3 times, to be sure that the Exception is always raised
        yield nose.tools.assert_raises, MyException, h, 1
Ejemplo n.º 13
0
def test_memory_numpy_check_mmap_mode():
    """Check that mmap_mode is respected even at the first call"""

    memory = Memory(cachedir=env['dir'], mmap_mode='r', verbose=0)
    memory.clear(warn=False)

    @memory.cache()
    def twice(a):
        return a * 2

    a = np.ones(3)

    b = twice(a)
    c = twice(a)

    nose.tools.assert_true(isinstance(c, np.memmap))
    nose.tools.assert_equal(c.mode, 'r')

    nose.tools.assert_true(isinstance(b, np.memmap))
    nose.tools.assert_equal(b.mode, 'r')
Ejemplo n.º 14
0
def test_memory_numpy():
    " Test memory with a function with numpy arrays."
    # Check with memmapping and without.
    for mmap_mode in (None, 'r'):
        accumulator = list()

        def n(l=None):
            accumulator.append(1)
            return l

        memory = Memory(cachedir=env['dir'], mmap_mode=mmap_mode, verbose=0)
        memory.clear(warn=False)
        cached_n = memory.cache(n)

        rnd = np.random.RandomState(0)
        for i in range(3):
            a = rnd.random_sample((10, 10))
            for _ in range(3):
                yield nose.tools.assert_true, np.all(cached_n(a) == a)
                yield nose.tools.assert_equal, len(accumulator), i + 1
Ejemplo n.º 15
0
def test_memory_exception(tmpdir):
    """ Smoketest the exception handling of Memory.
    """
    memory = Memory(location=tmpdir.strpath, verbose=0)

    class MyException(Exception):
        pass

    @memory.cache
    def h(exc=0):
        if exc:
            raise MyException

    # Call once, to initialise the cache
    h()

    for _ in range(3):
        # Call 3 times, to be sure that the Exception is always raised
        with raises(MyException):
            h(1)
Ejemplo n.º 16
0
def test_memorized_result_pickle(tmpdir):
    # Verify a MemoryResult object can be pickled/depickled. Non regression
    # test introduced following issue
    # https://github.com/joblib/joblib/issues/747

    memory = Memory(location=tmpdir.strpath)

    @memory.cache
    def g(x):
        return x**2

    memorized_result = g.call_and_shelve(4)
    memorized_result_pickle = pickle.dumps(memorized_result)
    memorized_result_loads = pickle.loads(memorized_result_pickle)

    assert memorized_result.store_backend.location == \
        memorized_result_loads.store_backend.location
    assert memorized_result.func == memorized_result_loads.func
    assert memorized_result.args_id == memorized_result_loads.args_id
    assert str(memorized_result) == str(memorized_result_loads)
Ejemplo n.º 17
0
def test_memory_recomputes_after_an_error_why_loading_results(
        tmpdir, monkeypatch):
    memory = Memory(location=tmpdir.strpath)

    def func(arg):
        # This makes sure that the timestamp returned by two calls of
        # func are different. This is needed on Windows where
        # time.time resolution may not be accurate enough
        time.sleep(0.01)
        return arg, time.time()

    cached_func = memory.cache(func)
    input_arg = 'arg'
    arg, timestamp = cached_func(input_arg)

    # Make sure the function is correctly cached
    assert arg == input_arg

    # Corrupting output.pkl to make sure that an error happens when
    # loading the cached result
    single_cache_item, = memory.store_backend.get_items()
    output_filename = os.path.join(single_cache_item.path, 'output.pkl')
    with open(output_filename, 'w') as f:
        f.write('garbage')

    recorded_warnings = []

    def append_to_record(item):
        recorded_warnings.append(item)

    # Make sure that corrupting the file causes recomputation and that
    # a warning is issued. Need monkeypatch because pytest does not
    # capture stdlib logging output (see
    # https://github.com/pytest-dev/pytest/issues/2079)
    monkeypatch.setattr(cached_func, 'warn', append_to_record)
    recomputed_arg, recomputed_timestamp = cached_func(arg)
    assert len(recorded_warnings) == 1
    exception_msg = 'Exception while loading results'
    assert exception_msg in recorded_warnings[0]
    assert recomputed_arg == arg
    assert recomputed_timestamp > timestamp
Ejemplo n.º 18
0
def test_memory_numpy_check_mmap_mode_async(tmpdir, monkeypatch):
    """Check that mmap_mode is respected even at the first call"""

    memory = Memory(location=tmpdir.strpath, mmap_mode='r', verbose=0)

    @memory.cache()
    @asyncio.coroutine
    def twice(a):
        return a * 2

    @asyncio.coroutine
    def main():
        a = np.ones(3)

        b = yield from twice(a)
        c = yield from twice(a)

        assert isinstance(c, np.memmap)
        assert c.mode == 'r'

        assert isinstance(b, np.memmap)
        assert b.mode == 'r'

        # Corrupts the file,  Deleting b and c mmaps
        # is necessary to be able edit the file
        del b
        del c
        corrupt_single_cache_item(memory)

        # Make sure that corrupting the file causes recomputation and that
        # a warning is issued.
        recorded_warnings = monkeypatch_cached_func_warn(twice, monkeypatch)
        d = yield from twice(a)
        assert len(recorded_warnings) == 1
        exception_msg = 'Exception while loading results'
        assert exception_msg in recorded_warnings[0]
        # Asserts that the recomputation returns a mmap
        assert isinstance(d, np.memmap)
        assert d.mode == 'r'

    asyncio.get_event_loop().run_until_complete(main())
Ejemplo n.º 19
0
def test_cached_function_race_condition_when_persisting_output_2(
        tmpdir, capfd):
    # Test race condition in first attempt at solving
    # https://github.com/joblib/joblib/issues/490. The race condition
    # was due to the delay between seeing the cache directory created
    # (interpreted as the result being cached) and the output.pkl being
    # pickled.
    memory = Memory(location=tmpdir.strpath)
    func_cached = memory.cache(fast_func_with_conditional_complex_output)

    Parallel(n_jobs=2)(delayed(func_cached)(True if i % 2 == 0 else False)
                       for i in range(3))

    stdout, stderr = capfd.readouterr()

    # Checking both stdout and stderr (ongoing PR #434 may change
    # logging destination) to make sure there is no exception while
    # loading the results
    exception_msg = 'Exception while loading results'
    assert exception_msg not in stdout
    assert exception_msg not in stderr
Ejemplo n.º 20
0
def _setup_temporary_cache_folder(num_inputs=10):
    # Use separate cache dir to avoid side-effects from other tests
    # that do not use _setup_temporary_cache_folder
    mem = Memory(cachedir=os.path.join(env['dir'], 'separate_cache'),
                 verbose=0)

    @mem.cache()
    def get_1000_bytes(arg):
        return 'a' * 1000

    inputs = list(range(num_inputs))
    for arg in inputs:
        get_1000_bytes(arg)

    hash_dirnames = [get_1000_bytes._get_output_dir(arg)[0] for arg in inputs]

    full_hashdirs = [
        os.path.join(get_1000_bytes.cachedir, dirname)
        for dirname in hash_dirnames
    ]
    return mem, full_hashdirs, get_1000_bytes
Ejemplo n.º 21
0
def main():
    logging.basicConfig(
        format="[%(asctime)s] %(levelname)s %(threadName)s: %(message)s",
        level=logging.INFO)

    cli_parser = ArgumentParser()
    cli_parser.add_argument("-c", "--conf", type=str, required=True)
    cli_parser.add_argument("-d", "--dataset", type=str, required=True)
    cli_parser.add_argument("-r", "--repeats", type=int, default=15)
    cli_parser.add_argument("-p", "--parallel", type=int, default=1)
    cli_parser.add_argument("--cache", type=str, default="cache")
    args = cli_parser.parse_args()

    parser = ArgumentParser()
    parser.add_argument("--lr", type=float, default=0.01)
    parser.add_argument("--fraction", type=float, default=0.001)
    parser.add_argument("--epochs", type=int, default=10)
    with open(args.conf, "rt") as f:
        configs = [
            parser.parse_args(line.strip().split(" "))
            for line in f.readlines()
        ]

    with TaskExecutor(max_workers=args.parallel,
                      memory=Memory(args.cache, compress=6)):
        results = [
            evaluate_config(args.dataset, conf.lr, conf.fraction, conf.epochs,
                            args.repeats) for conf in configs
        ]
    results = [r.result for r in results]

    for config, result in sorted(zip(configs, results),
                                 key=lambda e: e[1]['conf'][0],
                                 reverse=True):
        logging.info(
            f"{args.dataset} baseline: {result['mean']:.5f} +/- {result['std']:.5f} => {result['conf'][0]:.5f}  (lr = {result['lr']})"
        )
Ejemplo n.º 22
0
def test_memory_warning_lambda_collisions():
    # Check that multiple use of lambda will raise collisions
    memory = Memory(cachedir=env['dir'], verbose=0)
    # For isolation with other tests
    memory.clear()
    a = lambda x: x
    a = memory.cache(a)
    b = lambda x: x + 1
    b = memory.cache(b)

    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")
        # This is a temporary workaround until we get rid of
        # inspect.getargspec, see
        # https://github.com/joblib/joblib/issues/247
        warnings.simplefilter("ignore", DeprecationWarning)
        nose.tools.assert_equal(0, a(0))
        nose.tools.assert_equal(2, b(1))
        nose.tools.assert_equal(1, a(1))

    # In recent Python versions, we can retrieve the code of lambdas,
    # thus nothing is raised
    nose.tools.assert_equal(len(w), 4)
Ejemplo n.º 23
0
def test_memory_arg_lambda():
    " Test memory with a lambda argument."

    memory = Memory(cachedir=env['dir'], verbose=0)
    memory.clear(warn=False)

    accum = {'value': 0}

    @memory.cache()
    def run_func(func):
        accum['value'] += 1
        return func()

    lambda_1 = lambda: 1
    lambda_2 = lambda: 2

    a = run_func(lambda_1)
    b = run_func(lambda_1)
    c = run_func(lambda_2)

    nose.tools.assert_equal(a, 1)
    nose.tools.assert_equal(b, 1)
    nose.tools.assert_equal(c, 2)
    nose.tools.assert_equal(accum['value'], 2)
Ejemplo n.º 24
0
def test_memory_name_collision():
    " Check that name collisions with functions will raise warnings"
    memory = Memory(cachedir=env['dir'], verbose=0)

    @memory.cache
    def name_collision(x):
        """ A first function called name_collision
        """
        return x

    a = name_collision

    @memory.cache
    def name_collision(x):
        """ A second function called name_collision
        """
        return x

    b = name_collision

    if not hasattr(warnings, 'catch_warnings'):
        # catch_warnings is new in Python 2.6
        return

    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")
        # This is a temporary workaround until we get rid of
        # inspect.getargspec, see
        # https://github.com/joblib/joblib/issues/247
        warnings.simplefilter("ignore", DeprecationWarning)
        a(1)
        b(1)

        yield nose.tools.assert_equal, len(w), 1
        yield nose.tools.assert_true, "collision" in str(w[-1].message)
Ejemplo n.º 25
0
def test_memory_warning_collision_detection():
    # Check that collisions impossible to detect will raise appropriate
    # warnings.
    memory = Memory(cachedir=env['dir'], verbose=0)
    # For isolation with other tests
    memory.clear()
    a1 = eval('lambda x: x')
    a1 = memory.cache(a1)
    b1 = eval('lambda x: x+1')
    b1 = memory.cache(b1)

    with warnings.catch_warnings(record=True) as w:
        # Cause all warnings to always be triggered.
        warnings.simplefilter("always")
        # This is a temporary workaround until we get rid of
        # inspect.getargspec, see
        # https://github.com/joblib/joblib/issues/247
        warnings.simplefilter("ignore", DeprecationWarning)
        a1(1)
        b1(1)
        a1(0)

        assert len(w) == 2
        assert "cannot detect" in str(w[-1].message).lower()
import matplotlib.pyplot as plt
from joblib.memory import Memory

from matplotlib import rc
import matplotlib

rc('font', **{'family': 'sans-serif', 'sans-serif': ['Helvetica']})
## for Palatino and other serif fonts use:
#rc('font',**{'family':'serif','serif':['Palatino']})
rc('text', usetex=True)
#matplotlib.rcParams['text.latex.preamble']=[r"\usepackage{siunitx}\sisetup{detect-weight=true, detect-family=true}"]
matplotlib.rcParams['text.latex.preamble'] = [
    r"\usepackage{bm}\usepackage{siunitx}\sisetup{detect-weight=true, detect-family=true}"
]

memory = Memory(cachedir="plotsperformance", verbose=0, compress=9)


def loadtest(folder):
    result = []
    for fold in range(10):
        result.append(
            numpy.load(folder + "/fold_" + str(fold) + "_test.npz")['arr_0'])
    # return numpy.ravel(numpy.array(result))
    return result


@memory.cache
def loadLL(folder):
    result = []
    for fold in range(10):
Ejemplo n.º 27
0
def test():
    n_topics = 10
    
    with open("acceptedoralpaperstext.txt") as f:
        content = []
        ids = []
        
        for line in f.readlines():
            cols = line.split("\t")
            content.append(cols[1].strip())
            ids.append(cols[0].strip())
        
    
    tf_vectorizer = CountVectorizer(max_df=0.95, min_df=2,
                                max_features=100,
                                stop_words='english')
    
    
    #print(content)
    bow = tf_vectorizer.fit_transform(content)
    
    
    
    feature_names = tf_vectorizer.get_feature_names()
    
    lda = LatentDirichletAllocation(n_topics=n_topics, max_iter=2000,
                                #learning_method='online',
                                #learning_offset=50.,
                                random_state=0)
    topics = lda.fit_transform(bow, bow)
    
    
    print(print_top_words(lda, feature_names, 10))
    print(topics)
    print(bow.shape)
    
    f = numpy.array(feature_names)
    
    data = numpy.array(bow.todense())
    
    featureTypes = ["discrete"] * data.shape[1]
    
    domains = []
    for i, ft in enumerate(featureTypes):
        domain = numpy.unique(data[:, i])

        # print(i, ft, domain)
        domains.append(domain)

        
    memory = Memory(cachedir="/tmp/spntopics", verbose=0, compress=9)


    @memory.cache
    def learn(data, min_instances_slice, feature_names, domains, featureTypes):
        spn = SPN.LearnStructure(data, featureTypes=featureTypes, row_split_method=Splitting.KmeansRows(), col_split_method=Splitting.RDCTest(threshold=0.1, linear=True),
                                featureNames=feature_names,
                                domains=domains,
                                 # spn = SPN.LearnStructure(data, featureNames=["X1"], domains =
                                 # domains, families=families, row_split_method=Splitting.KmeansRows(),
                                 # col_split_method=Splitting.RDCTest(),
                                 min_instances_slice=min_instances_slice)
        return spn
    
    
    
    
    print(data.shape)
    print(type(data))
    #0/0
    spn = learn(data, 5, f, domains, featureTypes)

    spn.root.validate()
    
    prodNodes = spn.get_nodes_by_type(ProductNode)

    for pn in prodNodes:
        leaves = pn.get_leaves()
        words = set()
        for leaf in leaves:
            # assuming pwl node:
            _x = numpy.argmax(leaf.y_range)
            max_x = leaf.x_range[_x]
            if max_x < 1.0:
                continue
            
            words.add(feature_names[leaf.featureIdx])
        # ll = pn.eval()
        if len(words) < 4:
            continue
        
        print(pn.rows, words)
Ejemplo n.º 28
0
def test_clear_memory_with_none_location():
    memory = Memory(location=None)
    memory.clear()
Ejemplo n.º 29
0
    def test_memory_func_with_signature(tmpdir):
        memory = Memory(location=tmpdir.strpath, verbose=0)
        func_cached = memory.cache(func_with_signature)

        assert func_cached(1, 2.) == 3.
Ejemplo n.º 30
0
def test_memory_default_store_backend():
    # test an unknow backend falls back into a FileSystemStoreBackend
    with raises(TypeError) as excinfo:
        Memory(location='/tmp/joblib', backend='unknown')
    excinfo.match(r"Unknown location*")