Beispiel #1
0
def check_memory(memory):
    """Check that ``memory`` is joblib.Memory-like.

    joblib.Memory-like means that ``memory`` can be converted into a
    joblib.Memory instance (typically a str denoting the ``location``)
    or has the same interface (has a ``cache`` method).

    Parameters
    ----------
    memory : None, str or object with the joblib.Memory interface

    Returns
    -------
    memory : object with the joblib.Memory interface

    Raises
    ------
    ValueError
        If ``memory`` is not joblib.Memory-like.
    """

    if memory is None or isinstance(memory, str):
        if LooseVersion(joblib.__version__) < '0.12':
            memory = joblib.Memory(cachedir=memory, verbose=0)
        else:
            memory = joblib.Memory(location=memory, verbose=0)
    elif not hasattr(memory, 'cache'):
        raise ValueError("'memory' should be None, a string or have the same"
                         " interface as joblib.Memory."
                         " Got memory='{}' instead.".format(memory))
    return memory
Beispiel #2
0
def test_make_pipeline_memory():
    cachedir = mkdtemp()
    if LooseVersion(joblib.__version__) < LooseVersion('0.12'):
        # Deal with change of API in joblib
        memory = joblib.Memory(cachedir=cachedir, verbose=10)
    else:
        memory = joblib.Memory(location=cachedir, verbose=10)
    pipeline = make_pipeline(DummyTransf(), SVC(), memory=memory)
    assert pipeline.memory is memory
    pipeline = make_pipeline(DummyTransf(), SVC())
    assert pipeline.memory is None
    assert len(pipeline) == 2

    shutil.rmtree(cachedir)
Beispiel #3
0
    def __init__(self, cache_dir):
        # TODO TdR 10/08/16: change url to post-processed data source.
        self.url = 'https://dds.cr.usgs.gov/srtm/version2_1/SRTM3/'
        self.file_listing = None

        cache_engine = joblib.Memory(cachedir=cache_dir, verbose=0)
        self.cached_crawl_page = cache_engine.cache(self.crawl_page)
Beispiel #4
0
def test_compile_string():
    import stan_utility.cache
    with tempfile.TemporaryDirectory() as cachedir:
        print("using cachedir:", cachedir)
        stan_utility.cache.path = cachedir
        stan_utility.cache.mem = joblib.Memory(cachedir, verbose=False)
        
        import stan_utility

        model_code = open(os.path.join(os.path.dirname(__file__), 'test.stan')).read()
        model = stan_utility.compile_model_code(model_code, model_name="mytest")
        data = dict(
            mean=1,
            unused=np.random.normal(size=(4,42)),
        )
        if os.path.exists("mytest_fitfit.hdf5"):
            os.unlink("mytest_fitfit.hdf5")
        samples = stan_utility.sample_model(model, data, outprefix="mytest_fit", chains=2, iter=346)
        assert os.path.exists("mytest_fitfit.hdf5")
        os.unlink("mytest_fitfit.hdf5")

        if os.path.exists("mytest_fit_corner.pdf"):
            os.unlink("mytest_fit_corner.pdf")
        stan_utility.plot_corner(samples, outprefix="mytest_fit")
        assert os.path.exists("mytest_fit_corner.pdf")
        os.unlink("mytest_fit_corner.pdf")
        
        flat_samples = stan_utility.get_flat_posterior(samples)
        assert set(flat_samples.keys()) == {"x", "y"}, flat_samples.keys()
        assert flat_samples['x'].shape == (346,), flat_samples['x'].shape
        assert flat_samples['y'].shape == (346, 10), flat_samples['y'].shape
Beispiel #5
0
def test_compile_file():
    import stan_utility.cache
    with tempfile.TemporaryDirectory() as cachedir:
        print("using cachedir:", cachedir)
        stan_utility.cache.path = cachedir
        stan_utility.cache.mem = joblib.Memory(cachedir, verbose=False)
        
        import stan_utility

        model = stan_utility.compile_model(os.path.join(os.path.dirname(__file__), 'test.stan'))
        data = dict(
            mean=1,
            unused=np.random.normal(size=(4,42)),
        )
        stan_utility.sample_model(model, data, chains=2)

        files = os.listdir(stan_utility.cache.get_path())
        assert "joblib" in files
        assert any(f for f in files if f.startswith("cached-") and f.endswith('.pkl')), files
        assert len(files) > 1, files

        stan_utility.cache.clear()

        files = os.listdir(stan_utility.cache.get_path())
        assert files == ["joblib"], files
Beispiel #6
0
def set_cachedir(cachedir=None, verbose=0):
    """Set root directory for the joblib cache.

    :Parameters:
     cachedir
         the cache directory name; if ``None``, a temporary directory
         is created using `TemporaryDirectory`
     verbose
         an integer number, controls the verbosity of the cache
         (default is 0, i.e., not verbose)
    """

    global _cachedir
    global _cacheobj
    global _cached_methods
    global _memory

    if cachedir is None:
        _cacheobj = TemporaryDirectory(prefix='mdp-joblib-cache.')
        cachedir = _cacheobj.name

    # only reset if the directory changes
    if cachedir != _cachedir:
        _cachedir = cachedir
        _memory = joblib.Memory(cachedir, verbose=verbose)
        # reset cached methods
        _cached_methods.clear()
Beispiel #7
0
 def __init__(self, basedir='.cache'):
     if not basedir:
         raise ValueError('`basedir` is empty')
     location = str(Path(__file__).resolve().parent.parent / basedir)
     mem = joblib.Memory(location=location, verbose=logging.DEBUG)
     self.load_svmlight_file = mem.cache(
         sklearn.datasets.load_svmlight_file)
Beispiel #8
0
def cached(*args, **kargs):
    import joblib as jb
    from .. import CACHE
    memo = getattr(cached, 'memo', None)
    if memo is None:
        cached.memo = memo = jb.Memory(CACHE, verbose=0)
    return memo.cache(*args, **kargs)
def generate_tsdiffana_thumbnail(image_files,
                                 sessions,
                                 subject_id,
                                 output_dir,
                                 results_gallery=None,
                                 tooltips=None):
    """Generate tsdiffana thumbnails

    Parameters
    ----------
    image_files: list or strings or list
        paths (4D case) to list of paths (3D case) of images under inspection

    output_dir: string
        dir to which all output whill be written

    subject_id: string
        id of subject under inspection

    sessions: list
        list of session ids, one per element of image_files

    result_gallery: ResultsGallery instance (optional)
        gallery to which thumbnails will be committed

    """
    # plot figures
    qa_cache_dir = os.path.join(output_dir, "QA")
    if not os.path.exists(qa_cache_dir):
        os.makedirs(qa_cache_dir)
    qa_mem = joblib.Memory(cachedir=qa_cache_dir, verbose=5)
    results = qa_mem.cache(multi_session_time_slice_diffs)(image_files)
    axes = plot_tsdiffs(results, use_same_figure=False)
    figures = [ax.get_figure() for ax in axes]
    output_filename_template = os.path.join(output_dir,
                                            "tsdiffana_plot_{0}.png")
    output_filenames = [
        output_filename_template.format(i) for i in range(len(figures))
    ]
    for fig, output_filename in zip(figures, output_filenames):
        fig.savefig(output_filename, bbox_inches="tight", dpi=200)
        pl.close(fig)

    if tooltips is None:
        tooltips = [None] * len(output_filename)

    # create thumbnails
    thumbnails = []
    for output_filename, tooltip in zip(output_filenames, tooltips):
        thumbnail = Thumbnail(tooltip=tooltip)
        thumbnail.a = a(href=os.path.basename(output_filename))
        thumbnail.img = img(src=os.path.basename(output_filename),
                            height="250px",
                            width="600px")
        thumbnail.description = "tsdiffana ({0} sessions)".format(
            len(sessions))
        thumbnails.append(thumbnail)
    if results_gallery:
        results_gallery.commit_thumbnails(thumbnails)
    return thumbnails
Beispiel #10
0
    def __init__(self, grid, shell_model, cache_dir=None):
        # final distance r
        r = grid.r_spherical

        # initial distance r0
        r0 = shell_model.initial_radius(r)

        # dr/dr0
        dr_dr0 = shell_model.dr_dr0(r, r0)

        # initial Lagrangian mesh
        x0 = r0 * grid.x / r
        y0 = r0 * grid.y / r
        z0 = r0 * grid.z / r

        # Computes and stores the components of the Jacobian matrix
        self.dx_dx0 = dr_dr0 * (x0 / r0)**2 + r / r0 * (1 - (x0 / r0)**2)
        self.dy_dy0 = dr_dr0 * (y0 / r0)**2 + r / r0 * (1 - (y0 / r0)**2)
        self.dz_dz0 = dr_dr0 * (z0 / r0)**2 + r / r0 * (1 - (z0 / r0)**2)

        self.dx_dy0 = (dr_dr0 - r / r0) * x0 * y0 / r0**2
        self.dy_dx0 = self.dx_dy0

        self.dx_dz0 = (dr_dr0 - r / r0) * x0 * z0 / r0**2
        self.dz_dx0 = self.dx_dz0

        self.dy_dz0 = (dr_dr0 - r / r0) * y0 * z0 / r0**2
        self.dz_dy0 = self.dy_dz0

        self._inv_J = None

        if cache_dir is not None:
            self._mem = joblib.Memory(cache_dir, verbose=0)
        else:
            self._mem = None
Beispiel #11
0
def hdb_predict(data, cl_size):
    cl = hdbscan.HDBSCAN(
        min_samples=1,
        min_cluster_size=cl_size,
        core_dist_n_jobs=threads,
        memory=joblib.Memory(location=".DAJIN_temp/clustering", verbose=0),
    )
    return cl.fit_predict(data) + 1
Beispiel #12
0
 def __init__(self, paths_name, cache=True):
     self.paths_name = paths_name
     self.paths = self._get_paths()
     if not on_cloud and cache:
         memory = joblib.Memory(cachedir=self.cache_dir)
         self.get_le_features = memory.cache(self.get_le_features)
         self.get_features = memory.cache(self.get_features)
     self.is_log = False
Beispiel #13
0
def init_data(data, client_options=None, plugin=None, cache_waveforms=False):
    """Return appropriate get_waveforms function

    See example configuration file for a description of the options"""
    if client_options is None:
        client_options = {}
    is_webservice = data in ('arclink', 'fdsn', 'seishub')
    if is_webservice:
        webservice_module = import_module('obspy.%s' % data)
        Client = getattr(webservice_module, 'Client')
        client = Client(**client_options)
        if data == 'fdsn':
            get_waveforms_orig = client.get_waveforms
        else:
            get_waveforms_orig = client.getWaveform

        def get_waveforms(event=None, **args):
            return get_waveforms_orig(**args)
    elif data == 'plugin':
        modulename, funcname = plugin.split(':')
        get_waveforms = load_func(modulename.strip(), funcname.strip())
    else:
        from obspy import read
        stream = read(data)

        def get_waveforms(network,
                          station,
                          location,
                          channel,
                          starttime,
                          endtime,
                          event=None):
            st = stream.select(network=network,
                               station=station,
                               location=location,
                               channel=channel)
            st = st.slice(starttime, endtime)
            return st

    def wrapper(**kwargs):
        try:
            return get_waveforms(**kwargs)
        except (KeyboardInterrupt, SystemExit):
            raise
        except Exception as ex:
            seedid = '.'.join((kwargs['network'], kwargs['station'],
                               kwargs['location'], kwargs['channel']))
            msg = 'channel %s: error while retireving data: %s'
            log.debug(msg, seedid, ex)

    use_cache = cache_waveforms and (is_webservice or data == 'plugin')
    if use_cache and joblib:
        log.info('use waveform cache in %s', cache_waveforms)
        memory = joblib.Memory(cachedir=cache_waveforms, verbose=0)
        return memory.cache(wrapper)
    elif use_cache:
        log.warning('install joblib to use cache_waveforms option')
    return wrapper
Beispiel #14
0
    def __init__(self, user_settings):
        self.tokenKey = user_settings.quandl_token
        self.user_settings = user_settings
        quandl.ApiConfig.api_key = self.tokenKey

        self.temp_dir = getTempPath(userSettings=user_settings)
        self.cacher = joblib.Memory(self.temp_dir, compress=9, verbose=0)

        pass
Beispiel #15
0
def hdb_cl_num(cl_size):
    cl = hdbscan.HDBSCAN(
        min_samples=1,
        min_cluster_size=cl_size,
        core_dist_n_jobs=threads,
        memory=joblib.Memory(location=".DAJIN_temp/clustering", verbose=0),
    )
    tmp = cl.fit_predict(pc_score)
    return len(np.unique(tmp))
    def __init__(self, user_settings):

        self.user_settings = user_settings
        self.financial_downloader = gm.FinancialsDownloader()
        # self.key_ratios_downloader = gm.KeyRatiosDownloader()
        self.temp_dir = getTempPath(userSettings=user_settings)
        self.cacher = joblib.Memory(self.temp_dir)

        # kr = gm.KeyRatiosDownloader()
        pass
Beispiel #17
0
    def _memoize(self, func, verbose=0):
        '''
        helper method for memory cache.
        '''
        if not hasattr(self, '_mymem'):
            self._mymem = joblib.Memory(cachedir=self.cachedir)

        memoized_func = self._mymem.cache(func, verbose=verbose)
        memoized_func.__doc__ = func.__doc__

        return memoized_func
Beispiel #18
0
def setup_cachedir(cachedir, mmap_mode=None, bytes_limit=None):
    """This function injects a joblib.Memory object in the cache() function
    (in a thread-specific slot of its 'memories' attribute)."""
    if not hasattr(cache, 'memories'):
        cache.memories = {}

    memory = joblib.Memory(location=cachedir,
                           verbose=0,
                           mmap_mode=mmap_mode,
                           bytes_limit=bytes_limit)
    cache.memories[current_thread().name] = memory
    return memory
Beispiel #19
0
def test_data_cache():
    # First, get the raw features
    crema_input = crema.pre.CQTensor()
    data = crema.data.make_task_data(TEST_FILE, TEST_JAMS, [], crema_input)

    # Then create a cache
    cache = joblib.Memory(cachedir='./crema_cache/', verbose=0)
    data2 = crema.data.make_task_data(TEST_FILE, TEST_JAMS, [], crema_input, cache=cache)
    data3 = crema.data.make_task_data(TEST_FILE, TEST_JAMS, [], crema_input, cache=cache)

    assert np.all(data['input_cqtensor'] == data2['input_cqtensor'])
    assert np.all(data2['input_cqtensor'] == data3['input_cqtensor'])
Beispiel #20
0
def memory(name) -> joblib.Memory:
    """
    Return the joblib's Memory object with the given name.
    """
    if isinstance(name, joblib.Memory):
        return name

    path = user_path() / "cache" / name
    path.mkdir(parents=True, exist_ok=True)
    opts = CACHE_OPTIONS.get(name, {})
    opts.setdefault("verbose", 0)
    return joblib.Memory(path, **opts)
Beispiel #21
0
    def __init__(self, user_settings):
        self.user_settings = user_settings
        self.vectorDao = VectorDao(self.user_settings)
        self.vectorizedDataService = VectorOnlineService(self.user_settings)
        self.vectorizedDataService.parallelDownloadInstruments = False
        if self.threads > 1:
            self.vectorizedDataService.parallelDownloadRatios = False
        else:
            self.vectorizedDataService.parallelDownloadRatios = False

        self.temp_dir = getTempPath(userSettings=user_settings)
        self.cacher = joblib.Memory(self.temp_dir, compress=9, verbose=0)
def resample_img(input_img_filename,
                 new_vox_dims, output_filename=None):
    """
    Resamples an image to a new resolution

    Parameters
    ----------
    input_img_filename: string
        path to image to be resampled

    new_vox_dims: list or tuple of +ve floats
        new vox dimensions to which the image is to be resampled

    output_filename: string (optional)
        where output image will be written

    Returns
    -------
    output_filename: string
        where the resampled img has been written

    """

    try:
        from nilearn.image import resample_img as ni_resample_img
    except ImportError:
        raise RuntimeError(
            "nilearn not found on your system; can't do resampling!")

    # sanity
    if output_filename is None:
        output_filename = os.path.join(
            os.path.dirname(input_img_filename),
            "resample_" + os.path.basename(input_img_filename))

    # prepare for smart-caching
    output_dir = os.path.dirname(output_filename)
    cache_dir = os.path.join(output_dir, "resample_img_cache")
    if not os.path.exists(cache_dir):
        os.makedirs(cache_dir)
    mem = joblib.Memory(cachedir=cache_dir, verbose=5)

    # resample input img to new resolution
    resampled_img = mem.cache(ni_resample_img)(
        input_img_filename,
        target_affine=np.diag(new_vox_dims))

    # save resampled img
    nibabel.save(resampled_img, output_filename)

    return output_filename
Beispiel #23
0
def set_cache_dir(location=None, compress=True, verbose=0, **kwargs):
    """
	Set up a cache directory for use with requests.

	Parameters
	----------
	location: str or None or False
		The path of the base directory to use as a data store
		or None or False.  If None, a default directory is created
		using appdirs.user_cache_dir.
		If False is given, no caching is done and
		the Memory object is completely transparent.

	compress: boolean, or integer, optional
		Whether to zip the stored data on disk. If an integer is
		given, it should be between 1 and 9, and sets the amount
		of compression.

	verbose: int, optional
		Verbosity flag, controls the debug messages that are issued
		as functions are evaluated.

	bytes_limit: int, optional
		Limit in bytes of the size of the cache.

	"""
    global memory, cache_dir

    if location is None:
        location = appdirs.user_cache_dir('cached_requests')

    if location is False:
        location = None

    memory = joblib.Memory(location,
                           compress=compress,
                           verbose=verbose,
                           **kwargs)

    make_cache = (
        (requests, 'get'),
        (requests, 'post'),
    )

    for module, func_name in make_cache:
        try:
            func = getattr(module, f"_{func_name}_orig")
        except AttributeError:
            func = getattr(module, func_name)
            setattr(module, f"_{func_name}_orig", func)
        setattr(module, func_name, memory.cache(func))
Beispiel #24
0
def adult_data_demo():
    xtrain, ytrain, xtest, ytest = load_adult_income_data(
        test_size=0.2, random_state=1337
    )

    # feature selection options
    fs_dict = adult_data_feature_selectors()

    # modelers
    model_dict = adult_data_modellers()

    # create a pipeline from the above
    cachedir = tempfile.mkdtemp()
    memory = joblib.Memory(cachedir, verbose=0)
    pipelines = [
        sklearn.pipeline.Pipeline(
            steps=[
                (fsname, fs),
                (modelname, model)
            ],
            memory=memory,
        )
        for (fsname, fs) in fs_dict.items()
        for (modelname, model) in model_dict.items()
    ]

    # cross validation
    df_scores = cross_validate_scores(pipelines, xtrain, ytrain)

    # select the pair with the best overall negative log loss
    fs, m = df_scores.groupby(['fs', 'm']).mean().test_neg_log_loss.idxmax()
    p_best = [
        p for p in pipelines
        if fs in p.named_steps
        and m in p.named_steps
    ][0]

    # re-fit this model to the *entire* train data (it has only ever been fitted
    # to bootstrapped sub-samples)
    p_best.fit(xtrain, ytrain)

    # get ccr values on test data
    df_ccr = get_ccr_df(p_best, xtest, ytest)
    ccr_fig = make_ccr_plot(df_ccr)

    # clean up our pipeline memory
    shutil.rmtree(cachedir)

    return p_best, df_scores, df_ccr, ccr_fig
Beispiel #25
0
def setup_preprocessing(tokenizer, use_idf, ngram):
    memory = joblib.Memory(cache_dir, verbose=0)
    clf = ExtraTreesClassifier(max_depth=10,
                               n_estimators=2000,
                               random_state=RANDOM_SEED,
                               n_jobs=N_CORES)

    return Pipeline(
        [
            ("vect", CountVectorizer(tokenizer=tokenizer, ngram_range=ngram)),
            ("tfidf", TfidfTransformer(use_idf=use_idf)),
            ("selectfrommodel", SelectFromModel(clf)),
        ],
        memory=memory,
    )
Beispiel #26
0
 def __init__(self, cachedir='tmp', verbose=1):
     """
     Parameters
     ----------
     cachedir: str
          Name of directory where objects are stored in files.
     verbose: bool, int
          Let joblib and this class speak when storing files
          to disk.
     """
     import joblib
     self.memory = joblib.Memory(cachedir=cachedir, verbose=verbose)
     self.verbose = verbose
     self.retrieve = self.memory.cache(self.retrieve, ignore=['data'])
     self.save = self.retrieve
Beispiel #27
0
def cache_to_disk(fn=None, *, cachedir=_DEFAULT_FN_CACHE_PATH, **kwargs):
    """Cache this function to disk, using joblib.
    """
    mem = joblib.Memory(cachedir=cachedir, verbose=0, **kwargs)

    # bare decorator
    if fn:
        return mem.cache(fn)

    # decorator called with kwargs
    else:

        def cache_to_disk_decorator(fn):
            return mem.cache(fn)

        return cache_to_disk_decorator
Beispiel #28
0
    def __init__(self, steps: list = [], cache_dir=None):
        '''Helper function that just calls build_graph_recur with an empty graph
        Params
        ------
        steps: list
            a list of ModuleSet instances
        cache_dir: str, default=None
            The directory to use as data store by joblib. If None, won't do
            caching.

        Returns
        -------
        G: nx.Digraph()
        '''
        self.steps = steps
        # set up the cache
        self.memory = joblib.Memory(location=cache_dir)
def solver_scaled(I, dt, C, T):
    """
    Solve 1D wave equation in dimensionless form.
    """
    # Make a hash of the arguments
    import inspect, hashlib
    data = inspect.getsource(I) + '_' + str(dt) + '_' + \
           str(C) + '_' + str(T)
    # Not fool proof: if x0 changes value, I source is the same...
    hashed_input = hashlib.sha1(data).hexdigest()

    cachedir = 'tmp_%s' % hashed_input
    is_computed = os.path.isdir(cachedir)

    import joblib
    memory = joblib.Memory(cachedir=cachedir, verbose=1)

    def retrieve(name, data=None):
        print 'joblib save of', name
        return data

    retrieve = memory.cache(retrieve, ignore=['data'])
    save = retrieve

    def action(u, x, t, n):
        if n == 0:
            save('x', x)
            save('t', t)
        save('u%d' % n, u)

    if is_computed:
        print 'No need to compute the numerical solution'
        return retrieve
    else:
        print 'Computing the numerical solution'
        solver_unscaled(I=I,
                        V=0,
                        f=0,
                        c=1,
                        L=1,
                        dt=dt,
                        C=C,
                        T=T,
                        user_action=action)
        return retrieve
Beispiel #30
0
def slice_snapshot(
    snapshot: md.Trajectory,
    project_dir: str,
    run: int,
    cache_dir: Optional[str],
) -> Dict[str, md.Trajectory]:
    """
    Slice snapshot to specified state in-place

    .. TODO ::

       The htf.npz file is very slow to load.
       Replace this with a JSON file containing relevant ligand indices only

    Parameters
    ----------
    snapshot : mdtraj.Trajectory
       Snapshot to slice
    project_dir : str
       Path to project directory (e.g. '/home/server/server2/projects/13422')
    run : int
       Run (e.g. '0')
    cache_dir : str or None
       If specified, cache relevant parts of "htf.npz" file in a local directory of this name

    Returns
    -------
    sliced_snapshot : dict of str : mdtraj.Trajectory
      sliced_snapshot[x] where x is one of ['protein', 'old_ligand', 'new_ligand', 'old_complex', 'new_complex']

    """

    get_stored_atom_indices_cached = (
        get_stored_atom_indices if cache_dir is None else joblib.Memory(
            cachedir=cache_dir, verbose=0).cache(get_stored_atom_indices))

    stored_atom_indices = get_stored_atom_indices_cached(project_dir, run)

    sliced_snapshot = dict()
    for key, atom_indices in stored_atom_indices.items():
        sliced_snapshot[key] = md.Trajectory(
            snapshot.xyz[:, atom_indices, :],
            snapshot.topology.subset(atom_indices))

    return sliced_snapshot