예제 #1
0
def test_get_cache_items(tmpdir):
    """Test cache items listing."""
    def func(arg):
        """Dummy function."""
        return arg

    register_hdfs_store_backend()

    mem = Memory(location=tmpdir.strpath,
                 host=__namenode__,
                 backend='hdfs',
                 user='******',
                 verbose=100,
                 compress=False)
    assert not mem.store.get_cache_items()

    cached_func = mem.cache(func)
    for arg in ["test1", "test2", "test3"]:
        cached_func(arg)

    # get_cache_items always returns an empty list for the moment
    assert len(mem.store.get_cache_items()) == 3

    mem.clear()
    assert not mem.store.get_cache_items()
예제 #2
0
def test_clear_cache(capsys, tmpdir):
    """Check clearing the cache."""
    def func(arg):
        """Dummy function."""
        print("executing function")
        return arg

    register_s3fs_store_backend()

    mem = Memory(location=tmpdir.strpath,
                 backend='s3',
                 verbose=0,
                 backend_options=dict(bucket="test"))
    cached_func = mem.cache(func)
    cached_func("test")

    out, _ = capsys.readouterr()
    assert out == "executing function\n"

    mem.clear()

    cached_func("test")
    out, _ = capsys.readouterr()
    assert out == "executing function\n"

    mem.clear()
    print(mem.store_backend.location)
    assert not os.listdir(mem.store_backend.location)
예제 #3
0
def run_transform(X, y, transform, cv_outer=LeaveOneOut(), n_alphas=1000):

    from sklearn.feature_selection import VarianceThreshold
    from sklearn.decomposition import PCA
    from sklearn.pipeline import make_pipeline
    from sklearn.linear_model import Ridge
    from sklearn.model_selection import GridSearchCV
    from joblib import Memory
    from sklearn.compose import TransformedTargetRegressor
    from sklearn.preprocessing import PowerTransformer

    # Find alpha range
    alphas = find_alpha_range(X, y, n_alphas=n_alphas)

    list_y_pred = []
    list_y_true = []
    list_models = []

    for train_index, test_index in tqdm(cv_outer.split(X)):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        list_y_true.append(y_test)

        cv_inner = StratifiedKFoldReg(n_splits=5, shuffle=True, random_state=0)

        tmpfolder = mkdtemp()
        memory = Memory(location=tmpfolder)

        pip = make_pipeline(VarianceThreshold(),
                            PCA(),
                            Ridge(max_iter=1e6),
                            memory=memory)

        grid = GridSearchCV(pip,
                            param_grid={'ridge__alpha': alphas},
                            cv=cv_inner,
                            n_jobs=-1,
                            scoring="neg_mean_squared_error")

        regr_trans = TransformedTargetRegressor(
            regressor=grid, transformer=PowerTransformer(method=transform))

        regr_trans.fit(X_train, y_train)
        list_models.append(regr_trans)

        y_pred = regr_trans.predict(X_test)
        list_y_pred.append(y_pred)

        memory.clear(warn=False)
        shutil.rmtree(tmpfolder)

    y_pred = np.concatenate(list_y_pred)
    y_true = np.concatenate(list_y_true)

    return y_pred, y_true, list_models
예제 #4
0
def run(X,
        y,
        cv_outer=StratifiedKFoldReg(n_splits=10, shuffle=True,
                                    random_state=10),
        n_alphas=1000):

    from sklearn.feature_selection import VarianceThreshold
    from sklearn.decomposition import PCA
    from sklearn.pipeline import make_pipeline
    from sklearn.linear_model import Lasso
    from sklearn.model_selection import GridSearchCV
    from joblib import Memory

    # Find alpha range
    alphas = find_alpha_range(X, y, n_alphas=n_alphas)

    list_y_pred = []
    list_y_true = []
    list_models = []

    for train_index, test_index in tqdm(cv_outer.split(X, y)):
        X_train, X_test = X[train_index], X[test_index]
        y_train, y_test = y[train_index], y[test_index]

        list_y_true.append(y_test)

        cv_inner = StratifiedKFoldReg(n_splits=5, shuffle=True, random_state=0)

        tmpfolder = mkdtemp()
        memory = Memory(location=tmpfolder, verbose=0)

        pip = make_pipeline(VarianceThreshold(),
                            PCA(),
                            Lasso(max_iter=1e6),
                            memory=memory)

        grid = GridSearchCV(pip,
                            param_grid={'lasso__alpha': alphas},
                            cv=cv_inner,
                            n_jobs=-1,
                            scoring="neg_mean_squared_error")

        grid.fit(X_train, y_train)
        list_models.append(grid)

        y_pred = grid.predict(X_test)
        list_y_pred.append(y_pred)

        memory.clear(warn=False)
        shutil.rmtree(tmpfolder)

    y_pred = np.concatenate(list_y_pred)
    y_true = np.concatenate(list_y_true)

    return y_pred, y_true, list_models
예제 #5
0
class NeoCache:
    def __init__(self, context, cache_dir=None):
        self.registry = {}
        self.context = context
        if cache_dir:
            self.cache_dir = cache_dir
        else:
            self.cache_dir = os.path.join(HERE, context)
        self.memory = Memory(self.cache_dir, verbose=0)

    def register(self):
        """
        Returns a decorator. The decorator ensures that the function name is
        registered and that the function is handled as a MemorizedFunc.
        """
        def decorator(func):
            if len(inspect.signature(func).parameters) > 0:
                raise TypeError(
                    'The signature of \'{}\' contains input arguments. '
                    'NeoCache only supports registering functions without '
                    'any input arguments.'.format(func.__name__))
            func = self.memory.cache(func)
            self.registry[func.__name__] = func
            return func

        return decorator

    def clear_cache(self):
        """
        Clears the cache by deleting all the files in the cache directory.
        """
        self.memory.clear()

    def update(self):
        """
        Clears the cache and executes all of the registered functions, so that
        the cache contains the latest return values.
        """
        self.clear_cache()
        for f in self.registry.values():
            f()
예제 #6
0
def test_clear_cache(tmpdir):
    """Test clearing cache."""
    def func(arg):
        """Dummy function."""
        print("executing function")
        return arg

    register_hdfs_store_backend()

    mem = Memory(location=tmpdir.strpath,
                 host=__namenode__,
                 backend='hdfs',
                 user='******',
                 verbose=100,
                 compress=False)
    cached_func = mem.cache(func)
    cached_func("test")

    mem.clear()

    assert not mem.store.object_exists(mem.store.cachedir)
예제 #7
0
class CacheManager(object):
    """The librosa cache manager class wraps joblib.Memory
    with a __call__ attribute, so that it may act as a function.

    Additionally, it provides a caching level filter, so that
    different functions can be cached or not depending on the user's
    preference for speed vs. storage usage.
    """
    def __init__(self, *args, **kwargs):

        level = kwargs.pop("level", 10)

        # Initialize the memory object
        self.memory = Memory(*args, **kwargs)
        # The level parameter controls which data we cache
        # smaller numbers mean less caching
        self.level = level

    def __call__(self, level):
        """Example usage:

        @cache(level=2)
        def semi_important_function(some_arguments):
            ...
        """
        def wrapper(function):
            """Decorator function.  Adds an input/output cache to
            the specified function."""
            if self.memory.location is not None and self.level >= level:
                return _decorator_apply(self.memory.cache, function)

            else:
                return function

        return wrapper

    def clear(self, *args, **kwargs):
        return self.memory.clear(*args, **kwargs)

    def eval(self, *args, **kwargs):
        return self.memory.eval(*args, **kwargs)

    def format(self, *args, **kwargs):
        return self.memory.format(*args, **kwargs)

    def reduce_size(self, *args, **kwargs):
        return self.memory.reduce_size(*args, **kwargs)

    def warn(self, *args, **kwargs):
        return self.memory.warn(*args, **kwargs)
예제 #8
0
def test_get_items(tmpdir):
    """Test cache items listing."""
    def func(arg):
        """Dummy function."""
        return arg

    register_s3fs_store_backend()

    mem = Memory(location=tmpdir.strpath,
                 backend='s3',
                 verbose=0,
                 backend_options=dict(bucket="test"))
    assert not mem.store_backend.get_items()

    cached_func = mem.cache(func)
    for arg in ["test1", "test2", "test3"]:
        cached_func(arg)

    # get_items always returns an empty list for the moment
    assert not mem.store_backend.get_items()

    mem.clear()
    assert not mem.store_backend.get_items()
예제 #9
0
        labels = []

        for value in train_label_list:
            images = os.listdir(directory + "/" + value)
            for image in images:
                ig = cv2.imread(directory + "/" + value + "/" + image, 0)
                features.append(ig)
                labels.append(value)

        # Extract the hog features
        list_hog_fd = []
        for feature in features:
            fd = hog(feature,
                     orientations=9,
                     pixels_per_cell=(i, i),
                     cells_per_block=(j, j),
                     visualize=False)
            list_hog_fd.append(fd)
        hog_features = np.array(list_hog_fd, 'float64')
        print("Count of digits in dataset", Counter(labels))
        # Create an linear SVM object
        clf = LinearSVC()
        # Perform the training
        clf.fit(hog_features, labels)
        # Save the classifier
        joblib.dump(clf,
                    "test_ppc" + str(i) + "_cpb" + str(j) + ".pkl",
                    compress=3)
        mem = Memory("./cachedir", verbose=0)
        mem.clear(warn=False)
os.system('hogRunner.py')
예제 #10
0
    def pipeline_grid(self, x_train, y_train):
        # Make pipeline
        location = 'cachedir'
        memory = Memory(location=location, verbose=10)
        pipe = Pipeline(
            [('reduce_dim', PCA()),
             ('feature_selection', SelectKBest(f_classif)),
             ('classify', LogisticRegression(solver='saga', penalty='l1'))],
            memory=memory)

        # In[6]:

        # Set paramters according to users inputs
        # PCA参数
        max_components = 0.99
        min_components = 0.3
        number_pc = 10
        range_dimreduction = np.linspace(min_components, max_components,
                                         number_pc).reshape(number_pc, )

        # ANOVA参数
        pca = PCA(n_components=min_components)
        pca.fit(X=x_train)
        min_number_anova = pca.n_components_
        pca = PCA(n_components=max_components)
        pca.fit(X=x_train)
        max_number_anova = pca.n_components_
        number_anova = 3
        range_feature_selection = np.arange(min_number_anova, max_number_anova,
                                            10)

        # 分类器参数
        max_l1_ratio = 1
        min_l1_ratio = 0.5
        number_l1_ratio = 2
        range_l1_ratio = np.linspace(min_l1_ratio, max_l1_ratio,
                                     number_l1_ratio).reshape(
                                         number_l1_ratio, )

        # 整体grid search设置
        param_grid = [
            {
                'reduce_dim__n_components': range_dimreduction,
                'feature_selection__k': range_feature_selection,
                'classify__l1_ratio': [max_l1_ratio],
            },
        ]

        # In[ ]:

        # Train
        grid = GridSearchCV(pipe, n_jobs=-1, param_grid=param_grid)
        grid.fit(x_train, y_train)

        # In[8]:
        # Delete the temporary cache before exiting
        memory.clear(warn=False)
        rmtree(location)

        # In[9]:
        return grid
예제 #11
0
파일: track.py 프로젝트: dPys/PyNets
def track_ensemble(target_samples,
                   atlas_data_wm_gm_int,
                   labels_im_file,
                   recon_path,
                   sphere,
                   traversal,
                   curv_thr_list,
                   step_list,
                   track_type,
                   maxcrossing,
                   roi_neighborhood_tol,
                   min_length,
                   waymask,
                   B0_mask,
                   t1w2dwi,
                   gm_in_dwi,
                   vent_csf_in_dwi,
                   wm_in_dwi,
                   tiss_class,
                   BACKEND='threading'):
    """
    Perform native-space ensemble tractography, restricted to a vector of ROI
    masks.

    Parameters
    ----------
    target_samples : int
        Total number of streamline samples specified to generate streams.
    atlas_data_wm_gm_int : str
        File path to Nifti1Image in T1w-warped native diffusion space,
        restricted to wm-gm interface.
    parcels : list
        List of 3D boolean numpy arrays of atlas parcellation ROI masks from a
        Nifti1Image in T1w-warped native diffusion space.
    recon_path : str
        File path to diffusion reconstruction model.
    tiss_classifier : str
        Tissue classification method.
    sphere : obj
        DiPy object for modeling diffusion directions on a sphere.
    traversal : str
        The statistical approach to tracking. Options are: det (deterministic),
        closest (clos), and prob (probabilistic).
    curv_thr_list : list
        List of integer curvature thresholds used to perform ensemble tracking.
    step_list : list
        List of float step-sizes used to perform ensemble tracking.
    track_type : str
        Tracking algorithm used (e.g. 'local' or 'particle').
    maxcrossing : int
        Maximum number if diffusion directions that can be assumed per voxel
        while tracking.
    roi_neighborhood_tol : float
        Distance (in the units of the streamlines, usually mm). If any
        coordinate in the streamline is within this distance from the center
        of any voxel in the ROI, the filtering criterion is set to True for
        this streamline, otherwise False. Defaults to the distance between
        the center of each voxel and the corner of the voxel.
    min_length : int
        Minimum fiber length threshold in mm.
    waymask_data : ndarray
        Tractography constraint mask array in native diffusion space.
    B0_mask_data : ndarray
        B0 brain mask data.
    n_seeds_per_iter : int
        Number of seeds from which to initiate tracking for each unique
        ensemble combination. By default this is set to 250.
    max_length : int
        Maximum number of steps to restrict tracking.
    particle_count
        pft_back_tracking_dist : float
        Distance in mm to back track before starting the particle filtering
        tractography. The total particle filtering tractography distance is
        equal to back_tracking_dist + front_tracking_dist. By default this is
        set to 2 mm.
    pft_front_tracking_dist : float
        Distance in mm to run the particle filtering tractography after the
        the back track distance. The total particle filtering tractography
        distance is equal to back_tracking_dist + front_tracking_dist. By
        default this is set to 1 mm.
    particle_count : int
        Number of particles to use in the particle filter.
    min_separation_angle : float
        The minimum angle between directions [0, 90].

    Returns
    -------
    streamlines : ArraySequence
        DiPy list/array-like object of streamline points from tractography.

    References
    ----------
    .. [1] Takemura, H., Caiafa, C. F., Wandell, B. A., & Pestilli, F. (2016).
      Ensemble Tractography. PLoS Computational Biology.
      https://doi.org/10.1371/journal.pcbi.1004692
    """
    import os
    import gc
    import time
    import warnings
    import time
    import tempfile
    from joblib import Parallel, delayed, Memory
    import itertools
    import pickle5 as pickle
    from pynets.dmri.track import run_tracking
    from colorama import Fore, Style
    from pynets.dmri.utils import generate_sl
    from nibabel.streamlines.array_sequence import concatenate, ArraySequence
    from pynets.core.utils import save_3d_to_4d
    from nilearn.masking import intersect_masks
    from nilearn.image import math_img
    from pynets.core.utils import load_runconfig
    from dipy.tracking import utils

    warnings.filterwarnings("ignore")

    pickle.HIGHEST_PROTOCOL = 5
    joblib_dir = tempfile.mkdtemp()
    os.makedirs(joblib_dir, exist_ok=True)

    hardcoded_params = load_runconfig()
    nthreads = hardcoded_params["omp_threads"][0]
    os.environ['MKL_NUM_THREADS'] = str(nthreads)
    os.environ['OPENBLAS_NUM_THREADS'] = str(nthreads)
    n_seeds_per_iter = \
        hardcoded_params['tracking']["n_seeds_per_iter"][0]
    max_length = \
        hardcoded_params['tracking']["max_length"][0]
    pft_back_tracking_dist = \
        hardcoded_params['tracking']["pft_back_tracking_dist"][0]
    pft_front_tracking_dist = \
        hardcoded_params['tracking']["pft_front_tracking_dist"][0]
    particle_count = \
        hardcoded_params['tracking']["particle_count"][0]
    min_separation_angle = \
        hardcoded_params['tracking']["min_separation_angle"][0]
    min_streams = \
        hardcoded_params['tracking']["min_streams"][0]
    seeding_mask_thr = hardcoded_params['tracking']["seeding_mask_thr"][0]
    timeout = hardcoded_params['tracking']["track_timeout"][0]

    all_combs = list(itertools.product(step_list, curv_thr_list))

    # Construct seeding mask
    seeding_mask = f"{os.path.dirname(labels_im_file)}/seeding_mask.nii.gz"
    if waymask is not None and os.path.isfile(waymask):
        waymask_img = math_img(f"img > {seeding_mask_thr}",
                               img=nib.load(waymask))
        waymask_img.to_filename(waymask)
        atlas_data_wm_gm_int_img = intersect_masks(
            [
                waymask_img,
                math_img("img > 0.001", img=nib.load(atlas_data_wm_gm_int)),
                math_img("img > 0.001", img=nib.load(labels_im_file))
            ],
            threshold=1,
            connected=False,
        )
        nib.save(atlas_data_wm_gm_int_img, seeding_mask)
    else:
        atlas_data_wm_gm_int_img = intersect_masks(
            [
                math_img("img > 0.001", img=nib.load(atlas_data_wm_gm_int)),
                math_img("img > 0.001", img=nib.load(labels_im_file))
            ],
            threshold=1,
            connected=False,
        )
        nib.save(atlas_data_wm_gm_int_img, seeding_mask)

    tissues4d = save_3d_to_4d([
        B0_mask, labels_im_file, seeding_mask, t1w2dwi, gm_in_dwi,
        vent_csf_in_dwi, wm_in_dwi
    ])

    # Commence Ensemble Tractography
    start = time.time()
    stream_counter = 0

    all_streams = []
    ix = 0

    memory = Memory(location=joblib_dir, mmap_mode='r+', verbose=0)
    os.chdir(f"{memory.location}/joblib")

    @memory.cache
    def load_recon_data(recon_path):
        import h5py
        with h5py.File(recon_path, 'r') as hf:
            recon_data = hf['reconstruction'][:].astype('float32')
        hf.close()
        return recon_data

    recon_shelved = load_recon_data.call_and_shelve(recon_path)

    @memory.cache
    def load_tissue_data(tissues4d):
        return nib.load(tissues4d)

    tissue_shelved = load_tissue_data.call_and_shelve(tissues4d)

    try:
        while float(stream_counter) < float(target_samples) and \
                float(ix) < 0.50*float(len(all_combs)):
            with Parallel(n_jobs=nthreads,
                          backend=BACKEND,
                          mmap_mode='r+',
                          verbose=0) as parallel:

                out_streams = parallel(
                    delayed(run_tracking)
                    (i, recon_shelved, n_seeds_per_iter, traversal,
                     maxcrossing, max_length, pft_back_tracking_dist,
                     pft_front_tracking_dist, particle_count,
                     roi_neighborhood_tol, min_length, track_type,
                     min_separation_angle, sphere, tiss_class, tissue_shelved)
                    for i in all_combs)

                out_streams = list(filter(None, out_streams))

                if len(out_streams) > 1:
                    out_streams = concatenate(out_streams, axis=0)
                else:
                    continue

                if waymask is not None and os.path.isfile(waymask):
                    try:
                        out_streams = out_streams[utils.near_roi(
                            out_streams,
                            np.eye(4),
                            np.asarray(
                                nib.load(waymask).dataobj).astype("bool"),
                            tol=int(round(roi_neighborhood_tol * 0.50, 1)),
                            mode="all")]
                    except BaseException:
                        print(f"\n{Fore.RED}No streamlines generated in "
                              f"waymask vacinity\n")
                        print(Style.RESET_ALL)
                        return None

                if len(out_streams) < min_streams:
                    ix += 1
                    print(f"\n{Fore.YELLOW}Fewer than {min_streams} "
                          f"streamlines tracked "
                          f"on last iteration...\n")
                    print(Style.RESET_ALL)
                    if ix > 5:
                        print(f"\n{Fore.RED}No streamlines generated\n")
                        print(Style.RESET_ALL)
                        return None
                    continue
                else:
                    ix -= 1

                stream_counter += len(out_streams)
                all_streams.extend([generate_sl(i) for i in out_streams])
                del out_streams

                print("%s%s%s%s" % (
                    "\nCumulative Streamline Count: ",
                    Fore.CYAN,
                    stream_counter,
                    "\n",
                ))
                gc.collect()
                print(Style.RESET_ALL)

                if time.time() - start > timeout:
                    print(f"\n{Fore.RED}Warning: Tractography timed "
                          f"out: {time.time() - start}")
                    print(Style.RESET_ALL)
                    memory.clear(warn=False)
                    return None

    except RuntimeError as e:
        print(f"\n{Fore.RED}Error: Tracking failed due to:\n{e}\n")
        print(Style.RESET_ALL)
        memory.clear(warn=False)
        return None

    print("Tracking Complete: ", str(time.time() - start))

    memory.clear(warn=False)

    del parallel, all_combs
    gc.collect()

    if stream_counter != 0:
        print('Generating final ...')
        return ArraySequence([ArraySequence(i) for i in all_streams])
    else:
        print(f"\n{Fore.RED}No streamlines generated!")
        print(Style.RESET_ALL)
        return None
예제 #12
0
    def pipeline_grid(self, x_train, y_train):
        # Make pipeline

        location = 'cachedir'
        memory = Memory(location=location, verbose=10)
        pipe = Pipeline(steps=[
                ('reduce_dim', 'passthrough'),
                ('feature_selection', 'passthrough'),
                ('estimator', 'passthrough'),
            ], 
            memory=memory
        )

        # Feature reduction parameters
        range_dimreduction = np.linspace(self.min_components, self.max_components, self.number_pc).reshape(self.number_pc,)
        
        # Feature selection parameters
        print("Identifing components after PCA...\n")
        pca = PCA(n_components=self.min_components)
        pca.fit(X=x_train)
        min_number_anova = pca.n_components_
        pca = PCA(n_components=self.max_components)
        pca.fit(X=x_train)
        max_number_anova = pca.n_components_
        range_feature_selection = np.arange(min_number_anova, max_number_anova, self.feature_selection_step)
        
        # Set parameters of gridCV
        print("Setting parameters of gridCV...\n")
        param_grid = [
            {   'reduce_dim':[PCA(iterated_power=7)],
                'reduce_dim__n_components': range_dimreduction,
                'feature_selection':[RFE(estimator=SVC())],
                'feature_selection__n_features_to_select': range_feature_selection,
                'estimator':[SVC()],
                'estimator__kernel': ['rbf', 'linear'],
                'estimator__C': self.range_C,
                'estimator__gamma': self.range_gamma,
                
            }, 
            {   'reduce_dim':[PCA(iterated_power=7)],
                'reduce_dim__n_components': range_dimreduction,
                'feature_selection':[SelectKBest(f_classif)],
                'feature_selection__k': range_feature_selection,
                'estimator':[LogisticRegression()],
                'estimator__penalty': ['l1', 'l2'],
            }, 
        ]
        
        iteration_num = (
            len(range_dimreduction) * len(range_feature_selection) * 2 * len(self.range_C) * len(self.range_gamma) + 
            len(range_dimreduction) * len(range_feature_selection) * 2
        )
        
        # Train
        cv = StratifiedKFold(n_splits=self.k)
        if self.search_strategy == 'grid':
            grid = GridSearchCV(
                pipe, n_jobs=self.n_jobs, param_grid=param_grid, cv=cv, 
                scoring = make_scorer(accuracy_score), refit=True
            )
            print(f"GridSearchCV fitting (about {iteration_num} times iteration)...\n")

        elif self.search_strategy == 'random':
            model = RandomizedSearchCV(
                pipe, n_jobs=self.n_jobs, param_distributions=param_grid, cv=cv, 
                scoring = make_scorer(accuracy_score), refit=True, n_iter=self.n_iter_of_randomedsearch,
            )
        
            print(f"RandomizedSearchCV fitting (about {iteration_num} times iteration)...\n")
        else:
            print(f"Please specify which search strategy!\n")
            return

        model.fit(x_train, y_train)

        # Delete the temporary cache before exiting
        memory.clear(warn=False)
        rmtree(location)

        # In[9]:
        return model
예제 #13
0
"""Test sent_tokenizer."""
from pathlib import Path
from joblib import Memory
from hlm_texts import hlm_en
from hlm_texts import sent_tokenizer

memory = Memory(location=Path("~/joblib_cache").expanduser())
# cache location for sent_tokenizer: Path("~/joblib_cache").expanduser()
memory.clear(0)  # clear cache, no warning


def test_sent_tokenizer():
    """test_sent_tokenizer."""
    para_list = hlm_en.splitlines()[:12]
    assert len(sent_tokenizer(para_list, 'en')) == 19

    assert len(sent_tokenizer("\n".join(para_list), 'en')) == 19
예제 #14
0
class CacheManager(object):
    '''The librosa cache manager class wraps joblib.Memory
    with a __call__ attribute, so that it may act as a function.

    Additionally, it provides a caching level filter, so that
    different functions can be cached or not depending on the user's
    preference for speed vs. storage usage.
    '''

    def __init__(self, *args, **kwargs):

        level = kwargs.pop('level', 10)

        # Initialize the memory object
        self.memory = Memory(*args, **kwargs)
        # The level parameter controls which data we cache
        # smaller numbers mean less caching
        self.level = level

    def __call__(self, level):
        '''Example usage:

        @cache(level=2)
        def semi_important_function(some_arguments):
            ...
        '''
        def wrapper(function):
            '''Decorator function.  Adds an input/output cache to
            the specified function.'''

            from decorator import FunctionMaker

            def decorator_apply(dec, func):
                """Decorate a function by preserving the signature even if dec
                is not a signature-preserving decorator.

                This recipe is derived from
                http://micheles.googlecode.com/hg/decorator/documentation.html#id14
                """

                return FunctionMaker.create(
                    func, 'return decorated(%(signature)s)',
                    dict(decorated=dec(func)), __wrapped__=func)

            if self.memory.location is not None and self.level >= level:
                return decorator_apply(self.memory.cache, function)

            else:
                return function
        return wrapper

    def clear(self, *args, **kwargs):
        return self.memory.clear(*args, **kwargs)

    def eval(self, *args, **kwargs):
        return self.memory.eval(*args, **kwargs)

    def format(self, *args, **kwargs):
        return self.memory.format(*args, **kwargs)

    def reduce_size(self, *args, **kwargs):
        return self.memory.reduce_size(*args, **kwargs)

    def warn(self, *args, **kwargs):
        return self.memory.warn(*args, **kwargs)
예제 #15
0
import datetime, subprocess

parser = argparse.ArgumentParser(description='')
parser.add_argument('-c', '--clearCache', action='store_true', default=False)
parser.add_argument('-d', '--date', type=str, nargs='+', default=[])
parser.add_argument('-s', '--startHour', type=int, default=0)
parser.add_argument('-e', '--endHour', type=int, default=24)
parser.add_argument('-H', '--histogram', action='store_true', default=False)
parser.add_argument('-t', '--timeseries', action='store_true', default=False)
parser.add_argument('-S', '--summary', action='store_true', default=False)
parser.add_argument('-m', '--map', action='store_true', default=False)
parser.add_argument('--day', type=str, nargs='+', default=day_names)
args = parser.parse_args()

memory = Memory(location='./cron-parse-cachedir', verbose=0)
if args.clearCache: memory.clear()

EPOCH, MAX_CHARGING_TIME = 5, 95  # Minutes


def parse_timestamp(line):
    elems = line.split(' ')
    if len(elems) > 1:  # Old version: Linux date
        year, month, date, time_ = elems[-1], elems[1], elems[2], elems[3]
        month = month_name_to_num.index(month)
        assert month != -1
        hour, minute = time_.split(':')[:2]
    else:  # New version: Python date
        year, month, date, hour, minute = line.split('-')
    return (int(year), int(month), int(date), int(hour), int(minute))
예제 #16
0
    def fit(self, X, y=None, groups=None, **fit_params):
        """
        Fit Nested CV with all sets of parameters.
        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            Training vector, where n_samples is the number of samples and
            n_features is the number of features.
        y : array-like of shape (n_samples, n_output) or (n_samples,), optional
            Target relative to X for classification or regression;
            None for unsupervised learning.
        groups : array-like, with shape (n_samples,), optional
            Group labels for the samples used while splitting the dataset into
            train/test set. Only used in conjunction with a "Group" :term:`cv`
            instance (e.g., :class:`~sklearn.model_selection.GroupKFold`).
        **fit_params : dict of string -> object
            Parameters passed to the ``fit`` method of the estimator
        Returns
            -------
            It will not return directly the values, but it's accessable from the class object it self.
            You should be able to access:
            outer_pred
                 A dictionary to access the train indexes, the test indexes and the model  of each outer loop
                 for further post-processing. Keys are respectively train, test and model with values being
                 lists of length outer_cv.get_n_splits().
            outer_results
                A dictionary to access the outer test scores, the best inner scores, the best inner parameters (and
                outer_train_scores if return_train_score == True). Keys are respectively outer_test_score,
                best_inner_score, best_inner_params (and outer_train_score) with values being lists of length
                outer_cv.get_n_splits().
            inner_results
                A list of dictionary of length outer_cv.get_n_splits().
                Each dictionary having params, mean_test_score, std_test_score (and mean_train_score, std_train_score
                if return_train_score == True) as keys and values being the list of params or associated results
                over the inner loops.
            best_estimator_
                Model when refit on the whole dataset with hyperparameter optimized by GridSearch CV.
                Available only if refit == True.
        """
        X, y = self._check_X_Y(X, y)

        self._check_pipeline_dic(self.pipeline_dic)
        self.model = self._get_pipeline(self.pipeline_dic)
        self.params_grid = self._get_parameters_grid(self.params_dic)

        outer_cv = check_cv(self.outer_cv, y, is_classifier(
            self.model[-1]))  # Last element of pipeline = estimator
        inner_cv = check_cv(self.inner_cv, y, is_classifier(
            self.model[-1]))  # Last element of pipeline = estimator

        if not isinstance(self.randomized_search, bool):
            raise TypeError('randomized_search argument must be a boolean')

        self.outer_pred = {
            'train': [],
            'test': [],
            'model': [],
            'predict_train': [],
            'predict_test': []
        }
        if hasattr(self.model[-1], 'predict_proba'):
            self.outer_pred.update({
                'predict_proba_train': [],
                'predict_proba_test': []
            })
        if hasattr(self.model[-1], 'decision_function'):
            self.outer_pred.update({
                'decision_function_train': [],
                'decision_function_test': []
            })

        self.outer_results = {
            'outer_test_score': [],
            'best_inner_score': [],
            'best_inner_params': []
        }
        self.inner_results = []
        if self.return_train_score:
            self.outer_results.update({'outer_train_score': []})

        # From sklearn.model_selection._search.BasesearchCV
        self.scorers, self.multimetric_ = _check_multimetric_scoring(
            self.model, scoring=self.metric)
        if self.multimetric_:
            if callable(self.refit_inner):
                raise ValueError(
                    'If inner loops use multimetric scoring and the user want to refit according to a '
                    'callable, the latter must be passed in a dictionnary {score: callable} with score '
                    'being the score name with which the score on different sets wiil be calculated'
                )
            if self.refit_inner is not False and (
                    not isinstance(self.refit_inner, str) or
                    # This will work for both dict / list (tuple)
                    self.refit_inner not in self.scorers):
                if isinstance(self.refit_inner, Mapping):
                    if len(self.refit_inner.keys()) > 1:
                        raise ValueError(
                            'refit_inner dict must have only one key, got %d' %
                            len(self.refit_inner.keys()))
                    self.refit_metric = list(self.refit_inner.keys())[0]
                    self.refit_inner = self.refit_inner[self.refit_metric]
                else:
                    raise ValueError(
                        "For multi-metric scoring, the parameter "
                        "refit must be set to a scorer key or a "
                        "dict with scorer key and callable value to refit an estimator with the "
                        "best parameter setting on the whole "
                        "data and make the best_* attributes "
                        "available for that metric. If this is "
                        "not needed, refit should be set to "
                        "False explicitly. %r was passed." % self.refit_inner)
            else:
                self.refit_metric = self.refit_inner
        else:
            self.refit_metric = 'score'
            if self.refit_inner is True:
                self.refit_inner = 'score'

        for k_outer, (train_outer_index, test_outer_index) in enumerate(
                outer_cv.split(X, y, groups)):
            if self.verbose > 1:
                print('\n-----------------\n{0}/{1} <-- Current outer fold'.
                      format(k_outer + 1, outer_cv.get_n_splits()))
            X_train_outer, X_test_outer = X[train_outer_index], X[
                test_outer_index]
            y_train_outer, y_test_outer = y[train_outer_index], y[
                test_outer_index]
            with tempfile.TemporaryDirectory() as location:
                memory = Memory(location=location, verbose=0)
                inner_model = clone(self.model)
                inner_model.set_params(memory=memory)
                if self.randomized_search:
                    pipeline_inner = RandomizedSearchCV(
                        inner_model,
                        self.params_grid,
                        scoring=self.scorers,
                        n_jobs=self.n_jobs,
                        cv=inner_cv,
                        n_iter=self.randomized_search_iter,
                        return_train_score=self.return_train_score,
                        verbose=self.verbose - 1,
                        pre_dispatch=self.pre_dispatch,
                        refit=self.refit_inner,
                        random_state=self.random_state)
                else:
                    pipeline_inner = GridSearchCV(
                        inner_model,
                        self.params_grid,
                        scoring=self.scorers,
                        n_jobs=self.n_jobs,
                        cv=inner_cv,
                        return_train_score=self.return_train_score,
                        verbose=self.verbose - 1,
                        pre_dispatch=self.pre_dispatch,
                        refit=self.refit_inner)
                pipeline_inner.fit(X_train_outer,
                                   y_train_outer,
                                   groups=groups,
                                   **fit_params)
                self.inner_results.append({
                    'params':
                    pipeline_inner.cv_results_['params'],
                    'mean_test_score':
                    pipeline_inner.cv_results_['mean_test_%s' %
                                               self.refit_metric],
                    'std_test_score':
                    pipeline_inner.cv_results_['std_test_%s' %
                                               self.refit_metric]
                })
                if self.return_train_score:
                    self.inner_results[-1].update({
                        'mean_train_score':
                        pipeline_inner.cv_results_['mean_train_%s' %
                                                   self.refit_metric],
                        'std_train_score':
                        pipeline_inner.cv_results_['std_train_%s' %
                                                   self.refit_metric]
                    })
                if self.verbose > 2:
                    for params_dict in pipeline_inner.cv_results_['params']:
                        mean_test_score = pipeline_inner.cv_results_[
                            'mean_test_%s' % self.refit_metric]
                        index_params_dic = pipeline_inner.cv_results_[
                            'params'].index(params_dict)
                        print('\t\t Params: {0}, Mean inner score: {1}'.format(
                            params_dict, mean_test_score[index_params_dic]))
                self.outer_results['best_inner_score'].append(
                    pipeline_inner.cv_results_['mean_test_%s' %
                                               self.refit_metric][
                                                   pipeline_inner.best_index_]
                )  # Because best_score doesn't exist if refit_inner is a callable
                self.outer_results['best_inner_params'].append(
                    pipeline_inner.best_params_)
                if self.return_train_score:
                    self.outer_results['outer_train_score'].append(
                        self.scorers[self.refit_metric](
                            pipeline_inner.best_estimator_, X_train_outer,
                            y_train_outer))
                self.outer_results['outer_test_score'].append(
                    self.scorers[self.refit_metric](
                        pipeline_inner.best_estimator_, X_test_outer,
                        y_test_outer))
                if self.verbose > 1:
                    print(
                        '\nResults for outer fold:\nBest inner parameters was: {0}'
                        .format(self.outer_results['best_inner_params'][-1]))
                    print('Outer score: {0}'.format(
                        self.outer_results['outer_test_score'][-1]))
                    print('Inner score: {0}'.format(
                        self.outer_results['best_inner_score'][-1]))
                self.outer_pred['train'].append(train_outer_index)
                self.outer_pred['test'].append(test_outer_index)
                self.outer_pred['model'].append(pipeline_inner.best_estimator_)
                self.outer_pred['predict_train'].append(
                    pipeline_inner.best_estimator_.predict(X_train_outer))
                self.outer_pred['predict_test'].append(
                    pipeline_inner.best_estimator_.predict(X_test_outer))
                if hasattr(pipeline_inner.best_estimator_[-1],
                           'predict_proba'):
                    self.outer_pred['predict_proba_train'].append(
                        pipeline_inner.best_estimator_.predict_proba(
                            X_train_outer))
                    self.outer_pred['predict_proba_test'].append(
                        pipeline_inner.best_estimator_.predict_proba(
                            X_test_outer))
                if hasattr(pipeline_inner.best_estimator_[-1],
                           'decision_function'):
                    self.outer_pred['decision_function_train'].append(
                        pipeline_inner.best_estimator_.decision_function(
                            X_train_outer))
                    self.outer_pred['decision_function_test'].append(
                        pipeline_inner.best_estimator_.decision_function(
                            X_test_outer))
                memory.clear(warn=False)
        if self.verbose > 0:
            print('\nOverall outer score (mean +/- std): {0} +/- {1}'.format(
                np.mean(self.outer_results['outer_test_score']),
                np.std(self.outer_results['outer_test_score'])))
            print('Best params by outer fold:')
            for i, params_dict in enumerate(
                    self.outer_results['best_inner_params']):
                print('\t Outer fold {0}: {1}'.format(i + 1, params_dict))
            print('\n')

        # Store the only scorer not as a dict for single metric evaluation
        self.scorer_ = self.scorers if self.multimetric_ else self.scorers[
            'score']

        # If refit is True Hyperparameter optimization on whole dataset and fit with best params
        if self.refit_outer:
            print('=== Refit ===')
            location = 'cachedir'
            memory = Memory(location=location, verbose=0)
            final_model = clone(self.model)
            final_model.set_params(memory=memory)
            pipeline_refit = GridSearchCV(
                final_model,
                self.params_grid,
                scoring=self.scorers[self.refit_metric],
                n_jobs=self.n_jobs,
                cv=outer_cv,
                verbose=self.verbose - 1)
            pipeline_refit.fit(X, y, groups=groups, **fit_params)
            self.best_estimator_ = pipeline_refit.best_estimator_
            memory.clear(warn=False)
            rmtree(location)
예제 #17
0
class MSCOAL(SCOAL):
    
    def __init__(self, 
                estimator=LinearRegression(), 
                max_split = 10,
                validation_size=0.2,
                random_state=42,
                n_jobs=1,
                cache=False,
                matrix='sparse',
                verbose=False):
        
        self.estimator = estimator
        self.max_split = max_split
        self.validation_size=validation_size
        self.random_state = random_state
        self.n_jobs = n_jobs
        self.cache=cache
        self.matrix=matrix
        self.verbose = verbose
        self.is_regressor = is_regressor(estimator)
    
    def _split_row_clusters(self,data,coclusters,models,n_jobs):
        row_clusters, col_clusters = coclusters
        n_row_clusters, n_col_clusters  = np.unique(row_clusters).size, np.unique(col_clusters).size
        results = self._compute_clusterwise(data,coclusters,models,self._score_rows,n_jobs)
        scores = np.zeros((row_clusters.size,n_row_clusters))
        for i in range(n_row_clusters):
            for j in range(n_col_clusters):
                scores[:,i] += results[i][j] 
        cluster_to_split = scores.mean(axis=0).argmax()
        rows = np.where(row_clusters==cluster_to_split)[0]
        rows_scores = scores[row_clusters==cluster_to_split,cluster_to_split]
        rows = rows[np.argsort(rows)]
        rows_scores = np.sort(rows_scores)
        rows1 = np.array_split(rows[rows_scores==0],2)[1]
        rows2 = np.array_split(rows[rows_scores>0],2)[1]
        rows = np.concatenate((rows1,rows2))
        new_row_clusters = row_clusters
        new_row_clusters[rows] = n_row_clusters

        return new_row_clusters, col_clusters
    
    def _split_col_clusters(self,data,coclusters,models,n_jobs):
        row_clusters, col_clusters = coclusters
        n_row_clusters, n_col_clusters  = np.unique(row_clusters).size, np.unique(col_clusters).size
        results = self._compute_clusterwise(data,coclusters,models,self._score_cols,n_jobs)
        scores = np.zeros((col_clusters.size,n_col_clusters))
        for i in range(n_row_clusters):
            for j in range(n_col_clusters):
                scores[:,j] += results[i][j] 
        cluster_to_split = scores.mean(axis=0).argmax()
        cols = np.where(col_clusters==cluster_to_split)[0]
        cols_scores = scores[col_clusters==cluster_to_split,cluster_to_split]
        cols = cols[np.argsort(cols_scores)]
        cols_scores = np.sort(cols_scores)
        cols1 = np.array_split(cols[cols_scores==0],2)[1]
        cols2 = np.array_split(cols[cols_scores>0],2)[1]
        cols = np.concatenate((cols1,cols2))
        new_col_clusters = col_clusters
        new_col_clusters[cols] = n_col_clusters

        return row_clusters, new_col_clusters

    def _print_status(self,iter_count,score,delta_score,n_row_clusters,n_col_clusters,elapsed_time):
        if iter_count==0:
            print('|'.join(x.ljust(15) for x in [
                    'split',' score','delta score','n row clusters', 'n col clusters', 'elapsed time (s)']))

        print('|'.join(x.ljust(15) for x in ['%i' % iter_count,'%.4f' % score,'%.4f' % delta_score,'%i' % n_row_clusters,'%i'  % n_col_clusters,'%i' % elapsed_time]))


    def _converge_mscoal(self,train_data,valid_data,coclusters,models,max_split=100,n_jobs=1,verbose=False):
        split_count=0 
        elapsed_time = 0
        score = np.nan
        delta_score=np.nan
        converged = False
        start = time.time()

        if coclusters is None:
            n_row_clusters, n_col_clusters = 1, 1
            coclusters = self._initialize_coclusters(n_row_clusters,n_col_clusters)
        else:
            row_clusters, col_clusters = coclusters
            n_row_clusters, n_col_clusters  = np.unique(row_clusters).size, np.unique(col_clusters).size

        if models is None:
            models = self._initialize_models(coclusters)

        coclusters,models = self._converge_scoal(train_data,coclusters,models,n_jobs=n_jobs,verbose=False)
        scores = self._score_coclusters(valid_data,coclusters,models,n_jobs)
        score = np.sum(scores)/int(self.n_values*self.validation_size)
        
        if verbose:
                self._print_status(split_count,score,delta_score,n_row_clusters,n_col_clusters,elapsed_time)

        converged = split_count >= max_split

        while not converged:

            row_clusters_changed = False
            col_clusters_changed = False
            delta_score = 0

            new_coclusters = deepcopy(coclusters)
            new_coclusters = self._split_row_clusters(valid_data,new_coclusters,models,n_jobs)
            new_models = self._initialize_models(new_coclusters)
            checked = np.all(self._check_coclusters(train_data,new_coclusters,models,n_jobs=1))
            if checked:
                new_coclusters,new_models = self._converge_scoal(train_data,new_coclusters,new_models,n_jobs=n_jobs,verbose=False)
                scores = self._score_coclusters(valid_data,new_coclusters,new_models,n_jobs)
                new_score = np.sum(scores)/int(self.n_values*self.validation_size)
                new_delta_score = score - new_score
                if new_delta_score>0:
                    n_row_clusters+=1
                    coclusters =  new_coclusters
                    models = new_models
                    row_clusters_changed = True
                    delta_score += new_delta_score
                    score = new_score

            new_coclusters = deepcopy(coclusters)
            new_coclusters = self._split_col_clusters(valid_data,new_coclusters,models,n_jobs)
            new_models = self._initialize_models(new_coclusters)
            checked = np.all(self._check_coclusters(train_data,new_coclusters,models,n_jobs=1))
            if checked:
                new_coclusters,new_models = self._converge_scoal(train_data,new_coclusters,new_models,n_jobs=n_jobs,verbose=False)
                scores = self._score_coclusters(valid_data,new_coclusters,new_models,n_jobs)
                new_score = np.sum(scores)/int(self.n_values*self.validation_size)
                new_delta_score = score - new_score
                if new_delta_score>0:
                    n_col_clusters+=1
                    coclusters  = new_coclusters
                    models = new_models
                    col_clusters_changed = True
                    delta_score += new_delta_score
                    score = new_score

            converged = (not row_clusters_changed and not col_clusters_changed) or split_count >= max_split
            split_count+=1
            elapsed_time = time.time() - start
            if verbose:
                self._print_status(split_count,score,delta_score,n_row_clusters,n_col_clusters,elapsed_time)
        
        train_matrix, row_features, col_features = train_data
        valid_matrix, _, _ = valid_data
        if self.matrix=='dense':
            train_matrix[np.where(np.invert(np.isnan(valid_matrix)))] = valid_matrix[np.where(np.invert(np.isnan(valid_matrix)))]
        else:
            train_matrix = np.vstack((train_matrix,valid_matrix))
        train_data = (train_matrix,row_features,col_features)

        coclusters, models = self._converge_scoal(train_data,coclusters,models,n_jobs=n_jobs,verbose=False)

        return coclusters,models

    
    def fit(self,target,row_features,col_features,coclusters=None):
        np.random.seed(self.random_state) 
        
        self.n_rows, self.n_cols, self.n_values = row_features.shape[0], col_features.shape[0], target.shape[0]
        self.n_row_features, self.n_col_features  = row_features.shape[1], col_features.shape[1]        
        
        valid = np.full(self.n_values,False)
        valid[:int(self.n_values*self.validation_size)] = True
        np.random.shuffle(valid)
        valid_target = target[valid]
        train_target = target[~valid] 
        del target

        if self.matrix=='dense':
            valid_matrix = np.zeros((self.n_rows, self.n_cols))*np.nan
            valid_matrix[valid_target[:,0].astype(int),valid_target[:,1].astype(int)] = valid_target[:,2]
  
            train_matrix = np.zeros((self.n_rows, self.n_cols))*np.nan
            train_matrix[train_target[:,0].astype(int),train_target[:,1].astype(int)] = train_target[:,2]      
        else:
            valid_matrix = valid_target  
            train_matrix = train_target 
        del train_target
        del valid_target     
        
        valid_data = (valid_matrix,row_features,col_features)
        train_data = (train_matrix,row_features,col_features)

        if self.cache:
            self.memory = Memory('./pyscoal-cache')
            self._cached_fit = self.memory.cache(self._cached_fit, ignore=['self','model','X','y'])\
        
        self.coclusters,self.models = self._converge_mscoal(train_data,valid_data,coclusters,None,self.max_split,self.n_jobs,self.verbose)
        row_clusters, col_clusters = self.coclusters
        self.n_row_clusters, self.n_col_clusters  = np.unique(row_clusters).size, np.unique(col_clusters).size

        if self.cache:
            self.memory.clear(warn=False)
예제 #18
0
    def _inner_sklearn(self, X, y):
        """ Perform hyperparameter tuning with cross-validation for each outer fold (using sklearn library). 
        Each time, save best parameters and associated best estimator (not refitted !).
        
        Parameters
        ----------
        X : array-like, shape (n_samples, n_features)
            Training data set.
            
        y : array-like, shape (n_samples, n_output)
            Target relative to X for classification or regression.
        """

        if self.caching:
            location = 'cachedir'
            memory = Memory(location=location, verbose=0, mmap_mode='r+')
            try:
                cv_estimator = clone(self.estimator).set_params(memory=memory)
            except ValueError as e:
                print(e)
                warnings.warn(
                    "Caching is only available with an estimator built with sklearn.pipeline.Pipeline (see https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html for more details). This argument was ignored by default."
                )
                cv_estimator = clone(self.estimator)
            else:
                if self.n_jobs_inner != 1:
                    warnings.warn(
                        "A combined use of joblib.Memory (function caching) and joblib.Parallel should be considered with care since it may lead to some failures. In particular, we noticed some errors when dealing with custom transformers in the pipeline."
                    )
        else:
            cv_estimator = clone(self.estimator)

        if self.randomized:
            inner = RandomizedSearchCV(estimator=cv_estimator,
                                       param_distributions=self.params,
                                       scoring=self.scoring_inner,
                                       cv=self.cv_inner,
                                       n_jobs=self.n_jobs_inner,
                                       refit=False,
                                       verbose=self.verbose)
        else:
            inner = GridSearchCV(estimator=cv_estimator,
                                 param_grid=self.params,
                                 scoring=self.scoring_inner,
                                 cv=self.cv_inner,
                                 n_jobs=self.n_jobs_inner,
                                 refit=False,
                                 verbose=self.verbose)

        count = 0
        for train, _ in self.cv_outer_.split(X, y):
            X_train, y_train = X[train, :], y[train]
            inner.fit(X_train, y_train)
            self.inner_results_['out fold ' + str(count)] = inner.cv_results_
            self.best_params_.append(inner.best_params_)
            self.best_estimators_.append(
                clone(clone(self.estimator).set_params(**inner.best_params_)))
            if self.caching:
                memory.clear(warn=False)
                rmtree(location)
            count += 1
        return self
예제 #19
0
def run(cfg,
        solver: ISolver,
        dataprovider: IDataProvider,
        aug: IAug,
        review_augmented_sample=False,
        review_train=True):
    # Check folder path
    if not os.path.exists(cfg.data_dir):
        logging.error('There are no such data folder {}'.format(cfg.data_dir))
        exit(-1)

    # Prepare data and split to train/test subsets
    data_dir, ids_train, ids_test = get_data(cfg, test_size=cfg.test_aspect)

    # Manage caching data access
    cache_folder = './cache'
    memory = Memory(cache_folder, verbose=0)
    memory.clear(warn=False)
    data_reader = memory.cache(
        read_sample) if memory is not None else read_sample

    if review_augmented_sample:
        matplotlib.use('TkAgg')  # Enable interactive mode

        # Specify params according to which subset to review
        ids, augm = (ids_train, aug.get_training_augmentation(cfg)) if review_train else \
            (ids_test, aug.get_validation_augmentation(cfg, cfg.minimize_train_aug))

        # Lets look at augmented data we have
        dataset = dataprovider(
            data_reader,
            data_dir,
            ids,
            ((0, 0), (None, None)),
            cfg,
            min_mask_ratio=cfg.min_mask_ratio,
            augmentation=augm,
            prep_getter=
            None  # don't use preparation to see actually augmentation the data
        )
        logging.info('Dataset length: {}'.format(len(dataset)))

        for i in range(150):
            dataset.show(i)

        return

    # ****************************************************************************************************************
    # Create model
    # ****************************************************************************************************************
    # Dataset for train images
    train_dataset = dataprovider(data_reader,
                                 data_dir,
                                 ids_train, ((0, 0), (None, None)),
                                 cfg,
                                 min_mask_ratio=cfg.min_mask_ratio,
                                 augmentation=aug.get_training_augmentation(
                                     cfg, cfg.minimize_train_aug),
                                 prep_getter=solver.get_prep_getter())
    # Dataset for validation images
    valid_dataset = dataprovider(data_reader,
                                 data_dir,
                                 ids_test, ((0, 0), (None, None)),
                                 cfg,
                                 min_mask_ratio=cfg.min_mask_ratio,
                                 augmentation=aug.get_validation_augmentation(
                                     cfg, cfg.minimize_train_aug),
                                 prep_getter=solver.get_prep_getter())

    train_dataloader = Dataloder(train_dataset,
                                 batch_size=cfg.batch_size,
                                 shuffle=True)
    valid_dataloader = Dataloder(valid_dataset, batch_size=1, shuffle=False)

    # Inform general samples info
    train_batch = train_dataloader[0]
    logging.info('Train X: {},{},{},{}'.format(train_batch[0].shape,
                                               train_batch[0].dtype,
                                               np.min(train_batch[0]),
                                               np.max(train_batch[0])))
    logging.info('Train Y: {},{},{},{}'.format(train_batch[1].shape,
                                               train_batch[1].dtype,
                                               np.min(train_batch[1]),
                                               np.max(train_batch[1])))
    logging.info('Train Batch size multiplier: {}'.format(
        cfg.batch_size_multiplier))
    logging.info('Train Samples Nb: {}'.format(len(train_dataset)))
    class_weights = None
    if hasattr(train_dataset, 'mask_uniq_values_nb'):
        if train_dataset.mask_uniq_values_nb is not None and cfg.apply_class_weights:
            mask_min_nb = np.min(train_dataset.mask_uniq_values_nb)
            if mask_min_nb > 0:
                class_weights = (train_dataset.mask_uniq_values_nb /
                                 mask_min_nb)**-1
    #
    val_batch = valid_dataloader[0]
    logging.info('Validate Samples Nb: {}'.format(len(valid_dataset)))
    logging.info('Val X: {},{},{},{}'.format(val_batch[0].shape,
                                             val_batch[0].dtype,
                                             np.min(val_batch[0]),
                                             np.max(val_batch[0])))
    logging.info('Val Y: {},{},{},{}'.format(val_batch[1].shape,
                                             val_batch[1].dtype,
                                             np.min(val_batch[1]),
                                             np.max(val_batch[1])))
    if train_batch[0].shape[1] != val_batch[0].shape[1] or train_batch[
            0].shape[2] != val_batch[0].shape[2]:
        logging.info(
            'Pay attention, that sample HW in train subset is different to validation subset. '
            'It may affect to metric cross comparison')

    model, weights_path, metrics = solver.build(compile_model=True,
                                                class_weights=class_weights)

    logging.info('Storing configuration...')
    with open(os.path.join(cfg.solution_dir, 'configuration.json'),
              'w',
              newline=os.linesep) as f:
        json.dump(dict({'cfg': dict(cfg)}), f, default=json_def_encoder)

    # Get monitoring metric
    monitoring_metric_name, monitoring_metric_mode = solver.monitoring_metric()

    # Define callbacks for learning rate scheduling and best checkpoints saving
    callbacks = [
        # Save best result
        keras.callbacks.ModelCheckpoint(weights_path,
                                        monitor=monitoring_metric_name,
                                        save_weights_only=True,
                                        save_best_only=True,
                                        mode=monitoring_metric_mode,
                                        verbose=1),
        # Save the latest result
        keras.callbacks.ModelCheckpoint('{}_last.h5'.format(
            os.path.join(os.path.dirname(weights_path),
                         os.path.splitext(os.path.basename(weights_path))[0])),
                                        monitor=monitoring_metric_name,
                                        save_weights_only=True,
                                        save_best_only=False,
                                        mode='auto',
                                        verbose=0),

        # Adam optimizer SHOULD not control LR
        # keras.callbacks.ReduceLROnPlateau(verbose=1, patience=10, factor=0.2)
        #
        # keras.callbacks.EarlyStopping(monitor='val_mean_iou',
        #                              min_delta=0.01,
        #                              patience=40,
        #                              verbose=0, mode='max')
        PlotLosses(imgfile='{}.png'.format(
            os.path.join(os.path.dirname(weights_path),
                         os.path.splitext(os.path.basename(weights_path))[0])),
                   figsize=(12, 4 * (1 + len(metrics)))
                   )  # PNG-files processed in Windows & Ubuntu
    ]

    if hasattr(cfg, 'callbacks'):
        callbacks = callbacks + cfg.callbacks

    matplotlib.use('Agg')  # Disable TclTk because it sometime crash training!
    # train model
    model.fit_generator(
        train_dataloader,
        steps_per_epoch=len(train_dataloader),
        epochs=cfg.epochs,
        callbacks=callbacks,
        validation_data=valid_dataloader,
        validation_steps=len(valid_dataloader),
    )
예제 #20
0
class Genome(object):
    """
    Object for genome file with some uitl function to analysis genome.
    
    Params:
    --------
    filename: `str`

    Returns:
    --------

    Examples:
    --------

    """
    def __init__(
            self,
            filename,
            exclude=None,
            exclude_contig=['tig', 'Un', 'Sy', 'scaffold', 'ctg', 'Pt', 'Mt'],
            mem_cache='.'):
        check_file_exists(filename)
        self.filename = filename
        self.exclude = listify(exclude)
        self.exclude_contig = listify(exclude_contig)
        self.getChrSizes()
        self.idx2label = dict(
            (i, chrom) for i, chrom in enumerate(self.chromLabels))
        self.label2idx = dict(
            (chrom, i) for i, chrom in enumerate(self.chromLabels))

        self.mem_cache = mem_cache
        self.memory = Memory(mem_cache, verbose=0)
        self.getGCBin = self.memory.cache(self._getGCBin)

    @property
    def handle(self):

        if self.filename[-3:] == ".gz":
            self._handle = gzip.open(self.filename, 'rt')
        else:
            self._handle = open(self.filename, 'r')

        return self._handle

    @property
    def seqs(self):
        """
        A OrderedDict of sequences.
        """
        if not hasattr(self, '_seqs'):
            self._seqs = []
            fa = SeqIO.parse(self.handle, 'fasta')
            for record in fa:
                if self.exclude:
                    if record.id in self.exclude:
                        continue
                if self.exclude_contig:
                    for contig in self.exclude_contig:
                        if contig in record.id:
                            break
                    else:
                        self._seqs.append(record.seq)
                else:
                    self._seqs.append(record.seq)
        return self._seqs

    @property
    def chromLabels(self):
        if not hasattr(self, '_chromLabels'):
            self._chromLabels = []
            import pyfaidx
            fa = pyfaidx.Fasta(self.filename)
            for record in fa:
                if self.exclude:
                    if record.name in self.exclude:
                        continue
                if self.exclude_contig:
                    for contig in self.exclude_contig:
                        if contig in record.name:
                            break
                    else:
                        self._chromLabels.append(record.name)
                else:
                    self._chromLabels.append(record.name)
        return self._chromLabels

    @property
    def chroms(self):
        return list(range(len(self.chromLabels)))

    @property
    def chromCount(self):
        return len(self.chroms)

    def getChrSizes(self):
        """
        Calculate the length of chromosome.
        """
        self.chromSizes = np.array(
            [len(self.seqs[i]) for i in range(self.chromCount)])
        return self.chromSizes

    def makeWindows(self, window):
        """
        make chromosome window

        Params:
        --------
        window: `int` window of chromosome

        Returns:
        --------
        out: `list` a list of  windows:

        Examples:
        ---------
        >>> makeWindows(10000)
        [('Chr1', 0, 100000) ...]
        """
        self.window = window
        if not hasattr(self, 'windows'):
            self.windows = OrderedDict()
            for idx, size in enumerate(self.chromSizes):
                temp = []
                chrom = self.idx2label[idx]
                for i in range(0, size + 1, window):
                    temp.append((i, i + window))
                else:
                    if temp[-1][1] > size:
                        temp[-1] = (temp[-1][0], size)
                self.windows[chrom] = temp
            self.chromBins = list(map(len, self.windows.values()))
            self.chromStartBins = np.r_[0, np.cumsum(self.chromBins[:-1])]
            self.chromEndBins = np.cumsum(self.chromBins)
            self.numBins = self.chromEndBins[-1]
            self.chromBinsDict = OrderedDict(
                zip(self.windows.keys(),
                    tuple(zip(self.chromStartBins, self.chromEndBins))))

            logging.debug('Successful makewindow')
        return self.windows

    def getGapBase(self, chrom, start, end):
        """
        Calculate the percentage of gap base number in a region
        """
        seq = self.seqs[chrom][start:end]
        if len(seq) == 0:
            return 0.0
        else:
            gap = seq.count('N') + seq.count('n')
            percent = 100.0 * gap / float(len(seq))

            return percent

    def getGC(self, chrom, start, end, correct=True):
        """
        Calculate the GC content of a sequence.
        """
        seq = self.seqs[chrom][start:end]

        gc = SeqUtils.GC(seq)

        gap = self.getGapBase(chrom, start, end) if correct \
            else 0.0

        if gap == 100.0:
            return -1.0
        else:
            corrected_gc = gc * 100.0 / (100.0 - gap)
            #logging.debug('Calculated GC content in {}:{}-{}'.format(
            #        chrom, start, end))
            return corrected_gc

    def _getGCBin(self, window, chr=[], correct=True, thread=24):
        """
        Calculate GC content of a series of windows, and return a OrderedDict

        Params:
        --------
        window: `int` window of bin
        chr: `list` default: `[]`
        thread: `int` thread of parallel running default: `24`
        Returns:
        --------
        out: `list` and gc store in array-like

        Examples:
        --------
        >>> getGCbin(1000000)
        [[0.5, 0.2, 0.5 ...], ...]

        """
        self.gcBin = []
        chroms = listify(chr) if chr else self.chromLabels
        _chromsidx = [self.label2idx[i] for i in chroms]
        """
        def subgc(chrom):
            chromWindow = int(self.chromSizes[chrom] // self.window) + 1
            _gc = np.ones(chromWindow, dtype=np.float)
            for i in range(chromWindow):
                _gc[i] = self.getGC(chrom,  i*self.window, 
                                    (i+1)*self.window, correct=correct)
                
            return _gc
        res = Parallel(thread)(delayed(subgc)(args)
                for args in _chromsidx)
        """
        for chrom in _chromsidx:
            chromWindow = int(self.chromSizes[chrom] // self.window) + 1
            self.gcBin.append(np.ones(chromWindow, dtype=np.float))
            for i in range(chromWindow - 1):

                self.gcBin[chrom][i] = self.getGC(chrom,
                                                  i * self.window,
                                                  (i + 1) * self.window,
                                                  correct=correct)
            else:
                self.gcBin[chrom][chromWindow - 1] = self.getGC(
                    chrom, (chromWindow - 1) * self.window,
                    chromWindow * self.window,
                    correct=correct)

        logging.debug('Successful getGCBin')
        return self.gcBin

    def clearCache(self):
        """
        clear Memory cache data in the `{}`.
        """.format(self.mem_cache)

        if hasattr(self, 'memory'):
            self.memory.clear()
예제 #21
0
        costly_compute = memory.cache(_costly_compute_cached)
        return costly_compute(data, self.column)


transformer = Algorithm()
start = time.time()
data_trans = transformer.transform(data)
end = time.time()

print('\nThe function took {:.2f} s to compute.'.format(end - start))
print('\nThe transformed data are:\n {}'.format(data_trans))

###############################################################################

start = time.time()
data_trans = transformer.transform(data)
end = time.time()

print('\nThe function took {:.2f} s to compute.'.format(end - start))
print('\nThe transformed data are:\n {}'.format(data_trans))

###############################################################################
# As expected, the second call to the ``transform`` method load the results
# which have been cached.

###############################################################################
# Clean up cache directory
###############################################################################

memory.clear(warn=False)
예제 #22
0
from scipy.stats.distributions import uniform

import mne
from mne.utils import logger, ProgressBar

from sklearn.base import BaseEstimator
from sklearn.grid_search import RandomizedSearchCV
from sklearn.cross_validation import KFold

from joblib import Memory
from pandas import DataFrame

from .utils import clean_by_interp, interpolate_bads

mem = Memory(cachedir='cachedir')
mem.clear()


def grid_search(epochs, n_interpolates, consensus_percs, prefix, n_folds=3):
    """Grid search to find optimal values of n_interpolate and consensus_perc.

    Parameters
    ----------
    epochs : instance of mne.Epochs
        The epochs object for which bad epochs must be found.
    n_interpolates : array
        The number of sensors to interpolate.
    consensus_percs : array
        The percentage of channels to be interpolated.
    n_folds : int
        Number of folds for cross-validation.
예제 #23
0
def create_api_blueprint(config):
    blueprint = Blueprint('api', __name__)

    data_dir = os.path.abspath(
        config.get('data', 'data_root', fallback='.data'))
    cache_dir = os.path.join(data_dir, 'server-cache')

    search_config = parse_search_config(config)
    client_config = dict(config['client']) if 'client' in config else {}

    memory = Memory(cachedir=cache_dir, verbose=0)
    LOGGER.debug("cache directory: %s", cache_dir)
    memory.clear(warn=False)

    db: Database = connect_configured_database(autocommit=True)

    load_recommender = get_recommend_reviewer_factory(db, config)

    recommend_reviewers: _ReloadableRecommendReviewers = (
        ReloadableRecommendReviewers(load_recommender))

    def get_search_type():
        return request.args.get('search_type', DEFAULT_SEARCH_TYPE)

    def user_has_role_by_email(email, role) -> bool:
        with db.begin():
            return recommend_reviewers.user_has_role_by_email(email=email,
                                                              role=role)

    api_auth = ApiAuth(config,
                       client_config,
                       search_config=search_config,
                       user_has_role_by_email=user_has_role_by_email,
                       get_search_type=get_search_type)

    @blueprint.route("/")
    def _api_root() -> Response:
        return jsonify({
            'links': {
                'recommend-reviewers': url_for('api._recommend_reviewers_api'),
                'subject-areas': url_for('api._subject_areas_api'),
                'keywords': url_for('api._keywords_api'),
                'config': url_for('api._config_api')
            }
        })

    @memory.cache
    def recommend_reviewers_as_json(**kwargs) -> Response:
        with db.begin():
            return jsonify(recommend_reviewers.recommend(**kwargs))

    @blueprint.route("/recommend-reviewers")
    @api_auth.wrap_search
    def _recommend_reviewers_api(**_) -> Response:
        manuscript_no = request.args.get('manuscript_no')
        subject_area = request.args.get('subject_area')
        keywords = request.args.get('keywords')
        abstract = request.args.get('abstract')
        limit = request.args.get('limit')

        search_type = get_search_type()
        search_params = search_config.get(search_type)
        if search_params is None:
            raise BadRequest('unknown search type - %s' % search_type)
        role = search_params.get('filter_by_role')
        recommend_relationship_types = search_params.get(
            'recommend_relationship_types')
        recommend_stage_names = search_params.get('recommend_stage_names')

        if limit is None:
            limit = search_params.get('default_limit', DEFAULT_LIMIT)
        else:
            limit = int(limit)
        if not manuscript_no and keywords is None:
            raise BadRequest('keywords parameter required')
        return recommend_reviewers_as_json(
            manuscript_no=manuscript_no,
            subject_area=subject_area,
            keywords=keywords,
            abstract=abstract,
            role=role,
            recommend_relationship_types=recommend_relationship_types,
            recommend_stage_names=recommend_stage_names,
            limit=limit)

    @blueprint.route("/manuscript/version/<path:version_id>")
    @api_auth
    def _get_manuscript_details(version_id, **_) -> Response:
        manuscript_details = recommend_reviewers.get_manuscript_details(
            version_id)
        if not manuscript_details:
            raise NotFound()
        return jsonify(manuscript_details)

    @blueprint.route("/subject-areas")
    def _subject_areas_api() -> Response:
        with db.begin():
            return jsonify(list(recommend_reviewers.get_all_subject_areas()))

    @blueprint.route("/keywords")
    def _keywords_api() -> Response:
        with db.begin():
            return jsonify(list(recommend_reviewers.get_all_keywords()))

    @blueprint.route("/config")
    def _config_api() -> Response:
        return jsonify(client_config)

    @blueprint.route("/search-types")
    @api_auth
    def _search_types_api(email=None) -> Response:
        with db.begin():
            if email is None or api_auth.is_staff_email(email):
                LOGGER.debug(
                    'email is None or staff email, not filtering search types')
                allowed_search_config = search_config
            else:
                roles = set(
                    recommend_reviewers.get_user_roles_by_email(email)) | {''}
                allowed_search_config = {
                    search_type: search_params
                    for search_type, search_params in search_config.items()
                    if search_params.get('required_role', '') in roles
                }
                LOGGER.debug(
                    'roles, email=%s, roles=%s, filtered_search_types=%s',
                    email, roles, allowed_search_config.keys())
            search_types_response = [{
                'search_type':
                search_type,
                'title':
                search_config[search_type].get('title', search_type)
            } for search_type in sorted(allowed_search_config.keys())]
            return jsonify(search_types_response)

    @blueprint.teardown_request
    def _remove_session(exc=None):
        try:
            LOGGER.debug('teardown, exc=%s', exc)
            db.remove_local()
        except Exception as e:  # pylint: disable=W0703
            LOGGER.warning('failed to remove session due to %s', e, exc_info=e)

    def reload_api():
        recommend_reviewers.reload()
        recommend_reviewers_as_json.clear()
        api_auth.reload()

    return blueprint, reload_api
예제 #24
0
    {
        'reduce_dim__n_components': N_FEATUREDIM_OPTIONS,
        'feature_selection__k': N_FEATURES_OPTIONS,
        'classify__C': C_OPTIONS,
        'classify__l1_ratio': l1_ratio,
    },
]

reducer_labels = ['PCA', 'KBest(chi2)']

grid = GridSearchCV(pipe, n_jobs=1, param_grid=param_grid)
X, y = load_digits(return_X_y=True)
grid.fit(X, y)

# Delete the temporary cache before exiting
memory.clear(warn=False)
rmtree(location)

mean_scores = np.array(grid.cv_results_['mean_test_score'])
# scores are in the order of param_grid iteration, which is alphabetical
mean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS))
# select score for best C
mean_scores = mean_scores.max(axis=0)
bar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) *
               (len(reducer_labels) + 1) + .5)

plt.figure()
COLORS = 'bgrcmyk'
for i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)):
    plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])
예제 #25
0
파일: Kernel.py 프로젝트: mtoqeerpk/zephyr
class SeisFDFDKernel(object):

    # source array ref

    # receiver array ref

    mesh = None
    freq = None
    Solver = lambda: None

    def __init__(self, systemConfig, **kwargs):

        if systemConfig.get('cache', False):
            try:
                from tempfile import mkdtemp
                from joblib import Memory
            except ImportError:
                pass
            else:
                if 'cacheDir' in systemConfig:
                    cacheDir = systemConfig['cacheDir']
                    try:
                        os.makedirs(cacheDir)
                    except OSError as e:
                        if e.errno == errno.EEXIST and os.path.isdir(cacheDir):
                            pass
                        else:
                            raise
                else:
                    cacheDir = mkdtemp()

                self._mem = Memory(cachedir=cacheDir, verbose=0)

                # Cache outputs of these methods
                self.forward = self._mem.cache(self.forward)
                self.backprop = self._mem.cache(self.backprop)

        hx = [(systemConfig['dx'], systemConfig['nx'] - 1)]
        hz = [(systemConfig['dz'], systemConfig['nz'] - 1)]
        self.mesh = SimPEG.Mesh.TensorMesh([hx, hz], '00')

        self.mesh.ireg = systemConfig.get('ireg', DEFAULT_IREG)
        self.mesh.freeSurf = systemConfig.get('freeSurf',
                                              DEFAULT_FREESURF_BOUNDS)

        initMap = {
            #   Argument        Rename to Property
            'c': 'cR',
            'Q': None,
            'rho': None,
            'nPML': None,
            'freeSurf': None,
            'freq': None,
            'ky': None,
            'kyweight': None,
            'Solver': None,
            'dx': None,
            'dz': None,
            'dtype': None,
        }

        for key in initMap.keys():
            if key in systemConfig:
                if initMap[key] is None:
                    setattr(self, key, systemConfig[key])
                else:
                    setattr(self, initMap[key], systemConfig[key])

    def __del__(self):
        if hasattr(self, '_mem'):
            self._mem.clear()
            cacheDir = self._mem.cachedir
            del self._mem
            shutil.rmtree(cacheDir)

    # Model properties

    @property
    def c(self):
        return self.cR + self.cI

    @c.setter
    def c(self, value):
        self._cR = value.real
        self._cI = value.imag
        self._invalidateMatrix()

    @property
    def rho(self):
        if getattr(self, '_rho', None) is None:
            self._rho = 310 * self.c**0.25
        return self._rho

    @rho.setter
    def rho(self, value):
        self._rho = value
        self._invalidateMatrix()

    @property
    def Q(self):
        if getattr(self, '_Q', None) is None:
            self._Q = numpy.inf
        return self._Q

    @Q.setter
    def Q(self, value):
        self._Q = value
        self._invalidateMatrix()

    @property
    def cR(self):
        return self._cR

    @cR.setter
    def cR(self, value):
        self._cR = value
        self._invalidateMatrix()

    @property
    def cI(self):
        if self.Q is numpy.inf:
            return 0
        else:
            return 1j * self.cR / (2 * self.Q)

    @cI.setter
    def cI(self, value):
        if (value == 0).all():
            self._Q = numpy.inf
        else:
            self._Q = 1j * self.cR / (2 * value)
        self._invalidateMatrix()

    # Modelling properties

    @property
    def nPML(self):
        if getattr(self, '_nPML', None) is None:
            self._nPML = DEFAULT_PML_SIZE
        return self._nPML

    @nPML.setter
    def nPML(self, value):
        self._nPML = value
        self._invalidateMatrix()

    @property
    def ky(self):
        if getattr(self, '_ky', None) is None:
            self._ky = 0.
        return self._ky

    @ky.setter
    def ky(self, value):
        self._ky = value
        self._invalidateMatrix()

    @property
    def kyweight(self):
        if getattr(self, '_kyweight', None) is None:
            self._kyweight = 1.
        return self._kyweight

    @kyweight.setter
    def kyweight(self, value):
        self._kyweight = value
        self._invalidateMatrix()

    # Clever matrix setup properties

    @property
    def Solver(self):
        if getattr(self, '_Solver', None) is None:
            self._Solver = DEFAULT_SOLVER
        return self._Solver

    @Solver.setter
    def Solver(self, value):
        self._Solver = value

    @property
    def A(self):
        if getattr(self, '_A', None) is None:
            self._A = self._initHelmholtzNinePoint()
        return self._A

    @property
    def Ainv(self):
        if getattr(self, '_Ainv', None) is None:
            self._mfact()
        return self._Ainv

    def _invalidateMatrix(self):
        if getattr(self, '_A', None) is not None:
            del (self._A)
        if getattr(self, '_Ainv', None) is not None:
            del (self._Ainv)
        if getattr(self, '_mem', None) is not None:
            self._mem.clear()

    @property
    def dtypeReal(self):
        if self.dtype == 'float':
            return numpy.float32
        elif self.dtype == 'double':
            return numpy.float64
        else:
            raise NotImplementedError('Unknown dtype: %s' % self.dtype)

    @property
    def dtypeComplex(self):
        if self.dtype == 'float':
            return numpy.complex64
        elif self.dtype == 'double':
            return numpy.complex128
        else:
            raise NotImplementedError('Unknown dtype: %s' % self.dtype)

    @property
    def dtype(self):
        return getattr(self, '_dtype', DEFAULT_DTYPE)

    @dtype.setter
    def dtype(self, value):
        # Currently this doesn't work because all the solvers assume doubles
        # if value in ['float', 'double']:
        if value in ['double']:
            self._dtype = value
        else:
            raise NotImplementedError('Unknown dtype: %s' % value)

    # ------------------------------------------------------------------------
    # Matrix setup

    def _mfact(self):
        self._Ainv = self.Solver(self.A)

    def _initHelmholtzNinePoint(self):
        """
        An attempt to reproduce the finite-difference stencil and the
        general behaviour of OMEGA by Pratt et al. The stencil is a 9-point
        second-order version based on work by a number of people in the mid-90s
        including Ivan Stekl. The boundary conditions are based on the PML
        implementation by Steve Roecker in fdfdpml.f.
        """

        # Set up SimPEG mesh
        dims = (self.mesh.nNy, self.mesh.nNx)
        # mAve = self.mesh.aveN2CC

        # c = (mAve.T * self.c.ravel()).reshape(dims)
        # rho = (mAve.T * self.rho.ravel()).reshape(dims)

        c = self.c
        rho = self.rho

        # fast --> slow is x --> y --> z as Fortran

        # Set up physical properties in matrices with padding
        omega = 2 * numpy.pi * self.freq
        cPad = numpy.pad(c, pad_width=1, mode='edge')
        rhoPad = numpy.pad(rho, pad_width=1, mode='edge')

        aky = 2 * numpy.pi * self.ky

        # Model parameter M
        K = ((omega**2 / cPad**2) - aky**2) / rhoPad

        # Horizontal, vertical and diagonal geometry terms
        dx = self.mesh.hx[0]
        dz = self.mesh.hy[0]
        dxx = dx**2
        dzz = dz**2
        dxz = dx * dz
        dd = numpy.sqrt(dxz)

        # PML decay terms
        # NB: Arrays are padded later, but 'c' in these lines
        #     comes from the original (un-padded) version

        nPML = self.nPML

        pmldx = dx * (nPML - 1)
        pmldz = dz * (nPML - 1)
        pmlr = 1e-3
        pmlfx = 3.0 * numpy.log(1 / pmlr) / (2 * pmldx**3)
        pmlfz = 3.0 * numpy.log(1 / pmlr) / (2 * pmldz**3)

        dpmlx = numpy.zeros(dims, dtype=self.dtypeComplex)
        dpmlz = numpy.zeros(dims, dtype=self.dtypeComplex)
        isnx = numpy.zeros(dims, dtype=self.dtypeReal)
        isnz = numpy.zeros(dims, dtype=self.dtypeReal)

        # Only enable PML if the free surface isn't set

        freeSurf = self.mesh.freeSurf

        if freeSurf[0]:
            isnz[-nPML:, :] = -1  # Top

        if freeSurf[1]:
            isnx[:, -nPML:] = -1  # Right Side

        if freeSurf[2]:
            isnz[:nPML, :] = 1  # Bottom

        if freeSurf[3]:
            isnx[:, :nPML] = 1  # Left side

        dpmlx[:, :nPML] = (numpy.arange(nPML, 0, -1) * dx).reshape((1, nPML))
        dpmlx[:, -nPML:] = (numpy.arange(1, nPML + 1, 1) * dx).reshape(
            (1, nPML))
        dnx = pmlfx * c * dpmlx**2
        ddnx = 2 * pmlfx * c * dpmlx
        denx = dnx + 1j * omega
        r1x = 1j * omega / denx
        r1xsq = r1x**2
        r2x = isnx * r1xsq * ddnx / denx

        dpmlz[:nPML, :] = (numpy.arange(nPML, 0, -1) * dz).reshape((nPML, 1))
        dpmlz[-nPML:, :] = (numpy.arange(1, nPML + 1, 1) * dz).reshape(
            (nPML, 1))
        dnz = pmlfz * c * dpmlz**2
        ddnz = 2 * pmlfz * c * dpmlz
        denz = dnz + 1j * omega
        r1z = 1j * omega / denz
        r1zsq = r1z**2
        r2z = isnz * r1zsq * ddnz / denz

        # Visual key for finite-difference terms
        # (per Pratt and Worthington, 1990)
        #
        #   This         Original
        # AF FF CF  vs.  AD DD CD
        # AA BE CC  vs.  AA BE CC
        # AD DD CD  vs.  AF FF CF

        # Set of keys to index the dictionaries
        keys = ['AD', 'DD', 'CD', 'AA', 'BE', 'CC', 'AF', 'FF', 'CF']

        # Diagonal offsets for the sparse matrix formation
        offsets = {
            'AD': (-1) * dims[1] + (-1),
            'DD': (-1) * dims[1] + (0),
            'CD': (-1) * dims[1] + (+1),
            'AA': (0) * dims[1] + (-1),
            'BE': (0) * dims[1] + (0),
            'CC': (0) * dims[1] + (+1),
            'AF': (+1) * dims[1] + (-1),
            'FF': (+1) * dims[1] + (0),
            'CF': (+1) * dims[1] + (+1),
        }

        # Buoyancies
        bMM = 1. / rhoPad[0:-2, 0:-2]  # bottom left
        bME = 1. / rhoPad[0:-2, 1:-1]  # bottom centre
        bMP = 1. / rhoPad[0:-2, 2:]  # bottom right
        bEM = 1. / rhoPad[1:-1, 0:-2]  # middle left
        bEE = 1. / rhoPad[1:-1, 1:-1]  # middle centre
        bEP = 1. / rhoPad[1:-1, 2:]  # middle right
        bPM = 1. / rhoPad[2:, 0:-2]  # top    left
        bPE = 1. / rhoPad[2:, 1:-1]  # top    centre
        bPP = 1. / rhoPad[2:, 2:]  # top    right

        # Initialize averaged buoyancies on most of the grid
        bMM = (bEE + bMM) / 2  # a2
        bME = (bEE + bME) / 2  # d1
        bMP = (bEE + bMP) / 2  # d2
        bEM = (bEE + bEM) / 2  # a1
        # ... middle
        bEP = (bEE + bEP) / 2  # c1
        bPM = (bEE + bPM) / 2  # f2
        bPE = (bEE + bPE) / 2  # f1
        bPP = (bEE + bPP) / 2  # c2

        # Reset the buoyancies on the outside edges
        # bMM[ 0, :] = bEE[ 0, :]
        # bMM[ :, 0] = bEE[ :, 0]
        # bME[ 0, :] = bEE[ 0, :]
        # bMP[ 0, :] = bEE[ 0, :]
        # bMP[ :,-1] = bEE[ :,-1]
        # bEM[ :, 0] = bEE[ :, 0]
        # bEP[ :,-1] = bEE[ :,-1]
        # bPM[-1, :] = bEE[-1, :]
        # bPM[ :, 0] = bEE[ :, 0]
        # bPE[-1, :] = bEE[-1, :]
        # bPP[-1, :] = bEE[-1, :]
        # bPP[ :,-1] = bEE[ :,-1]

        # K = omega^2/(c^2 . rho)
        kMM = K[0:-2, 0:-2]  # bottom left
        kME = K[0:-2, 1:-1]  # bottom centre
        kMP = K[0:-2, 2:]  # bottom centre
        kEM = K[1:-1, 0:-2]  # middle left
        kEE = K[1:-1, 1:-1]  # middle centre
        kEP = K[1:-1, 2:]  # middle right
        kPM = K[2:, 0:-2]  # top    left
        kPE = K[2:, 1:-1]  # top    centre
        kPP = K[2:, 2:]  # top    right

        # 9-point fd star
        acoef = 0.5461
        bcoef = 0.4539
        ccoef = 0.6248
        dcoef = 0.09381
        ecoef = 0.000001297

        # 5-point fd star
        # acoef = 1.0
        # bcoef = 0.0
        # ecoef = 0.0

        # NB: bPM and bMP here are switched relative to S. Roecker's version
        #     in OMEGA. This is because the labelling herein is always ?ZX.

        diagonals = {
            'AD':
            ecoef * kMM + bcoef * bMM * ((r1zsq + r1xsq) / (4 * dxz) -
                                         (r2z + r2x) / (4 * dd)),
            'DD':
            dcoef * kME + acoef * bME * (r1zsq / dz - r2z / 2) / dz + bcoef *
            (r1zsq - r1xsq) * (bMP + bMM) / (4 * dxz),
            'CD':
            ecoef * kMP + bcoef * bMP * ((r1zsq + r1xsq) / (4 * dxz) -
                                         (r2z + r2x) / (4 * dd)),
            'AA':
            dcoef * kEM + acoef * bEM * (r1xsq / dx - r2x / 2) / dx + bcoef *
            (r1xsq - r1zsq) * (bPM + bMM) / (4 * dxz),
            'BE':
            ccoef * kEE + acoef * (r2x * (bEM - bEP) / (2 * dx) + r2z *
                                   (bME - bPE) / (2 * dz) - r1xsq *
                                   (bEM + bEP) / dxx - r1zsq *
                                   (bME + bPE) / dzz) + bcoef *
            (((r2x + r2z) * (bMM - bPP) + (r2z - r2x) * (bMP - bPM)) /
             (4 * dd) - (r1xsq + r1zsq) * (bMM + bPP + bPM + bMP) / (4 * dxz)),
            'CC':
            dcoef * kEP + acoef * bEP * (r1xsq / dx + r2x / 2) / dx + bcoef *
            (r1xsq - r1zsq) * (bMP + bPP) / (4 * dxz),
            'AF':
            ecoef * kPM + bcoef * bPM * ((r1zsq + r1xsq) / (4 * dxz) -
                                         (r2z + r2x) / (4 * dd)),
            'FF':
            dcoef * kPE + acoef * bPE * (r1zsq / dz - r2z / 2) / dz + bcoef *
            (r1zsq - r1xsq) * (bPM + bPP) / (4 * dxz),
            'CF':
            ecoef * kPP + bcoef * bPP * ((r1zsq + r1xsq) / (4 * dxz) -
                                         (r2z + r2x) / (4 * dd)),
        }

        diagonals['AD'] = diagonals['AD'].ravel()[dims[1] + 1:]
        diagonals['DD'] = diagonals['DD'].ravel()[dims[1]:]
        diagonals['CD'] = diagonals['CD'].ravel()[dims[1] - 1:]
        diagonals['AA'] = diagonals['AA'].ravel()[1:]
        diagonals['BE'] = diagonals['BE'].ravel()[:]
        diagonals['CC'] = diagonals['CC'].ravel()[:-1]
        diagonals['AF'] = diagonals['AF'].ravel()[:-dims[1] + 1]
        diagonals['FF'] = diagonals['FF'].ravel()[:-dims[1]]
        diagonals['CF'] = diagonals['CF'].ravel()[:-dims[1] - 1]

        # self._setupBoundary(diagonals, freeSurf)
        if any(freeSurf):
            raise NotImplementedError('Free surface not implemented!')

        # for key in diagonals.keys():
        #     print('%s:\t%d\t%d'%(key, diagonals[key].size, offsets[key]))

        diagonals = [diagonals[key] for key in keys]
        offsets = [offsets[key] for key in keys]

        A = scipy.sparse.diags(
            diagonals,
            offsets,
            shape=(self.mesh.nN, self.mesh.nN),
            format='csr',
            dtype=self.dtypeComplex
        )  #, shape=(self.mesh.nN, self.mesh.nN))#, self.mesh.nN, self.mesh.nN, format='csr')

        return A

    # def _setupBoundary(self, diagonals, freeSurf):
    #     """
    #     Function to set up boundary regions for the Seismic FDFD problem
    #     using the 9-point finite-difference stencil from OMEGA/FULLWV.
    #     """

    #     keys = diagonals.keys()
    #     pickDiag = lambda x: -1. if freeSurf[x] else 1.

    #     # Left
    #     for key in keys:
    #         if key is 'BE':
    #             diagonals[key][:,0] = pickDiag(3)
    #         else:
    #             diagonals[key][:,0] = 0.

    #     # Right
    #     for key in keys:
    #         if key is 'BE':
    #             diagonals[key][:,-1] = pickDiag(1)
    #         else:
    #             diagonals[key][:,-1] = 0.

    #     # Bottom
    #     for key in keys:
    #         if key is 'BE':
    #             diagonals[key][0,:] = pickDiag(2)
    #         else:
    #             diagonals[key][0,:] = 0.

    #     # Top
    #     for key in keys:
    #         if key is 'BE':
    #             diagonals[key][-1,:] = pickDiag(0)
    #         else:
    #             diagonals[key][-1,:] = 0.

    # ------------------------------------------------------------------------
    # Externally-callable functions

    def clear(self):
        self._invalidateMatrix()

    # What about @caching decorators?
    def forward(self, src, dOnly=True):

        q = self.kyweight * src.getq(self.mesh)
        u = self.Ainv * q

        d = numpy.array(
            [numpy.dot(P, u) for P in src.getP(self.mesh, self.ky)]).ravel()

        if dOnly:
            return d
        else:
            return u, d

    def backprop(self, src, dresid=1.):

        qr = self.kyweight * src.getqback(self.mesh, dresid, self.ky)
        u = self.Ainv * qr

        return u
예제 #26
0
@memory.cache
def spell(word, count=10, dict_words=None):
    dict_words = load_words() if dict_words is None else dict_words
    return sorted(dict_words, key=lambda dw: levenshtein(word, dw))[:count]


if __name__ == '__main__':
    from argparse import ArgumentParser

    parser = ArgumentParser(description='spell checker')
    parser.add_argument('word', help='word to check', nargs='?')
    parser.add_argument('--count',
                        type=int,
                        default=10,
                        help='number of words to return')
    parser.add_argument('--clear-cache',
                        help='clear cache',
                        action='store_true',
                        default=False)
    args = parser.parse_args()

    if args.clear_cache:
        memory.clear()
        raise SystemExit

    if not args.word:
        raise SystemExit('no word given')

    for word in spell(args.word, args.count):
        print(word)
예제 #27
0
파일: Kernel.py 프로젝트: bsmithyman/zephyr
class SeisFDFDKernel(object):

    # source array ref

    # receiver array ref

    mesh = None
    freq = None
    Solver = lambda: None


    def __init__(self, systemConfig, **kwargs):

        if systemConfig.get('cache', False):
            try:
                from tempfile import mkdtemp
                from joblib import Memory
            except ImportError:
                pass
            else:
                if 'cacheDir' in systemConfig:
                    cacheDir = systemConfig['cacheDir']
                    try:
                        os.makedirs(cacheDir)
                    except OSError as e:
                        if e.errno == errno.EEXIST and os.path.isdir(cacheDir):
                            pass
                        else:
                            raise
                else:
                    cacheDir = mkdtemp()

                self._mem = Memory(cachedir=cacheDir, verbose=0)

                # Cache outputs of these methods
                self.forward = self._mem.cache(self.forward)
                self.backprop = self._mem.cache(self.backprop)

        hx = [(systemConfig['dx'], systemConfig['nx']-1)]
        hz = [(systemConfig['dz'], systemConfig['nz']-1)]
        self.mesh = SimPEG.Mesh.TensorMesh([hx, hz], '00')

        self.mesh.ireg = systemConfig.get('ireg', DEFAULT_IREG)
        self.mesh.freeSurf = systemConfig.get('freeSurf', DEFAULT_FREESURF_BOUNDS)

        initMap = {
        #   Argument        Rename to Property
            'c':            'cR',
            'Q':            None,
            'rho':          None,
            'nPML':         None,
            'freeSurf':     None,
            'freq':         None,
            'ky':           None,
            'kyweight':     None,
            'Solver':       None,
            'dx':           None,
            'dz':           None,
            'dtype':        None,
        }

        for key in initMap.keys():
            if key in systemConfig:
                if initMap[key] is None:
                    setattr(self, key, systemConfig[key])
                else:
                    setattr(self, initMap[key], systemConfig[key])

    def __del__(self):
        if hasattr(self, '_mem'):
            self._mem.clear()
            cacheDir = self._mem.cachedir
            del self._mem
            shutil.rmtree(cacheDir)


    # Model properties

    @property
    def c(self):
        return self.cR + self.cI
    @c.setter
    def c(self, value):
        self._cR = value.real
        self._cI = value.imag
        self._invalidateMatrix()

    @property
    def rho(self):
        if getattr(self, '_rho', None) is None:
            self._rho = 310 * self.c**0.25
        return self._rho
    @rho.setter
    def rho(self, value):
        self._rho = value
        self._invalidateMatrix()

    @property
    def Q(self):
        if getattr(self, '_Q', None) is None:
            self._Q = numpy.inf
        return self._Q
    @Q.setter
    def Q(self, value):
        self._Q = value
        self._invalidateMatrix()

    @property
    def cR(self):
        return self._cR
    @cR.setter
    def cR(self, value):
        self._cR = value
        self._invalidateMatrix()
    
    @property
    def cI(self):
        if self.Q is numpy.inf:
            return 0
        else:
            return 1j * self.cR / (2*self.Q)
    @cI.setter
    def cI(self, value):
        if (value == 0).all():
            self._Q = numpy.inf
        else:
            self._Q = 1j * self.cR / (2*value)
        self._invalidateMatrix()

    # Modelling properties

    @property
    def nPML(self):
        if getattr(self, '_nPML', None) is None:
            self._nPML = DEFAULT_PML_SIZE
        return self._nPML
    @nPML.setter
    def nPML(self, value):
        self._nPML = value
        self._invalidateMatrix()

    @property
    def ky(self):
        if getattr(self, '_ky', None) is None:
            self._ky = 0.
        return self._ky
    @ky.setter
    def ky(self, value):
        self._ky = value
        self._invalidateMatrix()

    @property
    def kyweight(self):
        if getattr(self, '_kyweight', None) is None:
            self._kyweight = 1.
        return self._kyweight
    @kyweight.setter
    def kyweight(self, value):
        self._kyweight = value
        self._invalidateMatrix()

    # Clever matrix setup properties

    @property
    def Solver(self):
        if getattr(self, '_Solver', None) is None:
            self._Solver = DEFAULT_SOLVER
        return self._Solver
    @Solver.setter
    def Solver(self, value):
        self._Solver = value

    @property
    def A(self):
        if getattr(self, '_A', None) is None:
            self._A = self._initHelmholtzNinePoint()
        return self._A

    @property
    def Ainv(self):
        if getattr(self, '_Ainv', None) is None:
            self._mfact()
        return self._Ainv

    def _invalidateMatrix(self):
        if getattr(self, '_A', None) is not None:
            del(self._A)
        if getattr(self, '_Ainv', None) is not None:
            del(self._Ainv)
        if getattr(self, '_mem', None) is not None:
            self._mem.clear()

    @property
    def dtypeReal(self):
        if self.dtype == 'float':
            return numpy.float32
        elif self.dtype == 'double':
            return numpy.float64
        else:
            raise NotImplementedError('Unknown dtype: %s'%self.dtype)

    @property
    def dtypeComplex(self):
        if self.dtype == 'float':
            return numpy.complex64
        elif self.dtype == 'double':
            return numpy.complex128
        else:
            raise NotImplementedError('Unknown dtype: %s'%self.dtype)

    @property
    def dtype(self):
        return getattr(self, '_dtype', DEFAULT_DTYPE)
    @dtype.setter
    def dtype(self, value):
        # Currently this doesn't work because all the solvers assume doubles
        # if value in ['float', 'double']:
        if value in ['double']:
            self._dtype = value
        else:
            raise NotImplementedError('Unknown dtype: %s'%value)

    # ------------------------------------------------------------------------
    # Matrix setup

    def _mfact(self):
        self._Ainv = self.Solver(self.A)

    def _initHelmholtzNinePoint(self):
        """
        An attempt to reproduce the finite-difference stencil and the
        general behaviour of OMEGA by Pratt et al. The stencil is a 9-point
        second-order version based on work by a number of people in the mid-90s
        including Ivan Stekl. The boundary conditions are based on the PML
        implementation by Steve Roecker in fdfdpml.f.
        """

        # Set up SimPEG mesh
        dims = (self.mesh.nNy, self.mesh.nNx)
        # mAve = self.mesh.aveN2CC

        # c = (mAve.T * self.c.ravel()).reshape(dims)
        # rho = (mAve.T * self.rho.ravel()).reshape(dims)

        c = self.c
        rho = self.rho

        # fast --> slow is x --> y --> z as Fortran

        # Set up physical properties in matrices with padding
        omega   = 2 * numpy.pi * self.freq 
        cPad    = numpy.pad(c, pad_width=1, mode='edge')
        rhoPad  = numpy.pad(rho, pad_width=1, mode='edge')

        aky = 2*numpy.pi*self.ky

        # Model parameter M
        K = ((omega**2 / cPad**2) - aky**2) / rhoPad

        # Horizontal, vertical and diagonal geometry terms
        dx  = self.mesh.hx[0]
        dz  = self.mesh.hy[0]
        dxx = dx**2
        dzz = dz**2
        dxz = dx*dz
        dd  = numpy.sqrt(dxz)

        # PML decay terms
        # NB: Arrays are padded later, but 'c' in these lines
        #     comes from the original (un-padded) version

        nPML    = self.nPML

        pmldx   = dx*(nPML - 1)
        pmldz   = dz*(nPML - 1)
        pmlr    = 1e-3
        pmlfx   = 3.0 * numpy.log(1/pmlr)/(2*pmldx**3)
        pmlfz   = 3.0 * numpy.log(1/pmlr)/(2*pmldz**3)

        dpmlx   = numpy.zeros(dims, dtype=self.dtypeComplex)
        dpmlz   = numpy.zeros(dims, dtype=self.dtypeComplex)
        isnx    = numpy.zeros(dims, dtype=self.dtypeReal)
        isnz    = numpy.zeros(dims, dtype=self.dtypeReal)

        # Only enable PML if the free surface isn't set

        freeSurf = self.mesh.freeSurf

        if freeSurf[0]:    
            isnz[-nPML:,:] = -1 # Top

        if freeSurf[1]:
            isnx[:,-nPML:] = -1 # Right Side

        if freeSurf[2]:
            isnz[:nPML,:] = 1 # Bottom

        if freeSurf[3]:
            isnx[:,:nPML] = 1 # Left side

        dpmlx[:,:nPML] = (numpy.arange(nPML, 0, -1)*dx).reshape((1,nPML))
        dpmlx[:,-nPML:] = (numpy.arange(1, nPML+1, 1)*dx).reshape((1,nPML))
        dnx     = pmlfx*c*dpmlx**2
        ddnx    = 2*pmlfx*c*dpmlx
        denx    = dnx + 1j*omega
        r1x     = 1j*omega / denx
        r1xsq   = r1x**2
        r2x     = isnx*r1xsq*ddnx/denx

        dpmlz[:nPML,:] = (numpy.arange(nPML, 0, -1)*dz).reshape((nPML,1))
        dpmlz[-nPML:,:] = (numpy.arange(1, nPML+1, 1)*dz).reshape((nPML,1))
        dnz     = pmlfz*c*dpmlz**2
        ddnz    = 2*pmlfz*c*dpmlz
        denz    = dnz + 1j*omega
        r1z     = 1j*omega / denz
        r1zsq   = r1z**2
        r2z     = isnz*r1zsq*ddnz/denz

        # Visual key for finite-difference terms
        # (per Pratt and Worthington, 1990)
        #
        #   This         Original
        # AF FF CF  vs.  AD DD CD
        # AA BE CC  vs.  AA BE CC
        # AD DD CD  vs.  AF FF CF

        # Set of keys to index the dictionaries
        keys = ['AD', 'DD', 'CD', 'AA', 'BE', 'CC', 'AF', 'FF', 'CF']

        # Diagonal offsets for the sparse matrix formation
        offsets = {
            'AD':   (-1) * dims[1] + (-1), 
            'DD':   (-1) * dims[1] + ( 0),
            'CD':   (-1) * dims[1] + (+1),
            'AA':   ( 0) * dims[1] + (-1),
            'BE':   ( 0) * dims[1] + ( 0),
            'CC':   ( 0) * dims[1] + (+1),
            'AF':   (+1) * dims[1] + (-1),
            'FF':   (+1) * dims[1] + ( 0),
            'CF':   (+1) * dims[1] + (+1),
        }

        # Buoyancies
        bMM = 1. / rhoPad[0:-2,0:-2] # bottom left
        bME = 1. / rhoPad[0:-2,1:-1] # bottom centre
        bMP = 1. / rhoPad[0:-2,2:  ] # bottom right
        bEM = 1. / rhoPad[1:-1,0:-2] # middle left
        bEE = 1. / rhoPad[1:-1,1:-1] # middle centre
        bEP = 1. / rhoPad[1:-1,2:  ] # middle right
        bPM = 1. / rhoPad[2:  ,0:-2] # top    left
        bPE = 1. / rhoPad[2:  ,1:-1] # top    centre
        bPP = 1. / rhoPad[2:  ,2:  ] # top    right

        # Initialize averaged buoyancies on most of the grid
        bMM = (bEE + bMM) / 2 # a2
        bME = (bEE + bME) / 2 # d1
        bMP = (bEE + bMP) / 2 # d2
        bEM = (bEE + bEM) / 2 # a1
        # ... middle
        bEP = (bEE + bEP) / 2 # c1
        bPM = (bEE + bPM) / 2 # f2
        bPE = (bEE + bPE) / 2 # f1
        bPP = (bEE + bPP) / 2 # c2

        # Reset the buoyancies on the outside edges
        # bMM[ 0, :] = bEE[ 0, :]
        # bMM[ :, 0] = bEE[ :, 0]
        # bME[ 0, :] = bEE[ 0, :]
        # bMP[ 0, :] = bEE[ 0, :]
        # bMP[ :,-1] = bEE[ :,-1]
        # bEM[ :, 0] = bEE[ :, 0]
        # bEP[ :,-1] = bEE[ :,-1]
        # bPM[-1, :] = bEE[-1, :]
        # bPM[ :, 0] = bEE[ :, 0]
        # bPE[-1, :] = bEE[-1, :]
        # bPP[-1, :] = bEE[-1, :]
        # bPP[ :,-1] = bEE[ :,-1]

        # K = omega^2/(c^2 . rho)
        kMM = K[0:-2,0:-2] # bottom left
        kME = K[0:-2,1:-1] # bottom centre
        kMP = K[0:-2,2:  ] # bottom centre
        kEM = K[1:-1,0:-2] # middle left
        kEE = K[1:-1,1:-1] # middle centre
        kEP = K[1:-1,2:  ] # middle right
        kPM = K[2:  ,0:-2] # top    left
        kPE = K[2:  ,1:-1] # top    centre
        kPP = K[2:  ,2:  ] # top    right

        # 9-point fd star
        acoef   = 0.5461
        bcoef   = 0.4539
        ccoef   = 0.6248
        dcoef   = 0.09381
        ecoef   = 0.000001297

        # 5-point fd star
        # acoef = 1.0
        # bcoef = 0.0
        # ecoef = 0.0

        # NB: bPM and bMP here are switched relative to S. Roecker's version
        #     in OMEGA. This is because the labelling herein is always ?ZX.

        diagonals = {
            'AD':   ecoef*kMM
                    + bcoef*bMM*((r1zsq+r1xsq)/(4*dxz) - (r2z+r2x)/(4*dd)),
            'DD':   dcoef*kME
                    + acoef*bME*(r1zsq/dz - r2z/2)/dz
                    + bcoef*(r1zsq-r1xsq)*(bMP+bMM)/(4*dxz),
            'CD':   ecoef*kMP
                    + bcoef*bMP*((r1zsq+r1xsq)/(4*dxz) - (r2z+r2x)/(4*dd)),
            'AA':   dcoef*kEM
                    + acoef*bEM*(r1xsq/dx - r2x/2)/dx
                    + bcoef*(r1xsq-r1zsq)*(bPM+bMM)/(4*dxz),
            'BE':   ccoef*kEE
                    + acoef*(r2x*(bEM-bEP)/(2*dx) + r2z*(bME-bPE)/(2*dz) - r1xsq*(bEM+bEP)/dxx - r1zsq*(bME+bPE)/dzz)
                    + bcoef*(((r2x+r2z)*(bMM-bPP) + (r2z-r2x)*(bMP-bPM))/(4*dd) - (r1xsq+r1zsq)*(bMM+bPP+bPM+bMP)/(4*dxz)),
            'CC':   dcoef*kEP
                    + acoef*bEP*(r1xsq/dx + r2x/2)/dx
                    + bcoef*(r1xsq-r1zsq)*(bMP+bPP)/(4*dxz),
            'AF':   ecoef*kPM
                    + bcoef*bPM*((r1zsq+r1xsq)/(4*dxz) - (r2z+r2x)/(4*dd)),
            'FF':   dcoef*kPE
                    + acoef*bPE*(r1zsq/dz - r2z/2)/dz
                    + bcoef*(r1zsq-r1xsq)*(bPM+bPP)/(4*dxz),
            'CF':   ecoef*kPP
                    + bcoef*bPP*((r1zsq+r1xsq)/(4*dxz) - (r2z+r2x)/(4*dd)),
        }

        diagonals['AD'] = diagonals['AD'].ravel()[dims[1]+1:          ]
        diagonals['DD'] = diagonals['DD'].ravel()[dims[1]  :          ]
        diagonals['CD'] = diagonals['CD'].ravel()[dims[1]-1:          ]
        diagonals['AA'] = diagonals['AA'].ravel()[        1:          ]
        diagonals['BE'] = diagonals['BE'].ravel()[         :          ]
        diagonals['CC'] = diagonals['CC'].ravel()[         :-1        ]
        diagonals['AF'] = diagonals['AF'].ravel()[         :-dims[1]+1]
        diagonals['FF'] = diagonals['FF'].ravel()[         :-dims[1]  ]
        diagonals['CF'] = diagonals['CF'].ravel()[         :-dims[1]-1]

        # self._setupBoundary(diagonals, freeSurf)
        if any(freeSurf):
            raise NotImplementedError('Free surface not implemented!')

        # for key in diagonals.keys():
        #     print('%s:\t%d\t%d'%(key, diagonals[key].size, offsets[key]))

        diagonals = [diagonals[key] for key in keys]
        offsets = [offsets[key] for key in keys]

        A = scipy.sparse.diags(diagonals, offsets, shape=(self.mesh.nN, self.mesh.nN), format='csr', dtype=self.dtypeComplex)#, shape=(self.mesh.nN, self.mesh.nN))#, self.mesh.nN, self.mesh.nN, format='csr')

        return A

    # def _setupBoundary(self, diagonals, freeSurf):
    #     """
    #     Function to set up boundary regions for the Seismic FDFD problem
    #     using the 9-point finite-difference stencil from OMEGA/FULLWV.
    #     """

    #     keys = diagonals.keys()
    #     pickDiag = lambda x: -1. if freeSurf[x] else 1.

    #     # Left
    #     for key in keys:
    #         if key is 'BE':
    #             diagonals[key][:,0] = pickDiag(3)
    #         else:
    #             diagonals[key][:,0] = 0.

    #     # Right
    #     for key in keys:
    #         if key is 'BE':
    #             diagonals[key][:,-1] = pickDiag(1)
    #         else:
    #             diagonals[key][:,-1] = 0.

    #     # Bottom
    #     for key in keys:
    #         if key is 'BE':
    #             diagonals[key][0,:] = pickDiag(2)
    #         else:
    #             diagonals[key][0,:] = 0.

    #     # Top
    #     for key in keys:
    #         if key is 'BE':
    #             diagonals[key][-1,:] = pickDiag(0)
    #         else:
    #             diagonals[key][-1,:] = 0.

    # ------------------------------------------------------------------------
    # Externally-callable functions

    def clear(self):
        self._invalidateMatrix()
    
    # What about @caching decorators?
    def forward(self, src, dOnly=True):

        q = self.kyweight * src.getq(self.mesh)
        u = self.Ainv * q

        d = numpy.array([numpy.dot(P,u) for P in src.getP(self.mesh, self.ky)]).ravel()

        if dOnly:
            return d
        else:
            return u, d

    def backprop(self, src, dresid=1.):

        qr = self.kyweight * src.getqback(self.mesh, dresid, self.ky)
        u = self.Ainv * qr

        return u
예제 #28
0
class CacheManager(object):
    '''The librosa cache manager class wraps joblib.Memory
    with a __call__ attribute, so that it may act as a function.

    Additionally, it provides a caching level filter, so that
    different functions can be cached or not depending on the user's
    preference for speed vs. storage usage.
    '''
    def __init__(self, *args, **kwargs):

        level = kwargs.pop('level', 10)

        # Initialize the memory object
        self.memory = Memory(*args, **kwargs)
        # The level parameter controls which data we cache
        # smaller numbers mean less caching
        self.level = level

    def __call__(self, level):
        '''Example usage:

        @cache(level=2)
        def semi_important_function(some_arguments):
            ...
        '''
        def wrapper(function):
            '''Decorator function.  Adds an input/output cache to
            the specified function.'''

            from decorator import FunctionMaker

            def decorator_apply(dec, func):
                """Decorate a function by preserving the signature even if dec
                is not a signature-preserving decorator.

                This recipe is derived from
                http://micheles.googlecode.com/hg/decorator/documentation.html#id14
                """

                return FunctionMaker.create(func,
                                            'return decorated(%(signature)s)',
                                            dict(decorated=dec(func)),
                                            __wrapped__=func)

            if self.memory.location is not None and self.level >= level:
                return decorator_apply(self.memory.cache, function)

            else:
                return function

        return wrapper

    def clear(self, *args, **kwargs):
        return self.memory.clear(*args, **kwargs)

    def eval(self, *args, **kwargs):
        return self.memory.eval(*args, **kwargs)

    def format(self, *args, **kwargs):
        return self.memory.format(*args, **kwargs)

    def reduce_size(self, *args, **kwargs):
        return self.memory.reduce_size(*args, **kwargs)

    def warn(self, *args, **kwargs):
        return self.memory.warn(*args, **kwargs)
예제 #29
0
    parser.add_option("-c",
                      "--clear",
                      dest="clear",
                      action='store_true',
                      help="if True, clear the cache.",
                      default=False)
    parser.add_option("-b",
                      "--backend",
                      dest="backend",
                      help="backend for parsing (selenium | requests)",
                      default='requests')
    options, args = parser.parse_args()
    backend, clear = options.backend, options.clear

    if clear:
        mem.clear()

    random.seed()
    gen_date = time.strftime("%B %d, %Y")
    url_tails = ['1521584321377182930', '12188330066413208874']
    papers = [
        'MEG and EEG data analysis with MNE-Python',
        'MNE software for processing MEG and EEG data'
    ]

    publications = list()
    for url_tail, paper in zip(url_tails, papers):
        titles, authors, links = get_citing_articles(
            'https://scholar.google.co.in/scholar?cites=%s' % url_tail,
            backend=backend)
예제 #30
0
파일: Kernel.py 프로젝트: rowanc1/zephyr
class SeisFDFDKernel(object):

    # source array ref

    # receiver array ref

    mesh = None
    freq = None
    Solver = lambda: None

    def __init__(self, systemConfig, locator=None, **kwargs):

        if locator is not None:
            self._locator = locator
        else:
            self._locator = SeisLocator25D(systemConfig['geom'])

        if systemConfig.get('cache', False):
            try:
                from tempfile import mkdtemp
                from joblib import Memory
            except ImportError:
                pass
            else:
                if 'cacheDir' in systemConfig:
                    cacheDir = systemConfig['cacheDir']
                    try:
                        os.makedirs(cacheDir)
                    except OSError as e:
                        if e.errno == errno.EEXIST and os.path.isdir(cacheDir):
                            pass
                        else:
                            raise
                else:
                    cacheDir = mkdtemp()

                self._mem = Memory(cachedir=cacheDir, verbose=0)

                # Cache outputs of these methods
                self.forward = self._mem.cache(self.forward)
                self.backprop = self._mem.cache(self.backprop)

        hx = [(systemConfig['dx'], systemConfig['nx'])]
        hz = [(systemConfig['dz'], systemConfig['nz'])]
        self.mesh = SimPEG.Mesh.TensorMesh([hx, hz], '00')

        initMap = {
            #   Argument        Rename to Property
            'c': 'cR',
            'Q': None,
            'rho': None,
            'nPML': None,
            'freeSurf': None,
            'freq': None,
            'ky': None,
            'kyweight': None,
            'Solver': None,
            'ireg': None,
            'dx': None,
            'dz': None,
        }

        for key in initMap.keys():
            if key in systemConfig:
                if initMap[key] is None:
                    setattr(self, key, systemConfig[key])
                else:
                    setattr(self, initMap[key], systemConfig[key])

    def __del__(self):
        if hasattr(self, '_mem'):
            self._mem.clear()
            cacheDir = self._mem.cachedir
            del self._mem
            shutil.rmtree(cacheDir)

    # Model properties

    @property
    def c(self):
        return self.cR + self.cI

    @c.setter
    def c(self, value):
        self._cR = value.real
        self._cI = value.imag
        self._invalidateMatrix()

    @property
    def rho(self):
        if getattr(self, '_rho', None) is None:
            self._rho = 310 * self.c**0.25
        return self._rho

    @rho.setter
    def rho(self, value):
        self._rho = value
        self._invalidateMatrix()

    @property
    def Q(self):
        if getattr(self, '_Q', None) is None:
            self._Q = numpy.inf
        return self._Q

    @Q.setter
    def Q(self, value):
        self._Q = value
        self._invalidateMatrix()

    @property
    def cR(self):
        return self._cR

    @cR.setter
    def cR(self, value):
        self._cR = value
        self._invalidateMatrix()

    @property
    def cI(self):
        if self.Q is numpy.inf:
            return 0
        else:
            return 1j * self.cR / (2 * self.Q)

    @cI.setter
    def cI(self, value):
        if (value == 0).all():
            self._Q = numpy.inf
        else:
            self._Q = 1j * self.cR / (2 * value)
        self._invalidateMatrix()

    # Modelling properties

    @property
    def nPML(self):
        if getattr(self, '_nPML', None) is None:
            self._nPML = DEFAULT_PML_SIZE
        return self._nPML

    @nPML.setter
    def nPML(self, value):
        self._nPML = value
        self._invalidateMatrix()

    @property
    def freeSurf(self):
        if getattr(self, '_freeSurf', None) is None:
            self._freeSurf = DEFAULT_FREESURF_BOUNDS
        return self._freeSurf

    @freeSurf.setter
    def freeSurf(self, value):
        self._freeSurf = value
        self._invalidateMatrix()

    @property
    def ky(self):
        if getattr(self, '_ky', None) is None:
            self._ky = 0.
        return self._ky

    @ky.setter
    def ky(self, value):
        self._ky = value
        self._invalidateMatrix()

    @property
    def ireg(self):
        if getattr(self, '_ireg', None) is None:
            self._ireg = DEFAULT_IREG
        return self._ireg

    @ireg.setter
    def ireg(self, value):
        self._ireg = value

    # Clever matrix setup properties

    @property
    def Solver(self):
        if getattr(self, '_Solver', None) is None:
            self._Solver = SimPEG.SolverWrapD(DEFAULT_SOLVER)
        return self._Solver

    @Solver.setter
    def Solver(self, value):
        self._Solver = value

    @property
    def A(self):
        if getattr(self, '_A', None) is None:
            self._A = self._initHelmholtzNinePoint()
        return self._A

    @property
    def Ainv(self):
        if getattr(self, '_Ainv', None) is None:
            self._mfact()
        return self._Ainv

    def _invalidateMatrix(self):
        if getattr(self, '_A', None) is not None:
            del (self._A)
        if getattr(self, '_Ainv', None) is not None:
            del (self._Ainv)
        if getattr(self, '_mem', None) is not None:
            self._mem.clear()

    # ------------------------------------------------------------------------
    # Matrix setup

    def _mfact(self):
        self._Ainv = self.Solver(self.A)

    def _initHelmholtzNinePoint(self):
        """
        An attempt to reproduce the finite-difference stencil and the
        general behaviour of OMEGA by Pratt et al. The stencil is a 9-point
        second-order version based on work by a number of people in the mid-90s
        including Ivan Stekl. The boundary conditions are based on the PML
        implementation by Steve Roecker in fdfdpml.f.
        """

        # Set up SimPEG mesh
        dims = (self.mesh.nNy, self.mesh.nNx)
        mAve = self.mesh.aveN2CC

        c = (mAve.T * self.c.ravel()).reshape(dims)
        rho = (mAve.T * self.rho.ravel()).reshape(dims)

        # fast --> slow is x --> y --> z as Fortran

        # Set up physical properties in matrices with padding
        omega = 2 * numpy.pi * self.freq
        cPad = numpy.pad(c, pad_width=1, mode='edge')
        rhoPad = numpy.pad(rho, pad_width=1, mode='edge')

        aky = 2 * numpy.pi * self.ky

        # Model parameter M
        K = ((omega**2 / cPad**2) - aky**2) / rhoPad

        # Horizontal, vertical and diagonal geometry terms
        dx = self.mesh.hx[0]
        dz = self.mesh.hy[0]
        dxx = dx**2
        dzz = dz**2
        dxz = dx * dz
        dd = numpy.sqrt(dxz)

        # PML decay terms
        # NB: Arrays are padded later, but 'c' in these lines
        #     comes from the original (un-padded) version

        nPML = self.nPML

        pmldx = dx * (nPML - 1)
        pmldz = dz * (nPML - 1)
        pmlr = 1e-3
        pmlfx = 3.0 * numpy.log(1 / pmlr) / (2 * pmldx**3)
        pmlfz = 3.0 * numpy.log(1 / pmlr) / (2 * pmldz**3)

        dpmlx = numpy.zeros(dims, dtype=numpy.complex128)
        dpmlz = numpy.zeros(dims, dtype=numpy.complex128)
        isnx = numpy.zeros(dims, dtype=numpy.float64)
        isnz = numpy.zeros(dims, dtype=numpy.float64)

        # Only enable PML if the free surface isn't set

        freeSurf = self.freeSurf

        if freeSurf[0]:
            isnz[-nPML:, :] = -1  # Top

        if freeSurf[1]:
            isnx[:, -nPML:] = -1  # Right Side

        if freeSurf[2]:
            isnz[:nPML, :] = 1  # Bottom

        if freeSurf[3]:
            isnx[:, :nPML] = 1  # Left side

        dpmlx[:, :nPML] = (numpy.arange(nPML, 0, -1) * dx).reshape((1, nPML))
        dpmlx[:, -nPML:] = (numpy.arange(1, nPML + 1, 1) * dx).reshape(
            (1, nPML))
        dnx = pmlfx * c * dpmlx**2
        ddnx = 2 * pmlfx * c * dpmlx
        denx = dnx + 1j * omega
        r1x = 1j * omega / denx
        r1xsq = r1x**2
        r2x = isnx * r1xsq * ddnx / denx

        dpmlz[:nPML, :] = (numpy.arange(nPML, 0, -1) * dz).reshape((nPML, 1))
        dpmlz[-nPML:, :] = (numpy.arange(1, nPML + 1, 1) * dz).reshape(
            (nPML, 1))
        dnz = pmlfz * c * dpmlz**2
        ddnz = 2 * pmlfz * c * dpmlz
        denz = dnz + 1j * omega
        r1z = 1j * omega / denz
        r1zsq = r1z**2
        r2z = isnz * r1zsq * ddnz / denz

        # Visual key for finite-difference terms
        # (per Pratt and Worthington, 1990)
        #
        #   This         Original
        # AF FF CF  vs.  AD DD CD
        # AA BE CC  vs.  AA BE CC
        # AD DD CD  vs.  AF FF CF

        # Set of keys to index the dictionaries
        keys = ['AD', 'DD', 'CD', 'AA', 'BE', 'CC', 'AF', 'FF', 'CF']

        # Diagonal offsets for the sparse matrix formation
        offsets = {
            'AD': (-1) * dims[1] + (-1),
            'DD': (-1) * dims[1] + (0),
            'CD': (-1) * dims[1] + (+1),
            'AA': (0) * dims[1] + (-1),
            'BE': (0) * dims[1] + (0),
            'CC': (0) * dims[1] + (+1),
            'AF': (+1) * dims[1] + (-1),
            'FF': (+1) * dims[1] + (0),
            'CF': (+1) * dims[1] + (+1),
        }

        # Buoyancies
        bMM = 1. / rhoPad[0:-2, 0:-2]  # bottom left
        bME = 1. / rhoPad[0:-2, 1:-1]  # bottom centre
        bMP = 1. / rhoPad[0:-2, 2:]  # bottom centre
        bEM = 1. / rhoPad[1:-1, 0:-2]  # middle left
        bEE = 1. / rhoPad[1:-1, 1:-1]  # middle centre
        bEP = 1. / rhoPad[1:-1, 2:]  # middle right
        bPM = 1. / rhoPad[2:, 0:-2]  # top    left
        bPE = 1. / rhoPad[2:, 1:-1]  # top    centre
        bPP = 1. / rhoPad[2:, 2:]  # top    right

        # Initialize averaged buoyancies on most of the grid
        bMM = (bEE + bMM) / 2  # a2
        bME = (bEE + bME) / 2  # d1
        bMP = (bEE + bMP) / 2  # d2
        bEM = (bEE + bEM) / 2  # a1
        # ... middle
        bEP = (bEE + bEP) / 2  # c1
        bPM = (bEE + bPM) / 2  # f2
        bPE = (bEE + bPE) / 2  # f1
        bPP = (bEE + bPP) / 2  # c2

        # Reset the buoyancies on the outside edges
        bMM[0, :] = bEE[0, :]
        bMM[:, 0] = bEE[:, 0]
        bME[0, :] = bEE[0, :]
        bMP[0, :] = bEE[0, :]
        bMP[:, -1] = bEE[:, -1]
        bEM[:, 0] = bEE[:, 0]
        bEP[:, -1] = bEE[:, -1]
        bPM[-1, :] = bEE[-1, :]
        bPM[:, 0] = bEE[:, 0]
        bPE[-1, :] = bEE[-1, :]
        bPP[-1, :] = bEE[-1, :]
        bPP[:, -1] = bEE[:, -1]

        # K = omega^2/(c^2 . rho)
        kMM = K[0:-2, 0:-2]  # bottom left
        kME = K[0:-2, 1:-1]  # bottom centre
        kMP = K[0:-2, 2:]  # bottom centre
        kEM = K[1:-1, 0:-2]  # middle left
        kEE = K[1:-1, 1:-1]  # middle centre
        kEP = K[1:-1, 2:]  # middle right
        kPM = K[2:, 0:-2]  # top    left
        kPE = K[2:, 1:-1]  # top    centre
        kPP = K[2:, 2:]  # top    right

        # 9-point fd star
        acoef = 0.5461
        bcoef = 0.4539
        ccoef = 0.6248
        dcoef = 0.09381
        ecoef = 0.000001297

        # 5-point fd star
        # acoef = 1.0
        # bcoef = 0.0
        # ecoef = 0.0

        # NB: bPM and bMP here are switched relative to S. Roecker's version
        #     in OMEGA. This is because the labelling herein is always ?ZX.

        diagonals = {
            'AD':
            ecoef * kMM + bcoef * bMM * ((r1zsq + r1xsq) / (4 * dxz) -
                                         (r2z + r2x) / (4 * dd)),
            'DD':
            dcoef * kME + acoef * bME * (r1zsq / dz - r2z / 2) / dz + bcoef *
            (r1zsq - r1xsq) * (bMP + bMM) / (4 * dxz),
            'CD':
            ecoef * kMP + bcoef * bMP * ((r1zsq + r1xsq) / (4 * dxz) -
                                         (r2z + r2x) / (4 * dd)),
            'AA':
            dcoef * kEM + acoef * bEM * (r1xsq / dx - r2x / 2) / dx + bcoef *
            (r1xsq - r1zsq) * (bPM + bMM) / (4 * dxz),
            'BE':
            ccoef * kEE + acoef * (r2x * (bEM - bEP) / (2 * dx) + r2z *
                                   (bME - bPE) / (2 * dz) - r1xsq *
                                   (bEM + bEP) / dxx - r1zsq *
                                   (bME + bPE) / dzz) + bcoef *
            (((r2x + r2z) * (bMM - bPP) + (r2z - r2x) * (bMP - bPM)) /
             (4 * dd) - (r1xsq + r1zsq) * (bMM + bPP + bPM + bMP) / (4 * dxz)),
            'CC':
            dcoef * kEP + acoef * bEP * (r1xsq / dx + r2x / 2) / dx + bcoef *
            (r1xsq - r1zsq) * (bMP + bPP) / (4 * dxz),
            'AF':
            ecoef * kPM + bcoef * bPM * ((r1zsq + r1xsq) / (4 * dxz) -
                                         (r2z + r2x) / (4 * dd)),
            'FF':
            dcoef * kPE + acoef * bPE * (r1zsq / dz - r2z / 2) / dz + bcoef *
            (r1zsq - r1xsq) * (bPM + bPP) / (4 * dxz),
            'CF':
            ecoef * kPP + bcoef * bPP * ((r1zsq + r1xsq) / (4 * dxz) -
                                         (r2z + r2x) / (4 * dd)),
        }

        self._setupBoundary(diagonals, freeSurf)

        diagonals = numpy.array([diagonals[key].ravel() for key in keys])
        offsets = [offsets[key] for key in keys]

        A = scipy.sparse.spdiags(diagonals,
                                 offsets,
                                 self.mesh.nN,
                                 self.mesh.nN,
                                 format='csr')

        return A

    def _setupBoundary(self, diagonals, freeSurf):
        """
        Function to set up boundary regions for the Seismic FDFD problem
        using the 9-point finite-difference stencil from OMEGA/FULLWV.
        """

        keys = diagonals.keys()
        pickDiag = lambda x: -1. if freeSurf[x] else 1.

        # Left
        for key in keys:
            if key is 'BE':
                diagonals[key][:, 0] = pickDiag(3)
            else:
                diagonals[key][:, 0] = 0.

        # Right
        for key in keys:
            if key is 'BE':
                diagonals[key][:, -1] = pickDiag(1)
            else:
                diagonals[key][:, -1] = 0.

        # Bottom
        for key in keys:
            if key is 'BE':
                diagonals[key][0, :] = pickDiag(2)
            else:
                diagonals[key][0, :] = 0.

        # Top
        for key in keys:
            if key is 'BE':
                diagonals[key][-1, :] = pickDiag(0)
            else:
                diagonals[key][-1, :] = 0.

    # Quasi-functional attempt -----------------------------------------------
    #
    def _srcVec(self, sLocs, terms):

        q = numpy.zeros((self.mesh.nNy, self.mesh.nNx), dtype=numpy.complex128)
        srcScale = -self.dx * self.dz

        if self.ireg == 0:
            # Closest source point
            q = q.ravel()

            for i in xrange(len(sLocs)):
                qI = SimPEG.Utils.closestPoints(self.mesh,
                                                sLocs[i],
                                                gridLoc='N')
                q[qI] += terms[i] / srcScale

        else:
            # Kaiser windowed sinc function

            freg = 2 * self.ireg + 1
            q = numpy.pad(q, self.ireg, mode='constant')

            for i in xrange(len(sLocs)):
                qI = SimPEG.Utils.closestPoints(self.mesh,
                                                sLocs[i],
                                                gridLoc='N')
                Zi, Xi = (qI / self.mesh.nNx, numpy.mod(qI, self.mesh.nNx))
                offset = (sLocs[i][0] - Xi * self.dx,
                          sLocs[i][1] - Zi * self.dz)
                sourceRegion = KaiserWindowedSinc(self.ireg, offset)
                q[Zi:Zi + freg,
                  Xi:Xi + freg] += terms[i] * sourceRegion / srcScale

            # Mirror and flip sign on terms that cross the free-surface boundary
            if self.freeSurf[0]:
                q[self.ireg:2 * self.ireg, :] -= numpy.flipud(
                    q[:self.ireg, :])  # Top
            if self.freeSurf[1]:
                q[:, -2 * self.ireg:-self.ireg] -= numpy.fliplr(
                    q[:, -self.ireg:])  # Right
            if self.freeSurf[2]:
                q[-2 * self.ireg:-self.ireg, :] -= numpy.flipud(
                    q[-self.ireg:, :])  # Bottom
            if self.freeSurf[3]:
                q[:, self.ireg:2 * self.ireg] -= numpy.fliplr(
                    q[:, :self.ireg])  # Left

            # Cut off edges
            q = q[self.ireg:-self.ireg, self.ireg:-self.ireg].ravel()

        return q

    def _srcTerm(self, sLocs, individual=True, terms=1):

        if individual and len(sLocs) > 1:
            result = []
            for i in xrange(len(sLocs)):
                result.append(
                    self._srcVec([
                        sLocs[i] if hasattr(sLocs, '__contains__') else sLocs
                    ], [terms[i]] if hasattr(terms, '__contains__') else
                                 [terms]))
        else:
            result = self._srcVec(
                sLocs if hasattr(sLocs, '__contains__') else [sLocs],
                terms if hasattr(terms, '__contains__') else [terms])

        return result

    #
    # Quasi-functional attempt -----------------------------------------------

    # ------------------------------------------------------------------------
    # Externally-callable functions

    def clear(self):
        self._invalidateMatrix()

    # What about @caching decorators?
    def forward(self, isrc, dOnly=True, sterm=1.):

        sloc, rlocs, coeffs = self._locator(isrc, self.ky)

        q = self._srcTerm(sloc, individual=True, terms=sterm)
        u = self.Ainv * q

        d = numpy.array([
            numpy.dot(u, qr)
            for qr in self._srcTerm(rlocs, individual=True, terms=coeffs)
        ])

        if dOnly:
            return d
        else:
            return u, d

    def backprop(self, isrc, dresid=1.):

        sloc, rlocs, coeffs = self._locator(isrc, self.ky)

        qr = self._srcTerm(rlocs, individual=False, terms=dresid * coeffs)

        u = self.Ainv * qr

        return u
예제 #31
0
파일: interfaces.py 프로젝트: dPys/PyNets
    def _run_interface(self, runtime):
        import os
        import gc
        import time
        import nibabel as nib
        from pynets.core.utils import load_runconfig
        from nipype.utils.filemanip import fname_presuffix, copyfile
        from pynets.fmri import clustering
        from pynets.registration.utils import orient_reslice
        from joblib import Parallel, delayed
        from joblib.externals.loky.backend import resource_tracker
        from pynets.registration import utils as regutils
        from pynets.core.utils import decompress_nifti
        import pkg_resources
        import shutil
        import tempfile
        resource_tracker.warnings = None

        template = pkg_resources.resource_filename(
            "pynets", f"templates/standard/{self.inputs.template_name}_brain_"
            f"{self.inputs.vox_size}.nii.gz")

        template_tmp_path = fname_presuffix(template,
                                            suffix="_tmp",
                                            newpath=runtime.cwd)
        copyfile(template, template_tmp_path, copy=True, use_hardlink=False)

        hardcoded_params = load_runconfig()

        c_boot = hardcoded_params["c_boot"][0]
        nthreads = hardcoded_params["omp_threads"][0]

        clust_list = ["kmeans", "ward", "complete", "average", "ncut", "rena"]

        clust_mask_temp_path = orient_reslice(self.inputs.clust_mask,
                                              runtime.cwd,
                                              self.inputs.vox_size)
        cm_suf = os.path.basename(self.inputs.clust_mask).split('.nii')[0]
        clust_mask_in_t1w_path = f"{runtime.cwd}/clust_mask-" \
                                 f"{cm_suf}_in_t1w.nii.gz"

        t1w_brain_tmp_path = fname_presuffix(self.inputs.t1w_brain,
                                             suffix="_tmp",
                                             newpath=runtime.cwd)
        copyfile(self.inputs.t1w_brain,
                 t1w_brain_tmp_path,
                 copy=True,
                 use_hardlink=False)

        mni2t1w_warp_tmp_path = fname_presuffix(self.inputs.mni2t1w_warp,
                                                suffix="_tmp",
                                                newpath=runtime.cwd)
        copyfile(
            self.inputs.mni2t1w_warp,
            mni2t1w_warp_tmp_path,
            copy=True,
            use_hardlink=False,
        )

        mni2t1_xfm_tmp_path = fname_presuffix(self.inputs.mni2t1_xfm,
                                              suffix="_tmp",
                                              newpath=runtime.cwd)
        copyfile(self.inputs.mni2t1_xfm,
                 mni2t1_xfm_tmp_path,
                 copy=True,
                 use_hardlink=False)

        clust_mask_in_t1w = regutils.roi2t1w_align(
            clust_mask_temp_path,
            t1w_brain_tmp_path,
            mni2t1_xfm_tmp_path,
            mni2t1w_warp_tmp_path,
            clust_mask_in_t1w_path,
            template_tmp_path,
            self.inputs.simple,
        )
        time.sleep(0.5)

        if self.inputs.mask:
            out_name_mask = fname_presuffix(self.inputs.mask,
                                            suffix="_tmp",
                                            newpath=runtime.cwd)
            copyfile(self.inputs.mask,
                     out_name_mask,
                     copy=True,
                     use_hardlink=False)
        else:
            out_name_mask = None

        out_name_func_file = fname_presuffix(self.inputs.func_file,
                                             suffix="_tmp",
                                             newpath=runtime.cwd)
        copyfile(self.inputs.func_file,
                 out_name_func_file,
                 copy=True,
                 use_hardlink=False)
        out_name_func_file = decompress_nifti(out_name_func_file)

        if self.inputs.conf:
            out_name_conf = fname_presuffix(self.inputs.conf,
                                            suffix="_tmp",
                                            newpath=runtime.cwd)
            copyfile(self.inputs.conf,
                     out_name_conf,
                     copy=True,
                     use_hardlink=False)
        else:
            out_name_conf = None

        nip = clustering.NiParcellate(
            func_file=out_name_func_file,
            clust_mask=clust_mask_in_t1w,
            k=int(self.inputs.k),
            clust_type=self.inputs.clust_type,
            local_corr=self.inputs.local_corr,
            outdir=self.inputs.outdir,
            conf=out_name_conf,
            mask=out_name_mask,
        )

        atlas = nip.create_clean_mask()
        nip.create_local_clustering(overwrite=True, r_thresh=0.4)

        if self.inputs.clust_type in clust_list:
            if float(c_boot) > 1:
                import random
                from joblib import Memory
                from joblib.externals.loky import get_reusable_executor
                print(f"Performing circular block bootstrapping with {c_boot}"
                      f" iterations...")
                ts_data, block_size = nip.prep_boot()

                cache_dir = tempfile.mkdtemp()
                memory = Memory(cache_dir, verbose=0)
                ts_data = memory.cache(ts_data)

                def create_bs_imgs(ts_data, block_size, clust_mask_corr_img):
                    import nibabel as nib
                    from nilearn.masking import unmask
                    from pynets.fmri.estimation import timeseries_bootstrap
                    boot_series = timeseries_bootstrap(
                        ts_data.func, block_size)[0].astype('float32')
                    return unmask(boot_series, clust_mask_corr_img)

                def run_bs_iteration(i, ts_data, work_dir, local_corr,
                                     clust_type, _local_conn_mat_path,
                                     num_conn_comps, _clust_mask_corr_img,
                                     _standardize, _detrending, k, _local_conn,
                                     conf, _dir_path, _conn_comps):
                    import os
                    import time
                    import gc
                    from pynets.fmri.clustering import parcellate
                    print(f"\nBootstrapped iteration: {i}")
                    out_path = f"{work_dir}/boot_parc_tmp_{str(i)}.nii.gz"

                    boot_img = create_bs_imgs(ts_data, block_size,
                                              _clust_mask_corr_img)
                    try:
                        parcellation = parcellate(
                            boot_img, local_corr, clust_type,
                            _local_conn_mat_path, num_conn_comps,
                            _clust_mask_corr_img, _standardize, _detrending, k,
                            _local_conn, conf, _dir_path, _conn_comps)
                        parcellation.to_filename(out_path)
                        parcellation.uncache()
                        boot_img.uncache()
                        gc.collect()
                    except BaseException:
                        boot_img.uncache()
                        gc.collect()
                        return None
                    _clust_mask_corr_img.uncache()
                    return out_path

                time.sleep(random.randint(1, 5))
                counter = 0
                boot_parcellations = []
                while float(counter) < float(c_boot):
                    with Parallel(n_jobs=nthreads,
                                  max_nbytes='8000M',
                                  backend='loky',
                                  mmap_mode='r+',
                                  temp_folder=cache_dir,
                                  verbose=10) as parallel:
                        iter_bootedparcels = parallel(
                            delayed(run_bs_iteration)
                            (i, ts_data, runtime.cwd, nip.local_corr,
                             nip.clust_type, nip._local_conn_mat_path,
                             nip.num_conn_comps, nip._clust_mask_corr_img,
                             nip._standardize, nip._detrending, nip.k,
                             nip._local_conn, nip.conf, nip._dir_path,
                             nip._conn_comps) for i in range(c_boot))

                        boot_parcellations.extend(
                            [i for i in iter_bootedparcels if i is not None])
                        counter = len(boot_parcellations)
                        del iter_bootedparcels
                        gc.collect()

                print('Bootstrapped samples complete:')
                print(boot_parcellations)
                print("Creating spatially-constrained consensus "
                      "parcellation...")
                consensus_parcellation = clustering.ensemble_parcellate(
                    boot_parcellations, int(self.inputs.k))
                nib.save(consensus_parcellation, nip.parcellation)
                memory.clear(warn=False)
                shutil.rmtree(cache_dir, ignore_errors=True)
                del parallel, memory, cache_dir
                get_reusable_executor().shutdown(wait=True)
                gc.collect()

                for i in boot_parcellations:
                    if i is not None:
                        if os.path.isfile(i):
                            os.system(f"rm -f {i} &")
            else:
                print("Creating spatially-constrained parcellation...")
                out_path = f"{runtime.cwd}/{atlas}_{str(self.inputs.k)}.nii.gz"
                func_img = nib.load(out_name_func_file)
                parcellation = clustering.parcellate(
                    func_img, self.inputs.local_corr, self.inputs.clust_type,
                    nip._local_conn_mat_path, nip.num_conn_comps,
                    nip._clust_mask_corr_img, nip._standardize,
                    nip._detrending, nip.k, nip._local_conn, nip.conf,
                    nip._dir_path, nip._conn_comps)
                parcellation.to_filename(out_path)

        else:
            raise ValueError("Clustering method not recognized. See: "
                             "https://nilearn.github.io/modules/generated/"
                             "nilearn.regions.Parcellations."
                             "html#nilearn.regions.Parcellations")

        # Give it a minute
        ix = 0
        while not os.path.isfile(nip.parcellation) and ix < 60:
            print('Waiting for clustered parcellation...')
            time.sleep(1)
            ix += 1

        if not os.path.isfile(nip.parcellation):
            raise FileNotFoundError(f"Parcellation clustering failed for"
                                    f" {nip.parcellation}")

        self._results["atlas"] = atlas
        self._results["parcellation"] = nip.parcellation
        self._results["clust_mask"] = clust_mask_in_t1w_path
        self._results["k"] = self.inputs.k
        self._results["clust_type"] = self.inputs.clust_type
        self._results["clustering"] = True
        self._results["func_file"] = self.inputs.func_file

        reg_tmp = [
            t1w_brain_tmp_path, mni2t1w_warp_tmp_path, mni2t1_xfm_tmp_path,
            template_tmp_path, out_name_func_file
        ]
        for j in reg_tmp:
            if j is not None:
                if os.path.isfile(j):
                    os.system(f"rm -f {j} &")

        gc.collect()

        return runtime
예제 #32
0
class FingerprintMatcher:
    def __init__(self, cache_dir='joblib_cache', verbose: int = 10):
        cache_dir = os.path.abspath(cache_dir)
        if not os.path.exists(cache_dir):
            os.makedirs(cache_dir)
        self.cache_dir = cache_dir
        self.memory = Memory(cache_dir, verbose=0)
        # self.compute_lro = self.memory.cache(self.compute_lro) # useless by now
        self.verbose = verbose
        self.minutiaeLUT = {}

    def __del__(self):
        self.memory.clear(warn=False)
        if hasattr(self, 'minutiaeLUT'):
            for path in self.minutiaeLUT.values():
                os.remove(path)

    def compute_lro(self, image, bd_specs, num_dir):
        raise NotImplementedError("Derived class must reimplement this method")

    def fit(self, X, y):
        return self

    def precompute(self, X):
        """ Precomputes the minutiae for all the files involved in matching.

        Args:
            X (iterable): iterable where each element is an absolute file path.

        Note:
            The iterable X must contain each and every file that needs to be involved in the matching phase.
        """
        # Ensure that X is a list and not a generator
        X = list(X)

        # Create a function with fixed paramters
        def field_compute(padded_img, blkoffs, num_dir):
            # Get border and step information
            i, j = np.unravel_index(blkoffs, shape=padded_img.shape)
            bd_specs = {
                'border_x': j[0, 0],
                'border_y': i[0, 0],
                'step_x': j[0, 1] - j[0, 0],
                'step_y': i[1, 0] - i[0, 0],
            }
            field, mask = self.compute_lro(padded_img, bd_specs, num_dir)
            if field is None:  # allows to not change the direction map
                return None
            # Average pooling on field
            field = subsample(field,
                              is_field=True,
                              **bd_specs,
                              smooth=True,
                              policy='nist')
            # Convert field to index
            lro = angle(field, keepDims=False)
            idx = nbis_angle2idx(lro, N=num_dir)
            # Eventually apply a mask
            mask = subsample(mask.astype(int),
                             is_field=False,
                             **bd_specs,
                             smooth=False,
                             policy='nist')
            mask = np.round(mask).astype(bool)
            idx[np.logical_not(mask)] = -1
            return idx.astype('int32')

        def compute_minutiae(path):
            try:
                image = np.array(PIL.Image.open(path).convert('L'))
                M = mindtct(image, field_compute, contrast_boost=True)[-1]
                M = minutiae_selection(M)
            except Exception as err:
                print('Warning: skipping image due to', err)
                return None
            return M

        minutiae = Parallel(verbose=self.verbose)(delayed(compute_minutiae)(x)
                                                  for x in X)

        for x, M in zip(X, minutiae):
            if M is None:
                continue
            # Create a filename that hopefully is not taken by other objects
            filename = '{}{}{}.xyt'.format(id(self), id(M), time.time())
            # Save minutiae to file
            filepath = os.path.join(self.cache_dir, filename)
            to_csv_options = {'sep': ' ', 'header': False, 'index': False}
            pd.DataFrame(M).to_csv(filepath, **to_csv_options)
            # Record the filepath in a dictionary
            self.minutiaeLUT[x] = filepath

    def match_scores(self, X):
        """ Perform matching exploiting the previously computed minutiae.

        Args:
            X (iterable): each element is a file absolute path, and must correspond to one of the file paths passed to the pre-computation function.
        """
        def _scores_from_batch(batch):
            """ Computes the scores for a batch with couples of file paths. """
            # Filter out null elements, coming from the last batch
            # batch = filter(None, batch)
            # Create the mates file
            # mates_file = os.path.join(self.cache_dir, '{}{}{}.lis'.format(id(self), id(batch), time.time()))
            # excluded = []
            # with open(mates_file, 'w') as f:
            #     for n, pair in enumerate(batch):
            #         if pair[0] in self.minutiaeLUT and pair[1] in self.minutiaeLUT:
            #             f.write(self.minutiaeLUT[pair[0]]+'\n')
            #             f.write(self.minutiaeLUT[pair[1]]+'\n')
            #         else:
            #             excluded.append(n)
            if batch[0] not in self.minutiaeLUT or batch[
                    1] not in self.minutiaeLUT:
                return None
            # Run matcher
            exe_path = os.path.join(__NBIS_LIB__, 'bin', 'bozorth3')
            # command = "{} -M \"{}\"".format(exe_path, mates_file)
            command = "{} \"{}\" \"{}\"".format(exe_path,
                                                self.minutiaeLUT[batch[0]],
                                                self.minutiaeLUT[batch[1]])
            with Popen(command,
                       cwd=self.cache_dir,
                       shell=True,
                       universal_newlines=True,
                       stdout=PIPE,
                       stderr=PIPE) as proc:
                err = proc.stderr.read()
                if err != "":
                    raise RuntimeError(err)
                # Read the list of scores
                # Splits on newlines and remove empty strings
                # scores = [int(k) for k in filter(None, proc.stdout.read().split('\n'))]
                scores = int(proc.stdout.read().rstrip())
            # Put Nones where a matching couldn't be executed
            # for n in excluded:
            #     scores.insert(n, None)
            # os.remove(mates_file)
            return scores

        # X = grouper(X, 256)
        scores = Parallel(verbose=self.verbose,
                          batch_size=512)(delayed(_scores_from_batch)(x)
                                          for x in X)
        # scores = list(chain(*scores))

        return scores

    def predict(self, X):
        return (self.match_scores(X) > self.threshold).astype(int)