def test_get_cache_items(tmpdir): """Test cache items listing.""" def func(arg): """Dummy function.""" return arg register_hdfs_store_backend() mem = Memory(location=tmpdir.strpath, host=__namenode__, backend='hdfs', user='******', verbose=100, compress=False) assert not mem.store.get_cache_items() cached_func = mem.cache(func) for arg in ["test1", "test2", "test3"]: cached_func(arg) # get_cache_items always returns an empty list for the moment assert len(mem.store.get_cache_items()) == 3 mem.clear() assert not mem.store.get_cache_items()
def test_clear_cache(capsys, tmpdir): """Check clearing the cache.""" def func(arg): """Dummy function.""" print("executing function") return arg register_s3fs_store_backend() mem = Memory(location=tmpdir.strpath, backend='s3', verbose=0, backend_options=dict(bucket="test")) cached_func = mem.cache(func) cached_func("test") out, _ = capsys.readouterr() assert out == "executing function\n" mem.clear() cached_func("test") out, _ = capsys.readouterr() assert out == "executing function\n" mem.clear() print(mem.store_backend.location) assert not os.listdir(mem.store_backend.location)
def run_transform(X, y, transform, cv_outer=LeaveOneOut(), n_alphas=1000): from sklearn.feature_selection import VarianceThreshold from sklearn.decomposition import PCA from sklearn.pipeline import make_pipeline from sklearn.linear_model import Ridge from sklearn.model_selection import GridSearchCV from joblib import Memory from sklearn.compose import TransformedTargetRegressor from sklearn.preprocessing import PowerTransformer # Find alpha range alphas = find_alpha_range(X, y, n_alphas=n_alphas) list_y_pred = [] list_y_true = [] list_models = [] for train_index, test_index in tqdm(cv_outer.split(X)): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] list_y_true.append(y_test) cv_inner = StratifiedKFoldReg(n_splits=5, shuffle=True, random_state=0) tmpfolder = mkdtemp() memory = Memory(location=tmpfolder) pip = make_pipeline(VarianceThreshold(), PCA(), Ridge(max_iter=1e6), memory=memory) grid = GridSearchCV(pip, param_grid={'ridge__alpha': alphas}, cv=cv_inner, n_jobs=-1, scoring="neg_mean_squared_error") regr_trans = TransformedTargetRegressor( regressor=grid, transformer=PowerTransformer(method=transform)) regr_trans.fit(X_train, y_train) list_models.append(regr_trans) y_pred = regr_trans.predict(X_test) list_y_pred.append(y_pred) memory.clear(warn=False) shutil.rmtree(tmpfolder) y_pred = np.concatenate(list_y_pred) y_true = np.concatenate(list_y_true) return y_pred, y_true, list_models
def run(X, y, cv_outer=StratifiedKFoldReg(n_splits=10, shuffle=True, random_state=10), n_alphas=1000): from sklearn.feature_selection import VarianceThreshold from sklearn.decomposition import PCA from sklearn.pipeline import make_pipeline from sklearn.linear_model import Lasso from sklearn.model_selection import GridSearchCV from joblib import Memory # Find alpha range alphas = find_alpha_range(X, y, n_alphas=n_alphas) list_y_pred = [] list_y_true = [] list_models = [] for train_index, test_index in tqdm(cv_outer.split(X, y)): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] list_y_true.append(y_test) cv_inner = StratifiedKFoldReg(n_splits=5, shuffle=True, random_state=0) tmpfolder = mkdtemp() memory = Memory(location=tmpfolder, verbose=0) pip = make_pipeline(VarianceThreshold(), PCA(), Lasso(max_iter=1e6), memory=memory) grid = GridSearchCV(pip, param_grid={'lasso__alpha': alphas}, cv=cv_inner, n_jobs=-1, scoring="neg_mean_squared_error") grid.fit(X_train, y_train) list_models.append(grid) y_pred = grid.predict(X_test) list_y_pred.append(y_pred) memory.clear(warn=False) shutil.rmtree(tmpfolder) y_pred = np.concatenate(list_y_pred) y_true = np.concatenate(list_y_true) return y_pred, y_true, list_models
class NeoCache: def __init__(self, context, cache_dir=None): self.registry = {} self.context = context if cache_dir: self.cache_dir = cache_dir else: self.cache_dir = os.path.join(HERE, context) self.memory = Memory(self.cache_dir, verbose=0) def register(self): """ Returns a decorator. The decorator ensures that the function name is registered and that the function is handled as a MemorizedFunc. """ def decorator(func): if len(inspect.signature(func).parameters) > 0: raise TypeError( 'The signature of \'{}\' contains input arguments. ' 'NeoCache only supports registering functions without ' 'any input arguments.'.format(func.__name__)) func = self.memory.cache(func) self.registry[func.__name__] = func return func return decorator def clear_cache(self): """ Clears the cache by deleting all the files in the cache directory. """ self.memory.clear() def update(self): """ Clears the cache and executes all of the registered functions, so that the cache contains the latest return values. """ self.clear_cache() for f in self.registry.values(): f()
def test_clear_cache(tmpdir): """Test clearing cache.""" def func(arg): """Dummy function.""" print("executing function") return arg register_hdfs_store_backend() mem = Memory(location=tmpdir.strpath, host=__namenode__, backend='hdfs', user='******', verbose=100, compress=False) cached_func = mem.cache(func) cached_func("test") mem.clear() assert not mem.store.object_exists(mem.store.cachedir)
class CacheManager(object): """The librosa cache manager class wraps joblib.Memory with a __call__ attribute, so that it may act as a function. Additionally, it provides a caching level filter, so that different functions can be cached or not depending on the user's preference for speed vs. storage usage. """ def __init__(self, *args, **kwargs): level = kwargs.pop("level", 10) # Initialize the memory object self.memory = Memory(*args, **kwargs) # The level parameter controls which data we cache # smaller numbers mean less caching self.level = level def __call__(self, level): """Example usage: @cache(level=2) def semi_important_function(some_arguments): ... """ def wrapper(function): """Decorator function. Adds an input/output cache to the specified function.""" if self.memory.location is not None and self.level >= level: return _decorator_apply(self.memory.cache, function) else: return function return wrapper def clear(self, *args, **kwargs): return self.memory.clear(*args, **kwargs) def eval(self, *args, **kwargs): return self.memory.eval(*args, **kwargs) def format(self, *args, **kwargs): return self.memory.format(*args, **kwargs) def reduce_size(self, *args, **kwargs): return self.memory.reduce_size(*args, **kwargs) def warn(self, *args, **kwargs): return self.memory.warn(*args, **kwargs)
def test_get_items(tmpdir): """Test cache items listing.""" def func(arg): """Dummy function.""" return arg register_s3fs_store_backend() mem = Memory(location=tmpdir.strpath, backend='s3', verbose=0, backend_options=dict(bucket="test")) assert not mem.store_backend.get_items() cached_func = mem.cache(func) for arg in ["test1", "test2", "test3"]: cached_func(arg) # get_items always returns an empty list for the moment assert not mem.store_backend.get_items() mem.clear() assert not mem.store_backend.get_items()
labels = [] for value in train_label_list: images = os.listdir(directory + "/" + value) for image in images: ig = cv2.imread(directory + "/" + value + "/" + image, 0) features.append(ig) labels.append(value) # Extract the hog features list_hog_fd = [] for feature in features: fd = hog(feature, orientations=9, pixels_per_cell=(i, i), cells_per_block=(j, j), visualize=False) list_hog_fd.append(fd) hog_features = np.array(list_hog_fd, 'float64') print("Count of digits in dataset", Counter(labels)) # Create an linear SVM object clf = LinearSVC() # Perform the training clf.fit(hog_features, labels) # Save the classifier joblib.dump(clf, "test_ppc" + str(i) + "_cpb" + str(j) + ".pkl", compress=3) mem = Memory("./cachedir", verbose=0) mem.clear(warn=False) os.system('hogRunner.py')
def pipeline_grid(self, x_train, y_train): # Make pipeline location = 'cachedir' memory = Memory(location=location, verbose=10) pipe = Pipeline( [('reduce_dim', PCA()), ('feature_selection', SelectKBest(f_classif)), ('classify', LogisticRegression(solver='saga', penalty='l1'))], memory=memory) # In[6]: # Set paramters according to users inputs # PCA参数 max_components = 0.99 min_components = 0.3 number_pc = 10 range_dimreduction = np.linspace(min_components, max_components, number_pc).reshape(number_pc, ) # ANOVA参数 pca = PCA(n_components=min_components) pca.fit(X=x_train) min_number_anova = pca.n_components_ pca = PCA(n_components=max_components) pca.fit(X=x_train) max_number_anova = pca.n_components_ number_anova = 3 range_feature_selection = np.arange(min_number_anova, max_number_anova, 10) # 分类器参数 max_l1_ratio = 1 min_l1_ratio = 0.5 number_l1_ratio = 2 range_l1_ratio = np.linspace(min_l1_ratio, max_l1_ratio, number_l1_ratio).reshape( number_l1_ratio, ) # 整体grid search设置 param_grid = [ { 'reduce_dim__n_components': range_dimreduction, 'feature_selection__k': range_feature_selection, 'classify__l1_ratio': [max_l1_ratio], }, ] # In[ ]: # Train grid = GridSearchCV(pipe, n_jobs=-1, param_grid=param_grid) grid.fit(x_train, y_train) # In[8]: # Delete the temporary cache before exiting memory.clear(warn=False) rmtree(location) # In[9]: return grid
def track_ensemble(target_samples, atlas_data_wm_gm_int, labels_im_file, recon_path, sphere, traversal, curv_thr_list, step_list, track_type, maxcrossing, roi_neighborhood_tol, min_length, waymask, B0_mask, t1w2dwi, gm_in_dwi, vent_csf_in_dwi, wm_in_dwi, tiss_class, BACKEND='threading'): """ Perform native-space ensemble tractography, restricted to a vector of ROI masks. Parameters ---------- target_samples : int Total number of streamline samples specified to generate streams. atlas_data_wm_gm_int : str File path to Nifti1Image in T1w-warped native diffusion space, restricted to wm-gm interface. parcels : list List of 3D boolean numpy arrays of atlas parcellation ROI masks from a Nifti1Image in T1w-warped native diffusion space. recon_path : str File path to diffusion reconstruction model. tiss_classifier : str Tissue classification method. sphere : obj DiPy object for modeling diffusion directions on a sphere. traversal : str The statistical approach to tracking. Options are: det (deterministic), closest (clos), and prob (probabilistic). curv_thr_list : list List of integer curvature thresholds used to perform ensemble tracking. step_list : list List of float step-sizes used to perform ensemble tracking. track_type : str Tracking algorithm used (e.g. 'local' or 'particle'). maxcrossing : int Maximum number if diffusion directions that can be assumed per voxel while tracking. roi_neighborhood_tol : float Distance (in the units of the streamlines, usually mm). If any coordinate in the streamline is within this distance from the center of any voxel in the ROI, the filtering criterion is set to True for this streamline, otherwise False. Defaults to the distance between the center of each voxel and the corner of the voxel. min_length : int Minimum fiber length threshold in mm. waymask_data : ndarray Tractography constraint mask array in native diffusion space. B0_mask_data : ndarray B0 brain mask data. n_seeds_per_iter : int Number of seeds from which to initiate tracking for each unique ensemble combination. By default this is set to 250. max_length : int Maximum number of steps to restrict tracking. particle_count pft_back_tracking_dist : float Distance in mm to back track before starting the particle filtering tractography. The total particle filtering tractography distance is equal to back_tracking_dist + front_tracking_dist. By default this is set to 2 mm. pft_front_tracking_dist : float Distance in mm to run the particle filtering tractography after the the back track distance. The total particle filtering tractography distance is equal to back_tracking_dist + front_tracking_dist. By default this is set to 1 mm. particle_count : int Number of particles to use in the particle filter. min_separation_angle : float The minimum angle between directions [0, 90]. Returns ------- streamlines : ArraySequence DiPy list/array-like object of streamline points from tractography. References ---------- .. [1] Takemura, H., Caiafa, C. F., Wandell, B. A., & Pestilli, F. (2016). Ensemble Tractography. PLoS Computational Biology. https://doi.org/10.1371/journal.pcbi.1004692 """ import os import gc import time import warnings import time import tempfile from joblib import Parallel, delayed, Memory import itertools import pickle5 as pickle from pynets.dmri.track import run_tracking from colorama import Fore, Style from pynets.dmri.utils import generate_sl from nibabel.streamlines.array_sequence import concatenate, ArraySequence from pynets.core.utils import save_3d_to_4d from nilearn.masking import intersect_masks from nilearn.image import math_img from pynets.core.utils import load_runconfig from dipy.tracking import utils warnings.filterwarnings("ignore") pickle.HIGHEST_PROTOCOL = 5 joblib_dir = tempfile.mkdtemp() os.makedirs(joblib_dir, exist_ok=True) hardcoded_params = load_runconfig() nthreads = hardcoded_params["omp_threads"][0] os.environ['MKL_NUM_THREADS'] = str(nthreads) os.environ['OPENBLAS_NUM_THREADS'] = str(nthreads) n_seeds_per_iter = \ hardcoded_params['tracking']["n_seeds_per_iter"][0] max_length = \ hardcoded_params['tracking']["max_length"][0] pft_back_tracking_dist = \ hardcoded_params['tracking']["pft_back_tracking_dist"][0] pft_front_tracking_dist = \ hardcoded_params['tracking']["pft_front_tracking_dist"][0] particle_count = \ hardcoded_params['tracking']["particle_count"][0] min_separation_angle = \ hardcoded_params['tracking']["min_separation_angle"][0] min_streams = \ hardcoded_params['tracking']["min_streams"][0] seeding_mask_thr = hardcoded_params['tracking']["seeding_mask_thr"][0] timeout = hardcoded_params['tracking']["track_timeout"][0] all_combs = list(itertools.product(step_list, curv_thr_list)) # Construct seeding mask seeding_mask = f"{os.path.dirname(labels_im_file)}/seeding_mask.nii.gz" if waymask is not None and os.path.isfile(waymask): waymask_img = math_img(f"img > {seeding_mask_thr}", img=nib.load(waymask)) waymask_img.to_filename(waymask) atlas_data_wm_gm_int_img = intersect_masks( [ waymask_img, math_img("img > 0.001", img=nib.load(atlas_data_wm_gm_int)), math_img("img > 0.001", img=nib.load(labels_im_file)) ], threshold=1, connected=False, ) nib.save(atlas_data_wm_gm_int_img, seeding_mask) else: atlas_data_wm_gm_int_img = intersect_masks( [ math_img("img > 0.001", img=nib.load(atlas_data_wm_gm_int)), math_img("img > 0.001", img=nib.load(labels_im_file)) ], threshold=1, connected=False, ) nib.save(atlas_data_wm_gm_int_img, seeding_mask) tissues4d = save_3d_to_4d([ B0_mask, labels_im_file, seeding_mask, t1w2dwi, gm_in_dwi, vent_csf_in_dwi, wm_in_dwi ]) # Commence Ensemble Tractography start = time.time() stream_counter = 0 all_streams = [] ix = 0 memory = Memory(location=joblib_dir, mmap_mode='r+', verbose=0) os.chdir(f"{memory.location}/joblib") @memory.cache def load_recon_data(recon_path): import h5py with h5py.File(recon_path, 'r') as hf: recon_data = hf['reconstruction'][:].astype('float32') hf.close() return recon_data recon_shelved = load_recon_data.call_and_shelve(recon_path) @memory.cache def load_tissue_data(tissues4d): return nib.load(tissues4d) tissue_shelved = load_tissue_data.call_and_shelve(tissues4d) try: while float(stream_counter) < float(target_samples) and \ float(ix) < 0.50*float(len(all_combs)): with Parallel(n_jobs=nthreads, backend=BACKEND, mmap_mode='r+', verbose=0) as parallel: out_streams = parallel( delayed(run_tracking) (i, recon_shelved, n_seeds_per_iter, traversal, maxcrossing, max_length, pft_back_tracking_dist, pft_front_tracking_dist, particle_count, roi_neighborhood_tol, min_length, track_type, min_separation_angle, sphere, tiss_class, tissue_shelved) for i in all_combs) out_streams = list(filter(None, out_streams)) if len(out_streams) > 1: out_streams = concatenate(out_streams, axis=0) else: continue if waymask is not None and os.path.isfile(waymask): try: out_streams = out_streams[utils.near_roi( out_streams, np.eye(4), np.asarray( nib.load(waymask).dataobj).astype("bool"), tol=int(round(roi_neighborhood_tol * 0.50, 1)), mode="all")] except BaseException: print(f"\n{Fore.RED}No streamlines generated in " f"waymask vacinity\n") print(Style.RESET_ALL) return None if len(out_streams) < min_streams: ix += 1 print(f"\n{Fore.YELLOW}Fewer than {min_streams} " f"streamlines tracked " f"on last iteration...\n") print(Style.RESET_ALL) if ix > 5: print(f"\n{Fore.RED}No streamlines generated\n") print(Style.RESET_ALL) return None continue else: ix -= 1 stream_counter += len(out_streams) all_streams.extend([generate_sl(i) for i in out_streams]) del out_streams print("%s%s%s%s" % ( "\nCumulative Streamline Count: ", Fore.CYAN, stream_counter, "\n", )) gc.collect() print(Style.RESET_ALL) if time.time() - start > timeout: print(f"\n{Fore.RED}Warning: Tractography timed " f"out: {time.time() - start}") print(Style.RESET_ALL) memory.clear(warn=False) return None except RuntimeError as e: print(f"\n{Fore.RED}Error: Tracking failed due to:\n{e}\n") print(Style.RESET_ALL) memory.clear(warn=False) return None print("Tracking Complete: ", str(time.time() - start)) memory.clear(warn=False) del parallel, all_combs gc.collect() if stream_counter != 0: print('Generating final ...') return ArraySequence([ArraySequence(i) for i in all_streams]) else: print(f"\n{Fore.RED}No streamlines generated!") print(Style.RESET_ALL) return None
def pipeline_grid(self, x_train, y_train): # Make pipeline location = 'cachedir' memory = Memory(location=location, verbose=10) pipe = Pipeline(steps=[ ('reduce_dim', 'passthrough'), ('feature_selection', 'passthrough'), ('estimator', 'passthrough'), ], memory=memory ) # Feature reduction parameters range_dimreduction = np.linspace(self.min_components, self.max_components, self.number_pc).reshape(self.number_pc,) # Feature selection parameters print("Identifing components after PCA...\n") pca = PCA(n_components=self.min_components) pca.fit(X=x_train) min_number_anova = pca.n_components_ pca = PCA(n_components=self.max_components) pca.fit(X=x_train) max_number_anova = pca.n_components_ range_feature_selection = np.arange(min_number_anova, max_number_anova, self.feature_selection_step) # Set parameters of gridCV print("Setting parameters of gridCV...\n") param_grid = [ { 'reduce_dim':[PCA(iterated_power=7)], 'reduce_dim__n_components': range_dimreduction, 'feature_selection':[RFE(estimator=SVC())], 'feature_selection__n_features_to_select': range_feature_selection, 'estimator':[SVC()], 'estimator__kernel': ['rbf', 'linear'], 'estimator__C': self.range_C, 'estimator__gamma': self.range_gamma, }, { 'reduce_dim':[PCA(iterated_power=7)], 'reduce_dim__n_components': range_dimreduction, 'feature_selection':[SelectKBest(f_classif)], 'feature_selection__k': range_feature_selection, 'estimator':[LogisticRegression()], 'estimator__penalty': ['l1', 'l2'], }, ] iteration_num = ( len(range_dimreduction) * len(range_feature_selection) * 2 * len(self.range_C) * len(self.range_gamma) + len(range_dimreduction) * len(range_feature_selection) * 2 ) # Train cv = StratifiedKFold(n_splits=self.k) if self.search_strategy == 'grid': grid = GridSearchCV( pipe, n_jobs=self.n_jobs, param_grid=param_grid, cv=cv, scoring = make_scorer(accuracy_score), refit=True ) print(f"GridSearchCV fitting (about {iteration_num} times iteration)...\n") elif self.search_strategy == 'random': model = RandomizedSearchCV( pipe, n_jobs=self.n_jobs, param_distributions=param_grid, cv=cv, scoring = make_scorer(accuracy_score), refit=True, n_iter=self.n_iter_of_randomedsearch, ) print(f"RandomizedSearchCV fitting (about {iteration_num} times iteration)...\n") else: print(f"Please specify which search strategy!\n") return model.fit(x_train, y_train) # Delete the temporary cache before exiting memory.clear(warn=False) rmtree(location) # In[9]: return model
"""Test sent_tokenizer.""" from pathlib import Path from joblib import Memory from hlm_texts import hlm_en from hlm_texts import sent_tokenizer memory = Memory(location=Path("~/joblib_cache").expanduser()) # cache location for sent_tokenizer: Path("~/joblib_cache").expanduser() memory.clear(0) # clear cache, no warning def test_sent_tokenizer(): """test_sent_tokenizer.""" para_list = hlm_en.splitlines()[:12] assert len(sent_tokenizer(para_list, 'en')) == 19 assert len(sent_tokenizer("\n".join(para_list), 'en')) == 19
class CacheManager(object): '''The librosa cache manager class wraps joblib.Memory with a __call__ attribute, so that it may act as a function. Additionally, it provides a caching level filter, so that different functions can be cached or not depending on the user's preference for speed vs. storage usage. ''' def __init__(self, *args, **kwargs): level = kwargs.pop('level', 10) # Initialize the memory object self.memory = Memory(*args, **kwargs) # The level parameter controls which data we cache # smaller numbers mean less caching self.level = level def __call__(self, level): '''Example usage: @cache(level=2) def semi_important_function(some_arguments): ... ''' def wrapper(function): '''Decorator function. Adds an input/output cache to the specified function.''' from decorator import FunctionMaker def decorator_apply(dec, func): """Decorate a function by preserving the signature even if dec is not a signature-preserving decorator. This recipe is derived from http://micheles.googlecode.com/hg/decorator/documentation.html#id14 """ return FunctionMaker.create( func, 'return decorated(%(signature)s)', dict(decorated=dec(func)), __wrapped__=func) if self.memory.location is not None and self.level >= level: return decorator_apply(self.memory.cache, function) else: return function return wrapper def clear(self, *args, **kwargs): return self.memory.clear(*args, **kwargs) def eval(self, *args, **kwargs): return self.memory.eval(*args, **kwargs) def format(self, *args, **kwargs): return self.memory.format(*args, **kwargs) def reduce_size(self, *args, **kwargs): return self.memory.reduce_size(*args, **kwargs) def warn(self, *args, **kwargs): return self.memory.warn(*args, **kwargs)
import datetime, subprocess parser = argparse.ArgumentParser(description='') parser.add_argument('-c', '--clearCache', action='store_true', default=False) parser.add_argument('-d', '--date', type=str, nargs='+', default=[]) parser.add_argument('-s', '--startHour', type=int, default=0) parser.add_argument('-e', '--endHour', type=int, default=24) parser.add_argument('-H', '--histogram', action='store_true', default=False) parser.add_argument('-t', '--timeseries', action='store_true', default=False) parser.add_argument('-S', '--summary', action='store_true', default=False) parser.add_argument('-m', '--map', action='store_true', default=False) parser.add_argument('--day', type=str, nargs='+', default=day_names) args = parser.parse_args() memory = Memory(location='./cron-parse-cachedir', verbose=0) if args.clearCache: memory.clear() EPOCH, MAX_CHARGING_TIME = 5, 95 # Minutes def parse_timestamp(line): elems = line.split(' ') if len(elems) > 1: # Old version: Linux date year, month, date, time_ = elems[-1], elems[1], elems[2], elems[3] month = month_name_to_num.index(month) assert month != -1 hour, minute = time_.split(':')[:2] else: # New version: Python date year, month, date, hour, minute = line.split('-') return (int(year), int(month), int(date), int(hour), int(minute))
def fit(self, X, y=None, groups=None, **fit_params): """ Fit Nested CV with all sets of parameters. Parameters ---------- X : array-like of shape (n_samples, n_features) Training vector, where n_samples is the number of samples and n_features is the number of features. y : array-like of shape (n_samples, n_output) or (n_samples,), optional Target relative to X for classification or regression; None for unsupervised learning. groups : array-like, with shape (n_samples,), optional Group labels for the samples used while splitting the dataset into train/test set. Only used in conjunction with a "Group" :term:`cv` instance (e.g., :class:`~sklearn.model_selection.GroupKFold`). **fit_params : dict of string -> object Parameters passed to the ``fit`` method of the estimator Returns ------- It will not return directly the values, but it's accessable from the class object it self. You should be able to access: outer_pred A dictionary to access the train indexes, the test indexes and the model of each outer loop for further post-processing. Keys are respectively train, test and model with values being lists of length outer_cv.get_n_splits(). outer_results A dictionary to access the outer test scores, the best inner scores, the best inner parameters (and outer_train_scores if return_train_score == True). Keys are respectively outer_test_score, best_inner_score, best_inner_params (and outer_train_score) with values being lists of length outer_cv.get_n_splits(). inner_results A list of dictionary of length outer_cv.get_n_splits(). Each dictionary having params, mean_test_score, std_test_score (and mean_train_score, std_train_score if return_train_score == True) as keys and values being the list of params or associated results over the inner loops. best_estimator_ Model when refit on the whole dataset with hyperparameter optimized by GridSearch CV. Available only if refit == True. """ X, y = self._check_X_Y(X, y) self._check_pipeline_dic(self.pipeline_dic) self.model = self._get_pipeline(self.pipeline_dic) self.params_grid = self._get_parameters_grid(self.params_dic) outer_cv = check_cv(self.outer_cv, y, is_classifier( self.model[-1])) # Last element of pipeline = estimator inner_cv = check_cv(self.inner_cv, y, is_classifier( self.model[-1])) # Last element of pipeline = estimator if not isinstance(self.randomized_search, bool): raise TypeError('randomized_search argument must be a boolean') self.outer_pred = { 'train': [], 'test': [], 'model': [], 'predict_train': [], 'predict_test': [] } if hasattr(self.model[-1], 'predict_proba'): self.outer_pred.update({ 'predict_proba_train': [], 'predict_proba_test': [] }) if hasattr(self.model[-1], 'decision_function'): self.outer_pred.update({ 'decision_function_train': [], 'decision_function_test': [] }) self.outer_results = { 'outer_test_score': [], 'best_inner_score': [], 'best_inner_params': [] } self.inner_results = [] if self.return_train_score: self.outer_results.update({'outer_train_score': []}) # From sklearn.model_selection._search.BasesearchCV self.scorers, self.multimetric_ = _check_multimetric_scoring( self.model, scoring=self.metric) if self.multimetric_: if callable(self.refit_inner): raise ValueError( 'If inner loops use multimetric scoring and the user want to refit according to a ' 'callable, the latter must be passed in a dictionnary {score: callable} with score ' 'being the score name with which the score on different sets wiil be calculated' ) if self.refit_inner is not False and ( not isinstance(self.refit_inner, str) or # This will work for both dict / list (tuple) self.refit_inner not in self.scorers): if isinstance(self.refit_inner, Mapping): if len(self.refit_inner.keys()) > 1: raise ValueError( 'refit_inner dict must have only one key, got %d' % len(self.refit_inner.keys())) self.refit_metric = list(self.refit_inner.keys())[0] self.refit_inner = self.refit_inner[self.refit_metric] else: raise ValueError( "For multi-metric scoring, the parameter " "refit must be set to a scorer key or a " "dict with scorer key and callable value to refit an estimator with the " "best parameter setting on the whole " "data and make the best_* attributes " "available for that metric. If this is " "not needed, refit should be set to " "False explicitly. %r was passed." % self.refit_inner) else: self.refit_metric = self.refit_inner else: self.refit_metric = 'score' if self.refit_inner is True: self.refit_inner = 'score' for k_outer, (train_outer_index, test_outer_index) in enumerate( outer_cv.split(X, y, groups)): if self.verbose > 1: print('\n-----------------\n{0}/{1} <-- Current outer fold'. format(k_outer + 1, outer_cv.get_n_splits())) X_train_outer, X_test_outer = X[train_outer_index], X[ test_outer_index] y_train_outer, y_test_outer = y[train_outer_index], y[ test_outer_index] with tempfile.TemporaryDirectory() as location: memory = Memory(location=location, verbose=0) inner_model = clone(self.model) inner_model.set_params(memory=memory) if self.randomized_search: pipeline_inner = RandomizedSearchCV( inner_model, self.params_grid, scoring=self.scorers, n_jobs=self.n_jobs, cv=inner_cv, n_iter=self.randomized_search_iter, return_train_score=self.return_train_score, verbose=self.verbose - 1, pre_dispatch=self.pre_dispatch, refit=self.refit_inner, random_state=self.random_state) else: pipeline_inner = GridSearchCV( inner_model, self.params_grid, scoring=self.scorers, n_jobs=self.n_jobs, cv=inner_cv, return_train_score=self.return_train_score, verbose=self.verbose - 1, pre_dispatch=self.pre_dispatch, refit=self.refit_inner) pipeline_inner.fit(X_train_outer, y_train_outer, groups=groups, **fit_params) self.inner_results.append({ 'params': pipeline_inner.cv_results_['params'], 'mean_test_score': pipeline_inner.cv_results_['mean_test_%s' % self.refit_metric], 'std_test_score': pipeline_inner.cv_results_['std_test_%s' % self.refit_metric] }) if self.return_train_score: self.inner_results[-1].update({ 'mean_train_score': pipeline_inner.cv_results_['mean_train_%s' % self.refit_metric], 'std_train_score': pipeline_inner.cv_results_['std_train_%s' % self.refit_metric] }) if self.verbose > 2: for params_dict in pipeline_inner.cv_results_['params']: mean_test_score = pipeline_inner.cv_results_[ 'mean_test_%s' % self.refit_metric] index_params_dic = pipeline_inner.cv_results_[ 'params'].index(params_dict) print('\t\t Params: {0}, Mean inner score: {1}'.format( params_dict, mean_test_score[index_params_dic])) self.outer_results['best_inner_score'].append( pipeline_inner.cv_results_['mean_test_%s' % self.refit_metric][ pipeline_inner.best_index_] ) # Because best_score doesn't exist if refit_inner is a callable self.outer_results['best_inner_params'].append( pipeline_inner.best_params_) if self.return_train_score: self.outer_results['outer_train_score'].append( self.scorers[self.refit_metric]( pipeline_inner.best_estimator_, X_train_outer, y_train_outer)) self.outer_results['outer_test_score'].append( self.scorers[self.refit_metric]( pipeline_inner.best_estimator_, X_test_outer, y_test_outer)) if self.verbose > 1: print( '\nResults for outer fold:\nBest inner parameters was: {0}' .format(self.outer_results['best_inner_params'][-1])) print('Outer score: {0}'.format( self.outer_results['outer_test_score'][-1])) print('Inner score: {0}'.format( self.outer_results['best_inner_score'][-1])) self.outer_pred['train'].append(train_outer_index) self.outer_pred['test'].append(test_outer_index) self.outer_pred['model'].append(pipeline_inner.best_estimator_) self.outer_pred['predict_train'].append( pipeline_inner.best_estimator_.predict(X_train_outer)) self.outer_pred['predict_test'].append( pipeline_inner.best_estimator_.predict(X_test_outer)) if hasattr(pipeline_inner.best_estimator_[-1], 'predict_proba'): self.outer_pred['predict_proba_train'].append( pipeline_inner.best_estimator_.predict_proba( X_train_outer)) self.outer_pred['predict_proba_test'].append( pipeline_inner.best_estimator_.predict_proba( X_test_outer)) if hasattr(pipeline_inner.best_estimator_[-1], 'decision_function'): self.outer_pred['decision_function_train'].append( pipeline_inner.best_estimator_.decision_function( X_train_outer)) self.outer_pred['decision_function_test'].append( pipeline_inner.best_estimator_.decision_function( X_test_outer)) memory.clear(warn=False) if self.verbose > 0: print('\nOverall outer score (mean +/- std): {0} +/- {1}'.format( np.mean(self.outer_results['outer_test_score']), np.std(self.outer_results['outer_test_score']))) print('Best params by outer fold:') for i, params_dict in enumerate( self.outer_results['best_inner_params']): print('\t Outer fold {0}: {1}'.format(i + 1, params_dict)) print('\n') # Store the only scorer not as a dict for single metric evaluation self.scorer_ = self.scorers if self.multimetric_ else self.scorers[ 'score'] # If refit is True Hyperparameter optimization on whole dataset and fit with best params if self.refit_outer: print('=== Refit ===') location = 'cachedir' memory = Memory(location=location, verbose=0) final_model = clone(self.model) final_model.set_params(memory=memory) pipeline_refit = GridSearchCV( final_model, self.params_grid, scoring=self.scorers[self.refit_metric], n_jobs=self.n_jobs, cv=outer_cv, verbose=self.verbose - 1) pipeline_refit.fit(X, y, groups=groups, **fit_params) self.best_estimator_ = pipeline_refit.best_estimator_ memory.clear(warn=False) rmtree(location)
class MSCOAL(SCOAL): def __init__(self, estimator=LinearRegression(), max_split = 10, validation_size=0.2, random_state=42, n_jobs=1, cache=False, matrix='sparse', verbose=False): self.estimator = estimator self.max_split = max_split self.validation_size=validation_size self.random_state = random_state self.n_jobs = n_jobs self.cache=cache self.matrix=matrix self.verbose = verbose self.is_regressor = is_regressor(estimator) def _split_row_clusters(self,data,coclusters,models,n_jobs): row_clusters, col_clusters = coclusters n_row_clusters, n_col_clusters = np.unique(row_clusters).size, np.unique(col_clusters).size results = self._compute_clusterwise(data,coclusters,models,self._score_rows,n_jobs) scores = np.zeros((row_clusters.size,n_row_clusters)) for i in range(n_row_clusters): for j in range(n_col_clusters): scores[:,i] += results[i][j] cluster_to_split = scores.mean(axis=0).argmax() rows = np.where(row_clusters==cluster_to_split)[0] rows_scores = scores[row_clusters==cluster_to_split,cluster_to_split] rows = rows[np.argsort(rows)] rows_scores = np.sort(rows_scores) rows1 = np.array_split(rows[rows_scores==0],2)[1] rows2 = np.array_split(rows[rows_scores>0],2)[1] rows = np.concatenate((rows1,rows2)) new_row_clusters = row_clusters new_row_clusters[rows] = n_row_clusters return new_row_clusters, col_clusters def _split_col_clusters(self,data,coclusters,models,n_jobs): row_clusters, col_clusters = coclusters n_row_clusters, n_col_clusters = np.unique(row_clusters).size, np.unique(col_clusters).size results = self._compute_clusterwise(data,coclusters,models,self._score_cols,n_jobs) scores = np.zeros((col_clusters.size,n_col_clusters)) for i in range(n_row_clusters): for j in range(n_col_clusters): scores[:,j] += results[i][j] cluster_to_split = scores.mean(axis=0).argmax() cols = np.where(col_clusters==cluster_to_split)[0] cols_scores = scores[col_clusters==cluster_to_split,cluster_to_split] cols = cols[np.argsort(cols_scores)] cols_scores = np.sort(cols_scores) cols1 = np.array_split(cols[cols_scores==0],2)[1] cols2 = np.array_split(cols[cols_scores>0],2)[1] cols = np.concatenate((cols1,cols2)) new_col_clusters = col_clusters new_col_clusters[cols] = n_col_clusters return row_clusters, new_col_clusters def _print_status(self,iter_count,score,delta_score,n_row_clusters,n_col_clusters,elapsed_time): if iter_count==0: print('|'.join(x.ljust(15) for x in [ 'split',' score','delta score','n row clusters', 'n col clusters', 'elapsed time (s)'])) print('|'.join(x.ljust(15) for x in ['%i' % iter_count,'%.4f' % score,'%.4f' % delta_score,'%i' % n_row_clusters,'%i' % n_col_clusters,'%i' % elapsed_time])) def _converge_mscoal(self,train_data,valid_data,coclusters,models,max_split=100,n_jobs=1,verbose=False): split_count=0 elapsed_time = 0 score = np.nan delta_score=np.nan converged = False start = time.time() if coclusters is None: n_row_clusters, n_col_clusters = 1, 1 coclusters = self._initialize_coclusters(n_row_clusters,n_col_clusters) else: row_clusters, col_clusters = coclusters n_row_clusters, n_col_clusters = np.unique(row_clusters).size, np.unique(col_clusters).size if models is None: models = self._initialize_models(coclusters) coclusters,models = self._converge_scoal(train_data,coclusters,models,n_jobs=n_jobs,verbose=False) scores = self._score_coclusters(valid_data,coclusters,models,n_jobs) score = np.sum(scores)/int(self.n_values*self.validation_size) if verbose: self._print_status(split_count,score,delta_score,n_row_clusters,n_col_clusters,elapsed_time) converged = split_count >= max_split while not converged: row_clusters_changed = False col_clusters_changed = False delta_score = 0 new_coclusters = deepcopy(coclusters) new_coclusters = self._split_row_clusters(valid_data,new_coclusters,models,n_jobs) new_models = self._initialize_models(new_coclusters) checked = np.all(self._check_coclusters(train_data,new_coclusters,models,n_jobs=1)) if checked: new_coclusters,new_models = self._converge_scoal(train_data,new_coclusters,new_models,n_jobs=n_jobs,verbose=False) scores = self._score_coclusters(valid_data,new_coclusters,new_models,n_jobs) new_score = np.sum(scores)/int(self.n_values*self.validation_size) new_delta_score = score - new_score if new_delta_score>0: n_row_clusters+=1 coclusters = new_coclusters models = new_models row_clusters_changed = True delta_score += new_delta_score score = new_score new_coclusters = deepcopy(coclusters) new_coclusters = self._split_col_clusters(valid_data,new_coclusters,models,n_jobs) new_models = self._initialize_models(new_coclusters) checked = np.all(self._check_coclusters(train_data,new_coclusters,models,n_jobs=1)) if checked: new_coclusters,new_models = self._converge_scoal(train_data,new_coclusters,new_models,n_jobs=n_jobs,verbose=False) scores = self._score_coclusters(valid_data,new_coclusters,new_models,n_jobs) new_score = np.sum(scores)/int(self.n_values*self.validation_size) new_delta_score = score - new_score if new_delta_score>0: n_col_clusters+=1 coclusters = new_coclusters models = new_models col_clusters_changed = True delta_score += new_delta_score score = new_score converged = (not row_clusters_changed and not col_clusters_changed) or split_count >= max_split split_count+=1 elapsed_time = time.time() - start if verbose: self._print_status(split_count,score,delta_score,n_row_clusters,n_col_clusters,elapsed_time) train_matrix, row_features, col_features = train_data valid_matrix, _, _ = valid_data if self.matrix=='dense': train_matrix[np.where(np.invert(np.isnan(valid_matrix)))] = valid_matrix[np.where(np.invert(np.isnan(valid_matrix)))] else: train_matrix = np.vstack((train_matrix,valid_matrix)) train_data = (train_matrix,row_features,col_features) coclusters, models = self._converge_scoal(train_data,coclusters,models,n_jobs=n_jobs,verbose=False) return coclusters,models def fit(self,target,row_features,col_features,coclusters=None): np.random.seed(self.random_state) self.n_rows, self.n_cols, self.n_values = row_features.shape[0], col_features.shape[0], target.shape[0] self.n_row_features, self.n_col_features = row_features.shape[1], col_features.shape[1] valid = np.full(self.n_values,False) valid[:int(self.n_values*self.validation_size)] = True np.random.shuffle(valid) valid_target = target[valid] train_target = target[~valid] del target if self.matrix=='dense': valid_matrix = np.zeros((self.n_rows, self.n_cols))*np.nan valid_matrix[valid_target[:,0].astype(int),valid_target[:,1].astype(int)] = valid_target[:,2] train_matrix = np.zeros((self.n_rows, self.n_cols))*np.nan train_matrix[train_target[:,0].astype(int),train_target[:,1].astype(int)] = train_target[:,2] else: valid_matrix = valid_target train_matrix = train_target del train_target del valid_target valid_data = (valid_matrix,row_features,col_features) train_data = (train_matrix,row_features,col_features) if self.cache: self.memory = Memory('./pyscoal-cache') self._cached_fit = self.memory.cache(self._cached_fit, ignore=['self','model','X','y'])\ self.coclusters,self.models = self._converge_mscoal(train_data,valid_data,coclusters,None,self.max_split,self.n_jobs,self.verbose) row_clusters, col_clusters = self.coclusters self.n_row_clusters, self.n_col_clusters = np.unique(row_clusters).size, np.unique(col_clusters).size if self.cache: self.memory.clear(warn=False)
def _inner_sklearn(self, X, y): """ Perform hyperparameter tuning with cross-validation for each outer fold (using sklearn library). Each time, save best parameters and associated best estimator (not refitted !). Parameters ---------- X : array-like, shape (n_samples, n_features) Training data set. y : array-like, shape (n_samples, n_output) Target relative to X for classification or regression. """ if self.caching: location = 'cachedir' memory = Memory(location=location, verbose=0, mmap_mode='r+') try: cv_estimator = clone(self.estimator).set_params(memory=memory) except ValueError as e: print(e) warnings.warn( "Caching is only available with an estimator built with sklearn.pipeline.Pipeline (see https://scikit-learn.org/stable/modules/generated/sklearn.pipeline.Pipeline.html for more details). This argument was ignored by default." ) cv_estimator = clone(self.estimator) else: if self.n_jobs_inner != 1: warnings.warn( "A combined use of joblib.Memory (function caching) and joblib.Parallel should be considered with care since it may lead to some failures. In particular, we noticed some errors when dealing with custom transformers in the pipeline." ) else: cv_estimator = clone(self.estimator) if self.randomized: inner = RandomizedSearchCV(estimator=cv_estimator, param_distributions=self.params, scoring=self.scoring_inner, cv=self.cv_inner, n_jobs=self.n_jobs_inner, refit=False, verbose=self.verbose) else: inner = GridSearchCV(estimator=cv_estimator, param_grid=self.params, scoring=self.scoring_inner, cv=self.cv_inner, n_jobs=self.n_jobs_inner, refit=False, verbose=self.verbose) count = 0 for train, _ in self.cv_outer_.split(X, y): X_train, y_train = X[train, :], y[train] inner.fit(X_train, y_train) self.inner_results_['out fold ' + str(count)] = inner.cv_results_ self.best_params_.append(inner.best_params_) self.best_estimators_.append( clone(clone(self.estimator).set_params(**inner.best_params_))) if self.caching: memory.clear(warn=False) rmtree(location) count += 1 return self
def run(cfg, solver: ISolver, dataprovider: IDataProvider, aug: IAug, review_augmented_sample=False, review_train=True): # Check folder path if not os.path.exists(cfg.data_dir): logging.error('There are no such data folder {}'.format(cfg.data_dir)) exit(-1) # Prepare data and split to train/test subsets data_dir, ids_train, ids_test = get_data(cfg, test_size=cfg.test_aspect) # Manage caching data access cache_folder = './cache' memory = Memory(cache_folder, verbose=0) memory.clear(warn=False) data_reader = memory.cache( read_sample) if memory is not None else read_sample if review_augmented_sample: matplotlib.use('TkAgg') # Enable interactive mode # Specify params according to which subset to review ids, augm = (ids_train, aug.get_training_augmentation(cfg)) if review_train else \ (ids_test, aug.get_validation_augmentation(cfg, cfg.minimize_train_aug)) # Lets look at augmented data we have dataset = dataprovider( data_reader, data_dir, ids, ((0, 0), (None, None)), cfg, min_mask_ratio=cfg.min_mask_ratio, augmentation=augm, prep_getter= None # don't use preparation to see actually augmentation the data ) logging.info('Dataset length: {}'.format(len(dataset))) for i in range(150): dataset.show(i) return # **************************************************************************************************************** # Create model # **************************************************************************************************************** # Dataset for train images train_dataset = dataprovider(data_reader, data_dir, ids_train, ((0, 0), (None, None)), cfg, min_mask_ratio=cfg.min_mask_ratio, augmentation=aug.get_training_augmentation( cfg, cfg.minimize_train_aug), prep_getter=solver.get_prep_getter()) # Dataset for validation images valid_dataset = dataprovider(data_reader, data_dir, ids_test, ((0, 0), (None, None)), cfg, min_mask_ratio=cfg.min_mask_ratio, augmentation=aug.get_validation_augmentation( cfg, cfg.minimize_train_aug), prep_getter=solver.get_prep_getter()) train_dataloader = Dataloder(train_dataset, batch_size=cfg.batch_size, shuffle=True) valid_dataloader = Dataloder(valid_dataset, batch_size=1, shuffle=False) # Inform general samples info train_batch = train_dataloader[0] logging.info('Train X: {},{},{},{}'.format(train_batch[0].shape, train_batch[0].dtype, np.min(train_batch[0]), np.max(train_batch[0]))) logging.info('Train Y: {},{},{},{}'.format(train_batch[1].shape, train_batch[1].dtype, np.min(train_batch[1]), np.max(train_batch[1]))) logging.info('Train Batch size multiplier: {}'.format( cfg.batch_size_multiplier)) logging.info('Train Samples Nb: {}'.format(len(train_dataset))) class_weights = None if hasattr(train_dataset, 'mask_uniq_values_nb'): if train_dataset.mask_uniq_values_nb is not None and cfg.apply_class_weights: mask_min_nb = np.min(train_dataset.mask_uniq_values_nb) if mask_min_nb > 0: class_weights = (train_dataset.mask_uniq_values_nb / mask_min_nb)**-1 # val_batch = valid_dataloader[0] logging.info('Validate Samples Nb: {}'.format(len(valid_dataset))) logging.info('Val X: {},{},{},{}'.format(val_batch[0].shape, val_batch[0].dtype, np.min(val_batch[0]), np.max(val_batch[0]))) logging.info('Val Y: {},{},{},{}'.format(val_batch[1].shape, val_batch[1].dtype, np.min(val_batch[1]), np.max(val_batch[1]))) if train_batch[0].shape[1] != val_batch[0].shape[1] or train_batch[ 0].shape[2] != val_batch[0].shape[2]: logging.info( 'Pay attention, that sample HW in train subset is different to validation subset. ' 'It may affect to metric cross comparison') model, weights_path, metrics = solver.build(compile_model=True, class_weights=class_weights) logging.info('Storing configuration...') with open(os.path.join(cfg.solution_dir, 'configuration.json'), 'w', newline=os.linesep) as f: json.dump(dict({'cfg': dict(cfg)}), f, default=json_def_encoder) # Get monitoring metric monitoring_metric_name, monitoring_metric_mode = solver.monitoring_metric() # Define callbacks for learning rate scheduling and best checkpoints saving callbacks = [ # Save best result keras.callbacks.ModelCheckpoint(weights_path, monitor=monitoring_metric_name, save_weights_only=True, save_best_only=True, mode=monitoring_metric_mode, verbose=1), # Save the latest result keras.callbacks.ModelCheckpoint('{}_last.h5'.format( os.path.join(os.path.dirname(weights_path), os.path.splitext(os.path.basename(weights_path))[0])), monitor=monitoring_metric_name, save_weights_only=True, save_best_only=False, mode='auto', verbose=0), # Adam optimizer SHOULD not control LR # keras.callbacks.ReduceLROnPlateau(verbose=1, patience=10, factor=0.2) # # keras.callbacks.EarlyStopping(monitor='val_mean_iou', # min_delta=0.01, # patience=40, # verbose=0, mode='max') PlotLosses(imgfile='{}.png'.format( os.path.join(os.path.dirname(weights_path), os.path.splitext(os.path.basename(weights_path))[0])), figsize=(12, 4 * (1 + len(metrics))) ) # PNG-files processed in Windows & Ubuntu ] if hasattr(cfg, 'callbacks'): callbacks = callbacks + cfg.callbacks matplotlib.use('Agg') # Disable TclTk because it sometime crash training! # train model model.fit_generator( train_dataloader, steps_per_epoch=len(train_dataloader), epochs=cfg.epochs, callbacks=callbacks, validation_data=valid_dataloader, validation_steps=len(valid_dataloader), )
class Genome(object): """ Object for genome file with some uitl function to analysis genome. Params: -------- filename: `str` Returns: -------- Examples: -------- """ def __init__( self, filename, exclude=None, exclude_contig=['tig', 'Un', 'Sy', 'scaffold', 'ctg', 'Pt', 'Mt'], mem_cache='.'): check_file_exists(filename) self.filename = filename self.exclude = listify(exclude) self.exclude_contig = listify(exclude_contig) self.getChrSizes() self.idx2label = dict( (i, chrom) for i, chrom in enumerate(self.chromLabels)) self.label2idx = dict( (chrom, i) for i, chrom in enumerate(self.chromLabels)) self.mem_cache = mem_cache self.memory = Memory(mem_cache, verbose=0) self.getGCBin = self.memory.cache(self._getGCBin) @property def handle(self): if self.filename[-3:] == ".gz": self._handle = gzip.open(self.filename, 'rt') else: self._handle = open(self.filename, 'r') return self._handle @property def seqs(self): """ A OrderedDict of sequences. """ if not hasattr(self, '_seqs'): self._seqs = [] fa = SeqIO.parse(self.handle, 'fasta') for record in fa: if self.exclude: if record.id in self.exclude: continue if self.exclude_contig: for contig in self.exclude_contig: if contig in record.id: break else: self._seqs.append(record.seq) else: self._seqs.append(record.seq) return self._seqs @property def chromLabels(self): if not hasattr(self, '_chromLabels'): self._chromLabels = [] import pyfaidx fa = pyfaidx.Fasta(self.filename) for record in fa: if self.exclude: if record.name in self.exclude: continue if self.exclude_contig: for contig in self.exclude_contig: if contig in record.name: break else: self._chromLabels.append(record.name) else: self._chromLabels.append(record.name) return self._chromLabels @property def chroms(self): return list(range(len(self.chromLabels))) @property def chromCount(self): return len(self.chroms) def getChrSizes(self): """ Calculate the length of chromosome. """ self.chromSizes = np.array( [len(self.seqs[i]) for i in range(self.chromCount)]) return self.chromSizes def makeWindows(self, window): """ make chromosome window Params: -------- window: `int` window of chromosome Returns: -------- out: `list` a list of windows: Examples: --------- >>> makeWindows(10000) [('Chr1', 0, 100000) ...] """ self.window = window if not hasattr(self, 'windows'): self.windows = OrderedDict() for idx, size in enumerate(self.chromSizes): temp = [] chrom = self.idx2label[idx] for i in range(0, size + 1, window): temp.append((i, i + window)) else: if temp[-1][1] > size: temp[-1] = (temp[-1][0], size) self.windows[chrom] = temp self.chromBins = list(map(len, self.windows.values())) self.chromStartBins = np.r_[0, np.cumsum(self.chromBins[:-1])] self.chromEndBins = np.cumsum(self.chromBins) self.numBins = self.chromEndBins[-1] self.chromBinsDict = OrderedDict( zip(self.windows.keys(), tuple(zip(self.chromStartBins, self.chromEndBins)))) logging.debug('Successful makewindow') return self.windows def getGapBase(self, chrom, start, end): """ Calculate the percentage of gap base number in a region """ seq = self.seqs[chrom][start:end] if len(seq) == 0: return 0.0 else: gap = seq.count('N') + seq.count('n') percent = 100.0 * gap / float(len(seq)) return percent def getGC(self, chrom, start, end, correct=True): """ Calculate the GC content of a sequence. """ seq = self.seqs[chrom][start:end] gc = SeqUtils.GC(seq) gap = self.getGapBase(chrom, start, end) if correct \ else 0.0 if gap == 100.0: return -1.0 else: corrected_gc = gc * 100.0 / (100.0 - gap) #logging.debug('Calculated GC content in {}:{}-{}'.format( # chrom, start, end)) return corrected_gc def _getGCBin(self, window, chr=[], correct=True, thread=24): """ Calculate GC content of a series of windows, and return a OrderedDict Params: -------- window: `int` window of bin chr: `list` default: `[]` thread: `int` thread of parallel running default: `24` Returns: -------- out: `list` and gc store in array-like Examples: -------- >>> getGCbin(1000000) [[0.5, 0.2, 0.5 ...], ...] """ self.gcBin = [] chroms = listify(chr) if chr else self.chromLabels _chromsidx = [self.label2idx[i] for i in chroms] """ def subgc(chrom): chromWindow = int(self.chromSizes[chrom] // self.window) + 1 _gc = np.ones(chromWindow, dtype=np.float) for i in range(chromWindow): _gc[i] = self.getGC(chrom, i*self.window, (i+1)*self.window, correct=correct) return _gc res = Parallel(thread)(delayed(subgc)(args) for args in _chromsidx) """ for chrom in _chromsidx: chromWindow = int(self.chromSizes[chrom] // self.window) + 1 self.gcBin.append(np.ones(chromWindow, dtype=np.float)) for i in range(chromWindow - 1): self.gcBin[chrom][i] = self.getGC(chrom, i * self.window, (i + 1) * self.window, correct=correct) else: self.gcBin[chrom][chromWindow - 1] = self.getGC( chrom, (chromWindow - 1) * self.window, chromWindow * self.window, correct=correct) logging.debug('Successful getGCBin') return self.gcBin def clearCache(self): """ clear Memory cache data in the `{}`. """.format(self.mem_cache) if hasattr(self, 'memory'): self.memory.clear()
costly_compute = memory.cache(_costly_compute_cached) return costly_compute(data, self.column) transformer = Algorithm() start = time.time() data_trans = transformer.transform(data) end = time.time() print('\nThe function took {:.2f} s to compute.'.format(end - start)) print('\nThe transformed data are:\n {}'.format(data_trans)) ############################################################################### start = time.time() data_trans = transformer.transform(data) end = time.time() print('\nThe function took {:.2f} s to compute.'.format(end - start)) print('\nThe transformed data are:\n {}'.format(data_trans)) ############################################################################### # As expected, the second call to the ``transform`` method load the results # which have been cached. ############################################################################### # Clean up cache directory ############################################################################### memory.clear(warn=False)
from scipy.stats.distributions import uniform import mne from mne.utils import logger, ProgressBar from sklearn.base import BaseEstimator from sklearn.grid_search import RandomizedSearchCV from sklearn.cross_validation import KFold from joblib import Memory from pandas import DataFrame from .utils import clean_by_interp, interpolate_bads mem = Memory(cachedir='cachedir') mem.clear() def grid_search(epochs, n_interpolates, consensus_percs, prefix, n_folds=3): """Grid search to find optimal values of n_interpolate and consensus_perc. Parameters ---------- epochs : instance of mne.Epochs The epochs object for which bad epochs must be found. n_interpolates : array The number of sensors to interpolate. consensus_percs : array The percentage of channels to be interpolated. n_folds : int Number of folds for cross-validation.
def create_api_blueprint(config): blueprint = Blueprint('api', __name__) data_dir = os.path.abspath( config.get('data', 'data_root', fallback='.data')) cache_dir = os.path.join(data_dir, 'server-cache') search_config = parse_search_config(config) client_config = dict(config['client']) if 'client' in config else {} memory = Memory(cachedir=cache_dir, verbose=0) LOGGER.debug("cache directory: %s", cache_dir) memory.clear(warn=False) db: Database = connect_configured_database(autocommit=True) load_recommender = get_recommend_reviewer_factory(db, config) recommend_reviewers: _ReloadableRecommendReviewers = ( ReloadableRecommendReviewers(load_recommender)) def get_search_type(): return request.args.get('search_type', DEFAULT_SEARCH_TYPE) def user_has_role_by_email(email, role) -> bool: with db.begin(): return recommend_reviewers.user_has_role_by_email(email=email, role=role) api_auth = ApiAuth(config, client_config, search_config=search_config, user_has_role_by_email=user_has_role_by_email, get_search_type=get_search_type) @blueprint.route("/") def _api_root() -> Response: return jsonify({ 'links': { 'recommend-reviewers': url_for('api._recommend_reviewers_api'), 'subject-areas': url_for('api._subject_areas_api'), 'keywords': url_for('api._keywords_api'), 'config': url_for('api._config_api') } }) @memory.cache def recommend_reviewers_as_json(**kwargs) -> Response: with db.begin(): return jsonify(recommend_reviewers.recommend(**kwargs)) @blueprint.route("/recommend-reviewers") @api_auth.wrap_search def _recommend_reviewers_api(**_) -> Response: manuscript_no = request.args.get('manuscript_no') subject_area = request.args.get('subject_area') keywords = request.args.get('keywords') abstract = request.args.get('abstract') limit = request.args.get('limit') search_type = get_search_type() search_params = search_config.get(search_type) if search_params is None: raise BadRequest('unknown search type - %s' % search_type) role = search_params.get('filter_by_role') recommend_relationship_types = search_params.get( 'recommend_relationship_types') recommend_stage_names = search_params.get('recommend_stage_names') if limit is None: limit = search_params.get('default_limit', DEFAULT_LIMIT) else: limit = int(limit) if not manuscript_no and keywords is None: raise BadRequest('keywords parameter required') return recommend_reviewers_as_json( manuscript_no=manuscript_no, subject_area=subject_area, keywords=keywords, abstract=abstract, role=role, recommend_relationship_types=recommend_relationship_types, recommend_stage_names=recommend_stage_names, limit=limit) @blueprint.route("/manuscript/version/<path:version_id>") @api_auth def _get_manuscript_details(version_id, **_) -> Response: manuscript_details = recommend_reviewers.get_manuscript_details( version_id) if not manuscript_details: raise NotFound() return jsonify(manuscript_details) @blueprint.route("/subject-areas") def _subject_areas_api() -> Response: with db.begin(): return jsonify(list(recommend_reviewers.get_all_subject_areas())) @blueprint.route("/keywords") def _keywords_api() -> Response: with db.begin(): return jsonify(list(recommend_reviewers.get_all_keywords())) @blueprint.route("/config") def _config_api() -> Response: return jsonify(client_config) @blueprint.route("/search-types") @api_auth def _search_types_api(email=None) -> Response: with db.begin(): if email is None or api_auth.is_staff_email(email): LOGGER.debug( 'email is None or staff email, not filtering search types') allowed_search_config = search_config else: roles = set( recommend_reviewers.get_user_roles_by_email(email)) | {''} allowed_search_config = { search_type: search_params for search_type, search_params in search_config.items() if search_params.get('required_role', '') in roles } LOGGER.debug( 'roles, email=%s, roles=%s, filtered_search_types=%s', email, roles, allowed_search_config.keys()) search_types_response = [{ 'search_type': search_type, 'title': search_config[search_type].get('title', search_type) } for search_type in sorted(allowed_search_config.keys())] return jsonify(search_types_response) @blueprint.teardown_request def _remove_session(exc=None): try: LOGGER.debug('teardown, exc=%s', exc) db.remove_local() except Exception as e: # pylint: disable=W0703 LOGGER.warning('failed to remove session due to %s', e, exc_info=e) def reload_api(): recommend_reviewers.reload() recommend_reviewers_as_json.clear() api_auth.reload() return blueprint, reload_api
{ 'reduce_dim__n_components': N_FEATUREDIM_OPTIONS, 'feature_selection__k': N_FEATURES_OPTIONS, 'classify__C': C_OPTIONS, 'classify__l1_ratio': l1_ratio, }, ] reducer_labels = ['PCA', 'KBest(chi2)'] grid = GridSearchCV(pipe, n_jobs=1, param_grid=param_grid) X, y = load_digits(return_X_y=True) grid.fit(X, y) # Delete the temporary cache before exiting memory.clear(warn=False) rmtree(location) mean_scores = np.array(grid.cv_results_['mean_test_score']) # scores are in the order of param_grid iteration, which is alphabetical mean_scores = mean_scores.reshape(len(C_OPTIONS), -1, len(N_FEATURES_OPTIONS)) # select score for best C mean_scores = mean_scores.max(axis=0) bar_offsets = (np.arange(len(N_FEATURES_OPTIONS)) * (len(reducer_labels) + 1) + .5) plt.figure() COLORS = 'bgrcmyk' for i, (label, reducer_scores) in enumerate(zip(reducer_labels, mean_scores)): plt.bar(bar_offsets + i, reducer_scores, label=label, color=COLORS[i])
class SeisFDFDKernel(object): # source array ref # receiver array ref mesh = None freq = None Solver = lambda: None def __init__(self, systemConfig, **kwargs): if systemConfig.get('cache', False): try: from tempfile import mkdtemp from joblib import Memory except ImportError: pass else: if 'cacheDir' in systemConfig: cacheDir = systemConfig['cacheDir'] try: os.makedirs(cacheDir) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(cacheDir): pass else: raise else: cacheDir = mkdtemp() self._mem = Memory(cachedir=cacheDir, verbose=0) # Cache outputs of these methods self.forward = self._mem.cache(self.forward) self.backprop = self._mem.cache(self.backprop) hx = [(systemConfig['dx'], systemConfig['nx'] - 1)] hz = [(systemConfig['dz'], systemConfig['nz'] - 1)] self.mesh = SimPEG.Mesh.TensorMesh([hx, hz], '00') self.mesh.ireg = systemConfig.get('ireg', DEFAULT_IREG) self.mesh.freeSurf = systemConfig.get('freeSurf', DEFAULT_FREESURF_BOUNDS) initMap = { # Argument Rename to Property 'c': 'cR', 'Q': None, 'rho': None, 'nPML': None, 'freeSurf': None, 'freq': None, 'ky': None, 'kyweight': None, 'Solver': None, 'dx': None, 'dz': None, 'dtype': None, } for key in initMap.keys(): if key in systemConfig: if initMap[key] is None: setattr(self, key, systemConfig[key]) else: setattr(self, initMap[key], systemConfig[key]) def __del__(self): if hasattr(self, '_mem'): self._mem.clear() cacheDir = self._mem.cachedir del self._mem shutil.rmtree(cacheDir) # Model properties @property def c(self): return self.cR + self.cI @c.setter def c(self, value): self._cR = value.real self._cI = value.imag self._invalidateMatrix() @property def rho(self): if getattr(self, '_rho', None) is None: self._rho = 310 * self.c**0.25 return self._rho @rho.setter def rho(self, value): self._rho = value self._invalidateMatrix() @property def Q(self): if getattr(self, '_Q', None) is None: self._Q = numpy.inf return self._Q @Q.setter def Q(self, value): self._Q = value self._invalidateMatrix() @property def cR(self): return self._cR @cR.setter def cR(self, value): self._cR = value self._invalidateMatrix() @property def cI(self): if self.Q is numpy.inf: return 0 else: return 1j * self.cR / (2 * self.Q) @cI.setter def cI(self, value): if (value == 0).all(): self._Q = numpy.inf else: self._Q = 1j * self.cR / (2 * value) self._invalidateMatrix() # Modelling properties @property def nPML(self): if getattr(self, '_nPML', None) is None: self._nPML = DEFAULT_PML_SIZE return self._nPML @nPML.setter def nPML(self, value): self._nPML = value self._invalidateMatrix() @property def ky(self): if getattr(self, '_ky', None) is None: self._ky = 0. return self._ky @ky.setter def ky(self, value): self._ky = value self._invalidateMatrix() @property def kyweight(self): if getattr(self, '_kyweight', None) is None: self._kyweight = 1. return self._kyweight @kyweight.setter def kyweight(self, value): self._kyweight = value self._invalidateMatrix() # Clever matrix setup properties @property def Solver(self): if getattr(self, '_Solver', None) is None: self._Solver = DEFAULT_SOLVER return self._Solver @Solver.setter def Solver(self, value): self._Solver = value @property def A(self): if getattr(self, '_A', None) is None: self._A = self._initHelmholtzNinePoint() return self._A @property def Ainv(self): if getattr(self, '_Ainv', None) is None: self._mfact() return self._Ainv def _invalidateMatrix(self): if getattr(self, '_A', None) is not None: del (self._A) if getattr(self, '_Ainv', None) is not None: del (self._Ainv) if getattr(self, '_mem', None) is not None: self._mem.clear() @property def dtypeReal(self): if self.dtype == 'float': return numpy.float32 elif self.dtype == 'double': return numpy.float64 else: raise NotImplementedError('Unknown dtype: %s' % self.dtype) @property def dtypeComplex(self): if self.dtype == 'float': return numpy.complex64 elif self.dtype == 'double': return numpy.complex128 else: raise NotImplementedError('Unknown dtype: %s' % self.dtype) @property def dtype(self): return getattr(self, '_dtype', DEFAULT_DTYPE) @dtype.setter def dtype(self, value): # Currently this doesn't work because all the solvers assume doubles # if value in ['float', 'double']: if value in ['double']: self._dtype = value else: raise NotImplementedError('Unknown dtype: %s' % value) # ------------------------------------------------------------------------ # Matrix setup def _mfact(self): self._Ainv = self.Solver(self.A) def _initHelmholtzNinePoint(self): """ An attempt to reproduce the finite-difference stencil and the general behaviour of OMEGA by Pratt et al. The stencil is a 9-point second-order version based on work by a number of people in the mid-90s including Ivan Stekl. The boundary conditions are based on the PML implementation by Steve Roecker in fdfdpml.f. """ # Set up SimPEG mesh dims = (self.mesh.nNy, self.mesh.nNx) # mAve = self.mesh.aveN2CC # c = (mAve.T * self.c.ravel()).reshape(dims) # rho = (mAve.T * self.rho.ravel()).reshape(dims) c = self.c rho = self.rho # fast --> slow is x --> y --> z as Fortran # Set up physical properties in matrices with padding omega = 2 * numpy.pi * self.freq cPad = numpy.pad(c, pad_width=1, mode='edge') rhoPad = numpy.pad(rho, pad_width=1, mode='edge') aky = 2 * numpy.pi * self.ky # Model parameter M K = ((omega**2 / cPad**2) - aky**2) / rhoPad # Horizontal, vertical and diagonal geometry terms dx = self.mesh.hx[0] dz = self.mesh.hy[0] dxx = dx**2 dzz = dz**2 dxz = dx * dz dd = numpy.sqrt(dxz) # PML decay terms # NB: Arrays are padded later, but 'c' in these lines # comes from the original (un-padded) version nPML = self.nPML pmldx = dx * (nPML - 1) pmldz = dz * (nPML - 1) pmlr = 1e-3 pmlfx = 3.0 * numpy.log(1 / pmlr) / (2 * pmldx**3) pmlfz = 3.0 * numpy.log(1 / pmlr) / (2 * pmldz**3) dpmlx = numpy.zeros(dims, dtype=self.dtypeComplex) dpmlz = numpy.zeros(dims, dtype=self.dtypeComplex) isnx = numpy.zeros(dims, dtype=self.dtypeReal) isnz = numpy.zeros(dims, dtype=self.dtypeReal) # Only enable PML if the free surface isn't set freeSurf = self.mesh.freeSurf if freeSurf[0]: isnz[-nPML:, :] = -1 # Top if freeSurf[1]: isnx[:, -nPML:] = -1 # Right Side if freeSurf[2]: isnz[:nPML, :] = 1 # Bottom if freeSurf[3]: isnx[:, :nPML] = 1 # Left side dpmlx[:, :nPML] = (numpy.arange(nPML, 0, -1) * dx).reshape((1, nPML)) dpmlx[:, -nPML:] = (numpy.arange(1, nPML + 1, 1) * dx).reshape( (1, nPML)) dnx = pmlfx * c * dpmlx**2 ddnx = 2 * pmlfx * c * dpmlx denx = dnx + 1j * omega r1x = 1j * omega / denx r1xsq = r1x**2 r2x = isnx * r1xsq * ddnx / denx dpmlz[:nPML, :] = (numpy.arange(nPML, 0, -1) * dz).reshape((nPML, 1)) dpmlz[-nPML:, :] = (numpy.arange(1, nPML + 1, 1) * dz).reshape( (nPML, 1)) dnz = pmlfz * c * dpmlz**2 ddnz = 2 * pmlfz * c * dpmlz denz = dnz + 1j * omega r1z = 1j * omega / denz r1zsq = r1z**2 r2z = isnz * r1zsq * ddnz / denz # Visual key for finite-difference terms # (per Pratt and Worthington, 1990) # # This Original # AF FF CF vs. AD DD CD # AA BE CC vs. AA BE CC # AD DD CD vs. AF FF CF # Set of keys to index the dictionaries keys = ['AD', 'DD', 'CD', 'AA', 'BE', 'CC', 'AF', 'FF', 'CF'] # Diagonal offsets for the sparse matrix formation offsets = { 'AD': (-1) * dims[1] + (-1), 'DD': (-1) * dims[1] + (0), 'CD': (-1) * dims[1] + (+1), 'AA': (0) * dims[1] + (-1), 'BE': (0) * dims[1] + (0), 'CC': (0) * dims[1] + (+1), 'AF': (+1) * dims[1] + (-1), 'FF': (+1) * dims[1] + (0), 'CF': (+1) * dims[1] + (+1), } # Buoyancies bMM = 1. / rhoPad[0:-2, 0:-2] # bottom left bME = 1. / rhoPad[0:-2, 1:-1] # bottom centre bMP = 1. / rhoPad[0:-2, 2:] # bottom right bEM = 1. / rhoPad[1:-1, 0:-2] # middle left bEE = 1. / rhoPad[1:-1, 1:-1] # middle centre bEP = 1. / rhoPad[1:-1, 2:] # middle right bPM = 1. / rhoPad[2:, 0:-2] # top left bPE = 1. / rhoPad[2:, 1:-1] # top centre bPP = 1. / rhoPad[2:, 2:] # top right # Initialize averaged buoyancies on most of the grid bMM = (bEE + bMM) / 2 # a2 bME = (bEE + bME) / 2 # d1 bMP = (bEE + bMP) / 2 # d2 bEM = (bEE + bEM) / 2 # a1 # ... middle bEP = (bEE + bEP) / 2 # c1 bPM = (bEE + bPM) / 2 # f2 bPE = (bEE + bPE) / 2 # f1 bPP = (bEE + bPP) / 2 # c2 # Reset the buoyancies on the outside edges # bMM[ 0, :] = bEE[ 0, :] # bMM[ :, 0] = bEE[ :, 0] # bME[ 0, :] = bEE[ 0, :] # bMP[ 0, :] = bEE[ 0, :] # bMP[ :,-1] = bEE[ :,-1] # bEM[ :, 0] = bEE[ :, 0] # bEP[ :,-1] = bEE[ :,-1] # bPM[-1, :] = bEE[-1, :] # bPM[ :, 0] = bEE[ :, 0] # bPE[-1, :] = bEE[-1, :] # bPP[-1, :] = bEE[-1, :] # bPP[ :,-1] = bEE[ :,-1] # K = omega^2/(c^2 . rho) kMM = K[0:-2, 0:-2] # bottom left kME = K[0:-2, 1:-1] # bottom centre kMP = K[0:-2, 2:] # bottom centre kEM = K[1:-1, 0:-2] # middle left kEE = K[1:-1, 1:-1] # middle centre kEP = K[1:-1, 2:] # middle right kPM = K[2:, 0:-2] # top left kPE = K[2:, 1:-1] # top centre kPP = K[2:, 2:] # top right # 9-point fd star acoef = 0.5461 bcoef = 0.4539 ccoef = 0.6248 dcoef = 0.09381 ecoef = 0.000001297 # 5-point fd star # acoef = 1.0 # bcoef = 0.0 # ecoef = 0.0 # NB: bPM and bMP here are switched relative to S. Roecker's version # in OMEGA. This is because the labelling herein is always ?ZX. diagonals = { 'AD': ecoef * kMM + bcoef * bMM * ((r1zsq + r1xsq) / (4 * dxz) - (r2z + r2x) / (4 * dd)), 'DD': dcoef * kME + acoef * bME * (r1zsq / dz - r2z / 2) / dz + bcoef * (r1zsq - r1xsq) * (bMP + bMM) / (4 * dxz), 'CD': ecoef * kMP + bcoef * bMP * ((r1zsq + r1xsq) / (4 * dxz) - (r2z + r2x) / (4 * dd)), 'AA': dcoef * kEM + acoef * bEM * (r1xsq / dx - r2x / 2) / dx + bcoef * (r1xsq - r1zsq) * (bPM + bMM) / (4 * dxz), 'BE': ccoef * kEE + acoef * (r2x * (bEM - bEP) / (2 * dx) + r2z * (bME - bPE) / (2 * dz) - r1xsq * (bEM + bEP) / dxx - r1zsq * (bME + bPE) / dzz) + bcoef * (((r2x + r2z) * (bMM - bPP) + (r2z - r2x) * (bMP - bPM)) / (4 * dd) - (r1xsq + r1zsq) * (bMM + bPP + bPM + bMP) / (4 * dxz)), 'CC': dcoef * kEP + acoef * bEP * (r1xsq / dx + r2x / 2) / dx + bcoef * (r1xsq - r1zsq) * (bMP + bPP) / (4 * dxz), 'AF': ecoef * kPM + bcoef * bPM * ((r1zsq + r1xsq) / (4 * dxz) - (r2z + r2x) / (4 * dd)), 'FF': dcoef * kPE + acoef * bPE * (r1zsq / dz - r2z / 2) / dz + bcoef * (r1zsq - r1xsq) * (bPM + bPP) / (4 * dxz), 'CF': ecoef * kPP + bcoef * bPP * ((r1zsq + r1xsq) / (4 * dxz) - (r2z + r2x) / (4 * dd)), } diagonals['AD'] = diagonals['AD'].ravel()[dims[1] + 1:] diagonals['DD'] = diagonals['DD'].ravel()[dims[1]:] diagonals['CD'] = diagonals['CD'].ravel()[dims[1] - 1:] diagonals['AA'] = diagonals['AA'].ravel()[1:] diagonals['BE'] = diagonals['BE'].ravel()[:] diagonals['CC'] = diagonals['CC'].ravel()[:-1] diagonals['AF'] = diagonals['AF'].ravel()[:-dims[1] + 1] diagonals['FF'] = diagonals['FF'].ravel()[:-dims[1]] diagonals['CF'] = diagonals['CF'].ravel()[:-dims[1] - 1] # self._setupBoundary(diagonals, freeSurf) if any(freeSurf): raise NotImplementedError('Free surface not implemented!') # for key in diagonals.keys(): # print('%s:\t%d\t%d'%(key, diagonals[key].size, offsets[key])) diagonals = [diagonals[key] for key in keys] offsets = [offsets[key] for key in keys] A = scipy.sparse.diags( diagonals, offsets, shape=(self.mesh.nN, self.mesh.nN), format='csr', dtype=self.dtypeComplex ) #, shape=(self.mesh.nN, self.mesh.nN))#, self.mesh.nN, self.mesh.nN, format='csr') return A # def _setupBoundary(self, diagonals, freeSurf): # """ # Function to set up boundary regions for the Seismic FDFD problem # using the 9-point finite-difference stencil from OMEGA/FULLWV. # """ # keys = diagonals.keys() # pickDiag = lambda x: -1. if freeSurf[x] else 1. # # Left # for key in keys: # if key is 'BE': # diagonals[key][:,0] = pickDiag(3) # else: # diagonals[key][:,0] = 0. # # Right # for key in keys: # if key is 'BE': # diagonals[key][:,-1] = pickDiag(1) # else: # diagonals[key][:,-1] = 0. # # Bottom # for key in keys: # if key is 'BE': # diagonals[key][0,:] = pickDiag(2) # else: # diagonals[key][0,:] = 0. # # Top # for key in keys: # if key is 'BE': # diagonals[key][-1,:] = pickDiag(0) # else: # diagonals[key][-1,:] = 0. # ------------------------------------------------------------------------ # Externally-callable functions def clear(self): self._invalidateMatrix() # What about @caching decorators? def forward(self, src, dOnly=True): q = self.kyweight * src.getq(self.mesh) u = self.Ainv * q d = numpy.array( [numpy.dot(P, u) for P in src.getP(self.mesh, self.ky)]).ravel() if dOnly: return d else: return u, d def backprop(self, src, dresid=1.): qr = self.kyweight * src.getqback(self.mesh, dresid, self.ky) u = self.Ainv * qr return u
@memory.cache def spell(word, count=10, dict_words=None): dict_words = load_words() if dict_words is None else dict_words return sorted(dict_words, key=lambda dw: levenshtein(word, dw))[:count] if __name__ == '__main__': from argparse import ArgumentParser parser = ArgumentParser(description='spell checker') parser.add_argument('word', help='word to check', nargs='?') parser.add_argument('--count', type=int, default=10, help='number of words to return') parser.add_argument('--clear-cache', help='clear cache', action='store_true', default=False) args = parser.parse_args() if args.clear_cache: memory.clear() raise SystemExit if not args.word: raise SystemExit('no word given') for word in spell(args.word, args.count): print(word)
class SeisFDFDKernel(object): # source array ref # receiver array ref mesh = None freq = None Solver = lambda: None def __init__(self, systemConfig, **kwargs): if systemConfig.get('cache', False): try: from tempfile import mkdtemp from joblib import Memory except ImportError: pass else: if 'cacheDir' in systemConfig: cacheDir = systemConfig['cacheDir'] try: os.makedirs(cacheDir) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(cacheDir): pass else: raise else: cacheDir = mkdtemp() self._mem = Memory(cachedir=cacheDir, verbose=0) # Cache outputs of these methods self.forward = self._mem.cache(self.forward) self.backprop = self._mem.cache(self.backprop) hx = [(systemConfig['dx'], systemConfig['nx']-1)] hz = [(systemConfig['dz'], systemConfig['nz']-1)] self.mesh = SimPEG.Mesh.TensorMesh([hx, hz], '00') self.mesh.ireg = systemConfig.get('ireg', DEFAULT_IREG) self.mesh.freeSurf = systemConfig.get('freeSurf', DEFAULT_FREESURF_BOUNDS) initMap = { # Argument Rename to Property 'c': 'cR', 'Q': None, 'rho': None, 'nPML': None, 'freeSurf': None, 'freq': None, 'ky': None, 'kyweight': None, 'Solver': None, 'dx': None, 'dz': None, 'dtype': None, } for key in initMap.keys(): if key in systemConfig: if initMap[key] is None: setattr(self, key, systemConfig[key]) else: setattr(self, initMap[key], systemConfig[key]) def __del__(self): if hasattr(self, '_mem'): self._mem.clear() cacheDir = self._mem.cachedir del self._mem shutil.rmtree(cacheDir) # Model properties @property def c(self): return self.cR + self.cI @c.setter def c(self, value): self._cR = value.real self._cI = value.imag self._invalidateMatrix() @property def rho(self): if getattr(self, '_rho', None) is None: self._rho = 310 * self.c**0.25 return self._rho @rho.setter def rho(self, value): self._rho = value self._invalidateMatrix() @property def Q(self): if getattr(self, '_Q', None) is None: self._Q = numpy.inf return self._Q @Q.setter def Q(self, value): self._Q = value self._invalidateMatrix() @property def cR(self): return self._cR @cR.setter def cR(self, value): self._cR = value self._invalidateMatrix() @property def cI(self): if self.Q is numpy.inf: return 0 else: return 1j * self.cR / (2*self.Q) @cI.setter def cI(self, value): if (value == 0).all(): self._Q = numpy.inf else: self._Q = 1j * self.cR / (2*value) self._invalidateMatrix() # Modelling properties @property def nPML(self): if getattr(self, '_nPML', None) is None: self._nPML = DEFAULT_PML_SIZE return self._nPML @nPML.setter def nPML(self, value): self._nPML = value self._invalidateMatrix() @property def ky(self): if getattr(self, '_ky', None) is None: self._ky = 0. return self._ky @ky.setter def ky(self, value): self._ky = value self._invalidateMatrix() @property def kyweight(self): if getattr(self, '_kyweight', None) is None: self._kyweight = 1. return self._kyweight @kyweight.setter def kyweight(self, value): self._kyweight = value self._invalidateMatrix() # Clever matrix setup properties @property def Solver(self): if getattr(self, '_Solver', None) is None: self._Solver = DEFAULT_SOLVER return self._Solver @Solver.setter def Solver(self, value): self._Solver = value @property def A(self): if getattr(self, '_A', None) is None: self._A = self._initHelmholtzNinePoint() return self._A @property def Ainv(self): if getattr(self, '_Ainv', None) is None: self._mfact() return self._Ainv def _invalidateMatrix(self): if getattr(self, '_A', None) is not None: del(self._A) if getattr(self, '_Ainv', None) is not None: del(self._Ainv) if getattr(self, '_mem', None) is not None: self._mem.clear() @property def dtypeReal(self): if self.dtype == 'float': return numpy.float32 elif self.dtype == 'double': return numpy.float64 else: raise NotImplementedError('Unknown dtype: %s'%self.dtype) @property def dtypeComplex(self): if self.dtype == 'float': return numpy.complex64 elif self.dtype == 'double': return numpy.complex128 else: raise NotImplementedError('Unknown dtype: %s'%self.dtype) @property def dtype(self): return getattr(self, '_dtype', DEFAULT_DTYPE) @dtype.setter def dtype(self, value): # Currently this doesn't work because all the solvers assume doubles # if value in ['float', 'double']: if value in ['double']: self._dtype = value else: raise NotImplementedError('Unknown dtype: %s'%value) # ------------------------------------------------------------------------ # Matrix setup def _mfact(self): self._Ainv = self.Solver(self.A) def _initHelmholtzNinePoint(self): """ An attempt to reproduce the finite-difference stencil and the general behaviour of OMEGA by Pratt et al. The stencil is a 9-point second-order version based on work by a number of people in the mid-90s including Ivan Stekl. The boundary conditions are based on the PML implementation by Steve Roecker in fdfdpml.f. """ # Set up SimPEG mesh dims = (self.mesh.nNy, self.mesh.nNx) # mAve = self.mesh.aveN2CC # c = (mAve.T * self.c.ravel()).reshape(dims) # rho = (mAve.T * self.rho.ravel()).reshape(dims) c = self.c rho = self.rho # fast --> slow is x --> y --> z as Fortran # Set up physical properties in matrices with padding omega = 2 * numpy.pi * self.freq cPad = numpy.pad(c, pad_width=1, mode='edge') rhoPad = numpy.pad(rho, pad_width=1, mode='edge') aky = 2*numpy.pi*self.ky # Model parameter M K = ((omega**2 / cPad**2) - aky**2) / rhoPad # Horizontal, vertical and diagonal geometry terms dx = self.mesh.hx[0] dz = self.mesh.hy[0] dxx = dx**2 dzz = dz**2 dxz = dx*dz dd = numpy.sqrt(dxz) # PML decay terms # NB: Arrays are padded later, but 'c' in these lines # comes from the original (un-padded) version nPML = self.nPML pmldx = dx*(nPML - 1) pmldz = dz*(nPML - 1) pmlr = 1e-3 pmlfx = 3.0 * numpy.log(1/pmlr)/(2*pmldx**3) pmlfz = 3.0 * numpy.log(1/pmlr)/(2*pmldz**3) dpmlx = numpy.zeros(dims, dtype=self.dtypeComplex) dpmlz = numpy.zeros(dims, dtype=self.dtypeComplex) isnx = numpy.zeros(dims, dtype=self.dtypeReal) isnz = numpy.zeros(dims, dtype=self.dtypeReal) # Only enable PML if the free surface isn't set freeSurf = self.mesh.freeSurf if freeSurf[0]: isnz[-nPML:,:] = -1 # Top if freeSurf[1]: isnx[:,-nPML:] = -1 # Right Side if freeSurf[2]: isnz[:nPML,:] = 1 # Bottom if freeSurf[3]: isnx[:,:nPML] = 1 # Left side dpmlx[:,:nPML] = (numpy.arange(nPML, 0, -1)*dx).reshape((1,nPML)) dpmlx[:,-nPML:] = (numpy.arange(1, nPML+1, 1)*dx).reshape((1,nPML)) dnx = pmlfx*c*dpmlx**2 ddnx = 2*pmlfx*c*dpmlx denx = dnx + 1j*omega r1x = 1j*omega / denx r1xsq = r1x**2 r2x = isnx*r1xsq*ddnx/denx dpmlz[:nPML,:] = (numpy.arange(nPML, 0, -1)*dz).reshape((nPML,1)) dpmlz[-nPML:,:] = (numpy.arange(1, nPML+1, 1)*dz).reshape((nPML,1)) dnz = pmlfz*c*dpmlz**2 ddnz = 2*pmlfz*c*dpmlz denz = dnz + 1j*omega r1z = 1j*omega / denz r1zsq = r1z**2 r2z = isnz*r1zsq*ddnz/denz # Visual key for finite-difference terms # (per Pratt and Worthington, 1990) # # This Original # AF FF CF vs. AD DD CD # AA BE CC vs. AA BE CC # AD DD CD vs. AF FF CF # Set of keys to index the dictionaries keys = ['AD', 'DD', 'CD', 'AA', 'BE', 'CC', 'AF', 'FF', 'CF'] # Diagonal offsets for the sparse matrix formation offsets = { 'AD': (-1) * dims[1] + (-1), 'DD': (-1) * dims[1] + ( 0), 'CD': (-1) * dims[1] + (+1), 'AA': ( 0) * dims[1] + (-1), 'BE': ( 0) * dims[1] + ( 0), 'CC': ( 0) * dims[1] + (+1), 'AF': (+1) * dims[1] + (-1), 'FF': (+1) * dims[1] + ( 0), 'CF': (+1) * dims[1] + (+1), } # Buoyancies bMM = 1. / rhoPad[0:-2,0:-2] # bottom left bME = 1. / rhoPad[0:-2,1:-1] # bottom centre bMP = 1. / rhoPad[0:-2,2: ] # bottom right bEM = 1. / rhoPad[1:-1,0:-2] # middle left bEE = 1. / rhoPad[1:-1,1:-1] # middle centre bEP = 1. / rhoPad[1:-1,2: ] # middle right bPM = 1. / rhoPad[2: ,0:-2] # top left bPE = 1. / rhoPad[2: ,1:-1] # top centre bPP = 1. / rhoPad[2: ,2: ] # top right # Initialize averaged buoyancies on most of the grid bMM = (bEE + bMM) / 2 # a2 bME = (bEE + bME) / 2 # d1 bMP = (bEE + bMP) / 2 # d2 bEM = (bEE + bEM) / 2 # a1 # ... middle bEP = (bEE + bEP) / 2 # c1 bPM = (bEE + bPM) / 2 # f2 bPE = (bEE + bPE) / 2 # f1 bPP = (bEE + bPP) / 2 # c2 # Reset the buoyancies on the outside edges # bMM[ 0, :] = bEE[ 0, :] # bMM[ :, 0] = bEE[ :, 0] # bME[ 0, :] = bEE[ 0, :] # bMP[ 0, :] = bEE[ 0, :] # bMP[ :,-1] = bEE[ :,-1] # bEM[ :, 0] = bEE[ :, 0] # bEP[ :,-1] = bEE[ :,-1] # bPM[-1, :] = bEE[-1, :] # bPM[ :, 0] = bEE[ :, 0] # bPE[-1, :] = bEE[-1, :] # bPP[-1, :] = bEE[-1, :] # bPP[ :,-1] = bEE[ :,-1] # K = omega^2/(c^2 . rho) kMM = K[0:-2,0:-2] # bottom left kME = K[0:-2,1:-1] # bottom centre kMP = K[0:-2,2: ] # bottom centre kEM = K[1:-1,0:-2] # middle left kEE = K[1:-1,1:-1] # middle centre kEP = K[1:-1,2: ] # middle right kPM = K[2: ,0:-2] # top left kPE = K[2: ,1:-1] # top centre kPP = K[2: ,2: ] # top right # 9-point fd star acoef = 0.5461 bcoef = 0.4539 ccoef = 0.6248 dcoef = 0.09381 ecoef = 0.000001297 # 5-point fd star # acoef = 1.0 # bcoef = 0.0 # ecoef = 0.0 # NB: bPM and bMP here are switched relative to S. Roecker's version # in OMEGA. This is because the labelling herein is always ?ZX. diagonals = { 'AD': ecoef*kMM + bcoef*bMM*((r1zsq+r1xsq)/(4*dxz) - (r2z+r2x)/(4*dd)), 'DD': dcoef*kME + acoef*bME*(r1zsq/dz - r2z/2)/dz + bcoef*(r1zsq-r1xsq)*(bMP+bMM)/(4*dxz), 'CD': ecoef*kMP + bcoef*bMP*((r1zsq+r1xsq)/(4*dxz) - (r2z+r2x)/(4*dd)), 'AA': dcoef*kEM + acoef*bEM*(r1xsq/dx - r2x/2)/dx + bcoef*(r1xsq-r1zsq)*(bPM+bMM)/(4*dxz), 'BE': ccoef*kEE + acoef*(r2x*(bEM-bEP)/(2*dx) + r2z*(bME-bPE)/(2*dz) - r1xsq*(bEM+bEP)/dxx - r1zsq*(bME+bPE)/dzz) + bcoef*(((r2x+r2z)*(bMM-bPP) + (r2z-r2x)*(bMP-bPM))/(4*dd) - (r1xsq+r1zsq)*(bMM+bPP+bPM+bMP)/(4*dxz)), 'CC': dcoef*kEP + acoef*bEP*(r1xsq/dx + r2x/2)/dx + bcoef*(r1xsq-r1zsq)*(bMP+bPP)/(4*dxz), 'AF': ecoef*kPM + bcoef*bPM*((r1zsq+r1xsq)/(4*dxz) - (r2z+r2x)/(4*dd)), 'FF': dcoef*kPE + acoef*bPE*(r1zsq/dz - r2z/2)/dz + bcoef*(r1zsq-r1xsq)*(bPM+bPP)/(4*dxz), 'CF': ecoef*kPP + bcoef*bPP*((r1zsq+r1xsq)/(4*dxz) - (r2z+r2x)/(4*dd)), } diagonals['AD'] = diagonals['AD'].ravel()[dims[1]+1: ] diagonals['DD'] = diagonals['DD'].ravel()[dims[1] : ] diagonals['CD'] = diagonals['CD'].ravel()[dims[1]-1: ] diagonals['AA'] = diagonals['AA'].ravel()[ 1: ] diagonals['BE'] = diagonals['BE'].ravel()[ : ] diagonals['CC'] = diagonals['CC'].ravel()[ :-1 ] diagonals['AF'] = diagonals['AF'].ravel()[ :-dims[1]+1] diagonals['FF'] = diagonals['FF'].ravel()[ :-dims[1] ] diagonals['CF'] = diagonals['CF'].ravel()[ :-dims[1]-1] # self._setupBoundary(diagonals, freeSurf) if any(freeSurf): raise NotImplementedError('Free surface not implemented!') # for key in diagonals.keys(): # print('%s:\t%d\t%d'%(key, diagonals[key].size, offsets[key])) diagonals = [diagonals[key] for key in keys] offsets = [offsets[key] for key in keys] A = scipy.sparse.diags(diagonals, offsets, shape=(self.mesh.nN, self.mesh.nN), format='csr', dtype=self.dtypeComplex)#, shape=(self.mesh.nN, self.mesh.nN))#, self.mesh.nN, self.mesh.nN, format='csr') return A # def _setupBoundary(self, diagonals, freeSurf): # """ # Function to set up boundary regions for the Seismic FDFD problem # using the 9-point finite-difference stencil from OMEGA/FULLWV. # """ # keys = diagonals.keys() # pickDiag = lambda x: -1. if freeSurf[x] else 1. # # Left # for key in keys: # if key is 'BE': # diagonals[key][:,0] = pickDiag(3) # else: # diagonals[key][:,0] = 0. # # Right # for key in keys: # if key is 'BE': # diagonals[key][:,-1] = pickDiag(1) # else: # diagonals[key][:,-1] = 0. # # Bottom # for key in keys: # if key is 'BE': # diagonals[key][0,:] = pickDiag(2) # else: # diagonals[key][0,:] = 0. # # Top # for key in keys: # if key is 'BE': # diagonals[key][-1,:] = pickDiag(0) # else: # diagonals[key][-1,:] = 0. # ------------------------------------------------------------------------ # Externally-callable functions def clear(self): self._invalidateMatrix() # What about @caching decorators? def forward(self, src, dOnly=True): q = self.kyweight * src.getq(self.mesh) u = self.Ainv * q d = numpy.array([numpy.dot(P,u) for P in src.getP(self.mesh, self.ky)]).ravel() if dOnly: return d else: return u, d def backprop(self, src, dresid=1.): qr = self.kyweight * src.getqback(self.mesh, dresid, self.ky) u = self.Ainv * qr return u
class CacheManager(object): '''The librosa cache manager class wraps joblib.Memory with a __call__ attribute, so that it may act as a function. Additionally, it provides a caching level filter, so that different functions can be cached or not depending on the user's preference for speed vs. storage usage. ''' def __init__(self, *args, **kwargs): level = kwargs.pop('level', 10) # Initialize the memory object self.memory = Memory(*args, **kwargs) # The level parameter controls which data we cache # smaller numbers mean less caching self.level = level def __call__(self, level): '''Example usage: @cache(level=2) def semi_important_function(some_arguments): ... ''' def wrapper(function): '''Decorator function. Adds an input/output cache to the specified function.''' from decorator import FunctionMaker def decorator_apply(dec, func): """Decorate a function by preserving the signature even if dec is not a signature-preserving decorator. This recipe is derived from http://micheles.googlecode.com/hg/decorator/documentation.html#id14 """ return FunctionMaker.create(func, 'return decorated(%(signature)s)', dict(decorated=dec(func)), __wrapped__=func) if self.memory.location is not None and self.level >= level: return decorator_apply(self.memory.cache, function) else: return function return wrapper def clear(self, *args, **kwargs): return self.memory.clear(*args, **kwargs) def eval(self, *args, **kwargs): return self.memory.eval(*args, **kwargs) def format(self, *args, **kwargs): return self.memory.format(*args, **kwargs) def reduce_size(self, *args, **kwargs): return self.memory.reduce_size(*args, **kwargs) def warn(self, *args, **kwargs): return self.memory.warn(*args, **kwargs)
parser.add_option("-c", "--clear", dest="clear", action='store_true', help="if True, clear the cache.", default=False) parser.add_option("-b", "--backend", dest="backend", help="backend for parsing (selenium | requests)", default='requests') options, args = parser.parse_args() backend, clear = options.backend, options.clear if clear: mem.clear() random.seed() gen_date = time.strftime("%B %d, %Y") url_tails = ['1521584321377182930', '12188330066413208874'] papers = [ 'MEG and EEG data analysis with MNE-Python', 'MNE software for processing MEG and EEG data' ] publications = list() for url_tail, paper in zip(url_tails, papers): titles, authors, links = get_citing_articles( 'https://scholar.google.co.in/scholar?cites=%s' % url_tail, backend=backend)
class SeisFDFDKernel(object): # source array ref # receiver array ref mesh = None freq = None Solver = lambda: None def __init__(self, systemConfig, locator=None, **kwargs): if locator is not None: self._locator = locator else: self._locator = SeisLocator25D(systemConfig['geom']) if systemConfig.get('cache', False): try: from tempfile import mkdtemp from joblib import Memory except ImportError: pass else: if 'cacheDir' in systemConfig: cacheDir = systemConfig['cacheDir'] try: os.makedirs(cacheDir) except OSError as e: if e.errno == errno.EEXIST and os.path.isdir(cacheDir): pass else: raise else: cacheDir = mkdtemp() self._mem = Memory(cachedir=cacheDir, verbose=0) # Cache outputs of these methods self.forward = self._mem.cache(self.forward) self.backprop = self._mem.cache(self.backprop) hx = [(systemConfig['dx'], systemConfig['nx'])] hz = [(systemConfig['dz'], systemConfig['nz'])] self.mesh = SimPEG.Mesh.TensorMesh([hx, hz], '00') initMap = { # Argument Rename to Property 'c': 'cR', 'Q': None, 'rho': None, 'nPML': None, 'freeSurf': None, 'freq': None, 'ky': None, 'kyweight': None, 'Solver': None, 'ireg': None, 'dx': None, 'dz': None, } for key in initMap.keys(): if key in systemConfig: if initMap[key] is None: setattr(self, key, systemConfig[key]) else: setattr(self, initMap[key], systemConfig[key]) def __del__(self): if hasattr(self, '_mem'): self._mem.clear() cacheDir = self._mem.cachedir del self._mem shutil.rmtree(cacheDir) # Model properties @property def c(self): return self.cR + self.cI @c.setter def c(self, value): self._cR = value.real self._cI = value.imag self._invalidateMatrix() @property def rho(self): if getattr(self, '_rho', None) is None: self._rho = 310 * self.c**0.25 return self._rho @rho.setter def rho(self, value): self._rho = value self._invalidateMatrix() @property def Q(self): if getattr(self, '_Q', None) is None: self._Q = numpy.inf return self._Q @Q.setter def Q(self, value): self._Q = value self._invalidateMatrix() @property def cR(self): return self._cR @cR.setter def cR(self, value): self._cR = value self._invalidateMatrix() @property def cI(self): if self.Q is numpy.inf: return 0 else: return 1j * self.cR / (2 * self.Q) @cI.setter def cI(self, value): if (value == 0).all(): self._Q = numpy.inf else: self._Q = 1j * self.cR / (2 * value) self._invalidateMatrix() # Modelling properties @property def nPML(self): if getattr(self, '_nPML', None) is None: self._nPML = DEFAULT_PML_SIZE return self._nPML @nPML.setter def nPML(self, value): self._nPML = value self._invalidateMatrix() @property def freeSurf(self): if getattr(self, '_freeSurf', None) is None: self._freeSurf = DEFAULT_FREESURF_BOUNDS return self._freeSurf @freeSurf.setter def freeSurf(self, value): self._freeSurf = value self._invalidateMatrix() @property def ky(self): if getattr(self, '_ky', None) is None: self._ky = 0. return self._ky @ky.setter def ky(self, value): self._ky = value self._invalidateMatrix() @property def ireg(self): if getattr(self, '_ireg', None) is None: self._ireg = DEFAULT_IREG return self._ireg @ireg.setter def ireg(self, value): self._ireg = value # Clever matrix setup properties @property def Solver(self): if getattr(self, '_Solver', None) is None: self._Solver = SimPEG.SolverWrapD(DEFAULT_SOLVER) return self._Solver @Solver.setter def Solver(self, value): self._Solver = value @property def A(self): if getattr(self, '_A', None) is None: self._A = self._initHelmholtzNinePoint() return self._A @property def Ainv(self): if getattr(self, '_Ainv', None) is None: self._mfact() return self._Ainv def _invalidateMatrix(self): if getattr(self, '_A', None) is not None: del (self._A) if getattr(self, '_Ainv', None) is not None: del (self._Ainv) if getattr(self, '_mem', None) is not None: self._mem.clear() # ------------------------------------------------------------------------ # Matrix setup def _mfact(self): self._Ainv = self.Solver(self.A) def _initHelmholtzNinePoint(self): """ An attempt to reproduce the finite-difference stencil and the general behaviour of OMEGA by Pratt et al. The stencil is a 9-point second-order version based on work by a number of people in the mid-90s including Ivan Stekl. The boundary conditions are based on the PML implementation by Steve Roecker in fdfdpml.f. """ # Set up SimPEG mesh dims = (self.mesh.nNy, self.mesh.nNx) mAve = self.mesh.aveN2CC c = (mAve.T * self.c.ravel()).reshape(dims) rho = (mAve.T * self.rho.ravel()).reshape(dims) # fast --> slow is x --> y --> z as Fortran # Set up physical properties in matrices with padding omega = 2 * numpy.pi * self.freq cPad = numpy.pad(c, pad_width=1, mode='edge') rhoPad = numpy.pad(rho, pad_width=1, mode='edge') aky = 2 * numpy.pi * self.ky # Model parameter M K = ((omega**2 / cPad**2) - aky**2) / rhoPad # Horizontal, vertical and diagonal geometry terms dx = self.mesh.hx[0] dz = self.mesh.hy[0] dxx = dx**2 dzz = dz**2 dxz = dx * dz dd = numpy.sqrt(dxz) # PML decay terms # NB: Arrays are padded later, but 'c' in these lines # comes from the original (un-padded) version nPML = self.nPML pmldx = dx * (nPML - 1) pmldz = dz * (nPML - 1) pmlr = 1e-3 pmlfx = 3.0 * numpy.log(1 / pmlr) / (2 * pmldx**3) pmlfz = 3.0 * numpy.log(1 / pmlr) / (2 * pmldz**3) dpmlx = numpy.zeros(dims, dtype=numpy.complex128) dpmlz = numpy.zeros(dims, dtype=numpy.complex128) isnx = numpy.zeros(dims, dtype=numpy.float64) isnz = numpy.zeros(dims, dtype=numpy.float64) # Only enable PML if the free surface isn't set freeSurf = self.freeSurf if freeSurf[0]: isnz[-nPML:, :] = -1 # Top if freeSurf[1]: isnx[:, -nPML:] = -1 # Right Side if freeSurf[2]: isnz[:nPML, :] = 1 # Bottom if freeSurf[3]: isnx[:, :nPML] = 1 # Left side dpmlx[:, :nPML] = (numpy.arange(nPML, 0, -1) * dx).reshape((1, nPML)) dpmlx[:, -nPML:] = (numpy.arange(1, nPML + 1, 1) * dx).reshape( (1, nPML)) dnx = pmlfx * c * dpmlx**2 ddnx = 2 * pmlfx * c * dpmlx denx = dnx + 1j * omega r1x = 1j * omega / denx r1xsq = r1x**2 r2x = isnx * r1xsq * ddnx / denx dpmlz[:nPML, :] = (numpy.arange(nPML, 0, -1) * dz).reshape((nPML, 1)) dpmlz[-nPML:, :] = (numpy.arange(1, nPML + 1, 1) * dz).reshape( (nPML, 1)) dnz = pmlfz * c * dpmlz**2 ddnz = 2 * pmlfz * c * dpmlz denz = dnz + 1j * omega r1z = 1j * omega / denz r1zsq = r1z**2 r2z = isnz * r1zsq * ddnz / denz # Visual key for finite-difference terms # (per Pratt and Worthington, 1990) # # This Original # AF FF CF vs. AD DD CD # AA BE CC vs. AA BE CC # AD DD CD vs. AF FF CF # Set of keys to index the dictionaries keys = ['AD', 'DD', 'CD', 'AA', 'BE', 'CC', 'AF', 'FF', 'CF'] # Diagonal offsets for the sparse matrix formation offsets = { 'AD': (-1) * dims[1] + (-1), 'DD': (-1) * dims[1] + (0), 'CD': (-1) * dims[1] + (+1), 'AA': (0) * dims[1] + (-1), 'BE': (0) * dims[1] + (0), 'CC': (0) * dims[1] + (+1), 'AF': (+1) * dims[1] + (-1), 'FF': (+1) * dims[1] + (0), 'CF': (+1) * dims[1] + (+1), } # Buoyancies bMM = 1. / rhoPad[0:-2, 0:-2] # bottom left bME = 1. / rhoPad[0:-2, 1:-1] # bottom centre bMP = 1. / rhoPad[0:-2, 2:] # bottom centre bEM = 1. / rhoPad[1:-1, 0:-2] # middle left bEE = 1. / rhoPad[1:-1, 1:-1] # middle centre bEP = 1. / rhoPad[1:-1, 2:] # middle right bPM = 1. / rhoPad[2:, 0:-2] # top left bPE = 1. / rhoPad[2:, 1:-1] # top centre bPP = 1. / rhoPad[2:, 2:] # top right # Initialize averaged buoyancies on most of the grid bMM = (bEE + bMM) / 2 # a2 bME = (bEE + bME) / 2 # d1 bMP = (bEE + bMP) / 2 # d2 bEM = (bEE + bEM) / 2 # a1 # ... middle bEP = (bEE + bEP) / 2 # c1 bPM = (bEE + bPM) / 2 # f2 bPE = (bEE + bPE) / 2 # f1 bPP = (bEE + bPP) / 2 # c2 # Reset the buoyancies on the outside edges bMM[0, :] = bEE[0, :] bMM[:, 0] = bEE[:, 0] bME[0, :] = bEE[0, :] bMP[0, :] = bEE[0, :] bMP[:, -1] = bEE[:, -1] bEM[:, 0] = bEE[:, 0] bEP[:, -1] = bEE[:, -1] bPM[-1, :] = bEE[-1, :] bPM[:, 0] = bEE[:, 0] bPE[-1, :] = bEE[-1, :] bPP[-1, :] = bEE[-1, :] bPP[:, -1] = bEE[:, -1] # K = omega^2/(c^2 . rho) kMM = K[0:-2, 0:-2] # bottom left kME = K[0:-2, 1:-1] # bottom centre kMP = K[0:-2, 2:] # bottom centre kEM = K[1:-1, 0:-2] # middle left kEE = K[1:-1, 1:-1] # middle centre kEP = K[1:-1, 2:] # middle right kPM = K[2:, 0:-2] # top left kPE = K[2:, 1:-1] # top centre kPP = K[2:, 2:] # top right # 9-point fd star acoef = 0.5461 bcoef = 0.4539 ccoef = 0.6248 dcoef = 0.09381 ecoef = 0.000001297 # 5-point fd star # acoef = 1.0 # bcoef = 0.0 # ecoef = 0.0 # NB: bPM and bMP here are switched relative to S. Roecker's version # in OMEGA. This is because the labelling herein is always ?ZX. diagonals = { 'AD': ecoef * kMM + bcoef * bMM * ((r1zsq + r1xsq) / (4 * dxz) - (r2z + r2x) / (4 * dd)), 'DD': dcoef * kME + acoef * bME * (r1zsq / dz - r2z / 2) / dz + bcoef * (r1zsq - r1xsq) * (bMP + bMM) / (4 * dxz), 'CD': ecoef * kMP + bcoef * bMP * ((r1zsq + r1xsq) / (4 * dxz) - (r2z + r2x) / (4 * dd)), 'AA': dcoef * kEM + acoef * bEM * (r1xsq / dx - r2x / 2) / dx + bcoef * (r1xsq - r1zsq) * (bPM + bMM) / (4 * dxz), 'BE': ccoef * kEE + acoef * (r2x * (bEM - bEP) / (2 * dx) + r2z * (bME - bPE) / (2 * dz) - r1xsq * (bEM + bEP) / dxx - r1zsq * (bME + bPE) / dzz) + bcoef * (((r2x + r2z) * (bMM - bPP) + (r2z - r2x) * (bMP - bPM)) / (4 * dd) - (r1xsq + r1zsq) * (bMM + bPP + bPM + bMP) / (4 * dxz)), 'CC': dcoef * kEP + acoef * bEP * (r1xsq / dx + r2x / 2) / dx + bcoef * (r1xsq - r1zsq) * (bMP + bPP) / (4 * dxz), 'AF': ecoef * kPM + bcoef * bPM * ((r1zsq + r1xsq) / (4 * dxz) - (r2z + r2x) / (4 * dd)), 'FF': dcoef * kPE + acoef * bPE * (r1zsq / dz - r2z / 2) / dz + bcoef * (r1zsq - r1xsq) * (bPM + bPP) / (4 * dxz), 'CF': ecoef * kPP + bcoef * bPP * ((r1zsq + r1xsq) / (4 * dxz) - (r2z + r2x) / (4 * dd)), } self._setupBoundary(diagonals, freeSurf) diagonals = numpy.array([diagonals[key].ravel() for key in keys]) offsets = [offsets[key] for key in keys] A = scipy.sparse.spdiags(diagonals, offsets, self.mesh.nN, self.mesh.nN, format='csr') return A def _setupBoundary(self, diagonals, freeSurf): """ Function to set up boundary regions for the Seismic FDFD problem using the 9-point finite-difference stencil from OMEGA/FULLWV. """ keys = diagonals.keys() pickDiag = lambda x: -1. if freeSurf[x] else 1. # Left for key in keys: if key is 'BE': diagonals[key][:, 0] = pickDiag(3) else: diagonals[key][:, 0] = 0. # Right for key in keys: if key is 'BE': diagonals[key][:, -1] = pickDiag(1) else: diagonals[key][:, -1] = 0. # Bottom for key in keys: if key is 'BE': diagonals[key][0, :] = pickDiag(2) else: diagonals[key][0, :] = 0. # Top for key in keys: if key is 'BE': diagonals[key][-1, :] = pickDiag(0) else: diagonals[key][-1, :] = 0. # Quasi-functional attempt ----------------------------------------------- # def _srcVec(self, sLocs, terms): q = numpy.zeros((self.mesh.nNy, self.mesh.nNx), dtype=numpy.complex128) srcScale = -self.dx * self.dz if self.ireg == 0: # Closest source point q = q.ravel() for i in xrange(len(sLocs)): qI = SimPEG.Utils.closestPoints(self.mesh, sLocs[i], gridLoc='N') q[qI] += terms[i] / srcScale else: # Kaiser windowed sinc function freg = 2 * self.ireg + 1 q = numpy.pad(q, self.ireg, mode='constant') for i in xrange(len(sLocs)): qI = SimPEG.Utils.closestPoints(self.mesh, sLocs[i], gridLoc='N') Zi, Xi = (qI / self.mesh.nNx, numpy.mod(qI, self.mesh.nNx)) offset = (sLocs[i][0] - Xi * self.dx, sLocs[i][1] - Zi * self.dz) sourceRegion = KaiserWindowedSinc(self.ireg, offset) q[Zi:Zi + freg, Xi:Xi + freg] += terms[i] * sourceRegion / srcScale # Mirror and flip sign on terms that cross the free-surface boundary if self.freeSurf[0]: q[self.ireg:2 * self.ireg, :] -= numpy.flipud( q[:self.ireg, :]) # Top if self.freeSurf[1]: q[:, -2 * self.ireg:-self.ireg] -= numpy.fliplr( q[:, -self.ireg:]) # Right if self.freeSurf[2]: q[-2 * self.ireg:-self.ireg, :] -= numpy.flipud( q[-self.ireg:, :]) # Bottom if self.freeSurf[3]: q[:, self.ireg:2 * self.ireg] -= numpy.fliplr( q[:, :self.ireg]) # Left # Cut off edges q = q[self.ireg:-self.ireg, self.ireg:-self.ireg].ravel() return q def _srcTerm(self, sLocs, individual=True, terms=1): if individual and len(sLocs) > 1: result = [] for i in xrange(len(sLocs)): result.append( self._srcVec([ sLocs[i] if hasattr(sLocs, '__contains__') else sLocs ], [terms[i]] if hasattr(terms, '__contains__') else [terms])) else: result = self._srcVec( sLocs if hasattr(sLocs, '__contains__') else [sLocs], terms if hasattr(terms, '__contains__') else [terms]) return result # # Quasi-functional attempt ----------------------------------------------- # ------------------------------------------------------------------------ # Externally-callable functions def clear(self): self._invalidateMatrix() # What about @caching decorators? def forward(self, isrc, dOnly=True, sterm=1.): sloc, rlocs, coeffs = self._locator(isrc, self.ky) q = self._srcTerm(sloc, individual=True, terms=sterm) u = self.Ainv * q d = numpy.array([ numpy.dot(u, qr) for qr in self._srcTerm(rlocs, individual=True, terms=coeffs) ]) if dOnly: return d else: return u, d def backprop(self, isrc, dresid=1.): sloc, rlocs, coeffs = self._locator(isrc, self.ky) qr = self._srcTerm(rlocs, individual=False, terms=dresid * coeffs) u = self.Ainv * qr return u
def _run_interface(self, runtime): import os import gc import time import nibabel as nib from pynets.core.utils import load_runconfig from nipype.utils.filemanip import fname_presuffix, copyfile from pynets.fmri import clustering from pynets.registration.utils import orient_reslice from joblib import Parallel, delayed from joblib.externals.loky.backend import resource_tracker from pynets.registration import utils as regutils from pynets.core.utils import decompress_nifti import pkg_resources import shutil import tempfile resource_tracker.warnings = None template = pkg_resources.resource_filename( "pynets", f"templates/standard/{self.inputs.template_name}_brain_" f"{self.inputs.vox_size}.nii.gz") template_tmp_path = fname_presuffix(template, suffix="_tmp", newpath=runtime.cwd) copyfile(template, template_tmp_path, copy=True, use_hardlink=False) hardcoded_params = load_runconfig() c_boot = hardcoded_params["c_boot"][0] nthreads = hardcoded_params["omp_threads"][0] clust_list = ["kmeans", "ward", "complete", "average", "ncut", "rena"] clust_mask_temp_path = orient_reslice(self.inputs.clust_mask, runtime.cwd, self.inputs.vox_size) cm_suf = os.path.basename(self.inputs.clust_mask).split('.nii')[0] clust_mask_in_t1w_path = f"{runtime.cwd}/clust_mask-" \ f"{cm_suf}_in_t1w.nii.gz" t1w_brain_tmp_path = fname_presuffix(self.inputs.t1w_brain, suffix="_tmp", newpath=runtime.cwd) copyfile(self.inputs.t1w_brain, t1w_brain_tmp_path, copy=True, use_hardlink=False) mni2t1w_warp_tmp_path = fname_presuffix(self.inputs.mni2t1w_warp, suffix="_tmp", newpath=runtime.cwd) copyfile( self.inputs.mni2t1w_warp, mni2t1w_warp_tmp_path, copy=True, use_hardlink=False, ) mni2t1_xfm_tmp_path = fname_presuffix(self.inputs.mni2t1_xfm, suffix="_tmp", newpath=runtime.cwd) copyfile(self.inputs.mni2t1_xfm, mni2t1_xfm_tmp_path, copy=True, use_hardlink=False) clust_mask_in_t1w = regutils.roi2t1w_align( clust_mask_temp_path, t1w_brain_tmp_path, mni2t1_xfm_tmp_path, mni2t1w_warp_tmp_path, clust_mask_in_t1w_path, template_tmp_path, self.inputs.simple, ) time.sleep(0.5) if self.inputs.mask: out_name_mask = fname_presuffix(self.inputs.mask, suffix="_tmp", newpath=runtime.cwd) copyfile(self.inputs.mask, out_name_mask, copy=True, use_hardlink=False) else: out_name_mask = None out_name_func_file = fname_presuffix(self.inputs.func_file, suffix="_tmp", newpath=runtime.cwd) copyfile(self.inputs.func_file, out_name_func_file, copy=True, use_hardlink=False) out_name_func_file = decompress_nifti(out_name_func_file) if self.inputs.conf: out_name_conf = fname_presuffix(self.inputs.conf, suffix="_tmp", newpath=runtime.cwd) copyfile(self.inputs.conf, out_name_conf, copy=True, use_hardlink=False) else: out_name_conf = None nip = clustering.NiParcellate( func_file=out_name_func_file, clust_mask=clust_mask_in_t1w, k=int(self.inputs.k), clust_type=self.inputs.clust_type, local_corr=self.inputs.local_corr, outdir=self.inputs.outdir, conf=out_name_conf, mask=out_name_mask, ) atlas = nip.create_clean_mask() nip.create_local_clustering(overwrite=True, r_thresh=0.4) if self.inputs.clust_type in clust_list: if float(c_boot) > 1: import random from joblib import Memory from joblib.externals.loky import get_reusable_executor print(f"Performing circular block bootstrapping with {c_boot}" f" iterations...") ts_data, block_size = nip.prep_boot() cache_dir = tempfile.mkdtemp() memory = Memory(cache_dir, verbose=0) ts_data = memory.cache(ts_data) def create_bs_imgs(ts_data, block_size, clust_mask_corr_img): import nibabel as nib from nilearn.masking import unmask from pynets.fmri.estimation import timeseries_bootstrap boot_series = timeseries_bootstrap( ts_data.func, block_size)[0].astype('float32') return unmask(boot_series, clust_mask_corr_img) def run_bs_iteration(i, ts_data, work_dir, local_corr, clust_type, _local_conn_mat_path, num_conn_comps, _clust_mask_corr_img, _standardize, _detrending, k, _local_conn, conf, _dir_path, _conn_comps): import os import time import gc from pynets.fmri.clustering import parcellate print(f"\nBootstrapped iteration: {i}") out_path = f"{work_dir}/boot_parc_tmp_{str(i)}.nii.gz" boot_img = create_bs_imgs(ts_data, block_size, _clust_mask_corr_img) try: parcellation = parcellate( boot_img, local_corr, clust_type, _local_conn_mat_path, num_conn_comps, _clust_mask_corr_img, _standardize, _detrending, k, _local_conn, conf, _dir_path, _conn_comps) parcellation.to_filename(out_path) parcellation.uncache() boot_img.uncache() gc.collect() except BaseException: boot_img.uncache() gc.collect() return None _clust_mask_corr_img.uncache() return out_path time.sleep(random.randint(1, 5)) counter = 0 boot_parcellations = [] while float(counter) < float(c_boot): with Parallel(n_jobs=nthreads, max_nbytes='8000M', backend='loky', mmap_mode='r+', temp_folder=cache_dir, verbose=10) as parallel: iter_bootedparcels = parallel( delayed(run_bs_iteration) (i, ts_data, runtime.cwd, nip.local_corr, nip.clust_type, nip._local_conn_mat_path, nip.num_conn_comps, nip._clust_mask_corr_img, nip._standardize, nip._detrending, nip.k, nip._local_conn, nip.conf, nip._dir_path, nip._conn_comps) for i in range(c_boot)) boot_parcellations.extend( [i for i in iter_bootedparcels if i is not None]) counter = len(boot_parcellations) del iter_bootedparcels gc.collect() print('Bootstrapped samples complete:') print(boot_parcellations) print("Creating spatially-constrained consensus " "parcellation...") consensus_parcellation = clustering.ensemble_parcellate( boot_parcellations, int(self.inputs.k)) nib.save(consensus_parcellation, nip.parcellation) memory.clear(warn=False) shutil.rmtree(cache_dir, ignore_errors=True) del parallel, memory, cache_dir get_reusable_executor().shutdown(wait=True) gc.collect() for i in boot_parcellations: if i is not None: if os.path.isfile(i): os.system(f"rm -f {i} &") else: print("Creating spatially-constrained parcellation...") out_path = f"{runtime.cwd}/{atlas}_{str(self.inputs.k)}.nii.gz" func_img = nib.load(out_name_func_file) parcellation = clustering.parcellate( func_img, self.inputs.local_corr, self.inputs.clust_type, nip._local_conn_mat_path, nip.num_conn_comps, nip._clust_mask_corr_img, nip._standardize, nip._detrending, nip.k, nip._local_conn, nip.conf, nip._dir_path, nip._conn_comps) parcellation.to_filename(out_path) else: raise ValueError("Clustering method not recognized. See: " "https://nilearn.github.io/modules/generated/" "nilearn.regions.Parcellations." "html#nilearn.regions.Parcellations") # Give it a minute ix = 0 while not os.path.isfile(nip.parcellation) and ix < 60: print('Waiting for clustered parcellation...') time.sleep(1) ix += 1 if not os.path.isfile(nip.parcellation): raise FileNotFoundError(f"Parcellation clustering failed for" f" {nip.parcellation}") self._results["atlas"] = atlas self._results["parcellation"] = nip.parcellation self._results["clust_mask"] = clust_mask_in_t1w_path self._results["k"] = self.inputs.k self._results["clust_type"] = self.inputs.clust_type self._results["clustering"] = True self._results["func_file"] = self.inputs.func_file reg_tmp = [ t1w_brain_tmp_path, mni2t1w_warp_tmp_path, mni2t1_xfm_tmp_path, template_tmp_path, out_name_func_file ] for j in reg_tmp: if j is not None: if os.path.isfile(j): os.system(f"rm -f {j} &") gc.collect() return runtime
class FingerprintMatcher: def __init__(self, cache_dir='joblib_cache', verbose: int = 10): cache_dir = os.path.abspath(cache_dir) if not os.path.exists(cache_dir): os.makedirs(cache_dir) self.cache_dir = cache_dir self.memory = Memory(cache_dir, verbose=0) # self.compute_lro = self.memory.cache(self.compute_lro) # useless by now self.verbose = verbose self.minutiaeLUT = {} def __del__(self): self.memory.clear(warn=False) if hasattr(self, 'minutiaeLUT'): for path in self.minutiaeLUT.values(): os.remove(path) def compute_lro(self, image, bd_specs, num_dir): raise NotImplementedError("Derived class must reimplement this method") def fit(self, X, y): return self def precompute(self, X): """ Precomputes the minutiae for all the files involved in matching. Args: X (iterable): iterable where each element is an absolute file path. Note: The iterable X must contain each and every file that needs to be involved in the matching phase. """ # Ensure that X is a list and not a generator X = list(X) # Create a function with fixed paramters def field_compute(padded_img, blkoffs, num_dir): # Get border and step information i, j = np.unravel_index(blkoffs, shape=padded_img.shape) bd_specs = { 'border_x': j[0, 0], 'border_y': i[0, 0], 'step_x': j[0, 1] - j[0, 0], 'step_y': i[1, 0] - i[0, 0], } field, mask = self.compute_lro(padded_img, bd_specs, num_dir) if field is None: # allows to not change the direction map return None # Average pooling on field field = subsample(field, is_field=True, **bd_specs, smooth=True, policy='nist') # Convert field to index lro = angle(field, keepDims=False) idx = nbis_angle2idx(lro, N=num_dir) # Eventually apply a mask mask = subsample(mask.astype(int), is_field=False, **bd_specs, smooth=False, policy='nist') mask = np.round(mask).astype(bool) idx[np.logical_not(mask)] = -1 return idx.astype('int32') def compute_minutiae(path): try: image = np.array(PIL.Image.open(path).convert('L')) M = mindtct(image, field_compute, contrast_boost=True)[-1] M = minutiae_selection(M) except Exception as err: print('Warning: skipping image due to', err) return None return M minutiae = Parallel(verbose=self.verbose)(delayed(compute_minutiae)(x) for x in X) for x, M in zip(X, minutiae): if M is None: continue # Create a filename that hopefully is not taken by other objects filename = '{}{}{}.xyt'.format(id(self), id(M), time.time()) # Save minutiae to file filepath = os.path.join(self.cache_dir, filename) to_csv_options = {'sep': ' ', 'header': False, 'index': False} pd.DataFrame(M).to_csv(filepath, **to_csv_options) # Record the filepath in a dictionary self.minutiaeLUT[x] = filepath def match_scores(self, X): """ Perform matching exploiting the previously computed minutiae. Args: X (iterable): each element is a file absolute path, and must correspond to one of the file paths passed to the pre-computation function. """ def _scores_from_batch(batch): """ Computes the scores for a batch with couples of file paths. """ # Filter out null elements, coming from the last batch # batch = filter(None, batch) # Create the mates file # mates_file = os.path.join(self.cache_dir, '{}{}{}.lis'.format(id(self), id(batch), time.time())) # excluded = [] # with open(mates_file, 'w') as f: # for n, pair in enumerate(batch): # if pair[0] in self.minutiaeLUT and pair[1] in self.minutiaeLUT: # f.write(self.minutiaeLUT[pair[0]]+'\n') # f.write(self.minutiaeLUT[pair[1]]+'\n') # else: # excluded.append(n) if batch[0] not in self.minutiaeLUT or batch[ 1] not in self.minutiaeLUT: return None # Run matcher exe_path = os.path.join(__NBIS_LIB__, 'bin', 'bozorth3') # command = "{} -M \"{}\"".format(exe_path, mates_file) command = "{} \"{}\" \"{}\"".format(exe_path, self.minutiaeLUT[batch[0]], self.minutiaeLUT[batch[1]]) with Popen(command, cwd=self.cache_dir, shell=True, universal_newlines=True, stdout=PIPE, stderr=PIPE) as proc: err = proc.stderr.read() if err != "": raise RuntimeError(err) # Read the list of scores # Splits on newlines and remove empty strings # scores = [int(k) for k in filter(None, proc.stdout.read().split('\n'))] scores = int(proc.stdout.read().rstrip()) # Put Nones where a matching couldn't be executed # for n in excluded: # scores.insert(n, None) # os.remove(mates_file) return scores # X = grouper(X, 256) scores = Parallel(verbose=self.verbose, batch_size=512)(delayed(_scores_from_batch)(x) for x in X) # scores = list(chain(*scores)) return scores def predict(self, X): return (self.match_scores(X) > self.threshold).astype(int)