def warmstart_all_parallel(x, y, x_test, y_test, fname_in='results_softmax_regression_mnist', fname_out='results_softmax_regression_warmstart_mnist', model_type='softmax_regression', w_diff_term_crit=0.0001, learning_rate=0.0001, regularizations = [100., 10., 1., 0.1, 0.01, 0.001, 0.]): pretrained_models = pickle.load(open(fname_in, 'rb')) if model_type == 'softmax_regression': #previous_loss_train=None, previous_regularization_penalty_train=None results = joblib.Parallel(n_jobs=47)(delayed(tf_softmax_regression.train_softmax) ( x, y, x_test, y_test, learning_rate=learning_rate, max_iterations=1000000, w_diff_term_crit=w_diff_term_crit, verbose=True, regularization=regularizations[target_i], model=pretrained_models[init_i]['model'], regularization_initialization=pretrained_models[init_i]['regularization'], previous_loss_train=pretrained_models[init_i]['loss_train'], previous_regularization_penalty_train=pretrained_models[init_i]['regularization_penalty_train'] ) for target_i in xrange(0, len(regularizations)) for init_i in xrange(0, len(pretrained_models)) ) elif model_type == 'linear_regression': results = joblib.Parallel(n_jobs=47)(delayed(tf_linear_regression.train) ( x, y, x_test, y_test, learning_rate=learning_rate, max_iterations=1000000, w_diff_term_crit=w_diff_term_crit, verbose=True, regularization=regularizations[target_i], model=pretrained_models[init_i]['model'], regularization_initialization=pretrained_models[init_i][ 'regularization'] ) for target_i in xrange(0, len(regularizations)) for init_i in xrange(0, len(pretrained_models)) ) pickle.dump(results, open(fname_out, 'wb'))
def parallel_sift(image_paths, poolNum=20, single=False): if single: with joblib.Parallel(n_jobs=poolNum, verbose=1) as parallel: features = parallel(joblib.delayed(do_sift_single)(s) for s in image_paths) else: split_paths = np.array_split(image_paths, poolNum) with joblib.Parallel(n_jobs=poolNum, verbose=1) as parallel: features = parallel(joblib.delayed(do_sift_batch)(s) for s in split_paths) features = np.vstack(features) return features
def train_all_parallel(x, y, x_test, y_test, fname='results_softmax_regression_mnist', model_type='softmax_regression', w_diff_term_crit=0.0001, learning_rate=0.0001, regularizations = [100., 10., 1., 0.1, 0.01, 0.001, 0.]): if model_type == 'softmax_regression': results = joblib.Parallel(n_jobs=47)(delayed( tf_softmax_regression.train_softmax)( x, y, x_test, y_test, learning_rate=learning_rate, max_iterations=1000000, regularization=regularizations[reg_i], w_diff_term_crit=w_diff_term_crit, verbose=True) for i_par in range(48) for reg_i in xrange(0, len(regularizations))) elif model_type == 'linear_regression': results = joblib.Parallel(n_jobs=47)(delayed(tf_linear_regression.train)( x, y, x_test, y_test, learning_rate=learning_rate, max_iterations=1000000, regularization=regularizations[reg_i], w_diff_term_crit=w_diff_term_crit, verbose=True) for i_par in range(48) for reg_i in xrange(0, len(regularizations))) pickle.dump(results, open(fname, 'wb'))
def parallel_helper(n_jobs, df_var, tile_str, tile_ul, mosaic_tx, overlapping_sets, agg_stats, inds): return joblib.Parallel(n_jobs=n_jobs, verbose=5, backend="threading")( joblib.delayed( stem.predict_pixel)(df_var, tile_str, tile_ul, mosaic_tx, pixel_inds, overlapping_sets, agg_stats) for pixel_inds in inds)
def get_split_scores(factory,thresholds,formula, metric = None,#p.e. usability entropy use_joblib = False, joblib_backend = 'threading', n_jobs = -1, min_events_fraction_leaf = 0.,verbose = False): if metric == None: metric = penalized_usability_entropy if min_events_fraction_leaf <=1: min_events_fraction_leaf = int(min_events_fraction_leaf*sum(factory.weights)) if verbose: print min_events_fraction_leaf, sum(factory.weights) if not use_joblib: scores = np.repeat(float("inf"),len(thresholds)) for i,(feature,cut,_) in enumerate(thresholds): predicate = (factory.events[:,feature] > cut) #skip the edge cases... (inf penalty) if np.all(predicate) or (not np.any(predicate)): #if this split does not split, fuggedaboutit continue if min_events_fraction_leaf>0: #get rid of too uneven a cuts sum_weight = np.sum(factory.weights) true_weight = np.sum(factory.weights[predicate]) false_weight = sum_weight - true_weight if true_weight < min_events_fraction_leaf or false_weight < min_events_fraction_leaf: if verbose: print "t:",true_weight,"f:",false_weight, "discarded" continue if verbose: print "t:",true_weight,"f:",false_weight, "passed" #compute score subFactories = factory.split_by(predicate) scores[i] = metric(formula,*subFactories) else: if n_jobs < 0: n_jobs = joblib.cpu_count() +1 - n_jobs indices = [0]+[len(thresholds)*(i+1)/n_jobs for i in range(n_jobs)] thresholdSections = [thresholds[indices[i]:indices[i+1]] for i in range(n_jobs)] if joblib_backend == 'threading': factory = [deepcopy(factory) for i in range(n_jobs)] formula = [deepcopy(formula) for i in range(n_jobs)] metric = [deepcopy(metric) for i in range(n_jobs)] #in case it has some internal data jobs = (joblib.delayed(get_split_scores)(factory[i],thresholdSection, formula[i], metric=metric[i],use_joblib = False, min_events_fraction_leaf = min_events_fraction_leaf, verbose = verbose) for i,thresholdSection in enumerate(thresholdSections)) else: jobs = (joblib.delayed(get_split_scores)(factory,thresholdSection, formula, metric=metric,use_joblib = False, min_events_fraction_leaf = min_events_fraction_leaf, verbose = verbose) for thresholdSection in thresholdSections) scores = np.hstack(joblib.Parallel(n_jobs = n_jobs, backend = joblib_backend)(jobs)) return scores
def watershed_sequence(a, seeds=None, mask=None, axis=0, n_jobs=1, **kwargs): """Perform a watershed on a plane-by-plane basis. See documentation for `watershed` for available kwargs. The watershed algorithm views image intensity as "height" and finds flood basins within it. These basins are then viewed as the different labeled regions of an image. This function performs watershed on an ndarray on each plane separately, then concatenate the results. Parameters ---------- a : numpy ndarray, arbitrary type or shape. The input image on which to perform the watershed transform. seeds : bool/int numpy.ndarray, same shape as a (optional, default None) The seeds for the watershed. mask : bool numpy.ndarray, same shape as a (optional, default None) If provided, perform watershed only over voxels that are True in the mask. axis : int, {1, ..., a.ndim} (optional, default: 0) Which axis defines the plane sequence. For example, if the input image is 3D and axis=1, then the output will be the watershed on a[:, 0, :], a[:, 1, :], a[:, 2, :], ... and so on. n_jobs : int, optional Use joblib to distribute each plane over given number of processing cores. If -1, `multiprocessing.cpu_count` is used. Returns ------- ws : numpy ndarray, int type The labeled watershed basins. Other parameters ---------------- **kwargs : keyword arguments passed through to the `watershed` function. """ if axis != 0: a = a.swapaxes(0, axis).copy() if seeds is not None: seeds = seeds.swapaxes(0, axis) if mask is not None: mask = mask.swapaxes(0, axis) if seeds is None: seeds = it.repeat(None) if mask is None: mask = it.repeat(None) ws = joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(watershed)(i, seeds=s, mask=m, **kwargs) for i, s, m in zip(a, seeds, mask)) counts = list(map(np.max, ws[:-1])) counts = np.concatenate((np.array([0]), counts)) counts = np.cumsum(counts) for c, w in zip(counts, ws): w += c ws = np.concatenate([w[np.newaxis, ...] for w in ws], axis=0) if axis != 0: ws = ws.swapaxes(0, axis).copy() return ws
def response(self, hits, block_size=None, n_jobs=1, inner_block_size=None): """compute a retina response matrix [alpha,beta] -> response at (alpha,beta)""" if block_size is not None and len(hits) > block_size: n_blocks = (len(hits) - 1) / block_size + 1 block_responses = [] for i in range(n_blocks): hit_block = hits[block_size * i:block_size * (i + 1)] if n_jobs == 1: block_response = self.response(hit_block, block_size=None, n_jobs=1) else: block_response = joblib.delayed(_response_job)( self, hit_block, block_size=inner_block_size, n_jobs=1) block_responses.append(block_response) if n_jobs != 1: block_responses = joblib.Parallel( n_jobs=n_jobs)(block_responses) response = np.sum(block_responses, axis=0) return response else: #single block dists = self.linegrid.distance_from(*hits.T) if self.power % 2 != 0: dists = np.abs(dists) response = np.sum(np.exp(-dists**self.power / self.variance), axis=0) return response
def retinize(hits_several, retina, n_closest=3000, n_shards=32): """apply retina to a list of hit matrices""" nearest_hits_several = [] for hits in hits_several: hits_dist = np.linalg.norm(hits - retina.ks, axis=-1) hits = hits[np.argsort(hits_dist)[:n_closest], :] nearest_hits_several.append(hits) responses = [] shard_size = (len(hits_several) - 1) / n_shards + 1 for shard_i in range(n_shards): responses.append( joblib.delayed(get_retina_response)( nearest_hits_several[shard_i * shard_size:(shard_i + 1) * shard_size], retina)) responses = [ resp for batch in joblib.Parallel(n_jobs=-1)(responses) for resp in batch ] retina_pts = np.vstack(responses) return retina_pts
def get_labels(crack_probability, n_init_layers): sys.setrecursionlimit(1000000) np.random.seed(10) extracted_layers = joblib.Parallel(n_jobs=-1)( joblib.delayed(extract_segments)(crack_probability[i]) for i in range(crack_probability.shape[0])) helper_mask = extract_segments( skimage.morphology.opening( np.mean(crack_probability[:n_init_layers], axis=0) > 0), 0, joint_std=2, min_angle=np.pi / 2, joint_thereshold=0.002, ) max_label = np.max(helper_mask) fixed_layers = [helper_mask] for i in range(0, len(extracted_layers)): layer_fixed, max_label = soft_propogate_labels( fixed_layers[i], extracted_layers[i], max_label, fill_ratio=0.75, interpolate_ratio=0.4, n_interpolation_options=3, ) fixed_layers.append(layer_fixed) return np.array(fixed_layers[1:])
def _set_oob_score(self, X, y): n_samples = y.shape[0] # predictions = np.zeros((n_samples, self.n_estimators)) - 1 # oob_score = np.zeros(self.n_estimators) # for i, (estimator, samples, split, features) in enumerate(zip( # self.estimators_, self.estimators_samples_, # self.estimators_splits_, self.estimators_features_)): # # Create mask for OOB samples # samples = indices_to_mask(samples, n_samples) # mask = ~samples # # predictions[mask, i] = estimator.predict(X[mask][:, features]) # # oob_score[i] = accuracy_score(split[mask], predictions[mask, i]) # # if self.verbose > 1 and i % 20 == 0: # print("Encoding. Done %d/%d" % (i + 1, self.n_estimators), # end="\r", file=sys.stderr) predictions, oob_score = zip( *jl.Parallel(n_jobs=self.n_jobs_predict, verbose=self.verbose)( jl.delayed(_predict_score_single_estimator)( estimator, X, features, samples, split, n_samples) for estimator, samples, split, features in zip( self.estimators_, self.estimators_samples_, self.estimators_splits_, self.estimators_features_))) predictions = np.array(predictions, dtype=np.int8).T # self.oob_decision_function_ = oob_decision_function self.prediction_ = predictions self.oob_score_ = oob_score
def generate_training(pairs_df, regions, generators, chunk_size=2**16, n_jobs=-1): for region in regions: region_bed_columns = { '{}_{}'.format(region, _) for _ in chromatics.generic_bed_columns } assert region_bed_columns.issubset(pairs_df.columns) max_chunks = int(np.ceil(len(pairs_df) / chunk_size)) results = joblib.Parallel(n_jobs)(joblib.delayed(generate_chunk_features)( pairs_df, regions, generators, chunk_size, chunk_number, max_chunks) for chunk_number in range(max_chunks)) features_df = pd.concat(results).fillna(0) training_df = pd.merge( pairs_df, features_df, left_on=['{}_name'.format(region) for region in regions], right_index=True) assert training_df.index.is_unique assert training_df.columns.is_unique return training_df
def scale_data_parallel_element(X): X_scaled = [] for i in range(0, len(X)): print "scaling ", i, " of ", len(X) X_scaled.append( joblib.Parallel(n_jobs=1000)(joblib.delayed(scale_element)(x) for x in X[i])) return X_scaled
def _transform(self, data): print('splitting data into groups') transform_jobs_result = self.split_and_prepare_transform_jobs(data) print('transforming data in', len(transform_jobs_result.jobs), 'groups') results = joblib.Parallel(n_jobs=self.n_jobs)(transform_jobs_result.jobs) transformed = self.merge_results(results) return transformed
def parallel_feature_hog(df, poolNum=20): split_df = np.array_split(df, poolNum) with joblib.Parallel(n_jobs=poolNum, verbose=0) as parallel: # result = parallel(joblib.delayed(parallel_features)(d) for d in split_df) result = parallel(joblib.delayed(get_feature_hog)(d) for d in split_df) features = np.vstack([r[0] for r in result]) labels = np.vstack([r[1] for r in result]) feat_path = "data/train_feat_hog.npy" label_path = "data/train_label_hog.npy" np.save(feat_path, features) np.save(label_path, labels) fake_paths = glob.glob("valid/valid_0/*.jpg") true_paths = glob.glob("valid/valid_1/*.jpg") df_fake = pd.DataFrame({"path": fake_paths}) df_fake["label"] = 0 df_true = pd.DataFrame({"path": true_paths}) df_true["label"] = 1 df = pd.concat([df_true, df_fake], axis=0).sample(frac=1) feat_path = "data/valid_feat_hog.npy" label_path = "data/valid_label_hog.npy" split_df = np.array_split(df, poolNum) with joblib.Parallel(n_jobs=poolNum, verbose=0) as parallel: # result = parallel(joblib.delayed(parallel_features)(d) for d in split_df) result = parallel(joblib.delayed(get_feature_hog)(d) for d in split_df) features = np.vstack([r[0] for r in result]) labels = np.vstack([r[1] for r in result]) np.save(feat_path, features) np.save(label_path, labels) return features, labels
def find_next_best_(self, selected, clf): selected_features = map(lambda s: s['feature'], selected) to_test = filter(lambda f: f not in selected_features, range(self.X.shape[1])) if self.n_jobs > 1: return joblib.Parallel(n_jobs=self.n_jobs, max_nbytes=1e6, mmap_mode='r')( joblib.delayed(self.get_feat_score_)\ (selected_features, f, clf, self.X, self.y) for f in to_test) else: return map(lambda f: self.get_feat_score_(selected_features, f, clf, self.X, self.y), to_test)
def make_dataset(cls, frame, src_dir='.', dst_dir='.', size=(128, 128)): from sklearn.externals import joblib success_flag = joblib.Parallel(n_jobs=10)(joblib.delayed(make_frame)( src_dir, x['filepath_mov'], dst_dir, x['filepath'], size) for (ind, x) in frame.iterrows()) dst_frame = frame[success_flag] return dst_frame
def __init__(self, f, n_jobs=None, verbosity=None): if n_jobs is None: n_jobs = cfg.parallel_n_jobs if verbosity is None: verbosity = cfg.verbosity job_verbosity = 70 if verbosity >= 1 else 0 self.pool = joblib.Parallel(n_jobs=n_jobs, verbose=job_verbosity) self.f = f self.cfg_vars = _get_vars_in_module(cfg)
def read_files(directory, seed=None, n_jobs=1): seed_pattern = '*' if seed is None else str(seed) glob_pattern = os.path.join(directory, "predictions_*_%s_*.npy" % seed_pattern) files = sorted(glob.glob(glob_pattern)) files = joblib.Parallel(n_jobs=n_jobs, verbose=10)( joblib.delayed(_load_file)(f=f) for f in files) return files
def predict(self, X, batch_size=None, n_jobs=1): """predict. Parameters ---------- X: array-like, shape = (n_samples, n_features) batch_size: int or None, defaults to None batch_size controls whether the pipelines will be called on small chunks of the data. Useful when calling the predict method on the whole array X results in a MemoryError. n_jobs: int, defaults to 1 Parallelize the predictions across the models with n_jobs processes. """ if self._keep_models is not True: raise ValueError( "Predict can only be called if 'keep_models==True'") if not self._can_predict and \ self._resampling_strategy not in \ ['holdout', 'holdout-iterative-fit']: raise NotImplementedError( 'Predict is currently not implemented for resampling ' 'strategy %s, please call refit().' % self._resampling_strategy) if self.models_ is None or len(self.models_) == 0 or \ self.ensemble_ is None: self._load_models() # If self.ensemble_ is None, it means that ensemble_size is set to zero. # In such cases, raise error because predict and predict_proba cannot # be called. if self.ensemble_ is None: raise ValueError("Predict and predict_proba can only be called " "if 'ensemble_size != 0'") # Parallelize predictions across models with n_jobs processes. # Each process computes predictions in chunks of batch_size rows. all_predictions = joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(_model_predict)(self, X, batch_size, identifier) for identifier in self.ensemble_.get_selected_model_identifiers()) if len(all_predictions) == 0: raise ValueError( 'Something went wrong generating the predictions. ' 'The ensemble should consist of the following ' 'models: %s, the following models were loaded: ' '%s' % (str(list(self.ensemble_indices_.keys())), str(list(self.models_.keys())))) predictions = self.ensemble_.predict(all_predictions) return predictions
def transform(self, X, *args, **kwargs): """ Transforms ``X`` using the transformers, uses :func:`pandas.concat` to horizontally concatenate the results. """ verify_x_type(X) Xts = joblib.Parallel(n_jobs=self.n_jobs)( joblib.delayed(_transform)(trans, weight, X, *args, **kwargs) for _, trans, weight in self._iter()) return self.__concat(Xts)
def visualize_images(self, img_names, importances, image_titles): """Create visualizations for all images in the list img_names.""" max_importance = self.get_max_importance(importances) min_importance = self.get_min_importance(importances) # Remove old results before visualizing all images, # to prevent mixing old and new visualizations. if os.path.isdir(self.datamanager.PATHS["RESULTS"]): rmtree(self.datamanager.PATHS["RESULTS"]) joblib.Parallel(n_jobs=-1, pre_dispatch='2*n_jobs')( joblib.delayed(visualize_image)(self, importances, img_names[i], image_titles[i], min_importance, max_importance) for i in range(len(img_names)))
def main_data(df, poolNum=20, replace=False): split_df = np.array_split(df, poolNum) with joblib.Parallel(n_jobs=poolNum, verbose=0) as parallel: # result = parallel(joblib.delayed(parallel_features)(d) for d in split_df) result = parallel(joblib.delayed(parallel_features)(d) for d in split_df) features = np.vstack([r[0] for r in result]) labels = np.vstack([r[1] for r in result]) return features, labels
def parse_text_files_to_conll_format(): """Parses all the input text files into conll format and writes them to ../conll """ # parse each text file into a conll file with open(PUB_PATH) as json_publications_file: publications = json.load(json_publications_file) with joblib.Parallel(n_jobs=os.cpu_count() - 1) as pool: pool( joblib.delayed(parse_publication)(publications[i]) for i in tqdm(range(len(publications)), desc= 'convert text files to conll format in to_conll_test.py'))
def split_and_prepare_transform_jobs(self, data): pjr = self.prepare_jobs(data) split_jobs = [] transform_jobs = [] self._update_split_transform_lists(pjr, split_jobs, transform_jobs) while split_jobs: pjrs = joblib.Parallel(n_jobs=self.n_jobs)(split_jobs) split_jobs = [] for pjr in pjrs: self._update_split_transform_lists(pjr, split_jobs, transform_jobs) return PrepareJobResult('transform', transform_jobs)
def try_add1_bfs(allTrees, factory, learning_rate, loss, breadth, y_pred, regularizer=0., use_joblib=False, n_jobs=-1): ''' select best tree to add (1 step) ''' if factory.__class__ is BinaryClassificationFactory: y_sign = factory.labels_sign margin = y_sign * y_pred elif factory.__class__ is RegressionFactory: margin = factory.labels - y_pred else: raise Exception("Factory type not supported") if use_joblib: if n_jobs < 0: n_jobs = joblib.cpu_count() + 1 - n_jobs indices = [0] + [ len(allTrees) * (i + 1) / n_jobs for i in range(n_jobs) ] treeSections = [ allTrees[indices[i]:indices[i + 1]] for i in range(n_jobs) ] tasks = [ joblib.delayed(_inthread_try_add)(treeSection, factory, loss, margin, y_pred, learning_rate, regularizer) for treeSection in treeSections ] _res = joblib.Parallel(n_jobs=n_jobs, backend="multiprocessing")(tasks) triples = reduce(lambda a, b: a + b, _res) else: triples = [ _try_add(tree, factory, loss, margin, y_pred, learning_rate, regularizer) for tree in allTrees ] triples.sort(key=lambda el: el[0]) return ([triple[1] for triple in triples[:breadth] ], [triple[0] for triple in triples[:breadth]], [triple[2] for triple in triples[:breadth]])
def model_comparison(*args, verbose=1, score_func=None, n_jobs=None, **kwargs): """Collecting repeated average performance measures of selected models. """ (comparison_scheme, X, y, estimators, estimator_params, selectors, fs_params, random_states, n_splits, path_to_results) = args global TMP_RESULTS_DIR # Setup temporary directory. path_tempdir = ioutil.setup_tempdir(TMP_RESULTS_DIR, root='.') # Set number of CPUs. if n_jobs is None: n_jobs = cpu_count() - 1 if cpu_count() > 1 else cpu_count() results = [] for estimator_name, estimator in estimators.items(): print('Running estimator: {}\n{}'.format(estimator.__name__, '-' * 30)) # Setup hyperparameter grid. hparam_grid = ParameterGrid(estimator_params[estimator_name]) for fs_name, fs_func in selectors.items(): print('Running selector: {}\n{}'.format(fs_name, '-' * 30)) selector = { 'name': fs_name, 'func': fs_func, 'params': fs_params[fs_name] } # Repeating experiments. results.extend( joblib.Parallel(n_jobs=n_jobs, verbose=verbose)( joblib.delayed(comparison_scheme)(X, y, estimator, hparam_grid, selector, n_splits, random_state, path_tempdir, verbose=verbose, score_func=score_func, n_jobs=n_jobs) for random_state in random_states)) results = _cleanup(results, path_to_results) return results
def pipeline_compact_watershed(prob, *, invert_prob=True, l1_threshold=0, grid_density=10, compactness=0.01, n_jobs=1): if invert_prob: prob = np.max(prob) - prob seeds = joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(multiscale_seed_sequence)(p[np.newaxis, :], l1_threshold=l1_threshold, grid_density=grid_density) for p in prob) seeds = np.reshape(seeds, prob.shape) fragments = joblib.Parallel(n_jobs=n_jobs)( joblib.delayed(compact_watershed)(p, s, compactness=compactness) for p, s in zip(prob, seeds)) fragments = np.array(fragments) max_ids = fragments.max(axis=-1).max(axis=-1) to_add = np.concatenate(([0], np.cumsum(max_ids)[:-1])) fragments += to_add[:, np.newaxis, np.newaxis] return fragments
def initialize(self): """Initialize all transformer arguments, needing initialisation.""" if not self._initialized["n_jobs"]: if type(self.n_jobs) is not int and self.n_jobs is not None: raise ValueError( 'n_jobs parameter must be an int ' 'indicating the number of jobs as in joblib or None') elif self.n_jobs is None: self._parallel = None else: self._parallel = joblib.Parallel(n_jobs=self.n_jobs, backend="threading", pre_dispatch='all') self._n_jobs = self._parallel._effective_n_jobs() self._initialized["n_jobs"] = True
def grow_ensemble(base_model, X, y, sample_weight=None, n_estimators=1, n_jobs=1, random_state=None): random_state = check_random_state(random_state) max_seed = np.iinfo('uint32').max random_states = random_state.randint(max_seed + 1, size=n_estimators) results = joblib.Parallel(n_jobs=n_jobs)(joblib.delayed(train_model)( clone(base_model), X, y, sample_weight=sample_weight, random_state=rs) for rs in random_states) return combine(results)
def test(): # load testing features datatype = 'margin' files = glob.glob(os.path.join('./svm_features',datatype,'*.npz')) results_dir = os.path.join('./results/svm_results') if not os.path.exists(results_dir): os.makedirs(results_dir) # testing # load model print('Loading model from pickle!!!') svm = joblib.load(os.path.join(svm_dir,'svm_model.pkl')) print('Loading model finished!!!') # multi-process to predicting joblib.Parallel(n_jobs=32)\ (joblib.delayed(multi_process_predict)(svm,file,idx,results_dir,datatype) for idx,file in enumerate(tqdm(files)))