Exemple #1
0
def warmstart_all_parallel(x, y, x_test, y_test, fname_in='results_softmax_regression_mnist', fname_out='results_softmax_regression_warmstart_mnist', model_type='softmax_regression', w_diff_term_crit=0.0001, learning_rate=0.0001, regularizations = [100., 10., 1., 0.1, 0.01, 0.001, 0.]):
    pretrained_models = pickle.load(open(fname_in, 'rb'))
    if model_type == 'softmax_regression':
        #previous_loss_train=None, previous_regularization_penalty_train=None
        results = joblib.Parallel(n_jobs=47)(delayed(tf_softmax_regression.train_softmax)
                                             (
                                             x, y, x_test, y_test, learning_rate=learning_rate, max_iterations=1000000,
                                             w_diff_term_crit=w_diff_term_crit, verbose=True,
                                             regularization=regularizations[target_i],
                                             model=pretrained_models[init_i]['model'],
                                             regularization_initialization=pretrained_models[init_i]['regularization'],
                                             previous_loss_train=pretrained_models[init_i]['loss_train'],
                                             previous_regularization_penalty_train=pretrained_models[init_i]['regularization_penalty_train']
                                         ) for target_i in xrange(0, len(regularizations))
                                           for init_i in xrange(0, len(pretrained_models))
                                         )
    elif model_type == 'linear_regression':
        results = joblib.Parallel(n_jobs=47)(delayed(tf_linear_regression.train)
                                                 (
                                                 x, y, x_test, y_test, learning_rate=learning_rate, max_iterations=1000000,
                                                 w_diff_term_crit=w_diff_term_crit, verbose=True,
                                                 regularization=regularizations[target_i],
                                                 model=pretrained_models[init_i]['model'],
                                                 regularization_initialization=pretrained_models[init_i][
                                                     'regularization']
                                             ) for target_i in xrange(0, len(regularizations))
                                             for init_i in xrange(0, len(pretrained_models))
                                             )
    pickle.dump(results, open(fname_out, 'wb'))
Exemple #2
0
def parallel_sift(image_paths, poolNum=20, single=False):

    if single:
        with joblib.Parallel(n_jobs=poolNum, verbose=1) as parallel:
            features = parallel(joblib.delayed(do_sift_single)(s) for s in image_paths)
    else:
        split_paths = np.array_split(image_paths, poolNum)
        with joblib.Parallel(n_jobs=poolNum, verbose=1) as parallel:
            features = parallel(joblib.delayed(do_sift_batch)(s) for s in split_paths)

        features = np.vstack(features)

    return features
Exemple #3
0
def train_all_parallel(x, y, x_test, y_test, fname='results_softmax_regression_mnist', model_type='softmax_regression', w_diff_term_crit=0.0001, learning_rate=0.0001, regularizations = [100., 10., 1., 0.1, 0.01, 0.001, 0.]):
    if model_type == 'softmax_regression':
        results = joblib.Parallel(n_jobs=47)(delayed( tf_softmax_regression.train_softmax)(
            x, y, x_test, y_test, learning_rate=learning_rate, max_iterations=1000000,
            regularization=regularizations[reg_i], w_diff_term_crit=w_diff_term_crit, verbose=True) for i_par in range(48) for reg_i in xrange(0, len(regularizations)))

    elif model_type == 'linear_regression':
        results = joblib.Parallel(n_jobs=47)(delayed(tf_linear_regression.train)(
            x, y, x_test, y_test, learning_rate=learning_rate, max_iterations=1000000,
            regularization=regularizations[reg_i], w_diff_term_crit=w_diff_term_crit, verbose=True) for i_par in range(48) for
                                             reg_i in xrange(0, len(regularizations)))

    pickle.dump(results, open(fname, 'wb'))
Exemple #4
0
def parallel_helper(n_jobs, df_var, tile_str, tile_ul, mosaic_tx,
                    overlapping_sets, agg_stats, inds):
    return joblib.Parallel(n_jobs=n_jobs, verbose=5, backend="threading")(
        joblib.delayed(
            stem.predict_pixel)(df_var, tile_str, tile_ul, mosaic_tx,
                                pixel_inds, overlapping_sets, agg_stats)
        for pixel_inds in inds)
Exemple #5
0
def get_split_scores(factory,thresholds,formula,
                     metric = None,#p.e. usability entropy
                     use_joblib = False,
                     joblib_backend = 'threading',
                     n_jobs = -1,
                     min_events_fraction_leaf = 0.,verbose = False):

    if metric == None:
        metric = penalized_usability_entropy
    if min_events_fraction_leaf <=1:
        min_events_fraction_leaf = int(min_events_fraction_leaf*sum(factory.weights))
    if verbose:
        print min_events_fraction_leaf, sum(factory.weights)

    if not use_joblib:
        scores = np.repeat(float("inf"),len(thresholds))
        for i,(feature,cut,_) in enumerate(thresholds):
            predicate =  (factory.events[:,feature] > cut)

            #skip the edge cases... (inf penalty)
            if np.all(predicate) or (not np.any(predicate)):
                #if this split does not split, fuggedaboutit
                continue 
            if min_events_fraction_leaf>0:
                #get rid of too uneven a cuts
                sum_weight = np.sum(factory.weights)
                true_weight = np.sum(factory.weights[predicate])
                false_weight = sum_weight - true_weight
                if true_weight < min_events_fraction_leaf or false_weight < min_events_fraction_leaf:
                    if verbose: print "t:",true_weight,"f:",false_weight, "discarded"
                    continue
                if verbose: print "t:",true_weight,"f:",false_weight, "passed"
            #compute score
            subFactories = factory.split_by(predicate)
            scores[i] = metric(formula,*subFactories)
    else:
        if n_jobs < 0:
            n_jobs = joblib.cpu_count() +1 - n_jobs
       
        indices = [0]+[len(thresholds)*(i+1)/n_jobs for i in range(n_jobs)]
        thresholdSections = [thresholds[indices[i]:indices[i+1]] for i in range(n_jobs)]
        
        if joblib_backend == 'threading':
            factory = [deepcopy(factory) for i in range(n_jobs)]
            formula = [deepcopy(formula) for i in range(n_jobs)]
            metric = [deepcopy(metric) for i in range(n_jobs)] #in case it has some internal data
            
            jobs = (joblib.delayed(get_split_scores)(factory[i],thresholdSection, formula[i],
                                                 metric=metric[i],use_joblib = False,
                                                 min_events_fraction_leaf = min_events_fraction_leaf,
                                                 verbose = verbose)
                                    for i,thresholdSection in enumerate(thresholdSections))
        else:
            jobs = (joblib.delayed(get_split_scores)(factory,thresholdSection, formula,
                                                 metric=metric,use_joblib = False,
                                                 min_events_fraction_leaf = min_events_fraction_leaf,
                                                 verbose = verbose)
                                    for thresholdSection in thresholdSections)
        scores = np.hstack(joblib.Parallel(n_jobs = n_jobs, backend = joblib_backend)(jobs))
    return scores
Exemple #6
0
def watershed_sequence(a, seeds=None, mask=None, axis=0, n_jobs=1, **kwargs):
    """Perform a watershed on a plane-by-plane basis.

    See documentation for `watershed` for available kwargs.

    The watershed algorithm views image intensity as "height" and finds flood
    basins within it. These basins are then viewed as the different labeled
    regions of an image.

    This function performs watershed on an ndarray on each plane separately,
    then concatenate the results.

    Parameters
    ----------
    a : numpy ndarray, arbitrary type or shape.
        The input image on which to perform the watershed transform.
    seeds : bool/int numpy.ndarray, same shape as a (optional, default None)
        The seeds for the watershed.
    mask : bool numpy.ndarray, same shape as a (optional, default None)
        If provided, perform watershed only over voxels that are True in the
        mask.
    axis : int, {1, ..., a.ndim} (optional, default: 0)
        Which axis defines the plane sequence. For example, if the input image
        is 3D and axis=1, then the output will be the watershed on a[:, 0, :], 
        a[:, 1, :], a[:, 2, :], ... and so on.
    n_jobs : int, optional
        Use joblib to distribute each plane over given number of processing
        cores. If -1, `multiprocessing.cpu_count` is used.

    Returns
    -------
    ws : numpy ndarray, int type
        The labeled watershed basins.

    Other parameters
    ----------------
    **kwargs : keyword arguments passed through to the `watershed` function.
    """
    if axis != 0:
        a = a.swapaxes(0, axis).copy()
        if seeds is not None:
            seeds = seeds.swapaxes(0, axis)
        if mask is not None:
            mask = mask.swapaxes(0, axis)
    if seeds is None:
        seeds = it.repeat(None)
    if mask is None:
        mask = it.repeat(None)
    ws = joblib.Parallel(n_jobs=n_jobs)(
        joblib.delayed(watershed)(i, seeds=s, mask=m, **kwargs)
        for i, s, m in zip(a, seeds, mask))
    counts = list(map(np.max, ws[:-1]))
    counts = np.concatenate((np.array([0]), counts))
    counts = np.cumsum(counts)
    for c, w in zip(counts, ws):
        w += c
    ws = np.concatenate([w[np.newaxis, ...] for w in ws], axis=0)
    if axis != 0:
        ws = ws.swapaxes(0, axis).copy()
    return ws
    def response(self, hits, block_size=None, n_jobs=1, inner_block_size=None):
        """compute a retina response matrix [alpha,beta] -> response at (alpha,beta)"""

        if block_size is not None and len(hits) > block_size:
            n_blocks = (len(hits) - 1) / block_size + 1

            block_responses = []
            for i in range(n_blocks):
                hit_block = hits[block_size * i:block_size * (i + 1)]
                if n_jobs == 1:
                    block_response = self.response(hit_block,
                                                   block_size=None,
                                                   n_jobs=1)
                else:
                    block_response = joblib.delayed(_response_job)(
                        self, hit_block, block_size=inner_block_size, n_jobs=1)
                block_responses.append(block_response)

            if n_jobs != 1:
                block_responses = joblib.Parallel(
                    n_jobs=n_jobs)(block_responses)

            response = np.sum(block_responses, axis=0)
            return response

        else:  #single block

            dists = self.linegrid.distance_from(*hits.T)
            if self.power % 2 != 0:
                dists = np.abs(dists)
            response = np.sum(np.exp(-dists**self.power / self.variance),
                              axis=0)
            return response
Exemple #8
0
def retinize(hits_several, retina, n_closest=3000, n_shards=32):
    """apply retina to a list of hit matrices"""
    nearest_hits_several = []
    for hits in hits_several:

        hits_dist = np.linalg.norm(hits - retina.ks, axis=-1)

        hits = hits[np.argsort(hits_dist)[:n_closest], :]
        nearest_hits_several.append(hits)

    responses = []
    shard_size = (len(hits_several) - 1) / n_shards + 1

    for shard_i in range(n_shards):
        responses.append(
            joblib.delayed(get_retina_response)(
                nearest_hits_several[shard_i * shard_size:(shard_i + 1) *
                                     shard_size], retina))

    responses = [
        resp for batch in joblib.Parallel(n_jobs=-1)(responses)
        for resp in batch
    ]
    retina_pts = np.vstack(responses)
    return retina_pts
Exemple #9
0
def get_labels(crack_probability, n_init_layers):
    sys.setrecursionlimit(1000000)
    np.random.seed(10)

    extracted_layers = joblib.Parallel(n_jobs=-1)(
        joblib.delayed(extract_segments)(crack_probability[i])
        for i in range(crack_probability.shape[0]))

    helper_mask = extract_segments(
        skimage.morphology.opening(
            np.mean(crack_probability[:n_init_layers], axis=0) > 0),
        0,
        joint_std=2,
        min_angle=np.pi / 2,
        joint_thereshold=0.002,
    )

    max_label = np.max(helper_mask)
    fixed_layers = [helper_mask]
    for i in range(0, len(extracted_layers)):
        layer_fixed, max_label = soft_propogate_labels(
            fixed_layers[i],
            extracted_layers[i],
            max_label,
            fill_ratio=0.75,
            interpolate_ratio=0.4,
            n_interpolation_options=3,
        )
        fixed_layers.append(layer_fixed)
    return np.array(fixed_layers[1:])
    def _set_oob_score(self, X, y):
        n_samples = y.shape[0]

        # predictions = np.zeros((n_samples, self.n_estimators)) - 1
        # oob_score = np.zeros(self.n_estimators)

        # for i, (estimator, samples, split, features) in enumerate(zip(
        #         self.estimators_, self.estimators_samples_,
        #         self.estimators_splits_, self.estimators_features_)):
        #     # Create mask for OOB samples
        #     samples = indices_to_mask(samples, n_samples)
        #     mask = ~samples
        #
        #     predictions[mask, i] = estimator.predict(X[mask][:, features])
        #
        #     oob_score[i] = accuracy_score(split[mask], predictions[mask, i])
        #
        #     if self.verbose > 1 and i % 20 == 0:
        #         print("Encoding. Done %d/%d" % (i + 1, self.n_estimators),
        #               end="\r", file=sys.stderr)
        predictions, oob_score = zip(
            *jl.Parallel(n_jobs=self.n_jobs_predict, verbose=self.verbose)(
                jl.delayed(_predict_score_single_estimator)(
                    estimator, X, features, samples, split, n_samples)
                for estimator, samples, split, features in zip(
                    self.estimators_, self.estimators_samples_,
                    self.estimators_splits_, self.estimators_features_)))

        predictions = np.array(predictions, dtype=np.int8).T
        # self.oob_decision_function_ = oob_decision_function
        self.prediction_ = predictions
        self.oob_score_ = oob_score
Exemple #11
0
def generate_training(pairs_df,
                      regions,
                      generators,
                      chunk_size=2**16,
                      n_jobs=-1):
    for region in regions:
        region_bed_columns = {
            '{}_{}'.format(region, _)
            for _ in chromatics.generic_bed_columns
        }
        assert region_bed_columns.issubset(pairs_df.columns)

    max_chunks = int(np.ceil(len(pairs_df) / chunk_size))
    results = joblib.Parallel(n_jobs)(joblib.delayed(generate_chunk_features)(
        pairs_df, regions, generators, chunk_size, chunk_number, max_chunks)
                                      for chunk_number in range(max_chunks))

    features_df = pd.concat(results).fillna(0)
    training_df = pd.merge(
        pairs_df,
        features_df,
        left_on=['{}_name'.format(region) for region in regions],
        right_index=True)
    assert training_df.index.is_unique
    assert training_df.columns.is_unique
    return training_df
Exemple #12
0
def scale_data_parallel_element(X):
    X_scaled = []
    for i in range(0, len(X)):
        print "scaling ", i, " of ", len(X)
        X_scaled.append(
            joblib.Parallel(n_jobs=1000)(joblib.delayed(scale_element)(x)
                                         for x in X[i]))
    return X_scaled
Exemple #13
0
    def _transform(self, data):
        print('splitting data into groups')
        transform_jobs_result = self.split_and_prepare_transform_jobs(data)
        print('transforming data in', len(transform_jobs_result.jobs), 'groups')
        results = joblib.Parallel(n_jobs=self.n_jobs)(transform_jobs_result.jobs)

        transformed = self.merge_results(results)
        return transformed
Exemple #14
0
def parallel_feature_hog(df, poolNum=20):    
    
    
    split_df = np.array_split(df, poolNum)

    with joblib.Parallel(n_jobs=poolNum, verbose=0) as parallel:
        # result = parallel(joblib.delayed(parallel_features)(d) for d in split_df)
        result = parallel(joblib.delayed(get_feature_hog)(d) for d in split_df)

    features = np.vstack([r[0] for r in result])
    labels = np.vstack([r[1] for r in result])
    
    feat_path = "data/train_feat_hog.npy"
    label_path = "data/train_label_hog.npy"
    
    np.save(feat_path, features)
    np.save(label_path, labels)
    
    fake_paths = glob.glob("valid/valid_0/*.jpg")
    true_paths = glob.glob("valid/valid_1/*.jpg")

    df_fake = pd.DataFrame({"path": fake_paths})
    df_fake["label"] = 0
    df_true = pd.DataFrame({"path": true_paths})
    df_true["label"] = 1

    df = pd.concat([df_true, df_fake], axis=0).sample(frac=1)

    feat_path = "data/valid_feat_hog.npy"
    label_path = "data/valid_label_hog.npy"
    
    
    split_df = np.array_split(df, poolNum)

    with joblib.Parallel(n_jobs=poolNum, verbose=0) as parallel:
        # result = parallel(joblib.delayed(parallel_features)(d) for d in split_df)
        result = parallel(joblib.delayed(get_feature_hog)(d) for d in split_df)

    features = np.vstack([r[0] for r in result])
    labels = np.vstack([r[1] for r in result])
    
    np.save(feat_path, features)
    np.save(label_path, labels)
    
    return features, labels
Exemple #15
0
 def find_next_best_(self, selected, clf):
   selected_features = map(lambda s: s['feature'], selected)
   to_test = filter(lambda f: f not in selected_features, range(self.X.shape[1]))
   if self.n_jobs > 1:
     return joblib.Parallel(n_jobs=self.n_jobs, max_nbytes=1e6, mmap_mode='r')(
       joblib.delayed(self.get_feat_score_)\
         (selected_features, f, clf, self.X, self.y) for f in to_test)      
   else:
     return map(lambda f: self.get_feat_score_(selected_features, f, clf, self.X, self.y), to_test)
Exemple #16
0
    def make_dataset(cls, frame, src_dir='.', dst_dir='.', size=(128, 128)):
        from sklearn.externals import joblib

        success_flag = joblib.Parallel(n_jobs=10)(joblib.delayed(make_frame)(
            src_dir, x['filepath_mov'], dst_dir, x['filepath'], size)
                                                  for (ind,
                                                       x) in frame.iterrows())
        dst_frame = frame[success_flag]
        return dst_frame
Exemple #17
0
 def __init__(self, f, n_jobs=None, verbosity=None):
     if n_jobs is None:
         n_jobs = cfg.parallel_n_jobs
     if verbosity is None:
         verbosity = cfg.verbosity
     job_verbosity = 70 if verbosity >= 1 else 0
     self.pool = joblib.Parallel(n_jobs=n_jobs, verbose=job_verbosity)
     self.f = f
     self.cfg_vars = _get_vars_in_module(cfg)
def read_files(directory, seed=None, n_jobs=1):

    seed_pattern = '*' if seed is None else str(seed)
    glob_pattern = os.path.join(directory, "predictions_*_%s_*.npy" %
                                seed_pattern)
    files = sorted(glob.glob(glob_pattern))
    files = joblib.Parallel(n_jobs=n_jobs, verbose=10)(
        joblib.delayed(_load_file)(f=f) for f in files)

    return files
Exemple #19
0
    def predict(self, X, batch_size=None, n_jobs=1):
        """predict.

        Parameters
        ----------
        X: array-like, shape = (n_samples, n_features)

        batch_size: int or None, defaults to None
            batch_size controls whether the pipelines will be
            called on small chunks of the data. Useful when calling the
            predict method on the whole array X results in a MemoryError.

        n_jobs: int, defaults to 1
            Parallelize the predictions across the models with n_jobs
            processes.
        """
        if self._keep_models is not True:
            raise ValueError(
                "Predict can only be called if 'keep_models==True'")
        if not self._can_predict and \
                self._resampling_strategy not in  \
                        ['holdout', 'holdout-iterative-fit']:
            raise NotImplementedError(
                'Predict is currently not implemented for resampling '
                'strategy %s, please call refit().' %
                self._resampling_strategy)

        if self.models_ is None or len(self.models_) == 0 or \
                self.ensemble_ is None:
            self._load_models()

        # If self.ensemble_ is None, it means that ensemble_size is set to zero.
        # In such cases, raise error because predict and predict_proba cannot
        # be called.
        if self.ensemble_ is None:
            raise ValueError("Predict and predict_proba can only be called "
                             "if 'ensemble_size != 0'")

        # Parallelize predictions across models with n_jobs processes.
        # Each process computes predictions in chunks of batch_size rows.
        all_predictions = joblib.Parallel(n_jobs=n_jobs)(
            joblib.delayed(_model_predict)(self, X, batch_size, identifier)
            for identifier in self.ensemble_.get_selected_model_identifiers())

        if len(all_predictions) == 0:
            raise ValueError(
                'Something went wrong generating the predictions. '
                'The ensemble should consist of the following '
                'models: %s, the following models were loaded: '
                '%s' % (str(list(self.ensemble_indices_.keys())),
                        str(list(self.models_.keys()))))

        predictions = self.ensemble_.predict(all_predictions)
        return predictions
Exemple #20
0
    def transform(self, X, *args, **kwargs):
        """
        Transforms ``X`` using the transformers, uses :func:`pandas.concat`
        to horizontally concatenate the results.
        """
        verify_x_type(X)

        Xts = joblib.Parallel(n_jobs=self.n_jobs)(
            joblib.delayed(_transform)(trans, weight, X, *args, **kwargs)
            for _, trans, weight in self._iter())
        return self.__concat(Xts)
 def visualize_images(self, img_names, importances, image_titles):
     """Create visualizations for all images in the list img_names."""
     max_importance = self.get_max_importance(importances)
     min_importance = self.get_min_importance(importances)
     # Remove old results before visualizing all images,
     # to prevent mixing old and new visualizations.
     if os.path.isdir(self.datamanager.PATHS["RESULTS"]):
         rmtree(self.datamanager.PATHS["RESULTS"])
     joblib.Parallel(n_jobs=-1, pre_dispatch='2*n_jobs')(
         joblib.delayed(visualize_image)(self, importances, img_names[i], image_titles[i], min_importance, max_importance)
         for i in range(len(img_names)))
Exemple #22
0
def main_data(df, poolNum=20, replace=False):

    split_df = np.array_split(df, poolNum)

    with joblib.Parallel(n_jobs=poolNum, verbose=0) as parallel:
        # result = parallel(joblib.delayed(parallel_features)(d) for d in split_df)
        result = parallel(joblib.delayed(parallel_features)(d) for d in split_df)

    features = np.vstack([r[0] for r in result])
    labels = np.vstack([r[1] for r in result])

    return features, labels
Exemple #23
0
def parse_text_files_to_conll_format():
    """Parses all the input text files into conll format and writes them to ../conll
    """
    # parse each text file into a conll file
    with open(PUB_PATH) as json_publications_file:
        publications = json.load(json_publications_file)

        with joblib.Parallel(n_jobs=os.cpu_count() - 1) as pool:
            pool(
                joblib.delayed(parse_publication)(publications[i]) for i in
                tqdm(range(len(publications)),
                     desc=
                     'convert text files to conll format in to_conll_test.py'))
Exemple #24
0
    def split_and_prepare_transform_jobs(self, data):
        pjr = self.prepare_jobs(data)

        split_jobs = []
        transform_jobs = []
        self._update_split_transform_lists(pjr, split_jobs, transform_jobs)

        while split_jobs:
            pjrs = joblib.Parallel(n_jobs=self.n_jobs)(split_jobs)
            split_jobs = []
            for pjr in pjrs:
                self._update_split_transform_lists(pjr, split_jobs, transform_jobs)
        return PrepareJobResult('transform', transform_jobs)
Exemple #25
0
def try_add1_bfs(allTrees,
                 factory,
                 learning_rate,
                 loss,
                 breadth,
                 y_pred,
                 regularizer=0.,
                 use_joblib=False,
                 n_jobs=-1):
    '''
    select best tree to add (1 step)
    '''
    if factory.__class__ is BinaryClassificationFactory:
        y_sign = factory.labels_sign
        margin = y_sign * y_pred
    elif factory.__class__ is RegressionFactory:
        margin = factory.labels - y_pred
    else:
        raise Exception("Factory type not supported")

    if use_joblib:
        if n_jobs < 0:
            n_jobs = joblib.cpu_count() + 1 - n_jobs

        indices = [0] + [
            len(allTrees) * (i + 1) / n_jobs for i in range(n_jobs)
        ]
        treeSections = [
            allTrees[indices[i]:indices[i + 1]] for i in range(n_jobs)
        ]

        tasks = [
            joblib.delayed(_inthread_try_add)(treeSection, factory, loss,
                                              margin, y_pred, learning_rate,
                                              regularizer)
            for treeSection in treeSections
        ]
        _res = joblib.Parallel(n_jobs=n_jobs, backend="multiprocessing")(tasks)
        triples = reduce(lambda a, b: a + b, _res)

    else:
        triples = [
            _try_add(tree, factory, loss, margin, y_pred, learning_rate,
                     regularizer) for tree in allTrees
        ]

    triples.sort(key=lambda el: el[0])

    return ([triple[1] for triple in triples[:breadth]
             ], [triple[0] for triple in triples[:breadth]],
            [triple[2] for triple in triples[:breadth]])
Exemple #26
0
def model_comparison(*args, verbose=1, score_func=None, n_jobs=None, **kwargs):
    """Collecting repeated average performance measures of selected models.

    """
    (comparison_scheme, X, y, estimators, estimator_params, selectors,
     fs_params, random_states, n_splits, path_to_results) = args

    global TMP_RESULTS_DIR

    # Setup temporary directory.
    path_tempdir = ioutil.setup_tempdir(TMP_RESULTS_DIR, root='.')

    # Set number of CPUs.
    if n_jobs is None:
        n_jobs = cpu_count() - 1 if cpu_count() > 1 else cpu_count()

    results = []
    for estimator_name, estimator in estimators.items():

        print('Running estimator: {}\n{}'.format(estimator.__name__, '-' * 30))

        # Setup hyperparameter grid.
        hparam_grid = ParameterGrid(estimator_params[estimator_name])

        for fs_name, fs_func in selectors.items():

            print('Running selector: {}\n{}'.format(fs_name, '-' * 30))

            selector = {
                'name': fs_name,
                'func': fs_func,
                'params': fs_params[fs_name]
            }
            # Repeating experiments.
            results.extend(
                joblib.Parallel(n_jobs=n_jobs, verbose=verbose)(
                    joblib.delayed(comparison_scheme)(X,
                                                      y,
                                                      estimator,
                                                      hparam_grid,
                                                      selector,
                                                      n_splits,
                                                      random_state,
                                                      path_tempdir,
                                                      verbose=verbose,
                                                      score_func=score_func,
                                                      n_jobs=n_jobs)
                    for random_state in random_states))
    results = _cleanup(results, path_to_results)

    return results
Exemple #27
0
def pipeline_compact_watershed(prob,
                               *,
                               invert_prob=True,
                               l1_threshold=0,
                               grid_density=10,
                               compactness=0.01,
                               n_jobs=1):
    if invert_prob:
        prob = np.max(prob) - prob
    seeds = joblib.Parallel(n_jobs=n_jobs)(
        joblib.delayed(multiscale_seed_sequence)(p[np.newaxis, :],
                                                 l1_threshold=l1_threshold,
                                                 grid_density=grid_density)
        for p in prob)
    seeds = np.reshape(seeds, prob.shape)
    fragments = joblib.Parallel(n_jobs=n_jobs)(
        joblib.delayed(compact_watershed)(p, s, compactness=compactness)
        for p, s in zip(prob, seeds))
    fragments = np.array(fragments)
    max_ids = fragments.max(axis=-1).max(axis=-1)
    to_add = np.concatenate(([0], np.cumsum(max_ids)[:-1]))
    fragments += to_add[:, np.newaxis, np.newaxis]
    return fragments
Exemple #28
0
 def initialize(self):
     """Initialize all transformer arguments, needing initialisation."""
     if not self._initialized["n_jobs"]:
         if type(self.n_jobs) is not int and self.n_jobs is not None:
             raise ValueError(
                 'n_jobs parameter must be an int '
                 'indicating the number of jobs as in joblib or None')
         elif self.n_jobs is None:
             self._parallel = None
         else:
             self._parallel = joblib.Parallel(n_jobs=self.n_jobs,
                                              backend="threading",
                                              pre_dispatch='all')
             self._n_jobs = self._parallel._effective_n_jobs()
         self._initialized["n_jobs"] = True
Exemple #29
0
def grow_ensemble(base_model,
                  X,
                  y,
                  sample_weight=None,
                  n_estimators=1,
                  n_jobs=1,
                  random_state=None):
    random_state = check_random_state(random_state)
    max_seed = np.iinfo('uint32').max
    random_states = random_state.randint(max_seed + 1, size=n_estimators)
    results = joblib.Parallel(n_jobs=n_jobs)(joblib.delayed(train_model)(
        clone(base_model), X, y, sample_weight=sample_weight, random_state=rs)
                                             for rs in random_states)

    return combine(results)
def test():
  # load testing features
  datatype = 'margin'
  files = glob.glob(os.path.join('./svm_features',datatype,'*.npz'))
  results_dir = os.path.join('./results/svm_results')
  if not os.path.exists(results_dir):
    os.makedirs(results_dir)
  # testing
  # load model
  print('Loading model from pickle!!!')
  svm = joblib.load(os.path.join(svm_dir,'svm_model.pkl'))
  print('Loading model finished!!!')
  # multi-process to predicting
  joblib.Parallel(n_jobs=32)\
  (joblib.delayed(multi_process_predict)(svm,file,idx,results_dir,datatype) for idx,file in enumerate(tqdm(files)))