Python Client.rebalance Examples

Programming Language: Python

Namespace/Package Name: distributed

Class/Type: Client

Method/Function: rebalance

Examples at hotexamples.com: 2

Python Client.rebalance - 2 examples found. These are the top rated real world Python examples of distributed.Client.rebalance extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Client(30)

gather(30)

map(30)

close(30)

persist(13)

compute(12)

ncores(10)

_start(10)

_shutdown(9)

get_dataset(6)

publish_dataset(5)

get(4)

nthreads(3)

current(3)

_gather(3)

cancel(3)

_publish_dataset(3)

channel(2)

list_datasets(2)

_scatter(2)

_repr_html_(2)

_get_dataset(2)

rebalance(2)

get_worker_logs(1)

has_what(1)

_restart(1)

Example #1

Show file

    def labelInstances(self, select_ind, client: Client = None, verbose=False):
        # For each selected instance retrieve from the simOracle the labeled instances
        labels, cost = self._oracle.query(instances=self._X[select_ind],
                                          indexes=select_ind)

        labels_iterator = zip(select_ind, labels)

        for item in labels_iterator:
            item_shape = np.shape(item[1]) if isinstance(
                item[1], (list, np.ndarray)) else da.shape(item[1])
            if len(item_shape) == len(da.shape(np.asarray(labels))):
                new_item = item[1]
            else:
                new_item = [item[1]]

            if item[0] == 0:  # choose the first item
                result = da.concatenate([new_item, self._Y[item[0] + 1:]],
                                        axis=0)
            elif item[0] == len(self._Y) - 1:  # choose the last item
                result = da.concatenate([self._Y[:item[0]], new_item], axis=0)
            else:  # any other item
                result = da.concatenate(
                    [self._Y[:item[0]], new_item, self._Y[item[0] + 1:]],
                    axis=0)

        self._Y = result.persist()

        if client is not None:
            client.rebalance(self._Y)

        if verbose:
            print("Label: %s, Cost: %s" % (labels, cost))

Example #2

Show file

File: context.py Project: a24lorie/DPyACL

    def __init__(self,
                 client: Client,
                 X,
                 Y,
                 ml_technique,
                 scenario_type: AbstractScenario,
                 performance_metrics: [],
                 query_strategy: SingleLabelIndexQuery,
                 oracle: Oracle,
                 stopping_criteria: AbstractStopCriterion,
                 self_partition: bool,
                 kfolds: int = 1,
                 batch_size=1,
                 **kwargs):
        """
        Parameters
        ----------
        :param client: distributed.Client
        :param X: array-like
            Data matrix with [n_samples, n_features]
        :param Y: array-like, optional
            labels of given data [n_samples, n_labels] or [n_samples]
        :param ml_technique
        :param scenario_type: Sub-Type of AbstractScenario
            Type of Active Learning scenario to use
        :param performance_metrics: array-like of BaseMetrics elements
        :param query_strategy: SinlgeLabelIndexQuery
        :param oracle: Oracle
        :param stopping_criteria: AbstractStopCriterion
        :param self_partition: bool
        :param kfolds: int, optional (default=1)
             If self_partition is True Random split data k sets according to the extra parameters
                -> test_ratio: float, optional (default=0.3)
                    Ratio of test set
                -> initial_label_rate: float, optional (default=0.05)
                    Ratio of initial label set
                    e.g. Initial_labelset*(1-test_ratio)*n_samples
                -> all_class: bool, optional (default=True)
                    Whether each split will contain at least one instance for each class.
                    If False, a totally random split will be performed.

            If self_partition is False the following the following parameter must be specified
                -> train_idx:
                -> test_idx:
                -> label_idx:
                ->  unlabel_idx:
        :param kwargs: optional
            Extra parameters
        """
        self._client = client

        if type(X) is da.core.Array:
            self._X = X.persist()
        else:
            self._X = da.from_array(X, chunks=len(X) // 50).persist()

        if isinstance(Y, da.core.Array):
            self._Y = Y.persist()
        else:
            self._Y = da.from_array(Y, chunks=len(Y) // 50).persist()

        # Persists the Dask Storage Structures
        if client is not None and kwargs.pop("rebalance", False):
            client.rebalance(self._X)
            client.rebalance(self._Y)

        check_X_y(self._X,
                  self._Y,
                  accept_sparse='csc',
                  multi_output=True,
                  distributed=False)

        self._scenario_type = scenario_type
        if self._scenario_type is None:
            raise ValueError("required param 'scenario_type' can not be empty")
        if not issubclass(self._scenario_type, AbstractScenario):
            raise ValueError(
                "the 'scenario_type' must be a subclass of 'AbstractScenario'")

        if self_partition:
            self._kfolds = kfolds
            self._train_idx, self._test_idx, self._label_idx, self._unlabel_idx = split(
                X=self._X,
                y=self._Y,
                test_ratio=kwargs.pop("test_ratio", 0.3),
                initial_label_rate=kwargs.pop("initial_label_rate", 0.05),
                split_count=self._kfolds,
                all_class=kwargs.pop("all_class", True))
        else:
            train_idx = kwargs.pop("train_idx", None)
            test_idx = kwargs.pop("test_idx", None)
            label_idx = kwargs.pop("label_idx", None)
            unlabel_idx = kwargs.pop("unlabel_idx", None)

            if train_idx is None:
                raise ValueError(
                    "required param 'train_idx' can not be empty ")
            if test_idx is None:
                raise ValueError("required param 'test_idx' can not be empty ")
            if label_idx is None:
                raise ValueError(
                    "required param 'label_idx' can not be empty ")
            if unlabel_idx is None:
                raise ValueError(
                    "required param 'unlabel_idx' can not be empty ")

            num_inst_x, num_feat = da.shape(self._X)
            num_inst_y, num_labels = da.shape(
                self._Y) if len(da.shape(self._Y)) > 1 else (da.shape(
                    self._Y)[0], 1)
            folds_train, num_inst_train = np.shape(train_idx)
            folds_test, num_inst_test = np.shape(test_idx)
            folds_labeled, num_inst_labeled = np.shape(label_idx)
            folds_unlabeled, num_inst_unlabeled = np.shape(unlabel_idx)

            if num_inst_x != num_inst_y:
                raise ValueError(
                    "Different numbers of instances for inputs (x:%s, y:%s)" %
                    (num_inst_x, num_inst_y))

            if folds_train != folds_test or folds_test != folds_labeled or folds_labeled != folds_unlabeled:
                raise ValueError(
                    "Different numbers of folds for inputs (train_idx:%s, test_idx:%s "
                    "label_idx:%s, unlabel_idx:%s)" %
                    (folds_train, folds_test, folds_labeled, folds_unlabeled))
            if kfolds != folds_test:
                raise ValueError(
                    "Number of folds for inputs (train_idx:%s, test_idx:%s "
                    "label_idx:%s, unlabel_idx:%s) must be equals to kfolds:%s param"
                    % (folds_train, folds_test, folds_labeled, folds_unlabeled,
                       kfolds))

            if num_inst_train + num_inst_test != num_inst_x:
                raise ValueError(
                    "The sum of the number of instances for train_idx and test_idx must be equal to the "
                    "number of instances for x"
                    "(num_inst_x:%s, num_inst_train:%s num_inst_test:%s)" %
                    (num_inst_x, num_inst_train, num_inst_test))

            if num_inst_labeled + num_inst_unlabeled != num_inst_train:
                raise ValueError(
                    "The sum of the number of instances for label_idx and unlabel_idx must be equal to the "
                    "number of instances for train_idx"
                    "(num_inst_labeled:%s, num_inst_unlabeled:%s num_inst_unlabeled:%s)"
                    %
                    (num_inst_labeled, num_inst_unlabeled, num_inst_unlabeled))

            self._kfolds = folds_train
            self._train_idx = train_idx
            self._test_idx = test_idx
            self._label_idx = label_idx
            self._unlabel_idx = unlabel_idx

        self._ml_technique = ml_technique
        if self._ml_technique is None:
            raise ValueError("required param 'ml_technique' can not be empty")

        self._performance_metrics = performance_metrics
        if self._performance_metrics is None or len(
                self._performance_metrics) == 0:
            raise ValueError(
                "required param 'performance_metric' can not be empty")
        else:
            for metric in self._performance_metrics:
                if not isinstance(metric, BaseMetrics):
                    raise ValueError(
                        "the elements in 'performance_metrics' must be of type BaseMetrics"
                    )

        self._query_strategy = query_strategy
        if self._query_strategy is None:
            raise ValueError(
                "required param 'query_strategy' can not be empty")

        self._oracle = oracle
        if self._oracle is None:
            raise ValueError("required param 'simOracle' can not be empty")

        self._stopping_criteria = stopping_criteria
        if self._stopping_criteria is None:
            raise ValueError(
                "required param 'stopping_criteria' can not be empty")

        # Dynamically create the scenario Type given the arguments
        importlib.import_module(self._scenario_type.__module__)

        self._scenario = eval(self._scenario_type.__qualname__)(
            X=self._X,
            y=self._Y,
            train_idx=self._train_idx[0],
            test_idx=self._test_idx[0],
            label_idx=copy.deepcopy(IndexCollection(self._label_idx[0])),
            unlabel_idx=copy.deepcopy(IndexCollection(self._unlabel_idx[0])),
            ml_technique=self._ml_technique,
            performance_metrics=self._performance_metrics,
            query_strategy=self._query_strategy,
            oracle=self._oracle,
            batch_size=batch_size)