예제 #1
0
class MaxPRegionsHeu:
    def __init__(self, local_search=None, random_state=None):
        """
        Class offering the implementation of the algorithm for solving the
        max-p-regions problem as described in [DAR2012]_.

        Parameters
        ----------
        local_search : None or :class:`AZP` or :class:`AZPSimulatedAnnealing`
            If None, then the AZP is used.
            Pass an instance of :class:`AZP` (or one of its subclasses) or
            :class:`AZPSimulatedAnnealing` to use a customized local search
            algorithm.
        random_state : None, int, str, bytes, or bytearray
            Random seed.
        """
        self.n_regions = None
        self.labels_ = None
        self.local_search = local_search
        self.random_state = random_state
        random.seed(random_state)
        self.metric = raise_distance_metric_not_set

    def fit_from_scipy_sparse_matrix(
        self,
        adj,
        attr,
        spatially_extensive_attr,
        threshold,
        max_it=10,
        objective_func=ObjectiveFunctionPairwise()):
        """
        Solve the max-p-regions problem in a heuristic way (see [DAR2012]_).

        The resulting region labels are assigned to the instance's
        :attr:`labels_` attribute.

        Parameters
        ----------
        adj : :class:`scipy.sparse.csr_matrix`
            Adjacency matrix representing the areas' contiguity relation.
        attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to clustering.
        spatially_extensive_attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to ensuring the threshold condition.
        threshold : numbers.Real or :class:`numpy.ndarray`
            The lower bound for a region's sum of spatially extensive
            attributes. The argument's type is numbers.Real if there is only
            one spatially extensive attribute per area, otherwise it is a
            one-dimensional array with as many entries as there are spatially
            extensive attributes per area.
        max_it : int, default: 10
            The maximum number of partitions produced in the algorithm's
            construction phase.
        objective_func : :class:`region.objective_function.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            The objective function to use.
        """
        print("f_f_SCIPY got:\n",
              attr,
              "\n",
              spatially_extensive_attr,
              "\n",
              threshold,
              sep="")
        weights = ps_api.WSP(adj).to_W()
        areas_dict = weights.neighbors
        self.metric = objective_func.metric

        best_partition = None
        best_obj_value = float("inf")
        feasible_partitions = []
        partitions_before_enclaves_assignment = []
        max_p = 0  # maximum number of regions

        # construction phase
        # print("constructing")
        for _ in range(max_it):
            # print(" ", _)
            partition, enclaves = self.grow_regions(adj, attr,
                                                    spatially_extensive_attr,
                                                    threshold)
            n_regions = len(partition)
            if n_regions > max_p:
                partitions_before_enclaves_assignment = [(partition, enclaves)]
                max_p = n_regions
            elif n_regions == max_p:
                partitions_before_enclaves_assignment.append(
                    (partition, enclaves))

        # print("\n" + "assigning enclaves")
        for partition, enclaves in partitions_before_enclaves_assignment:
            # print("  cleaning up in partition", partition)
            feasible_partitions.append(
                self.assign_enclaves(partition, enclaves, areas_dict, attr))

        for partition in feasible_partitions:
            print(partition, "\n")

        # local search phase
        if self.local_search is None:
            self.local_search = AZP()
        self.local_search.allow_move_strategy = AllowMoveAZPMaxPRegions(
            spatially_extensive_attr, threshold,
            self.local_search.allow_move_strategy)
        for partition in feasible_partitions:
            self.local_search.fit_from_scipy_sparse_matrix(
                adj,
                attr,
                max_p,
                initial_labels=array_from_region_list(partition),
                objective_func=objective_func)
            partition = self.local_search.labels_
            # print("optimized partition", partition)
            obj_value = objective_func(partition, attr)
            if obj_value < best_obj_value:
                best_obj_value = obj_value
                best_partition = partition
        self.labels_ = best_partition

    fit = copy_func(fit_from_scipy_sparse_matrix)
    fit.__doc__ = "Alias for :meth:`fit_from_scipy_sparse_matrix:.\n\n" \
                  + fit_from_scipy_sparse_matrix.__doc__

    def fit_from_dict(self,
                      neighbors_dict,
                      attr,
                      spatially_extensive_attr,
                      threshold,
                      max_it=10,
                      objective_func=ObjectiveFunctionPairwise()):
        """
        Solve the max-p-regions problem in a heuristic way (see [DAR2012]_).

        The resulting region labels are assigned to the instance's
        :attr:`labels_` attribute.

        Parameters
        ----------
        neighbors_dict : dict
            Each key represents an area and each value is an iterable of
            neighbors of this area.
        attr : dict
            A dict with the same keys as `neighbors_dict` and values
            representing the attributes for calculating h**o-/heterogeneity. A
            value can be scalar (e.g. `float` or `int`) or a
            :class:`numpy.ndarray`.
        spatially_extensive_attr : dict
            A dict with the same keys as `neighbors_dict` and values
            representing the spatially extensive attribute (scalar or iterable
            of scalars). In the max-p-regions problem each region's sum of
            spatially extensive attributes must be greater than a specified
            threshold. In case of iterables of scalars as dict-values all
            elements of the iterable have to fulfill the condition.
        threshold : numbers.Real or :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        max_it : int, default: 10
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        objective_func : :class:`region.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        if not isinstance(neighbors_dict, dict):
            raise ValueError("The neighbors_dict argument must be dict.")

        not_same_dict_keys_msg = "The {} argument has to be of type dict " \
                                 "with the same keys as neighbors_dict."

        if not isinstance(attr, dict) or attr.keys() != neighbors_dict.keys():
            raise ValueError(not_same_dict_keys_msg.format("attr"))

        if not isinstance(spatially_extensive_attr, dict) or \
                spatially_extensive_attr.keys() != neighbors_dict.keys():
            raise ValueError(
                not_same_dict_keys_msg.format(spatially_extensive_attr))
        adj = scipy_sparse_matrix_from_dict(neighbors_dict)
        attr_arr = array_from_dict_values(attr)
        spat_ext_attr_arr = array_from_dict_values(spatially_extensive_attr)
        self.fit_from_scipy_sparse_matrix(adj,
                                          attr_arr,
                                          spat_ext_attr_arr,
                                          threshold=threshold,
                                          max_it=max_it,
                                          objective_func=objective_func)

    def fit_from_geodataframe(self,
                              gdf,
                              attr,
                              spatially_extensive_attr,
                              threshold,
                              max_it=10,
                              objective_func=ObjectiveFunctionPairwise(),
                              contiguity="rook"):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix:.

        Parameters
        ----------
        gdf : :class:`geopandas.GeoDataFrame`

        attr : str or list
            The clustering criteria (columns of the GeoDataFrame `gdf`) are
            specified as string (for one column) or list of strings (for
            multiple columns).
        spatially_extensive_attr : str or list
            The name (`str`) or names (`list` of strings) of column(s) in `gdf`
            containing the spatially extensive attributes.
        threshold : numbers.Real or :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        max_it : int, default: 10
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        objective_func : :class:`region.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        contiguity : {"rook", "queen"}, default: "rook"
            Defines the contiguity relationship between areas. Possible
            contiguity definitions are:

            * "rook" - Rook contiguity.
            * "queen" - Queen contiguity.
        """
        w = w_from_gdf(gdf, contiguity)
        attr = array_from_df_col(gdf, attr)
        spat_ext_attr = array_from_df_col(gdf, spatially_extensive_attr)

        self.fit_from_w(w,
                        attr,
                        spat_ext_attr,
                        threshold=threshold,
                        max_it=max_it,
                        objective_func=objective_func)

    def fit_from_networkx(self,
                          graph,
                          attr,
                          spatially_extensive_attr,
                          threshold,
                          max_it=10,
                          objective_func=ObjectiveFunctionPairwise()):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix:.

        Parameters
        ----------
        graph : `networkx.Graph`

        attr : str, list or dict
            If the clustering criteria are present in the networkx.Graph
            `graph` as node attributes, then they can be specified as a string
            (for one criterion) or as a list of strings (for multiple
            criteria).
            Alternatively, a dict can be used with each key being a node of the
            networkx.Graph `graph` and each value being the corresponding
            clustering criterion (a scalar (e.g. `float` or `int`) or a
            :class:`numpy.ndarray`).
            If there are no clustering criteria present in the networkx.Graph
            `graph` as node attributes, then a dictionary must be used for this
            argument. Refer to the corresponding argument in
            :meth:`fit_from_dict` for more details about the expected dict.
        spatially_extensive_attr : str, list or dict
            If the spatially extensive attribute is present in the
            networkx.Graph `graph` as node attributes, then they can be
            specified as a string (for one attribute) or as a list of
            strings (for multiple attributes).
            Alternatively, a dict can be used with each key being a node of the
            networkx.Graph `graph` and each value being the corresponding
            spatially extensive attribute (a scalar (e.g. `float` or `int`) or
            a :class:`numpy.ndarray`).
            If there are no spatially extensive attributes present in the
            networkx.Graph `graph` as node attributes, then a dictionary must
            be used for this argument. Refer to the corresponding argument in
            :meth:`fit_from_dict` for more details about the expected dict.
        threshold : numbers.Real or :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        max_it : int, default: 10
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        objective_func : :class:`region.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        adj = nx.to_scipy_sparse_matrix(graph)
        attr = array_from_graph_or_dict(graph, attr)
        sp_ext_attr = array_from_graph_or_dict(graph, spatially_extensive_attr)
        self.fit_from_scipy_sparse_matrix(adj,
                                          attr,
                                          sp_ext_attr,
                                          threshold=threshold,
                                          max_it=max_it,
                                          objective_func=objective_func)

    def fit_from_w(self,
                   w,
                   attr,
                   spatially_extensive_attr,
                   threshold,
                   max_it=10,
                   objective_func=ObjectiveFunctionPairwise()):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix:.

        Parameters
        ----------
        w : :class:`libpysal.weights.weights.W`
            W object representing the contiguity relation.
        attr : :class:`numpy.ndarray`
            Each element specifies an area's attribute which is used for
            calculating the objective function.
        spatially_extensive_attr : :class:`numpy.ndarray`
            Each element specifies an area's spatially extensive attribute
            which is used to ensure that the sum of spatially extensive
            attributes in each region adds up to a threshold defined by the
            `threshold` argument.
        threshold : numbers.Real or :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        max_it : int, default: 10
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        objective_func : :class:`region.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        adj = scipy_sparse_matrix_from_w(w)
        self.fit_from_scipy_sparse_matrix(adj,
                                          attr,
                                          spatially_extensive_attr,
                                          threshold,
                                          max_it=max_it,
                                          objective_func=objective_func)

    def grow_regions(self, adj, attr, spatially_extensive_attr, threshold):
        """
        Parameters
        ----------
        adj : :class:`scipy.sparse.csr_matrix`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        attr : :class:`numpy.ndarray`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        spatially_extensive_attr : :class:`numpy.ndarray`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        threshold : numbers.Real or :class:`numpy.ndarray`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.

        Returns
        -------
        result : `tuple`
            `result[0]` is a `list`. Each list element is a `set` of a region's
            areas. Note that not every area is assigned to a region after this
            function has terminated, so they won't be in any of the `set`s in
            `result[0]`.
            `result[1]` is a `list` of areas not assigned to any region.
        """
        # print("grow_regions called with spatially_extensive_attr", spatially_extensive_attr)
        partition = []
        enclave_areas = []
        unassigned_areas = list(range(adj.shape[0]))
        assigned_areas = []

        # todo: rm prints
        while unassigned_areas:
            # print("partition", partition)
            area = pop_randomly_from(unassigned_areas)
            # print("seed in area", area)
            assigned_areas.append(area)
            if (spatially_extensive_attr[area] >= threshold).all():
                # print("  seed --> region :)")
                # print("because", spatially_extensive_attr[area], ">=", threshold)
                partition.append({area})
            else:
                region = {area}
                # print("  all neighbors:", neigh_dict[area])
                # print("  already assigned:", assigned_areas)
                unassigned_neighs = set(
                    adj[area].nonzero()[1]).difference(assigned_areas)
                feasible = True
                spat_ext_attr = spatially_extensive_attr[area].copy()
                while (spat_ext_attr < threshold).any():
                    # print(" ", spat_ext_attr, "<", threshold, "Need neighs!")
                    # print("  potential neighbors:", unassigned_neighs)
                    if unassigned_neighs:
                        neigh = self.find_best_area(region, unassigned_neighs,
                                                    attr)
                        # print(" we choose neighbor", neigh)
                        region.add(neigh)
                        unassigned_neighs.remove(neigh)
                        unassigned_neighs.update(set(adj[neigh].nonzero()[1]))
                        unassigned_neighs.difference_update(assigned_areas)
                        spat_ext_attr += spatially_extensive_attr[neigh]
                        unassigned_areas.remove(neigh)
                        assigned_areas.append(neigh)
                    else:
                        # print("  Oh no! No neighbors left :(")
                        enclave_areas.extend(region)
                        feasible = False
                        # the following line (present in the algorithm in
                        # [DAR2012]) is commented out because it leads to an
                        # infinite loop:
                        # unassigned_areas.extend(region)
                        for area in region:
                            assigned_areas.remove(area)
                        break
                if feasible:
                    partition.append(region)
                # print("  unassigned:", unassigned_areas)
                # print("  assigned:", assigned_areas)
                # print()
        # print("grow_regions partit.:", partition, "enclaves:", enclave_areas)
        return partition, enclave_areas

    def find_best_area(self, region, candidates, attr):
        """
        Parameters
        ----------
        region : iterable
            Each element represents an area.
        candidates : iterable
            Each element represents an area bordering on region.
        attr : :class:`numpy.ndarray`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.

        Returns
        -------
        best_area :
            An element of `candidates` with minimal dissimilarity when being
            moved to the region `region`.
        """
        candidates = {
            area: sum(
                self.metric(attr[area].reshape(1, -1), attr[area2].reshape(
                    1, -1)) for area2 in region)
            for area in candidates
        }
        best_candidates = [
            area for area in candidates
            if candidates[area] == min(candidates.values())
        ]
        return random_element_from(best_candidates)

    def assign_enclaves(self, partition, enclave_areas, neigh_dict, attr):
        """
        Start with a partial partition (not all areas are assigned to a region)
        and a list of enclave areas (i.e. areas not present in the partial
        partition). Then assign all enclave areas to regions in the partial
        partition and return the resulting partition.

        Parameters
        ----------
        partition : `list`
            Each element (of type `set`) represents a region.
        enclave_areas : `list`
            Each element represents an area.
        neigh_dict : `dict`
            Each key represents an area. Each value is an iterable of the
            corresponding neighbors.
        attr : :class:`numpy.ndarray`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.

        Returns
        -------
        partition : `list`
            Each element (of type `set`) represents a region.
        """
        # print("partition:", partition, "- enclaves:", enclave_areas)
        while enclave_areas:
            neighbors_of_assigned = [
                area for area in enclave_areas if any(
                    neigh not in enclave_areas for neigh in neigh_dict[area])
            ]
            area = pop_randomly_from(neighbors_of_assigned)
            neigh_regions_idx = []
            for neigh in neigh_dict[area]:
                try:
                    neigh_regions_idx.append(
                        find_sublist_containing(neigh, partition, index=True))
                except LookupError:
                    pass
            region_idx = self.find_best_region_idx(area, partition,
                                                   neigh_regions_idx, attr)
            partition[region_idx].add(area)
            enclave_areas.remove(area)
        return partition

    def find_best_region_idx(self, area, partition, candidate_regions_idx,
                             attr):
        """

        Parameters
        ----------
        area :
            The area to be moved to one of the regions specified by
            `candidate_regions_idx`.
        partition : `list`
            Each element (of type `set`) represents a region.
        candidate_regions_idx : iterable
            Each element is the index of a region in the `partition` list.
        attr : :class:`numpy.ndarray`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.

        Returns
        -------
        best_idx : int
            The index of a region (w.r.t. `partition`) which has the smallest
            sum of dissimilarities after area `area` is moved to the region.
        """
        dissim_per_idx = {
            region_idx: sum(
                self.metric(attr[area].reshape(1, -1), attr[area2].reshape(
                    1, -1)) for area2 in partition[region_idx])
            for region_idx in candidate_regions_idx
        }
        minimum_dissim = min(dissim_per_idx.values())
        best_idxs = [
            idx for idx in dissim_per_idx
            if dissim_per_idx[idx] == minimum_dissim
        ]
        return random_element_from(best_idxs)
예제 #2
0
class MaxPRegionsExact:
    """
    A class for solving the max-p-regions problem by transforming it into a
    mixed-integer-programming problem (MIP) as described in [DAR2012]_.

    Attributes
    ----------
    labels_ : dict
        Each key is an area and each value the region it has been assigned to.
    """
    def __init__(self):
        self.labels_ = None
        self.solver = None
        self.metric = raise_distance_metric_not_set

    def fit_from_scipy_sparse_matrix(self,
                                     adj,
                                     attr,
                                     spatially_extensive_attr,
                                     threshold,
                                     solver="cbc",
                                     metric="euclidean"):
        """
        Solve the max-p-regions problem as MIP as described in [DAR2012]_.

        The resulting region labels are assigned to the instance's
        :attr:`labels_` attribute.

        Parameters
        ----------
        adj : class:`scipy.sparse.csr_matrix`
            Adjacency matrix representing the areas' contiguity relation.
        attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to clustering.
        spatially_extensive_attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to ensuring the threshold condition.
        threshold : numbers.Real or :class:`numpy.ndarray`
            The lower bound for a region's sum of spatially extensive
            attributes. The argument's type is numbers.Real if there is only
            one spatially extensive attribute per area, otherwise it is a
            one-dimensional array with as many entries as there are spatially
            extensive attributes per area.
        solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc"
            The solver to use. Unless the default solver is used, the user has
            to make sure that the specified solver is installed.

            * "cbc" - the Cbc (Coin-or branch and cut) solver
            * "cplex" - the CPLEX solver
            * "glpk" - the GLPK (GNU Linear Programming Kit) solver
            * "gurobi" - the Gurobi Optimizer

        metric : str or function, default: "euclidean"
            See the `metric` argument in
            :func:`region.util.get_metric_function`.
        """
        self.metric = get_metric_function(metric)
        check_solver(solver)

        prob = LpProblem("Max-p-Regions", LpMinimize)

        # Parameters of the optimization problem
        n_areas = adj.shape[0]
        I = list(range(n_areas))  # index for areas
        II = [(i, j) for i in I for j in I]
        II_upper_triangle = [(i, j) for i, j in II if i < j]
        # index of potential regions, called k in [DAR2012]_:
        K = range(n_areas)
        # index of contiguity order, called c in [DAR2012]_:
        O = range(n_areas)
        d = {(i, j): self.metric(attr[i].reshape(1, -1),
                                 attr[j].reshape(1, -1))
             for i, j in II_upper_triangle}
        h = 1 + floor(log10(sum(d[(i, j)] for i, j in II_upper_triangle)))

        # Decision variables
        t = LpVariable.dicts("t", ((i, j) for i, j in II_upper_triangle),
                             lowBound=0,
                             upBound=1,
                             cat=LpInteger)
        x = LpVariable.dicts("x", ((i, k, o) for i in I for k in K for o in O),
                             lowBound=0,
                             upBound=1,
                             cat=LpInteger)

        # Objective function
        # (1) in Duque et al. (2012): "The Max-p-Regions Problem"
        prob += -10**h * lpSum(x[i, k, 0] for k in K for i in I) \
            + lpSum(d[i, j] * t[i, j] for i, j in II_upper_triangle)

        # Constraints
        # (2) in Duque et al. (2012): "The Max-p-Regions Problem"
        for k in K:
            prob += lpSum(x[i, k, 0] for i in I) <= 1
        # (3) in Duque et al. (2012): "The Max-p-Regions Problem"
        for i in I:
            prob += lpSum(x[i, k, o] for k in K for o in O) == 1
        # (4) in Duque et al. (2012): "The Max-p-Regions Problem"
        for i in I:
            for k in K:
                for o in range(1, len(O)):
                    prob += x[i, k, o] <= lpSum(x[j, k, o - 1]
                                                for j in neighbors(adj, i))
        # (5) in Duque et al. (2012): "The Max-p-Regions Problem"
        if isinstance(spatially_extensive_attr[I[0]], numbers.Real):
            for k in K:
                lhs = lpSum(x[i, k, o] * spatially_extensive_attr[i] for i in I
                            for o in O)
                prob += lhs >= threshold * lpSum(x[i, k, 0] for i in I)
        elif isinstance(spatially_extensive_attr[I[0]], collections.Iterable):
            for el in range(len(spatially_extensive_attr[I[0]])):
                for k in K:
                    lhs = lpSum(x[i, k, o] * spatially_extensive_attr[i][el]
                                for i in I for o in O)
                    if isinstance(threshold, numbers.Real):
                        rhs = threshold * lpSum(x[i, k, 0] for i in I)
                        prob += lhs >= rhs
                    elif isinstance(threshold, np.ndarray):
                        rhs = threshold[el] * lpSum(x[i, k, 0] for i in I)
                        prob += lhs >= rhs
        # (6) in Duque et al. (2012): "The Max-p-Regions Problem"
        for i, j in II_upper_triangle:
            for k in K:
                prob += t[i, j] >= \
                        lpSum(x[i, k, o] + x[j, k, o] for o in O) - 1
        # (7) in Duque et al. (2012): "The Max-p-Regions Problem"
        # already in LpVariable-definition
        # (8) in Duque et al. (2012): "The Max-p-Regions Problem"
        # already in LpVariable-definition

        # additional constraint for speedup (p. 405 in [DAR2012]_)
        for o in O:
            prob += x[I[0], K[0], o] == (1 if o == 0 else 0)

        # Solve the optimization problem
        solver = get_solver_instance(solver)
        print("start solving with", solver)
        prob.solve(solver)
        print("solved")
        result = np.zeros(n_areas)
        for i in I:
            for k in K:
                for o in O:
                    if x[i, k, o].varValue == 1:
                        result[i] = k
        self.labels_ = result
        self.solver = solver

    fit = copy_func(fit_from_scipy_sparse_matrix)
    fit.__doc__ = "Alias for :meth:`fit_from_scipy_sparse_matrix`.\n\n" \
                  + fit_from_scipy_sparse_matrix.__doc__

    def fit_from_dict(self,
                      neighbors_dict,
                      attr,
                      spatially_extensive_attr,
                      threshold,
                      solver="cbc",
                      metric="euclidean"):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix`.

        Parameters
        ----------
        neighbors_dict : dict
            Each key represents an area and each value is an iterable of
            neighbors of this area.
        attr : dict
            A dict with the same keys as `neighbors_dict` and values
            representing the attributes for calculating h**o-/heterogeneity. A
            value can be scalar (e.g. `float` or `int`) or a
            :class:`numpy.ndarray`.
        spatially_extensive_attr : dict
            A dict with the same keys as `neighbors_dict` and values
            representing the spatially extensive attribute (scalar or iterable
            of scalars). In the max-p-regions problem each region's sum of
            spatially extensive attributes must be greater than a specified
            threshold. In case of iterables of scalars as dict-values all
            elements of the iterable have to fulfill the condition.
        threshold : numbers.Real or :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc"
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        metric : str or function, default: "euclidean"
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        if not isinstance(neighbors_dict, dict):
            raise ValueError("The neighbors_dict argument must be dict.")

        not_same_dict_keys_msg = "The {} argument has to be of type dict " \
                                 "with the same keys as neighbors_dict."

        if not isinstance(attr, dict) or attr.keys() != neighbors_dict.keys():
            raise ValueError(not_same_dict_keys_msg.format("attr"))

        if not isinstance(spatially_extensive_attr, dict) or \
                spatially_extensive_attr.keys() != neighbors_dict.keys():
            raise ValueError(
                not_same_dict_keys_msg.format(spatially_extensive_attr))

        adj = scipy_sparse_matrix_from_dict(neighbors_dict)
        attr_arr = array_from_dict_values(attr)
        spat_ext_attr_arr = array_from_dict_values(spatially_extensive_attr)
        self.fit_from_scipy_sparse_matrix(adj,
                                          attr_arr,
                                          spat_ext_attr_arr,
                                          threshold=threshold,
                                          solver=solver,
                                          metric=metric)

    def fit_from_geodataframe(self,
                              gdf,
                              attr,
                              spatially_extensive_attr,
                              threshold,
                              solver="cbc",
                              metric="euclidean",
                              contiguity="rook"):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix`.

        Parameters
        ----------
        gdf : GeoDataFrame

        attr : str or list
            The clustering criteria (columns of the GeoDataFrame `gdf`) are
            specified as string (for one column) or list of strings (for
            multiple columns).
        spatially_extensive_attr : str or list
            The name (`str`) or names (`list` of strings) of column(s) in `gdf`
            containing the spatially extensive attributes.
        threshold : numbers.Real or :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc"
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        metric : str or function, default: "euclidean"
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        contiguity : {"rook", "queen"}, default: "rook"
            Defines the contiguity relationship between areas. Possible
            contiguity definitions are:

            * "rook" - Rook contiguity.
            * "queen" - Queen contiguity.
        """
        w = w_from_gdf(gdf, contiguity)
        attr = array_from_df_col(gdf, attr)
        spat_ext_attr = array_from_df_col(gdf, spatially_extensive_attr)

        self.fit_from_w(w,
                        attr,
                        spat_ext_attr,
                        threshold=threshold,
                        solver=solver,
                        metric=metric)

    def fit_from_networkx(self,
                          graph,
                          attr,
                          spatially_extensive_attr,
                          threshold,
                          solver="cbc",
                          metric="euclidean"):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix`.

        Parameters
        ----------
        graph : `networkx.Graph`

        attr : str, list or dict
            If the clustering criteria are present in the networkx.Graph
            `graph` as node attributes, then they can be specified as a string
            (for one criterion) or as a list of strings (for multiple
            criteria).
            Alternatively, a dict can be used with each key being a node of the
            networkx.Graph `graph` and each value being the corresponding
            clustering criterion (a scalar (e.g. `float` or `int`) or a
            :class:`numpy.ndarray`).
            If there are no clustering criteria present in the networkx.Graph
            `graph` as node attributes, then a dictionary must be used for this
            argument. Refer to the corresponding argument in
            :meth:`fit_from_dict` for more details about the expected dict.
        spatially_extensive_attr : str, list or dict
            If the spatially extensive attribute is present in the
            networkx.Graph `graph` as node attributes, then they can be
            specified as a string (for one attribute) or as a list of
            strings (for multiple attributes).
            Alternatively, a dict can be used with each key being a node of the
            networkx.Graph `graph` and each value being the corresponding
            spatially extensive attribute (a scalar (e.g. `float` or `int`) or
            a :class:`numpy.ndarray`).
            If there are no spatially extensive attributes present in the
            networkx.Graph `graph` as node attributes, then a dictionary must
            be used for this argument. Refer to the corresponding argument in
            :meth:`fit_from_dict` for more details about the expected dict.
        threshold : numbers.Real or :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc"
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        metric : str or function, default: "euclidean"
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        adj = nx.to_scipy_sparse_matrix(graph)
        attr = array_from_graph_or_dict(graph, attr)
        sp_ext_attr = array_from_graph_or_dict(graph, spatially_extensive_attr)
        self.fit_from_scipy_sparse_matrix(adj,
                                          attr,
                                          sp_ext_attr,
                                          threshold=threshold,
                                          solver=solver,
                                          metric=metric)

    def fit_from_w(self,
                   w,
                   attr,
                   spatially_extensive_attr,
                   threshold,
                   solver="cbc",
                   metric="euclidean"):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix`.

        Parameters
        ----------
        w : libpysal.weights.W
            W object representing the areas' contiguity relation.
        attr : :class:`numpy.ndarray`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        spatially_extensive_attr : :class:`numpy.ndarray`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        threshold : numbers.Real or :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc"
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        metric : str or function, default: "euclidean"
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        adj = scipy_sparse_matrix_from_w(w)
        self.fit_from_scipy_sparse_matrix(adj,
                                          attr,
                                          spatially_extensive_attr,
                                          threshold=threshold,
                                          solver=solver,
                                          metric=metric)
예제 #3
0
파일: exact.py 프로젝트: GRSEB9S/region
class PRegionsExact:
    """
    A class for solving the p-regions problem by transforming it into a
    mixed-integer-programming problem (MIP) as described in [DCM2011]_.

    Attributes
    ----------
    labels : :class:`numpy.ndarray`
        Each element is a region label specifying to which region the
        corresponding area was assigned to by the last run of a fit-method.
    method : str
        The method used in the last call of a fit-method for translating the
        p-regions problem into an MIP.
    metric : function
        The distance metric specified in the last call of a fit-method.
    n_regions : int
        The number of regions the areas were clustered into by the last run of
        a fit-method.
    solver : str
        The solver used in the last call of a fit-method.
    """
    def __init__(self):
        self.n_regions = None
        self.labels_ = None
        self.method = None
        self.solver = None
        self.metric = raise_distance_metric_not_set

    def fit_from_scipy_sparse_matrix(self,
                                     adj,
                                     attr,
                                     n_regions,
                                     method="flow",
                                     solver="cbc",
                                     metric="euclidean"):
        """
        Solve the p-regions problem as MIP as described in [DCM2011]_.

        The resulting region labels are assigned to the instance's
        :attr:`labels_` attribute.

        Parameters
        ----------
        adj : class:`scipy.sparse.csr_matrix`
            Adjacency matrix representing the areas' contiguity relation.
        attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to clustering.
        n_regions : `int`
            Number of desired regions.
        method : {"flow", "order", "tree"}, default: "flow"
            The method to translate the clustering problem into an exact
            optimization model.

            * "flow" - Flow model on p. 112-113 in [DCM2011]_
            * "order" - Order model on p. 110-112 in [DCM2011]_
            * "tree" - Tree model on p. 108-110 in [DCM2011]_

        solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc"
            The solver to use. Unless the default solver is used, the user has
            to make sure that the specified solver is installed.

            * "cbc" - the Cbc (Coin-or branch and cut) solver
            * "cplex" - the CPLEX solver
            * "glpk" - the GLPK (GNU Linear Programming Kit) solver
            * "gurobi" - the Gurobi Optimizer

        metric : str or function, default: "euclidean"
            See the `metric` argument in
            :func:`region.util.get_metric_function`.
        """
        if not isinstance(n_regions, numbers.Integral) or n_regions <= 0:
            raise ValueError("The n_regions argument must be a positive "
                             "integer.")
        if adj.shape[0] < n_regions:
            raise ValueError("The number of regions must be less than the "
                             "number of areas.")
        if attr.ndim == 1:
            attr = attr.reshape(adj.shape[0], -1)
        self._check_method(method)
        check_solver(solver)
        metric = get_metric_function(metric)

        opt_func = {
            "flow": _flow,
            "order": _order,
            "tree": _tree
        }[method.lower()]

        result_dict = opt_func(adj, attr, n_regions, solver, metric)
        self.labels_ = result_dict
        self.n_regions = n_regions
        self.method = method
        self.metric = metric
        self.solver = solver

    fit = copy_func(fit_from_scipy_sparse_matrix)
    fit.__doc__ = "Alias for :meth:`fit_from_scipy_sparse_matrix:.\n\n" \
                  + fit_from_scipy_sparse_matrix.__doc__

    def fit_from_dict(self,
                      neighbors_dict,
                      attr,
                      n_regions,
                      method="flow",
                      solver="cbc",
                      metric="euclidean"):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix:.

        Parameters
        ----------
        neighbors_dict : dict
            Each key represents an area and each value is an iterable of
            neighbors of this area.
        attr : dict
            A dict with the same keys as `neighbors_dict` and values
            representing the clustering criteria. A value can be scalar (e.g.
            float or int) or a :class:`numpy.ndarray`.
        n_regions : int
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        method : str
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        solver : str
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        metric : str or function, default: "euclidean"
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        if not isinstance(neighbors_dict, dict):
            raise ValueError("The neighbors_dict argument must be dict.")

        if not isinstance(attr, dict) or attr.keys() != neighbors_dict.keys():
            raise ValueError("The attr argument has to be of type dict with "
                             "the same keys as neighbors_dict.")

        adj = scipy_sparse_matrix_from_dict(neighbors_dict)
        attr_arr = array_from_dict_values(attr)

        self.fit_from_scipy_sparse_matrix(adj,
                                          attr_arr,
                                          n_regions,
                                          method=method,
                                          solver=solver,
                                          metric=metric)

    def fit_from_geodataframe(self,
                              gdf,
                              attr,
                              n_regions,
                              method="flow",
                              solver="cbc",
                              metric="euclidean",
                              contiguity="rook"):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix:.

        Parameters
        ----------
        gdf : GeoDataFrame

        attr : str or list
            The clustering criteria (columns of the GeoDataFrame `gdf`) are
            specified as string (for one column) or list of strings (for
            multiple columns).
        n_regions : int
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        method : str
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        solver : str
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        contiguity : {"rook", "queen"}, default: "rook"
            Defines the contiguity relationship between areas. Possible
            contiguity definitions are:

            * "rook" - Rook contiguity.
            * "queen" - Queen contiguity.

        metric : str or function, default: "euclidean"
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        w = w_from_gdf(gdf, contiguity)
        attr = array_from_df_col(gdf, attr)
        self.fit_from_w(w,
                        attr,
                        n_regions,
                        method=method,
                        solver=solver,
                        metric=metric)

    def fit_from_networkx(self,
                          graph,
                          attr,
                          n_regions,
                          method="flow",
                          solver="cbc",
                          metric="euclidean"):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix:.

        Parameters
        ----------
        graph : `networkx.Graph`
            Graph representing the areas' contiguity relation.
        attr : str, list or dict
            If the clustering criteria are present in the networkx.Graph
            `graph` as node attributes, then they can be specified as a string
            (for one criterion) or as a list of strings (for multiple
            criteria).
            Alternatively, a dict can be used with each key being a node of the
            networkx.Graph `graph` and each value being the corresponding
            clustering criterion (a scalar (e.g. `float` or `int`) or a
            :class:`numpy.ndarray`).
            If there are no clustering criteria present in the networkx.Graph
            `graph` as node attributes, then a dictionary must be used for this
            argument. Refer to the corresponding argument in
            :meth:`fit_from_dict` for more details about the expected dict.
        n_regions : int
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        method : str
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        solver : str
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        metric : str or function, default: "euclidean"
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        adj = nx.to_scipy_sparse_matrix(graph)
        attr = array_from_graph_or_dict(graph, attr)
        self.fit_from_scipy_sparse_matrix(adj,
                                          attr,
                                          n_regions,
                                          method=method,
                                          solver=solver,
                                          metric=metric)

    def fit_from_w(self,
                   w,
                   attr,
                   n_regions,
                   method="flow",
                   solver="cbc",
                   metric="euclidean"):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix:.

        Parameters
        ----------
        w : libpysal.weights.W
            W object representing the areas' contiguity relation.
        attr : :class:`numpy.ndarray`
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        n_regions : int
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        method : str
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        solver : str
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        metric : str or function, default: "euclidean"
            See the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        adj = scipy_sparse_matrix_from_w(w)
        self.fit_from_scipy_sparse_matrix(adj,
                                          attr,
                                          n_regions,
                                          method=method,
                                          solver=solver,
                                          metric=metric)

    @staticmethod
    def _check_method(method):
        if not isinstance(method, str) \
                or method.lower() not in ["flow", "order", "tree"]:
            raise ValueError("The method argument must be one of the following"
                             " strings: 'flow', 'order', or 'tree'.")
예제 #4
0
class AZP:
    """
    Class offering the implementation of the AZP algorithm (see [OR1995]_).

    Attributes
    ----------
    labels_ : :class:`numpy.ndarray`
        Each element is a region label specifying to which region the
        corresponding area was assigned to by the last run of a fit-method.
    """

    def __init__(self, allow_move_strategy=None, random_state=None):
        """
        Parameters
        ----------
        allow_move_strategy : None or :class:`AllowMoveStrategy`, default: None
            If None, then the AZP algorithm in [OR1995]_ is chosen.
            For a different behavior for allowing moves an AllowMoveStrategy
            instance can be passed as argument.
        random_state : None, int, str, bytes, or bytearray, default: None
            Random seed.
        """
        self.n_regions = None
        self.labels_ = None
        self.random_state = random_state
        random.seed(self.random_state)

        if isinstance(allow_move_strategy, AllowMoveStrategy):
            self.allow_move_strategy = allow_move_strategy
        elif allow_move_strategy is None:
            self.allow_move_strategy = AllowMoveAZP()
        else:
            raise ValueError("The allow_move_strategy argument must be either "
                             "None, or an instance of AllowMoveStrategy.")

        self.objective_func = None

    def fit_from_scipy_sparse_matrix(
            self,
            adj,
            attr,
            n_regions,
            initial_labels=None,
            objective_func=ObjectiveFunctionPairwise()):
        """
        Perform the AZP algorithm as described in [OR1995]_.

        The resulting region labels are assigned to the instance's
        :attr:`labels_` attribute.

        Parameters
        ----------
        adj : :class:`scipy.sparse.csr_matrix`
            Adjacency matrix representing the contiguity relation.
        attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to clustering.
        n_regions : `int`
            Number of desired regions.
        initial_labels : :class:`numpy.ndarray` or None, default: None
            One-dimensional array of labels at the beginning of the algorithm.
            If None, then a random initial clustering will be generated.
        objective_func : :class:`region.objective_function.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            The objective function to use.
        """
        if attr.ndim == 1:
            attr = attr.reshape(adj.shape[0], -1)
        self.allow_move_strategy.attr_all = attr
        self.objective_func = objective_func
        # step 1
        if initial_labels is not None:
            assert_feasible(initial_labels, adj, n_regions)
            initial_labels_gen = separate_components(adj, initial_labels)
        else:
            initial_labels_gen = generate_initial_sol(adj, n_regions)
        labels = -np.ones(adj.shape[0])
        for labels_comp in initial_labels_gen:
            comp_idx = np.where(labels_comp != -1)[0]
            adj_comp = sub_adj_matrix(adj, comp_idx)
            labels_comp = labels_comp[comp_idx]
            attr_comp = attr[comp_idx]
            self.allow_move_strategy.start_new_component(
                labels_comp, attr_comp, self.objective_func, comp_idx)

            labels_comp = self._azp_connected_component(
                adj_comp, labels_comp, attr_comp)
            labels[comp_idx] = labels_comp

        self.n_regions = n_regions
        self.labels_ = labels

    fit = copy_func(fit_from_scipy_sparse_matrix)
    fit.__doc__ = "Alias for :meth:`fit_from_scipy_sparse_matrix`.\n\n" \
                  + fit_from_scipy_sparse_matrix.__doc__

    def fit_from_w(self,
                   w,
                   attr,
                   n_regions,
                   initial_labels=None,
                   objective_func=ObjectiveFunctionPairwise()):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix`.

        Parameters
        ----------
        w : :class:`libpysal.weights.weights.W`
            W object representing the contiguity relation.
        attr : :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        n_regions : `int`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        initial_labels : :class:`numpy.ndarray` or None, default: None
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        objective_func : :class:`region.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        adj = scipy_sparse_matrix_from_w(w)
        self.fit_from_scipy_sparse_matrix(
            adj,
            attr,
            n_regions,
            initial_labels,
            objective_func=objective_func)

    def fit_from_networkx(self,
                          graph,
                          attr,
                          n_regions,
                          initial_labels=None,
                          objective_func=ObjectiveFunctionPairwise()):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix`.

        Parameters
        ----------
        graph : `networkx.Graph`
            Graph representing the contiguity relation.
        attr : str, list or dict
            If the clustering criteria are present in the networkx.Graph
            `graph` as node attributes, then they can be specified as a string
            (for one criterion) or as a list of strings (for multiple
            criteria).
            Alternatively, a dict can be used with each key being a node of the
            networkx.Graph `graph` and each value being the corresponding
            clustering criterion (a scalar (e.g. `float` or `int`) or a
            :class:`numpy.ndarray`).
            If there are no clustering criteria present in the networkx.Graph
            `graph` as node attributes, then a dictionary must be used for this
            argument. Refer to the corresponding argument in
            :meth:`fit_from_dict` for more details about the expected dict.
        n_regions : `int`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        initial_labels : str or dict or None, default: None
            If str, then the string names the graph's attribute holding the
            information about the initial clustering.
            If dict, then each key is a node and each value is the region the
            key area is assigned to at the beginning of the algorithm.
            If None, then a random initial clustering will be generated.
        objective_func : :class:`region.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        adj = nx.to_scipy_sparse_matrix(graph)
        attr = array_from_graph_or_dict(graph, attr)
        if initial_labels is not None:
            initial_labels = array_from_graph_or_dict(graph, initial_labels)
        self.fit_from_scipy_sparse_matrix(
            adj,
            attr,
            n_regions,
            initial_labels,
            objective_func=objective_func)

    def fit_from_geodataframe(self,
                              gdf,
                              attr,
                              n_regions,
                              contiguity="rook",
                              initial_labels=None,
                              objective_func=ObjectiveFunctionPairwise()):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix`.

        Parameters
        ----------
        gdf : :class:`geopandas.GeoDataFrame`

        attr : `str` or `list`
            The clustering-relevant attributes (columns of the GeoDataFrame
            `gdf`) are specified as string (for one column) or list of strings
            (for multiple columns).
        n_regions : `int`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        contiguity : {"rook", "queen"}, default: "rook"
            Defines the contiguity relationship between areas. Possible
            contiguity definitions are:

            * "rook" - Rook contiguity.
            * "queen" - Queen contiguity.

        initial_labels : :class:`numpy.ndarray` or None, default: None
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        objective_func : :class:`region.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        w = w_from_gdf(gdf, contiguity)
        attr = array_from_df_col(gdf, attr)
        self.fit_from_w(
            w, attr, n_regions, initial_labels, objective_func=objective_func)

    def fit_from_dict(self,
                      neighbor_dict,
                      attr,
                      n_regions,
                      initial_labels=None,
                      objective_func=ObjectiveFunctionPairwise()):
        """
        Alternative API for :meth:`fit_from_scipy_sparse_matrix`.

        Parameters
        ----------
        neighbor_dict : `dict`
            Each key is an area and each value is an iterable of the key area's
            neighbors.
        attr : `dict`
            Each key is an area and each value is the corresponding
            clustering-relevant attribute.
        n_regions : `int`
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        initial_labels : `dict` or None, default: None
            Each key represents an area. Each value represents the region, the
            corresponding area is assigned to at the beginning of the
            algorithm.
            If None, then a random initial clustering will be generated.
        objective_func : :class:`region.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            Refer to the corresponding argument in
            :meth:`fit_from_scipy_sparse_matrix`.
        """
        sorted_areas = sorted(neighbor_dict)

        adj = scipy_sparse_matrix_from_dict(neighbor_dict)
        attr_arr = array_from_dict_values(attr, sorted_areas)

        if initial_labels is not None:
            initial_labels = array_from_dict_values(
                initial_labels, sorted_areas, flat_output=True, dtype=np.int32)
        self.fit_from_scipy_sparse_matrix(
            adj,
            attr_arr,
            n_regions,
            initial_labels,
            objective_func=objective_func)

    def _azp_connected_component(self, adj, initial_clustering, attr):
        """
        Implementation of the AZP algorithm for a spatially connected set of
        areas (i.e. for every area there is a path to every other area).

        Parameters
        ----------
        adj : :class:`scipy.sparse.csr_matrix`
            Adjacency matrix representing the contiguity relation. The matrix'
            shape is (N, N) where N denotes the number of areas in the
            currently considered connected component.
        initial_clustering : :class:`numpy.ndarray`
            Array of labels. Shape: (N,) where N denotes the number of areas in
            the currently considered connected component.
        attr : :class:`numpy.ndarray`
            Array of labels. Shape: (N, M) where N denotes the number of areas
            in the currently considered connected component and M denotes the
            number of attributes per area.

        Returns
        -------
        labels : :class:`numpy.ndarray`
            One-dimensional array of region labels after the AZP algorithm has
            been performed. Only region labels of the currently considered
            connected component are returned.
        """
        # if there is only one region in the initial solution, just return it.
        distinct_regions = list(np.unique(initial_clustering))
        if len(distinct_regions) == 1:
            return initial_clustering
        distinct_regions_copy = distinct_regions.copy()

        #  step 2: make a list of the M regions
        labels = initial_clustering

        obj_val_start = float("inf")
        obj_val_end = self.allow_move_strategy.objective_val

        region_neighbors = {}
        for region in distinct_regions:
            region_areas = set(np.where(labels == region)[0])
            neighs = set()
            for area in region_areas:
                neighs.update(neighbors(adj, area))
            region_neighbors[region] = neighs.difference(region_areas)
        del neighs

        # step 7: Repeat until no further improving moves are made
        while obj_val_end < obj_val_start:  # improvement
            obj_val_start = float(obj_val_end)
            distinct_regions = distinct_regions_copy.copy()
            # step 6: when the list for region K is exhausted return to step 3
            # and select another region and repeat steps 4-6

            while distinct_regions:
                # step 3: select & remove any region K at random from this list
                recipient = pop_randomly_from(distinct_regions)
                while True:
                    # step 4: identify a set of zones bordering on members of
                    # region K that could be moved into region K without
                    # destroying the internal contiguity of the donor region(s)

                    candidates = []
                    for neigh in region_neighbors[recipient]:
                        neigh_region = labels[neigh]
                        sub_adj = sub_adj_matrix(
                            adj,
                            np.where(labels == neigh_region)[0],
                            wo_nodes=neigh)
                        if is_connected(sub_adj):
                            # if area is alone in its region, it must stay
                            if count(labels, neigh_region) > 1:
                                candidates.append(neigh)
                    # step 5: randomly select zones from this list until either
                    # there is a local improvement in the current value of the
                    # objective function or a move that is equivalently as good
                    # as the current best. Then make the move, update the list
                    # of candidate zones, and return to step 4 or else repeat
                    # step 5 until the list is exhausted.
                    while candidates:
                        cand = pop_randomly_from(candidates)
                        if self.allow_move_strategy(cand, recipient, labels):
                            donor = labels[cand]

                            make_move(cand, recipient, labels)

                            region_neighbors[donor].add(cand)
                            region_neighbors[recipient].discard(cand)

                            neighs_of_cand = neighbors(adj, cand)

                            recipient_region_areas = set(
                                np.where(labels == recipient)[0])
                            region_neighbors[recipient].update(neighs_of_cand)
                            region_neighbors[recipient].difference_update(
                                recipient_region_areas)

                            donor_region_areas = set(
                                np.where(labels == donor)[0])
                            not_donor_neighs_anymore = set(
                                area for area in neighs_of_cand if not any(
                                    a in donor_region_areas
                                    for a in neighbors(adj, area)))
                            region_neighbors[donor].difference_update(
                                not_donor_neighs_anymore)
                            break
                    else:
                        break

            obj_val_end = float(self.allow_move_strategy.objective_val)
        return labels