Esempio n. 1
0
 def __init__(self, metric=None):
     """
     Parameters
     ----------
     metric : function or str or None, default: None
         Refer to the `metric` argument
         in :func:`region.util.get_metric_function`.
     """
     self.metric = get_metric_function(metric)
Esempio n. 2
0
 def __init__(self, metric=None):
     """
     Parameters
     ----------
     metric : function or str or None, default: None
         Refer to the `metric` argument
         in :func:`region.util.get_metric_function`.
     """
     self.metric = get_metric_function(metric)
Esempio n. 3
0
    def fit_from_scipy_sparse_matrix(self,
                                     adj,
                                     attr,
                                     n_regions,
                                     method="flow",
                                     solver="cbc",
                                     metric="euclidean"):
        """
        Solve the p-regions problem as MIP as described in [DCM2011]_.

        The resulting region labels are assigned to the instance's
        :attr:`labels_` attribute.

        Parameters
        ----------
        adj : class:`scipy.sparse.csr_matrix`
            Adjacency matrix representing the areas' contiguity relation.
        attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to clustering.
        n_regions : `int`
            Number of desired regions.
        method : {"flow", "order", "tree"}, default: "flow"
            The method to translate the clustering problem into an exact
            optimization model.

            * "flow" - Flow model on p. 112-113 in [DCM2011]_
            * "order" - Order model on p. 110-112 in [DCM2011]_
            * "tree" - Tree model on p. 108-110 in [DCM2011]_

        solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc"
            The solver to use. Unless the default solver is used, the user has
            to make sure that the specified solver is installed.

            * "cbc" - the Cbc (Coin-or branch and cut) solver
            * "cplex" - the CPLEX solver
            * "glpk" - the GLPK (GNU Linear Programming Kit) solver
            * "gurobi" - the Gurobi Optimizer

        metric : str or function, default: "euclidean"
            See the `metric` argument in
            :func:`region.util.get_metric_function`.
        """
        if not isinstance(n_regions, numbers.Integral) or n_regions <= 0:
            raise ValueError("The n_regions argument must be a positive "
                             "integer.")
        if adj.shape[0] < n_regions:
            raise ValueError("The number of regions must be less than the "
                             "number of areas.")
        if attr.ndim == 1:
            attr = attr.reshape(adj.shape[0], -1)
        self._check_method(method)
        check_solver(solver)
        metric = get_metric_function(metric)

        opt_func = {
            "flow": _flow,
            "order": _order,
            "tree": _tree
        }[method.lower()]

        result_dict = opt_func(adj, attr, n_regions, solver, metric)
        self.labels_ = result_dict
        self.n_regions = n_regions
        self.method = method
        self.metric = metric
        self.solver = solver
Esempio n. 4
0
    def fit_from_scipy_sparse_matrix(self,
                                     adj,
                                     attr,
                                     spatially_extensive_attr,
                                     threshold,
                                     solver="cbc",
                                     metric="euclidean"):
        """
        Solve the max-p-regions problem as MIP as described in [DAR2012]_.

        The resulting region labels are assigned to the instance's
        :attr:`labels_` attribute.

        Parameters
        ----------
        adj : class:`scipy.sparse.csr_matrix`
            Adjacency matrix representing the areas' contiguity relation.
        attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to clustering.
        spatially_extensive_attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to ensuring the threshold condition.
        threshold : numbers.Real or :class:`numpy.ndarray`
            The lower bound for a region's sum of spatially extensive
            attributes. The argument's type is numbers.Real if there is only
            one spatially extensive attribute per area, otherwise it is a
            one-dimensional array with as many entries as there are spatially
            extensive attributes per area.
        solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc"
            The solver to use. Unless the default solver is used, the user has
            to make sure that the specified solver is installed.

            * "cbc" - the Cbc (Coin-or branch and cut) solver
            * "cplex" - the CPLEX solver
            * "glpk" - the GLPK (GNU Linear Programming Kit) solver
            * "gurobi" - the Gurobi Optimizer

        metric : str or function, default: "euclidean"
            See the `metric` argument in
            :func:`region.util.get_metric_function`.
        """
        self.metric = get_metric_function(metric)
        check_solver(solver)

        prob = LpProblem("Max-p-Regions", LpMinimize)

        # Parameters of the optimization problem
        n_areas = adj.shape[0]
        I = list(range(n_areas))  # index for areas
        II = [(i, j) for i in I for j in I]
        II_upper_triangle = [(i, j) for i, j in II if i < j]
        # index of potential regions, called k in [DAR2012]_:
        K = range(n_areas)
        # index of contiguity order, called c in [DAR2012]_:
        O = range(n_areas)
        d = {(i, j): self.metric(attr[i].reshape(1, -1),
                                 attr[j].reshape(1, -1))
             for i, j in II_upper_triangle}
        h = 1 + floor(log10(sum(d[(i, j)] for i, j in II_upper_triangle)))

        # Decision variables
        t = LpVariable.dicts("t", ((i, j) for i, j in II_upper_triangle),
                             lowBound=0,
                             upBound=1,
                             cat=LpInteger)
        x = LpVariable.dicts("x", ((i, k, o) for i in I for k in K for o in O),
                             lowBound=0,
                             upBound=1,
                             cat=LpInteger)

        # Objective function
        # (1) in Duque et al. (2012): "The Max-p-Regions Problem"
        prob += -10**h * lpSum(x[i, k, 0] for k in K for i in I) \
            + lpSum(d[i, j] * t[i, j] for i, j in II_upper_triangle)

        # Constraints
        # (2) in Duque et al. (2012): "The Max-p-Regions Problem"
        for k in K:
            prob += lpSum(x[i, k, 0] for i in I) <= 1
        # (3) in Duque et al. (2012): "The Max-p-Regions Problem"
        for i in I:
            prob += lpSum(x[i, k, o] for k in K for o in O) == 1
        # (4) in Duque et al. (2012): "The Max-p-Regions Problem"
        for i in I:
            for k in K:
                for o in range(1, len(O)):
                    prob += x[i, k, o] <= lpSum(x[j, k, o - 1]
                                                for j in neighbors(adj, i))
        # (5) in Duque et al. (2012): "The Max-p-Regions Problem"
        if isinstance(spatially_extensive_attr[I[0]], numbers.Real):
            for k in K:
                lhs = lpSum(x[i, k, o] * spatially_extensive_attr[i] for i in I
                            for o in O)
                prob += lhs >= threshold * lpSum(x[i, k, 0] for i in I)
        elif isinstance(spatially_extensive_attr[I[0]], collections.Iterable):
            for el in range(len(spatially_extensive_attr[I[0]])):
                for k in K:
                    lhs = lpSum(x[i, k, o] * spatially_extensive_attr[i][el]
                                for i in I for o in O)
                    if isinstance(threshold, numbers.Real):
                        rhs = threshold * lpSum(x[i, k, 0] for i in I)
                        prob += lhs >= rhs
                    elif isinstance(threshold, np.ndarray):
                        rhs = threshold[el] * lpSum(x[i, k, 0] for i in I)
                        prob += lhs >= rhs
        # (6) in Duque et al. (2012): "The Max-p-Regions Problem"
        for i, j in II_upper_triangle:
            for k in K:
                prob += t[i, j] >= \
                        lpSum(x[i, k, o] + x[j, k, o] for o in O) - 1
        # (7) in Duque et al. (2012): "The Max-p-Regions Problem"
        # already in LpVariable-definition
        # (8) in Duque et al. (2012): "The Max-p-Regions Problem"
        # already in LpVariable-definition

        # additional constraint for speedup (p. 405 in [DAR2012]_)
        for o in O:
            prob += x[I[0], K[0], o] == (1 if o == 0 else 0)

        # Solve the optimization problem
        solver = get_solver_instance(solver)
        print("start solving with", solver)
        prob.solve(solver)
        print("solved")
        result = np.zeros(n_areas)
        for i in I:
            for k in K:
                for o in O:
                    if x[i, k, o].varValue == 1:
                        result[i] = k
        self.labels_ = result
        self.solver = solver