def __init__(self, metric=None): """ Parameters ---------- metric : function or str or None, default: None Refer to the `metric` argument in :func:`region.util.get_metric_function`. """ self.metric = get_metric_function(metric)
def fit_from_scipy_sparse_matrix(self, adj, attr, n_regions, method="flow", solver="cbc", metric="euclidean"): """ Solve the p-regions problem as MIP as described in [DCM2011]_. The resulting region labels are assigned to the instance's :attr:`labels_` attribute. Parameters ---------- adj : class:`scipy.sparse.csr_matrix` Adjacency matrix representing the areas' contiguity relation. attr : :class:`numpy.ndarray` Array (number of areas x number of attributes) of areas' attributes relevant to clustering. n_regions : `int` Number of desired regions. method : {"flow", "order", "tree"}, default: "flow" The method to translate the clustering problem into an exact optimization model. * "flow" - Flow model on p. 112-113 in [DCM2011]_ * "order" - Order model on p. 110-112 in [DCM2011]_ * "tree" - Tree model on p. 108-110 in [DCM2011]_ solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc" The solver to use. Unless the default solver is used, the user has to make sure that the specified solver is installed. * "cbc" - the Cbc (Coin-or branch and cut) solver * "cplex" - the CPLEX solver * "glpk" - the GLPK (GNU Linear Programming Kit) solver * "gurobi" - the Gurobi Optimizer metric : str or function, default: "euclidean" See the `metric` argument in :func:`region.util.get_metric_function`. """ if not isinstance(n_regions, numbers.Integral) or n_regions <= 0: raise ValueError("The n_regions argument must be a positive " "integer.") if adj.shape[0] < n_regions: raise ValueError("The number of regions must be less than the " "number of areas.") if attr.ndim == 1: attr = attr.reshape(adj.shape[0], -1) self._check_method(method) check_solver(solver) metric = get_metric_function(metric) opt_func = { "flow": _flow, "order": _order, "tree": _tree }[method.lower()] result_dict = opt_func(adj, attr, n_regions, solver, metric) self.labels_ = result_dict self.n_regions = n_regions self.method = method self.metric = metric self.solver = solver
def fit_from_scipy_sparse_matrix(self, adj, attr, spatially_extensive_attr, threshold, solver="cbc", metric="euclidean"): """ Solve the max-p-regions problem as MIP as described in [DAR2012]_. The resulting region labels are assigned to the instance's :attr:`labels_` attribute. Parameters ---------- adj : class:`scipy.sparse.csr_matrix` Adjacency matrix representing the areas' contiguity relation. attr : :class:`numpy.ndarray` Array (number of areas x number of attributes) of areas' attributes relevant to clustering. spatially_extensive_attr : :class:`numpy.ndarray` Array (number of areas x number of attributes) of areas' attributes relevant to ensuring the threshold condition. threshold : numbers.Real or :class:`numpy.ndarray` The lower bound for a region's sum of spatially extensive attributes. The argument's type is numbers.Real if there is only one spatially extensive attribute per area, otherwise it is a one-dimensional array with as many entries as there are spatially extensive attributes per area. solver : {"cbc", "cplex", "glpk", "gurobi"}, default: "cbc" The solver to use. Unless the default solver is used, the user has to make sure that the specified solver is installed. * "cbc" - the Cbc (Coin-or branch and cut) solver * "cplex" - the CPLEX solver * "glpk" - the GLPK (GNU Linear Programming Kit) solver * "gurobi" - the Gurobi Optimizer metric : str or function, default: "euclidean" See the `metric` argument in :func:`region.util.get_metric_function`. """ self.metric = get_metric_function(metric) check_solver(solver) prob = LpProblem("Max-p-Regions", LpMinimize) # Parameters of the optimization problem n_areas = adj.shape[0] I = list(range(n_areas)) # index for areas II = [(i, j) for i in I for j in I] II_upper_triangle = [(i, j) for i, j in II if i < j] # index of potential regions, called k in [DAR2012]_: K = range(n_areas) # index of contiguity order, called c in [DAR2012]_: O = range(n_areas) d = {(i, j): self.metric(attr[i].reshape(1, -1), attr[j].reshape(1, -1)) for i, j in II_upper_triangle} h = 1 + floor(log10(sum(d[(i, j)] for i, j in II_upper_triangle))) # Decision variables t = LpVariable.dicts("t", ((i, j) for i, j in II_upper_triangle), lowBound=0, upBound=1, cat=LpInteger) x = LpVariable.dicts("x", ((i, k, o) for i in I for k in K for o in O), lowBound=0, upBound=1, cat=LpInteger) # Objective function # (1) in Duque et al. (2012): "The Max-p-Regions Problem" prob += -10**h * lpSum(x[i, k, 0] for k in K for i in I) \ + lpSum(d[i, j] * t[i, j] for i, j in II_upper_triangle) # Constraints # (2) in Duque et al. (2012): "The Max-p-Regions Problem" for k in K: prob += lpSum(x[i, k, 0] for i in I) <= 1 # (3) in Duque et al. (2012): "The Max-p-Regions Problem" for i in I: prob += lpSum(x[i, k, o] for k in K for o in O) == 1 # (4) in Duque et al. (2012): "The Max-p-Regions Problem" for i in I: for k in K: for o in range(1, len(O)): prob += x[i, k, o] <= lpSum(x[j, k, o - 1] for j in neighbors(adj, i)) # (5) in Duque et al. (2012): "The Max-p-Regions Problem" if isinstance(spatially_extensive_attr[I[0]], numbers.Real): for k in K: lhs = lpSum(x[i, k, o] * spatially_extensive_attr[i] for i in I for o in O) prob += lhs >= threshold * lpSum(x[i, k, 0] for i in I) elif isinstance(spatially_extensive_attr[I[0]], collections.Iterable): for el in range(len(spatially_extensive_attr[I[0]])): for k in K: lhs = lpSum(x[i, k, o] * spatially_extensive_attr[i][el] for i in I for o in O) if isinstance(threshold, numbers.Real): rhs = threshold * lpSum(x[i, k, 0] for i in I) prob += lhs >= rhs elif isinstance(threshold, np.ndarray): rhs = threshold[el] * lpSum(x[i, k, 0] for i in I) prob += lhs >= rhs # (6) in Duque et al. (2012): "The Max-p-Regions Problem" for i, j in II_upper_triangle: for k in K: prob += t[i, j] >= \ lpSum(x[i, k, o] + x[j, k, o] for o in O) - 1 # (7) in Duque et al. (2012): "The Max-p-Regions Problem" # already in LpVariable-definition # (8) in Duque et al. (2012): "The Max-p-Regions Problem" # already in LpVariable-definition # additional constraint for speedup (p. 405 in [DAR2012]_) for o in O: prob += x[I[0], K[0], o] == (1 if o == 0 else 0) # Solve the optimization problem solver = get_solver_instance(solver) print("start solving with", solver) prob.solve(solver) print("solved") result = np.zeros(n_areas) for i in I: for k in K: for o in O: if x[i, k, o].varValue == 1: result[i] = k self.labels_ = result self.solver = solver