Beispiel #1
0
def assert_feasible(solution, adj, n_regions=None):
    """
    Parameters
    ----------
    solution : :class:`numpy.ndarray`
        Array of region labels.
    adj : :class:`scipy.sparse.csr_matrix`
        Adjacency matrix representing the contiguity relation.
    n_regions : `int` or `None`
        An `int` represents the desired number of regions.
        If `None`, then the number of regions is not checked.

    Raises
    ------
    exc : `ValueError`
        A `ValueError` is raised if clustering is not spatially contiguous.
        Given the `n_regions` argument is not `None`, a `ValueError` is raised
        also if the number of regions is not equal to the `n_regions` argument.
    """
    if n_regions is not None:
        if len(set(solution)) != n_regions:
            raise ValueError(
                "The number of regions is {} but "
                "should be {}".format(len(solution), n_regions)
            )

    for region_label in set(solution):
        aux = sub_adj_matrix(adj, np.where(solution == region_label)[0])

        # check right contiguity
        if not is_connected(aux):
            raise ValueError(
                "Region {} is not spatially " "contiguous.".format(region_label)
            )
Beispiel #2
0
    def fit_from_scipy_sparse_matrix(
            self,
            adj,
            attr,
            n_regions,
            initial_labels=None,
            objective_func=ObjectiveFunctionPairwise()):
        """
        Perform the AZP algorithm as described in [OR1995]_.

        The resulting region labels are assigned to the instance's
        :attr:`labels_` attribute.

        Parameters
        ----------
        adj : :class:`scipy.sparse.csr_matrix`
            Adjacency matrix representing the contiguity relation.
        attr : :class:`numpy.ndarray`
            Array (number of areas x number of attributes) of areas' attributes
            relevant to clustering.
        n_regions : `int`
            Number of desired regions.
        initial_labels : :class:`numpy.ndarray` or None, default: None
            One-dimensional array of labels at the beginning of the algorithm.
            If None, then a random initial clustering will be generated.
        objective_func : :class:`region.objective_function.ObjectiveFunction`, default: ObjectiveFunctionPairwise()
            The objective function to use.
        """
        if attr.ndim == 1:
            attr = attr.reshape(adj.shape[0], -1)
        self.allow_move_strategy.attr_all = attr
        self.objective_func = objective_func
        # step 1
        if initial_labels is not None:
            assert_feasible(initial_labels, adj, n_regions)
            initial_labels_gen = separate_components(adj, initial_labels)
        else:
            initial_labels_gen = generate_initial_sol(adj, n_regions)
        labels = -np.ones(adj.shape[0])
        for labels_comp in initial_labels_gen:
            comp_idx = np.where(labels_comp != -1)[0]
            adj_comp = sub_adj_matrix(adj, comp_idx)
            labels_comp = labels_comp[comp_idx]
            attr_comp = attr[comp_idx]
            self.allow_move_strategy.start_new_component(
                labels_comp, attr_comp, self.objective_func, comp_idx)

            labels_comp = self._azp_connected_component(
                adj_comp, labels_comp, attr_comp)
            labels[comp_idx] = labels_comp

        self.n_regions = n_regions
        self.labels_ = labels
Beispiel #3
0
def boolean_assert_feasible(solution, adj, n_regions=None):
    """
    Return boolean version of assert_feasible
    """

    resp = []
    if n_regions is not None:
        if len(set(solution)) != n_regions:
            raise ValueError(
                "The number of regions is {} but "
                "should be {}".format(len(solution), n_regions)
            )

    for region_label in set(solution):
        aux = sub_adj_matrix(adj, np.where(solution == region_label)[0])
        resp.append(is_connected(aux))

    final_resp = all(resp)
    return final_resp
Beispiel #4
0
    def _azp_connected_component(self, adj, initial_labels, attr):
        """
        Implementation of the reactive tabu version of the AZP algorithm (refer
        to [OR1995]_) for a spatially connected set of areas (i.e. for every
        area there is a path to every other area).

        Parameters
        ----------
        adj : :class:`scipy.sparse.csr_matrix`
            Refer to the corresponding argument in
            :meth:`AZP._azp_connected_component`.
        initial_labels : :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`AZP._azp_connected_component`.
        attr : :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`AZP._azp_connected_component`.

        Returns
        -------
        labels : :class:`numpy.ndarray`
            Refer to the return value in :meth:`AZP._azp_connected_component`.
        """
        self.reset_tabu(1)
        # if there is only one region in the initial solution, just return it.
        distinct_regions = list(np.unique(initial_labels))
        if len(distinct_regions) == 1:
            return initial_labels

        #  step 2: make a list of the M regions
        labels = initial_labels

        it_since_tabu_len_changed = 0
        obj_val_start = float("inf")
        # step 12: Repeat steps 3-11 until either no further improvements are
        # made or a maximum number of iterations are exceeded.
        for it in range(self.maxit):
            obj_val_end = self.objective_func(labels, attr)
            if not obj_val_end < obj_val_start:
                break  # step 12
            obj_val_start = obj_val_end

            it_since_tabu_len_changed += 1
            # step 3: Define the list of all possible moves that are not tabu
            # and retain regional connectivity.
            possible_moves = []
            for area in range(labels.shape[0]):
                old_region = labels[area]
                sub_adj = sub_adj_matrix(
                    adj, np.where(labels == old_region)[0], wo_nodes=area)
                # moving the area must not destroy spatial contiguity in donor
                # region and if area is alone in its region, it must stay:
                if is_connected(sub_adj) and count(labels, old_region) > 1:
                    for neigh in neighbors(adj, area):
                        new_region = labels[neigh]
                        if new_region != old_region:
                            possible_move = Move(area, old_region, new_region)
                            if possible_move not in self.tabu:
                                possible_moves.append(possible_move)
            # step 4: Find the best nontabu move.
            best_move = None
            best_move_index = None
            best_objval_diff = float("inf")
            for i, move in enumerate(possible_moves):
                obj_val_diff = self.objective_func.update(
                    move.area, move.new_region, labels, attr)
                if obj_val_diff < best_objval_diff:
                    best_move_index, best_move = i, move
                    best_objval_diff = obj_val_diff
            # step 5: Make the move. Update the tabu status.
            self._make_move(best_move.area, best_move.new_region, labels)
            # step 6: Look up the current zoning system in a list of all zoning
            # systems visited so far during the search. If not found then go
            # to step 10.
            # Sets can't be permuted so we convert our list to a set:
            label_tup = tuple(labels)
            if label_tup in self.visited:
                # step 7: If it is found and it has been visited more than K1
                # times already and this cyclical behavior has been found on
                # at least K2 other occasions (involving other zones) then go
                # to step 11.
                times_visited = self.visited.count(label_tup)
                cycle = list(reversed(self.visited))
                cycle = cycle[:cycle.index(label_tup) + 1]
                cycle = list(reversed(cycle))
                it_until_repetition = len(cycle)
                if times_visited > self.k1:
                    times_cycle_found = 0
                    if self.k2 > 0:
                        for i in range(len(self.visited) - len(cycle)):
                            if self.visited[i:i + len(cycle)] == cycle:
                                times_cycle_found += 1
                                if times_cycle_found >= self.k2:
                                    break
                    if times_cycle_found >= self.k2:
                        # step 11: Delete all stored zoning systems and make P
                        # random moves, P = 1 + self.avg_it_until_rep/2, and
                        # update tabu to preclude a return to the previous
                        # state.
                        # we save the labels such that we can access it if
                        # this step yields a poor solution.
                        last_step = (11, tuple(labels))
                        self.visited = []
                        p = math.floor(1 + self.avg_it_until_rep / 2)
                        possible_moves.pop(best_move_index)
                        for _ in range(p):
                            move = possible_moves.pop(
                                random.randrange(len(possible_moves)))
                            self._make_move(move.area, move.new_region, labels)
                        continue
                    # step 8: Update a moving average of the repetition
                    # interval self.avg_it_until_rep, and increase the
                    # prohibition period R to 1.1*R.
                    self.rep_counter += 1
                    avg_it = self.avg_it_until_rep
                    self.avg_it_until_rep = 1 / self.rep_counter * \
                        ((self.rep_counter-1)*avg_it + it_until_repetition)

                    self.tabu = deque(self.tabu, 1.1 * self.tabu.maxlen)
                    # step 9: If the number of iterations since R was last
                    # changed exceeds self.avg_it_until_rep, then decrease R to
                    # max(0.9*R, 1).
                    if it_since_tabu_len_changed > self.avg_it_until_rep:
                        new_tabu_len = max([0.9 * self.tabu.maxlen, 1])
                        new_tabu_len = math.floor(new_tabu_len)
                        self.tabu = deque(self.tabu, new_tabu_len)
                    it_since_tabu_len_changed = 0  # step 8

            # step 10: Save the zoning system and go to step 12.
            self.visited.append(tuple(labels))
            last_step = 10

        if last_step == 10:
            try:
                return np.array(self.visited[-2])
            except IndexError:
                return np.array(self.visited[-1])
        # if step 11 was the last one, the result is in last_step[1]
        return np.array(last_step[1])
Beispiel #5
0
    def _azp_connected_component(self, adj, initial_clustering, attr):
        """
        Implementation of the basic tabu version of the AZP algorithm (refer
        to [OR1995]_) for a spatially connected set of areas (i.e. for every
        area there is a path to every other area).

        Parameters
        ----------
        adj : :class:`scipy.sparse.csr_matrix`
            Refer to the corresponding argument in
            :meth:`AZP._azp_connected_component`.
        initial_clustering : :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`AZP._azp_connected_component`.
        attr : :class:`numpy.ndarray`
            Refer to the corresponding argument in
            :meth:`AZP._azp_connected_component`.

        Returns
        -------
        labels : :class:`numpy.ndarray`
            Refer to the return value in :meth:`AZP._azp_connected_component`.
        """
        self.reset_tabu()
        # if there is only one region in the initial solution, just return it.
        distinct_regions = list(np.unique(initial_clustering))
        if len(distinct_regions) == 1:
            return initial_clustering

        #  step 2: make a list of the M regions
        labels = initial_clustering

        visited = []
        stop = False
        while True:
            # added termination condition (not in Openshaw & Rao (1995))
            label_tup = tuple(labels)
            if visited.count(label_tup) >= self.reps_before_termination:
                stop = True

            visited.append(label_tup)

            # step 1 Find the global best move that is not prohibited or tabu.
            # find possible moves (globally)
            best_move = None
            best_objval_diff = float("inf")
            for area in range(labels.shape[0]):
                old_region = labels[area]
                sub_adj = sub_adj_matrix(
                    adj, np.where(labels == old_region)[0], wo_nodes=area)
                # moving the area must not destroy spatial contiguity in donor
                # region and if area is alone in its region, it must stay:
                if is_connected(sub_adj) and count(labels, old_region) > 1:
                    for neigh in neighbors(adj, area):
                        new_region = labels[neigh]
                        if new_region != old_region:
                            possible_move = Move(area, old_region, new_region)
                            if possible_move not in self.tabu:
                                objval_diff = self.objective_func.update(
                                    possible_move.area,
                                    possible_move.new_region, labels, attr)
                                if objval_diff < best_objval_diff:
                                    best_move = possible_move
                                    best_objval_diff = objval_diff
            # step 2: Make this move if it is an improvement or equivalet in
            # value.
            if best_move is not None and best_objval_diff <= 0:
                self._make_move(best_move.area, best_move.new_region, labels)
            else:
                # step 3: if no improving move can be made, then see if a tabu
                # move can be made which improves on the current local best
                # (termed an aspiration move)
                improving_tabus = [
                    move for move in self.tabu
                    if labels[move.area] == move.old_region
                    and self.objective_func.update(move.area, move.new_region,
                                                   labels, attr) < 0
                ]
                if improving_tabus:
                    aspiration_move = random_element_from(improving_tabus)
                    self._make_move(aspiration_move.area,
                                    aspiration_move.new_region, labels)
                else:
                    # step 4: If there is no improving move and no aspirational
                    # move, then make the best move even if it is nonimproving
                    # (that is, results in a worse value of the objective
                    # function).
                    if stop:
                        break
                    if best_move is not None:
                        self._make_move(best_move.area, best_move.new_region,
                                        labels)
        return labels
Beispiel #6
0
    def _azp_connected_component(self, adj, initial_clustering, attr):
        """
        Implementation of the AZP algorithm for a spatially connected set of
        areas (i.e. for every area there is a path to every other area).

        Parameters
        ----------
        adj : :class:`scipy.sparse.csr_matrix`
            Adjacency matrix representing the contiguity relation. The matrix'
            shape is (N, N) where N denotes the number of areas in the
            currently considered connected component.
        initial_clustering : :class:`numpy.ndarray`
            Array of labels. Shape: (N,) where N denotes the number of areas in
            the currently considered connected component.
        attr : :class:`numpy.ndarray`
            Array of labels. Shape: (N, M) where N denotes the number of areas
            in the currently considered connected component and M denotes the
            number of attributes per area.

        Returns
        -------
        labels : :class:`numpy.ndarray`
            One-dimensional array of region labels after the AZP algorithm has
            been performed. Only region labels of the currently considered
            connected component are returned.
        """
        # if there is only one region in the initial solution, just return it.
        distinct_regions = list(np.unique(initial_clustering))
        if len(distinct_regions) == 1:
            return initial_clustering
        distinct_regions_copy = distinct_regions.copy()

        #  step 2: make a list of the M regions
        labels = initial_clustering

        obj_val_start = float("inf")
        obj_val_end = self.allow_move_strategy.objective_val

        region_neighbors = {}
        for region in distinct_regions:
            region_areas = set(np.where(labels == region)[0])
            neighs = set()
            for area in region_areas:
                neighs.update(neighbors(adj, area))
            region_neighbors[region] = neighs.difference(region_areas)
        del neighs

        # step 7: Repeat until no further improving moves are made
        while obj_val_end < obj_val_start:  # improvement
            obj_val_start = float(obj_val_end)
            distinct_regions = distinct_regions_copy.copy()
            # step 6: when the list for region K is exhausted return to step 3
            # and select another region and repeat steps 4-6

            while distinct_regions:
                # step 3: select & remove any region K at random from this list
                recipient = pop_randomly_from(distinct_regions)
                while True:
                    # step 4: identify a set of zones bordering on members of
                    # region K that could be moved into region K without
                    # destroying the internal contiguity of the donor region(s)

                    candidates = []
                    for neigh in region_neighbors[recipient]:
                        neigh_region = labels[neigh]
                        sub_adj = sub_adj_matrix(
                            adj,
                            np.where(labels == neigh_region)[0],
                            wo_nodes=neigh)
                        if is_connected(sub_adj):
                            # if area is alone in its region, it must stay
                            if count(labels, neigh_region) > 1:
                                candidates.append(neigh)
                    # step 5: randomly select zones from this list until either
                    # there is a local improvement in the current value of the
                    # objective function or a move that is equivalently as good
                    # as the current best. Then make the move, update the list
                    # of candidate zones, and return to step 4 or else repeat
                    # step 5 until the list is exhausted.
                    while candidates:
                        cand = pop_randomly_from(candidates)
                        if self.allow_move_strategy(cand, recipient, labels):
                            donor = labels[cand]

                            make_move(cand, recipient, labels)

                            region_neighbors[donor].add(cand)
                            region_neighbors[recipient].discard(cand)

                            neighs_of_cand = neighbors(adj, cand)

                            recipient_region_areas = set(
                                np.where(labels == recipient)[0])
                            region_neighbors[recipient].update(neighs_of_cand)
                            region_neighbors[recipient].difference_update(
                                recipient_region_areas)

                            donor_region_areas = set(
                                np.where(labels == donor)[0])
                            not_donor_neighs_anymore = set(
                                area for area in neighs_of_cand if not any(
                                    a in donor_region_areas
                                    for a in neighbors(adj, area)))
                            region_neighbors[donor].difference_update(
                                not_donor_neighs_anymore)
                            break
                    else:
                        break

            obj_val_end = float(self.allow_move_strategy.objective_val)
        return labels