Example #1
0
def test_make_iteration():
    our_map = Map(
        number_of_clusters=4,
        data=data.loc[data["Ожидаемая урожайность / приплод"] == "КРДР"],
    )
    clusters = our_map._create_clusters(
        num_batch=(7, 15, 21, 42),
        clusternames=our_map.clusternames,
        number_of_clusters=4,
        graph=our_map.graph,
    )
    clusters = our_map._make_iteration(clusters=clusters, graph=our_map.graph)
    # for x in clusters:
    #     print(x.name)
    # for x in clusters:
    #     print(x.square)
    print([x.square for x in clusters])
    print([x.name for x in clusters])
    print([x.adjacent_list for x in clusters])

    names = [x.name.split() for x in clusters]
    names_2 = [set(x.name.split()) for x in clusters]
    for x, y in zip(names, names_2):
        if len(x) != len(y):
            print("PANIc")
            print(x)
            print(y)
    print(names)
    for i in range(len(names)):
        for j in range(i, len(names)):
            if i != j:
                print("shit", i, j, set(names[i]).intersection(set(names[j])))
    assert 1 == 0
Example #2
0
def test_launch_algo():
    our_map = Map(
        number_of_clusters=4,
        data=data.loc[data["Ожидаемая урожайность / приплод"] == "КРДР"],
    )
    our_map.launch_algo()
    assert 0 == 1
Example #3
0
def test_clusters_chose_cluster_with_maxsquare_neighbor():
    column_names = ["№", "neighbors", "S"]
    data = [
        ["1", "2, 3", 1],
        ["2", "1", 1],
        ["3", "1, 5", 1],
        ["5", "3", 10],
    ]
    dataframe = pd.DataFrame(data=data, columns=column_names)
    our_map = Map(number_of_clusters=4, data=dataframe)
    clusters = our_map._create_clusters(
        num_batch=(0, 1, 2),
        clusternames=our_map.clusternames,
        number_of_clusters=4,
        graph=our_map.graph,
    )
    print(
        "111",
        [(y.name, y.adjacent_list, x.serial_num) for x in clusters
         for y in x.nodes_belong],
    )
    print([x.adjacent_list for x in clusters])
    cluster_with_biggest_neithbor = (
        our_map._clusters_chose_cluster_with_maxsquare_neighbor(
            clusters, our_map.graph))
    print(cluster_with_biggest_neithbor)
    print([x for x in cluster_with_biggest_neithbor.adjacent_list])
    assert {
        x.name
        for x in cluster_with_biggest_neithbor.nodes_belong
    } == {"3"
          }, f"{[x.name for x in cluster_with_biggest_neithbor.nodes_belong]=}"
Example #4
0
def test_try_chose_smallest_cluster():

    column_names = ["№", "neighbors", "S"]
    data = [
        ["1", "", 1],
        ["2", "", 7],
        ["3", "1", 10],
        ["3", "4", 20],
    ]
    dataframe = pd.DataFrame(data=data, columns=column_names)

    our_map = Map(number_of_clusters=3, data=dataframe)
    clusters = our_map._create_clusters(
        num_batch=(0, 1, 2),
        clusternames=our_map.clusternames,
        number_of_clusters=4,
        graph=our_map.graph,
    )
    for cluster in clusters:
        for node in cluster.nodes_belong:
            our_map.restricted_nodes.add(node.name)
    print(our_map.restricted_nodes)
    # TODO сделать реальную проверку
    cluster_with_smallest_square = our_map._try_chose_smallest_cluster(
        clusters, our_map.restricted_nodes)
    print(our_map.restricted_nodes, [x.name for x in clusters])
    assert (cluster_with_smallest_square.name == "3"
            ), f"{cluster_with_smallest_square.name=}"
Example #5
0
def test_cut_graph_for_cluster():

    column_names = ["№", "neighbors", "S"]
    data = [
        ["1", "2, 3", 1],
        ["2", "1", 1],
        ["3", "1, 5, 6", 1],
        ["5", "3", 1],
        ["6", "3, 4", 1],
        ["4", "6, 2", 1],
    ]
    dataframe = pd.DataFrame(data=data, columns=column_names)
    our_map = Map(number_of_clusters=4, data=dataframe)
    clusters = our_map._create_clusters(
        num_batch=(0, 1, 2, 3),
        clusternames=our_map.clusternames,
        number_of_clusters=4,
        graph=our_map.graph,
    )
    print([(y.name, y.adjacent_list, x.serial_num) for x in clusters
           for y in x.nodes_belong])

    target_serial_num = 0
    cut_graph = our_map._cut_graph_for_cluster(graph=our_map.graph,
                                               clusternum=target_serial_num)
    print([(x, y.belongs_to_cluster) for x, y in our_map.graph.items()])
    print([(x, y.belongs_to_cluster, y.adjacent_list)
           for x, y in cut_graph.items()])
    print(cut_graph)
    for y in cut_graph.values():
        assert (y.belongs_to_cluster == target_serial_num
                or not y.belongs_to_cluster), f"{y.belongs_to_cluster=}"
Example #6
0
 def __init__(self, data: pd.DataFrame, given_sum: int,
              number_of_clusters: int):
     self.crop_map = Map(data=data, number_of_clusters=number_of_clusters)
     self.given_sum = given_sum
     self.number_of_clusters = number_of_clusters
     self.graph_len = len(self.crop_map.graph)
     self.leaders = []
Example #7
0
def test_create_batches():
    our_map = Map(number_of_clusters=4, data=data)

    combinations = our_map._create_batches(graph_len=5, number_of_clusters=3)

    result = list(combinations)

    assert len(result) == 10, f"{len(result)=}"
Example #8
0
def test_cluster_join_field_default():

    serial_num = 1
    nodes = [Node("1", ["2", "3"], 1)]
    test = Cluster(
        serial_num=serial_num,
        nodes_belong=nodes,
    )
    column_names = ["№", "neighbors", "S"]
    data = [
        ["1", "2, 3", 1],
        ["2", "1", 1],
        ["3", "1, 5, 6", 3],
        ["5", "3", 1],
        ["6", "3, 4", 1],
        ["4", "6", 2],
    ]
    dataframe = pd.DataFrame(data=data, columns=column_names)

    graph = Map._build_graph(data=dataframe)

    nodes = [graph[x] for x in test.adjacent_list]
    func_to_chose_node = test._chose_max_square_node
    node_chosen = test.cluster_join_node(func_field_chose=func_to_chose_node,
                                         nodes=nodes)

    assert node_chosen == "3", f"{node_chosen=}"
    assert test.adjacent_list == {"2", "5", "6"}, f"{test.adjacent_list=}"
Example #9
0
def test_cluster_join_specific_node():

    serial_num = 1
    nodes = [Node("1", ["2", "3"], 1)]
    test = Cluster(
        serial_num=serial_num,
        nodes_belong=nodes,
    )
    column_names = ["№", "neighbors", "S"]
    data = [
        ["1", "2, 3", 1],
        ["2", "1", 1],
        ["3", "1, 5, 6", 3],
        ["5", "3", 1],
        ["6", "3, 4", 1],
        ["4", "6", 2],
    ]
    dataframe = pd.DataFrame(data=data, columns=column_names)

    graph = Map._build_graph(data=dataframe)

    nodes = [graph[x] for x in test.adjacent_list]
    print(test.adjacent_list)
    node_chosen = test.cluster_join_specific_node(graph["3"])
    print(test.adjacent_list)

    assert node_chosen == "3", f"{node_chosen=}"
    assert test.adjacent_list == {"2", "5", "6"}, f"{test.adjacent_list=}"
Example #10
0
def test_create_clusters():
    data = [
        ["1", "2, 3", 1],
        ["2", "1, 3", 1],
        ["3", "1, 2", 1],
    ]
    column_names = ["№", "neighbors", "S"]
    dataframe = pd.DataFrame(data=data, columns=column_names)
    our_map = Map(number_of_clusters=4, data=dataframe)

    result = our_map._create_clusters(
        num_batch=(0, 1, 2),
        clusternames=our_map.clusternames,
        number_of_clusters=4,
        graph=our_map.graph,
    )
    assert len(result) == 3, f"{len(result)=}"
    for x in result:
        assert not x.adjacent_list, f"{x.adjacent_list=}"
Example #11
0
def test_clusters_chose_cluster_with_smallest_square():
    column_names = ["№", "neighbors", "S"]
    data = [
        ["1", "2, 3", 10],
        ["2", "1", 5],
        ["3", "1, 5", 4],
        ["5", "3", 3],
    ]
    dataframe = pd.DataFrame(data=data, columns=column_names)
    our_map = Map(number_of_clusters=4, data=dataframe)
    clusters = our_map._create_clusters(
        num_batch=(0, 1, 2, 3),
        clusternames=our_map.clusternames,
        number_of_clusters=4,
        graph=our_map.graph,
    )
    print([(y.name, y.adjacent_list, x.serial_num, x.square) for x in clusters
           for y in x.nodes_belong])
    cluster_with_smallest_square = our_map._clusters_chose_cluster_with_smallest_square(
        clusters)
    print(cluster_with_smallest_square.serial_num)
    assert cluster_with_smallest_square.square == 3.0
    f"{cluster_with_smallest_square.serial_num=}, {cluster_with_smallest_square.square=}"
Example #12
0
def validate_double_edges(data: pd.DataFrame) -> List:
    """
    валидирует входную таблицу на наличие неориентированных ребер у соседних нод
    :param data:
    :return:
    """

    # column_names = ["№", "neighbors", "S"]
    graph = Map._build_graph(data)
    res = []
    for key in graph.keys():
        for node_name in graph[key].adjacent_list:
            if not key in graph[node_name].adjacent_list:
                print("PANIC", key, node_name)
                res.append((key, node_name))
    return res
Example #13
0
def validate_node_name_not_in_adj_list(data: pd.DataFrame) -> List:
    """
    Валидирует входную таблицу

    Args:
        data:

    Returns:

    """

    # column_names = ["№", "neighbors", "S"]
    graph = Map._build_graph(data)
    res = []
    for node in graph.values():
        if node.name in node.adjacent_list:
            res.append((node.name, node.adjacent_list))
    return res
Example #14
0
def test_build_graph_default():
    column_names = ["№", "neighbors", "S"]
    data = [
        ["1", "2, 3", 1],
        ["2", "1", 1],
        ["3", "1, 5, 6", 1],
        ["5", "3", 1],
        ["6", "3, 4", 1],
        ["4", "6", 1],
    ]
    dataframe = pd.DataFrame(data=data, columns=column_names)

    graph = Map._build_graph(data=dataframe)

    assert (graph["1"] == Node(name="1", adjacent_list={"2", "3"}, square=1)
            and graph["2"] == Node(name="2", adjacent_list={"1"}, square=1)
            and graph["3"] == Node(
                name="3", adjacent_list={"1", "5", "6"}, square=1)
            and graph["5"] == Node(name="5", adjacent_list={"3"}, square=1) and
            graph["6"] == Node(name="6", adjacent_list={"3", "4"}, square=1)
            and graph["4"] == Node(name="4", adjacent_list={"6"},
                                   square=1)), f"{graph['1'].adjacent_list=}"
Example #15
0
def test_clusters_bring_closer():
    column_names = ["№", "neighbors", "S"]
    data = [
        ["1", "2, 3", 1],
        ["2", "1, 3, 4", 7],
        ["3", "1, 2, 5, 6", 10],
        ["4", "2, 5, 8", 6],
        ["5", "3, 4, 6, 7, 8", 4],
        ["6", "3, 5, 7", 6],
        ["7", "5, 6, 8", 1],
        ["8", "4, 5, 7", 5],
    ]
    dataframe = pd.DataFrame(data=data, columns=column_names)

    our_map = Map(number_of_clusters=3, data=dataframe)
    clusters = our_map._create_clusters(
        num_batch=(0, 5, 7),
        clusternames=our_map.clusternames,
        number_of_clusters=4,
        graph=our_map.graph,
    )

    cluster_with_smallest_square = our_map._clusters_chose_cluster_with_smallest_square(
        clusters)
    # def _chose_max_square_node(nodes: List[Node]) -> Node:
    node = cluster_with_smallest_square._chose_max_square_node(
        [our_map.graph[x] for x in cluster_with_smallest_square.adjacent_list])

    clusters.pop(clusters.index(cluster_with_smallest_square))
    cluster_with_smallest_square.cluster_join_specific_node(node)
    # TODO сделать настоящий сет
    our_map._clusters_bring_closer(clusters,
                                   cluster_with_smallest_square.square,
                                   set(["a", "b"]))
    clusters.append(cluster_with_smallest_square)

    for cluster in clusters:
        assert cluster.square == 11.0, f"{cluster.name=}, {cluster.square=}"
Example #16
0
class Solver:
    """
    решатель на основе перебора, с предварительным склеиванием вершин до значения
    given_sum-количество вершин, до которого нужно уменьшить количество вершин в графе
    """
    def __init__(self, data: pd.DataFrame, given_sum: int,
                 number_of_clusters: int):
        self.crop_map = Map(data=data, number_of_clusters=number_of_clusters)
        self.given_sum = given_sum
        self.number_of_clusters = number_of_clusters
        self.graph_len = len(self.crop_map.graph)
        self.leaders = []

    @staticmethod
    def _make_combinations_with_given_sum(given_sum: int,
                                          number_of_clusters: int,
                                          graph_len: int) -> Iterator[Tuple]:
        """
        выдает всевозможные комбинации заданной длины с заданной суммой
        :param given_sum: заданная сумма
        :param number_of_clusters: количество кластеров(размерность тупла)
        :param graph_len: размер графа
        :return: комбинация
        """
        for x in itertools.product(range(1, given_sum),
                                   repeat=number_of_clusters):
            if not sum(x) == given_sum:
                continue
            yield x

    @staticmethod
    def _make_combinations_with_given_sum_new(
            given_sum: int, number_of_clusters: int,
            graph_len: int) -> Iterator[Tuple]:
        """
        выдает всевозможные комбинации заданной длины с заданной суммой
        :param given_sum: заданная сумма
        :param number_of_clusters: количество кластеров(размерность тупла)
        :param graph_len: размер графа
        :return: комбинация
        """
        iterset = set()
        for x in itertools.product(range(1, given_sum),
                                   repeat=number_of_clusters):
            if not sum(x) == given_sum:
                continue
            xx = tuple(sorted([i for i in x]))
            if xx in iterset:
                continue
            iterset.add(xx)
            yield xx

    @staticmethod
    def _make_combinations_with_given_sum_extra(
            given_sum: int, number_of_clusters: int, graph_len: int,
            cut_bound_max: int) -> Iterator[Tuple]:
        """
        выдает всевозможные комбинации заданной длины с заданной суммой
        :param given_sum: заданная сумма
        :param number_of_clusters: количество кластеров(размерность тупла)
        :param graph_len: размер графа
        :return: комбинация
        """
        iterset = set()
        for x in itertools.product(range(1, given_sum),
                                   repeat=number_of_clusters):
            if not sum(x) == given_sum:
                continue
            xx = tuple(sorted([i for i in x]))
            if xx in iterset:
                continue
            min_xx = min(xx)
            max_xx = max(xx)
            if abs(min_xx - max_xx) > cut_bound_max:
                continue
            iterset.add(xx)
            yield xx

    @staticmethod
    def _find_pair_with_min_square(graph: Dict):
        """
        находит пару нод, которые являются соседними
        и сумма площадей которых является наименьшей
        в рамках актуального графа
        :param graph: граф с нодами
        :return:
        """
        nodes = [x for x in graph.values()]
        ans_max = []
        ans_min = []
        for node in nodes:
            adj_list = [(graph[x].name, graph[x].square)
                        for x in node.adjacent_list]
            if not adj_list:
                continue
            min_elem = adj_list[adj_list.index(
                min(adj_list, key=lambda x: x[1]))]
            ans_min.append((node.name, node.square, min_elem[0], min_elem[1]))

        res = ans_min[ans_min.index(min(ans_min, key=lambda x: x[1] + x[3]))]
        return (res[0], res[2])

    @staticmethod
    def _first_node_absorb_second(first: str, second: str, graph: Dict):
        """
        поглощение первой нодой второй ноды.
        проход по графу с внесением соответствующих изменений в списки смежности нод
        :param first: имя поглощающей ноды
        :param second: имя поглощаемой ноды
        :param graph: граф с нодами
        :return:
        """
        if first == second:
            return
        graph[first].absorbed.append(graph[second])
        graph[first].absorbed_names.add(second)
        graph[first].absorbed_names = graph[first].absorbed_names.union(
            graph[second].absorbed_names)
        graph[first].square += graph[second].square
        graph[first].adjacent_list.remove(second)
        graph[first].adjacent_list = graph[first].adjacent_list.union(
            graph[second].adjacent_list)
        graph[first].adjacent_list.discard(second)
        graph[first].adjacent_list.discard(first)
        graph.pop(second)
        for node in graph.values():
            if node == graph[first]:
                continue
            if second in node.adjacent_list:
                node.adjacent_list.remove(second)
                if first in node.adjacent_list:
                    continue
                node.adjacent_list.add(first)
        # input()

    @staticmethod
    def _reduce_nodes_quantity(graph: Dict, nodes_quantity: int):
        """
        уменьшает количество нод в графе до уровня nodes_quantity
        :param graph: граф с нодами-полями
        :param nodes_quantity:количество точек до которого уменьшится граф
        :return:
        """
        while len(graph) > nodes_quantity:
            first, second = Solver._find_pair_with_min_square(graph=graph)

            Solver._first_node_absorb_second(first=first,
                                             second=second,
                                             graph=graph)

    @staticmethod
    def _check_cluster_for_connectivity(graph: Dict, cluster: Cluster) -> bool:
        """
        метод осуществляет проверку кластера на связность, проверяя,
        находится ли в всписках смежности каждой из вершины, принадлежащей кластеру,
        какая нибудь вершина из этого же кластера
        :param graph: граф
        :param cluster: кластер
        :return: True-кластер 'связный'
        """

        # def BFS(self, s):

        # Mark all the vertices as not visited
        if not cluster.nodes_belong or len(cluster.nodes_belong) == 1:
            return True
        node_name = cluster.nodes_belong[0].name
        gr = {
            x: y
            for (x, y) in graph.items()
            if x in set([n.name for n in cluster.nodes_belong])
        }
        for x in gr.values():
            tmp = x.adjacent_list.copy()
            x.adjacent_list = set([
                n for n in x.adjacent_list
                if n in set([nn.name for nn in cluster.nodes_belong])
            ])
            x.cut_for_check = tmp.difference(x.adjacent_list)
        visited = {x: False for x in gr.keys()}

        # Create a queue for BFS
        queue = []

        # Mark the source node as
        # visited and enqueue it
        queue.append(node_name)
        visited[node_name] = True

        while queue:

            # Dequeue a vertex from
            # queue and print it
            s = queue.pop(0)

            # Get all adjacent vertices of the
            # dequeued vertex s. If a adjacent
            # has not been visited, then mark it
            # visited and enqueue it
            for name in gr[s].adjacent_list:
                if not visited[name]:
                    queue.append(name)
                    visited[name] = True
        for x in gr.values():
            x.adjacent_list = x.adjacent_list.union(x.cut_for_check)
            x.cut_for_check = set()

        return not (False in visited.values())

    def solve_new(self, cut_bound_max: int) -> List:
        """
        алгоритм кластеризации на основе перебора
        :return:
        """
        start = time.time()
        self._reduce_nodes_quantity(self.crop_map.graph, self.given_sum)
        self.crop_map.clusternames = {
            x: y
            for (x, y) in zip(range(len(self.crop_map.graph.keys())),
                              self.crop_map.graph.keys())
        }

        cluster_for_checking_map_full_connecticity = Cluster(
            serial_num=-1, nodes_belong=list(self.crop_map.graph.values()))

        ic([
            (
                self.crop_map.graph[x].name,
                # self.crop_map.graph[x].absorbed_names,
                self.crop_map.graph[x].adjacent_list,
            ) for x in self.crop_map.graph.keys()
        ])
        new_dots = []
        for index, x in enumerate(self.crop_map.graph.values()):
            names = [x.name]
            names.extend(list(name for name in x.absorbed_names))
            for name in names:
                new_dots.append([name, index])

        new_dots_data = pd.DataFrame(data=new_dots)
        new_dots_data.to_excel(excel_writer="new_dots.xlsx", index=False)
        iterable = set([x for x in self.crop_map.clusternames.keys()])
        leader = math.inf
        best_clusters = []
        counter = 0
        oh_shit = set()
        start = time.time()
        for node_quantity_combination in self._make_combinations_with_given_sum_extra(
                given_sum=self.given_sum,
                number_of_clusters=self.number_of_clusters,
                graph_len=self.graph_len,
                cut_bound_max=cut_bound_max,
        ):
            ic(node_quantity_combination)
            start = time.time()
            for first_nums in itertools.combinations(
                    iterable=iterable, r=node_quantity_combination[0]):

                first_cluster = Cluster(
                    serial_num=0,
                    nodes_belong=[
                        self.crop_map.graph[self.crop_map.clusternames[x]]
                        for x in first_nums
                    ],
                )
                if not self._check_cluster_for_connectivity(
                        self.crop_map.graph, first_cluster):
                    # return
                    continue
                new_it = iterable.difference(set(first_nums))
                for second_nums in itertools.combinations(
                        iterable=new_it, r=node_quantity_combination[1]):
                    second_cluster = Cluster(
                        serial_num=1,
                        nodes_belong=[
                            self.crop_map.graph[self.crop_map.clusternames[x]]
                            for x in second_nums
                        ],
                    )
                    if not self._check_cluster_for_connectivity(
                            self.crop_map.graph, second_cluster):
                        continue
                    newest_it = new_it.difference(set(second_nums))
                    for third_nums in itertools.combinations(
                            iterable=newest_it,
                            r=node_quantity_combination[2]):
                        new_newest_it = tuple(
                            newest_it.difference(set(third_nums)))
                        first = str(sorted([str(x) for x in first_nums]))
                        second = str(sorted([str(x) for x in second_nums]))
                        third = str(sorted([str(x) for x in third_nums]))
                        fourth = str(sorted([str(x) for x in new_newest_it]))
                        guess = tuple(sorted([first, second, third, fourth]))
                        if guess in oh_shit:
                            continue
                        oh_shit.add(guess)
                        third_cluster = Cluster(
                            serial_num=2,
                            nodes_belong=[
                                self.crop_map.graph[
                                    self.crop_map.clusternames[x]]
                                for x in third_nums
                            ],
                        )
                        if not self._check_cluster_for_connectivity(
                                self.crop_map.graph, third_cluster):
                            continue
                        fourth_cluster = Cluster(
                            serial_num=3,
                            nodes_belong=[
                                self.crop_map.graph[
                                    self.crop_map.clusternames[x]]
                                for x in new_newest_it
                            ],
                        )
                        if not self._check_cluster_for_connectivity(
                                self.crop_map.graph, fourth_cluster):
                            continue
                        clusters = [
                            first_cluster,
                            second_cluster,
                            third_cluster,
                            fourth_cluster,
                        ]
                        counter += 1
                        res = self.crop_map.count_cluster_metrics_mean_deviation(
                            clusters=clusters)
                        if res < leader:
                            leader = res
                            best_clusters = clusters
                            self.leaders = best_clusters
            iteration_time = time.time() - start
            ic(iteration_time, leader, [x.square for x in best_clusters])
            ic(iteration_time)
            oh_shit = set()

        return (time.time() - start, best_clusters, leader)