def test_make_iteration(): our_map = Map( number_of_clusters=4, data=data.loc[data["Ожидаемая урожайность / приплод"] == "КРДР"], ) clusters = our_map._create_clusters( num_batch=(7, 15, 21, 42), clusternames=our_map.clusternames, number_of_clusters=4, graph=our_map.graph, ) clusters = our_map._make_iteration(clusters=clusters, graph=our_map.graph) # for x in clusters: # print(x.name) # for x in clusters: # print(x.square) print([x.square for x in clusters]) print([x.name for x in clusters]) print([x.adjacent_list for x in clusters]) names = [x.name.split() for x in clusters] names_2 = [set(x.name.split()) for x in clusters] for x, y in zip(names, names_2): if len(x) != len(y): print("PANIc") print(x) print(y) print(names) for i in range(len(names)): for j in range(i, len(names)): if i != j: print("shit", i, j, set(names[i]).intersection(set(names[j]))) assert 1 == 0
def test_launch_algo(): our_map = Map( number_of_clusters=4, data=data.loc[data["Ожидаемая урожайность / приплод"] == "КРДР"], ) our_map.launch_algo() assert 0 == 1
def test_clusters_chose_cluster_with_maxsquare_neighbor(): column_names = ["№", "neighbors", "S"] data = [ ["1", "2, 3", 1], ["2", "1", 1], ["3", "1, 5", 1], ["5", "3", 10], ] dataframe = pd.DataFrame(data=data, columns=column_names) our_map = Map(number_of_clusters=4, data=dataframe) clusters = our_map._create_clusters( num_batch=(0, 1, 2), clusternames=our_map.clusternames, number_of_clusters=4, graph=our_map.graph, ) print( "111", [(y.name, y.adjacent_list, x.serial_num) for x in clusters for y in x.nodes_belong], ) print([x.adjacent_list for x in clusters]) cluster_with_biggest_neithbor = ( our_map._clusters_chose_cluster_with_maxsquare_neighbor( clusters, our_map.graph)) print(cluster_with_biggest_neithbor) print([x for x in cluster_with_biggest_neithbor.adjacent_list]) assert { x.name for x in cluster_with_biggest_neithbor.nodes_belong } == {"3" }, f"{[x.name for x in cluster_with_biggest_neithbor.nodes_belong]=}"
def test_try_chose_smallest_cluster(): column_names = ["№", "neighbors", "S"] data = [ ["1", "", 1], ["2", "", 7], ["3", "1", 10], ["3", "4", 20], ] dataframe = pd.DataFrame(data=data, columns=column_names) our_map = Map(number_of_clusters=3, data=dataframe) clusters = our_map._create_clusters( num_batch=(0, 1, 2), clusternames=our_map.clusternames, number_of_clusters=4, graph=our_map.graph, ) for cluster in clusters: for node in cluster.nodes_belong: our_map.restricted_nodes.add(node.name) print(our_map.restricted_nodes) # TODO сделать реальную проверку cluster_with_smallest_square = our_map._try_chose_smallest_cluster( clusters, our_map.restricted_nodes) print(our_map.restricted_nodes, [x.name for x in clusters]) assert (cluster_with_smallest_square.name == "3" ), f"{cluster_with_smallest_square.name=}"
def test_cut_graph_for_cluster(): column_names = ["№", "neighbors", "S"] data = [ ["1", "2, 3", 1], ["2", "1", 1], ["3", "1, 5, 6", 1], ["5", "3", 1], ["6", "3, 4", 1], ["4", "6, 2", 1], ] dataframe = pd.DataFrame(data=data, columns=column_names) our_map = Map(number_of_clusters=4, data=dataframe) clusters = our_map._create_clusters( num_batch=(0, 1, 2, 3), clusternames=our_map.clusternames, number_of_clusters=4, graph=our_map.graph, ) print([(y.name, y.adjacent_list, x.serial_num) for x in clusters for y in x.nodes_belong]) target_serial_num = 0 cut_graph = our_map._cut_graph_for_cluster(graph=our_map.graph, clusternum=target_serial_num) print([(x, y.belongs_to_cluster) for x, y in our_map.graph.items()]) print([(x, y.belongs_to_cluster, y.adjacent_list) for x, y in cut_graph.items()]) print(cut_graph) for y in cut_graph.values(): assert (y.belongs_to_cluster == target_serial_num or not y.belongs_to_cluster), f"{y.belongs_to_cluster=}"
def __init__(self, data: pd.DataFrame, given_sum: int, number_of_clusters: int): self.crop_map = Map(data=data, number_of_clusters=number_of_clusters) self.given_sum = given_sum self.number_of_clusters = number_of_clusters self.graph_len = len(self.crop_map.graph) self.leaders = []
def test_create_batches(): our_map = Map(number_of_clusters=4, data=data) combinations = our_map._create_batches(graph_len=5, number_of_clusters=3) result = list(combinations) assert len(result) == 10, f"{len(result)=}"
def test_cluster_join_field_default(): serial_num = 1 nodes = [Node("1", ["2", "3"], 1)] test = Cluster( serial_num=serial_num, nodes_belong=nodes, ) column_names = ["№", "neighbors", "S"] data = [ ["1", "2, 3", 1], ["2", "1", 1], ["3", "1, 5, 6", 3], ["5", "3", 1], ["6", "3, 4", 1], ["4", "6", 2], ] dataframe = pd.DataFrame(data=data, columns=column_names) graph = Map._build_graph(data=dataframe) nodes = [graph[x] for x in test.adjacent_list] func_to_chose_node = test._chose_max_square_node node_chosen = test.cluster_join_node(func_field_chose=func_to_chose_node, nodes=nodes) assert node_chosen == "3", f"{node_chosen=}" assert test.adjacent_list == {"2", "5", "6"}, f"{test.adjacent_list=}"
def test_cluster_join_specific_node(): serial_num = 1 nodes = [Node("1", ["2", "3"], 1)] test = Cluster( serial_num=serial_num, nodes_belong=nodes, ) column_names = ["№", "neighbors", "S"] data = [ ["1", "2, 3", 1], ["2", "1", 1], ["3", "1, 5, 6", 3], ["5", "3", 1], ["6", "3, 4", 1], ["4", "6", 2], ] dataframe = pd.DataFrame(data=data, columns=column_names) graph = Map._build_graph(data=dataframe) nodes = [graph[x] for x in test.adjacent_list] print(test.adjacent_list) node_chosen = test.cluster_join_specific_node(graph["3"]) print(test.adjacent_list) assert node_chosen == "3", f"{node_chosen=}" assert test.adjacent_list == {"2", "5", "6"}, f"{test.adjacent_list=}"
def test_create_clusters(): data = [ ["1", "2, 3", 1], ["2", "1, 3", 1], ["3", "1, 2", 1], ] column_names = ["№", "neighbors", "S"] dataframe = pd.DataFrame(data=data, columns=column_names) our_map = Map(number_of_clusters=4, data=dataframe) result = our_map._create_clusters( num_batch=(0, 1, 2), clusternames=our_map.clusternames, number_of_clusters=4, graph=our_map.graph, ) assert len(result) == 3, f"{len(result)=}" for x in result: assert not x.adjacent_list, f"{x.adjacent_list=}"
def test_clusters_chose_cluster_with_smallest_square(): column_names = ["№", "neighbors", "S"] data = [ ["1", "2, 3", 10], ["2", "1", 5], ["3", "1, 5", 4], ["5", "3", 3], ] dataframe = pd.DataFrame(data=data, columns=column_names) our_map = Map(number_of_clusters=4, data=dataframe) clusters = our_map._create_clusters( num_batch=(0, 1, 2, 3), clusternames=our_map.clusternames, number_of_clusters=4, graph=our_map.graph, ) print([(y.name, y.adjacent_list, x.serial_num, x.square) for x in clusters for y in x.nodes_belong]) cluster_with_smallest_square = our_map._clusters_chose_cluster_with_smallest_square( clusters) print(cluster_with_smallest_square.serial_num) assert cluster_with_smallest_square.square == 3.0 f"{cluster_with_smallest_square.serial_num=}, {cluster_with_smallest_square.square=}"
def validate_double_edges(data: pd.DataFrame) -> List: """ валидирует входную таблицу на наличие неориентированных ребер у соседних нод :param data: :return: """ # column_names = ["№", "neighbors", "S"] graph = Map._build_graph(data) res = [] for key in graph.keys(): for node_name in graph[key].adjacent_list: if not key in graph[node_name].adjacent_list: print("PANIC", key, node_name) res.append((key, node_name)) return res
def validate_node_name_not_in_adj_list(data: pd.DataFrame) -> List: """ Валидирует входную таблицу Args: data: Returns: """ # column_names = ["№", "neighbors", "S"] graph = Map._build_graph(data) res = [] for node in graph.values(): if node.name in node.adjacent_list: res.append((node.name, node.adjacent_list)) return res
def test_build_graph_default(): column_names = ["№", "neighbors", "S"] data = [ ["1", "2, 3", 1], ["2", "1", 1], ["3", "1, 5, 6", 1], ["5", "3", 1], ["6", "3, 4", 1], ["4", "6", 1], ] dataframe = pd.DataFrame(data=data, columns=column_names) graph = Map._build_graph(data=dataframe) assert (graph["1"] == Node(name="1", adjacent_list={"2", "3"}, square=1) and graph["2"] == Node(name="2", adjacent_list={"1"}, square=1) and graph["3"] == Node( name="3", adjacent_list={"1", "5", "6"}, square=1) and graph["5"] == Node(name="5", adjacent_list={"3"}, square=1) and graph["6"] == Node(name="6", adjacent_list={"3", "4"}, square=1) and graph["4"] == Node(name="4", adjacent_list={"6"}, square=1)), f"{graph['1'].adjacent_list=}"
def test_clusters_bring_closer(): column_names = ["№", "neighbors", "S"] data = [ ["1", "2, 3", 1], ["2", "1, 3, 4", 7], ["3", "1, 2, 5, 6", 10], ["4", "2, 5, 8", 6], ["5", "3, 4, 6, 7, 8", 4], ["6", "3, 5, 7", 6], ["7", "5, 6, 8", 1], ["8", "4, 5, 7", 5], ] dataframe = pd.DataFrame(data=data, columns=column_names) our_map = Map(number_of_clusters=3, data=dataframe) clusters = our_map._create_clusters( num_batch=(0, 5, 7), clusternames=our_map.clusternames, number_of_clusters=4, graph=our_map.graph, ) cluster_with_smallest_square = our_map._clusters_chose_cluster_with_smallest_square( clusters) # def _chose_max_square_node(nodes: List[Node]) -> Node: node = cluster_with_smallest_square._chose_max_square_node( [our_map.graph[x] for x in cluster_with_smallest_square.adjacent_list]) clusters.pop(clusters.index(cluster_with_smallest_square)) cluster_with_smallest_square.cluster_join_specific_node(node) # TODO сделать настоящий сет our_map._clusters_bring_closer(clusters, cluster_with_smallest_square.square, set(["a", "b"])) clusters.append(cluster_with_smallest_square) for cluster in clusters: assert cluster.square == 11.0, f"{cluster.name=}, {cluster.square=}"
class Solver: """ решатель на основе перебора, с предварительным склеиванием вершин до значения given_sum-количество вершин, до которого нужно уменьшить количество вершин в графе """ def __init__(self, data: pd.DataFrame, given_sum: int, number_of_clusters: int): self.crop_map = Map(data=data, number_of_clusters=number_of_clusters) self.given_sum = given_sum self.number_of_clusters = number_of_clusters self.graph_len = len(self.crop_map.graph) self.leaders = [] @staticmethod def _make_combinations_with_given_sum(given_sum: int, number_of_clusters: int, graph_len: int) -> Iterator[Tuple]: """ выдает всевозможные комбинации заданной длины с заданной суммой :param given_sum: заданная сумма :param number_of_clusters: количество кластеров(размерность тупла) :param graph_len: размер графа :return: комбинация """ for x in itertools.product(range(1, given_sum), repeat=number_of_clusters): if not sum(x) == given_sum: continue yield x @staticmethod def _make_combinations_with_given_sum_new( given_sum: int, number_of_clusters: int, graph_len: int) -> Iterator[Tuple]: """ выдает всевозможные комбинации заданной длины с заданной суммой :param given_sum: заданная сумма :param number_of_clusters: количество кластеров(размерность тупла) :param graph_len: размер графа :return: комбинация """ iterset = set() for x in itertools.product(range(1, given_sum), repeat=number_of_clusters): if not sum(x) == given_sum: continue xx = tuple(sorted([i for i in x])) if xx in iterset: continue iterset.add(xx) yield xx @staticmethod def _make_combinations_with_given_sum_extra( given_sum: int, number_of_clusters: int, graph_len: int, cut_bound_max: int) -> Iterator[Tuple]: """ выдает всевозможные комбинации заданной длины с заданной суммой :param given_sum: заданная сумма :param number_of_clusters: количество кластеров(размерность тупла) :param graph_len: размер графа :return: комбинация """ iterset = set() for x in itertools.product(range(1, given_sum), repeat=number_of_clusters): if not sum(x) == given_sum: continue xx = tuple(sorted([i for i in x])) if xx in iterset: continue min_xx = min(xx) max_xx = max(xx) if abs(min_xx - max_xx) > cut_bound_max: continue iterset.add(xx) yield xx @staticmethod def _find_pair_with_min_square(graph: Dict): """ находит пару нод, которые являются соседними и сумма площадей которых является наименьшей в рамках актуального графа :param graph: граф с нодами :return: """ nodes = [x for x in graph.values()] ans_max = [] ans_min = [] for node in nodes: adj_list = [(graph[x].name, graph[x].square) for x in node.adjacent_list] if not adj_list: continue min_elem = adj_list[adj_list.index( min(adj_list, key=lambda x: x[1]))] ans_min.append((node.name, node.square, min_elem[0], min_elem[1])) res = ans_min[ans_min.index(min(ans_min, key=lambda x: x[1] + x[3]))] return (res[0], res[2]) @staticmethod def _first_node_absorb_second(first: str, second: str, graph: Dict): """ поглощение первой нодой второй ноды. проход по графу с внесением соответствующих изменений в списки смежности нод :param first: имя поглощающей ноды :param second: имя поглощаемой ноды :param graph: граф с нодами :return: """ if first == second: return graph[first].absorbed.append(graph[second]) graph[first].absorbed_names.add(second) graph[first].absorbed_names = graph[first].absorbed_names.union( graph[second].absorbed_names) graph[first].square += graph[second].square graph[first].adjacent_list.remove(second) graph[first].adjacent_list = graph[first].adjacent_list.union( graph[second].adjacent_list) graph[first].adjacent_list.discard(second) graph[first].adjacent_list.discard(first) graph.pop(second) for node in graph.values(): if node == graph[first]: continue if second in node.adjacent_list: node.adjacent_list.remove(second) if first in node.adjacent_list: continue node.adjacent_list.add(first) # input() @staticmethod def _reduce_nodes_quantity(graph: Dict, nodes_quantity: int): """ уменьшает количество нод в графе до уровня nodes_quantity :param graph: граф с нодами-полями :param nodes_quantity:количество точек до которого уменьшится граф :return: """ while len(graph) > nodes_quantity: first, second = Solver._find_pair_with_min_square(graph=graph) Solver._first_node_absorb_second(first=first, second=second, graph=graph) @staticmethod def _check_cluster_for_connectivity(graph: Dict, cluster: Cluster) -> bool: """ метод осуществляет проверку кластера на связность, проверяя, находится ли в всписках смежности каждой из вершины, принадлежащей кластеру, какая нибудь вершина из этого же кластера :param graph: граф :param cluster: кластер :return: True-кластер 'связный' """ # def BFS(self, s): # Mark all the vertices as not visited if not cluster.nodes_belong or len(cluster.nodes_belong) == 1: return True node_name = cluster.nodes_belong[0].name gr = { x: y for (x, y) in graph.items() if x in set([n.name for n in cluster.nodes_belong]) } for x in gr.values(): tmp = x.adjacent_list.copy() x.adjacent_list = set([ n for n in x.adjacent_list if n in set([nn.name for nn in cluster.nodes_belong]) ]) x.cut_for_check = tmp.difference(x.adjacent_list) visited = {x: False for x in gr.keys()} # Create a queue for BFS queue = [] # Mark the source node as # visited and enqueue it queue.append(node_name) visited[node_name] = True while queue: # Dequeue a vertex from # queue and print it s = queue.pop(0) # Get all adjacent vertices of the # dequeued vertex s. If a adjacent # has not been visited, then mark it # visited and enqueue it for name in gr[s].adjacent_list: if not visited[name]: queue.append(name) visited[name] = True for x in gr.values(): x.adjacent_list = x.adjacent_list.union(x.cut_for_check) x.cut_for_check = set() return not (False in visited.values()) def solve_new(self, cut_bound_max: int) -> List: """ алгоритм кластеризации на основе перебора :return: """ start = time.time() self._reduce_nodes_quantity(self.crop_map.graph, self.given_sum) self.crop_map.clusternames = { x: y for (x, y) in zip(range(len(self.crop_map.graph.keys())), self.crop_map.graph.keys()) } cluster_for_checking_map_full_connecticity = Cluster( serial_num=-1, nodes_belong=list(self.crop_map.graph.values())) ic([ ( self.crop_map.graph[x].name, # self.crop_map.graph[x].absorbed_names, self.crop_map.graph[x].adjacent_list, ) for x in self.crop_map.graph.keys() ]) new_dots = [] for index, x in enumerate(self.crop_map.graph.values()): names = [x.name] names.extend(list(name for name in x.absorbed_names)) for name in names: new_dots.append([name, index]) new_dots_data = pd.DataFrame(data=new_dots) new_dots_data.to_excel(excel_writer="new_dots.xlsx", index=False) iterable = set([x for x in self.crop_map.clusternames.keys()]) leader = math.inf best_clusters = [] counter = 0 oh_shit = set() start = time.time() for node_quantity_combination in self._make_combinations_with_given_sum_extra( given_sum=self.given_sum, number_of_clusters=self.number_of_clusters, graph_len=self.graph_len, cut_bound_max=cut_bound_max, ): ic(node_quantity_combination) start = time.time() for first_nums in itertools.combinations( iterable=iterable, r=node_quantity_combination[0]): first_cluster = Cluster( serial_num=0, nodes_belong=[ self.crop_map.graph[self.crop_map.clusternames[x]] for x in first_nums ], ) if not self._check_cluster_for_connectivity( self.crop_map.graph, first_cluster): # return continue new_it = iterable.difference(set(first_nums)) for second_nums in itertools.combinations( iterable=new_it, r=node_quantity_combination[1]): second_cluster = Cluster( serial_num=1, nodes_belong=[ self.crop_map.graph[self.crop_map.clusternames[x]] for x in second_nums ], ) if not self._check_cluster_for_connectivity( self.crop_map.graph, second_cluster): continue newest_it = new_it.difference(set(second_nums)) for third_nums in itertools.combinations( iterable=newest_it, r=node_quantity_combination[2]): new_newest_it = tuple( newest_it.difference(set(third_nums))) first = str(sorted([str(x) for x in first_nums])) second = str(sorted([str(x) for x in second_nums])) third = str(sorted([str(x) for x in third_nums])) fourth = str(sorted([str(x) for x in new_newest_it])) guess = tuple(sorted([first, second, third, fourth])) if guess in oh_shit: continue oh_shit.add(guess) third_cluster = Cluster( serial_num=2, nodes_belong=[ self.crop_map.graph[ self.crop_map.clusternames[x]] for x in third_nums ], ) if not self._check_cluster_for_connectivity( self.crop_map.graph, third_cluster): continue fourth_cluster = Cluster( serial_num=3, nodes_belong=[ self.crop_map.graph[ self.crop_map.clusternames[x]] for x in new_newest_it ], ) if not self._check_cluster_for_connectivity( self.crop_map.graph, fourth_cluster): continue clusters = [ first_cluster, second_cluster, third_cluster, fourth_cluster, ] counter += 1 res = self.crop_map.count_cluster_metrics_mean_deviation( clusters=clusters) if res < leader: leader = res best_clusters = clusters self.leaders = best_clusters iteration_time = time.time() - start ic(iteration_time, leader, [x.square for x in best_clusters]) ic(iteration_time) oh_shit = set() return (time.time() - start, best_clusters, leader)