def test_cluster_join_field_default(): serial_num = 1 nodes = [Node("1", ["2", "3"], 1)] test = Cluster( serial_num=serial_num, nodes_belong=nodes, ) column_names = ["№", "neighbors", "S"] data = [ ["1", "2, 3", 1], ["2", "1", 1], ["3", "1, 5, 6", 3], ["5", "3", 1], ["6", "3, 4", 1], ["4", "6", 2], ] dataframe = pd.DataFrame(data=data, columns=column_names) graph = Map._build_graph(data=dataframe) nodes = [graph[x] for x in test.adjacent_list] func_to_chose_node = test._chose_max_square_node node_chosen = test.cluster_join_node(func_field_chose=func_to_chose_node, nodes=nodes) assert node_chosen == "3", f"{node_chosen=}" assert test.adjacent_list == {"2", "5", "6"}, f"{test.adjacent_list=}"
def test_cluster_join_specific_node(): serial_num = 1 nodes = [Node("1", ["2", "3"], 1)] test = Cluster( serial_num=serial_num, nodes_belong=nodes, ) column_names = ["№", "neighbors", "S"] data = [ ["1", "2, 3", 1], ["2", "1", 1], ["3", "1, 5, 6", 3], ["5", "3", 1], ["6", "3, 4", 1], ["4", "6", 2], ] dataframe = pd.DataFrame(data=data, columns=column_names) graph = Map._build_graph(data=dataframe) nodes = [graph[x] for x in test.adjacent_list] print(test.adjacent_list) node_chosen = test.cluster_join_specific_node(graph["3"]) print(test.adjacent_list) assert node_chosen == "3", f"{node_chosen=}" assert test.adjacent_list == {"2", "5", "6"}, f"{test.adjacent_list=}"
def test_check_cluster_for_connectivity_basic_1(): column_names = ["№", "neighbors", "S"] data = [ ["1", "2,3", 1], ["2", "1,3", 2], ["3", "1,2, 10", 5], ["4", "5", 3], ["5", "4", 3], ] dataframe = pd.DataFrame(data=data, columns=column_names) solver = Solver(data=dataframe, given_sum=20, number_of_clusters=4) test_cluster_not_connected = Cluster( serial_num=0, nodes_belong=[ solver.crop_map.graph[x] for x in ["1", "2", "3", "4", "5"] ], ) assert not solver._check_cluster_for_connectivity( graph=solver.crop_map.graph, cluster=test_cluster_not_connected) test_cluster_connected = Cluster( serial_num=0, nodes_belong=[solver.crop_map.graph[x] for x in ["1", "2", "3"]], ) assert solver._check_cluster_for_connectivity( graph=solver.crop_map.graph, cluster=test_cluster_connected)
def test_reduce_node_quantity_custom_ex(): nodes_len = 2 column_names = ["№", "neighbors", "S"] data = [ ["1", "2,3,4", 2], ["2", "1,3,5", 4], ["3", "1,2,4,5", 6], ["4", "1,3,5", 8], ["5", "2,3,4,13,16", 10], ["6", "7,8,9", 1.01], ["7", "6,8,10", 1.02], ["8", "6,7,9,11", 1.03], ["9", "6,8,12", 1.04], ["10", "7,11,13", 1.05], ["11", "8,10,12,14", 1.06], ["12", "9,11,15", 1.07], ["13", "5,10,14,17", 1.08], ["14", "11,13,15", 1.09], ["15", "12,14", 1.10], ["16", "5,17", 50], ["17", "13,16", 50], ] dataframe = pd.DataFrame(data=data, columns=column_names) solver = Solver(data=dataframe, given_sum=20, number_of_clusters=4) solver._reduce_nodes_quantity(graph=solver.crop_map.graph, nodes_quantity=nodes_len) assert len( solver.crop_map.graph) == nodes_len, f"{len(solver.crop_map.graph)=}" test_cluster_connected = Cluster( serial_num=0, nodes_belong=[x for x in solver.crop_map.graph.values()]) assert solver._check_cluster_for_connectivity( graph=solver.crop_map.graph, cluster=test_cluster_connected)
def test_chose_max_square_node(): nodes = [ Node(name="1", adjacent_list={"1", "2", "3"}, square=1), Node(name="2", adjacent_list={"1", "2", "3"}, square=2), Node(name="3", adjacent_list={"1", "2", "3"}, square=3), ] max_square_node = Cluster._chose_max_square_node(nodes=nodes) assert (max_square_node.name == "3" ), f"{max_square_node.name=}, f{max_square_node.square=}"
def test_cluster_default(): serial_num = 1 nodes = [Node("1", ["3", "4", "5", "2"], 1), Node("2", ["6"], 1)] test = Cluster( serial_num=serial_num, nodes_belong=nodes, ) assert test.adjacent_list == {"3", "4", "5", "6"}, f"{test.adjacent_list=}" for x in test.nodes_belong: assert x.belongs_to_cluster == 1, f"{x.name=}, {x.belongs_to_cluster=}" # print([x.name for x in y.adjacent_list] fr) assert test.square == 2, f"{test.square=}"
def test_reduce_node_quantity_basic(): """ придумать реальный кейс для проверки на связность :return: """ global data nodes_len = 20 data = pd.read_excel(datapath) solver = Solver(data=data, given_sum=20, number_of_clusters=4) solver._reduce_nodes_quantity(graph=solver.crop_map.graph, nodes_quantity=nodes_len) assert len( solver.crop_map.graph) == nodes_len, f"{len(solver.crop_map.graph)=}" test_cluster_connected = Cluster( serial_num=0, nodes_belong=[x for x in solver.crop_map.graph.values()]) assert solver._check_cluster_for_connectivity( graph=solver.crop_map.graph, cluster=test_cluster_connected)
def test_cluster_chose_closest_to_square_field(): nodes = [ Node("1", None, 1), Node("2", None, 2), Node("3", None, 3), Node("6", None, 6), Node("7", None, 7), ] current_square = 10 target_square = 15 optimal_node = Cluster.chose_closest_to_square_node( nodes=nodes, current_square=current_square, target_square=target_square) assert (optimal_node.name == "6" ), f"{optimal_node.name=}, {current_square=}, {target_square=}"
def _create_clusters(num_batch: Tuple, clusternames: Dict, number_of_clusters: int, graph: Dict) -> List[Cluster]: """ инициализирует number_of_clusters кластеров Returns: список кластеров(в одном кластере 1 поле) """ a = [graph.keys()] cluster_list = [ Cluster( serial_num=x, nodes_belong=[graph[clusternames[y]]], ) for x, y in zip(range(number_of_clusters), num_batch) ] nodes_belongs_to_cluster = { y.name for x in cluster_list for y in x.nodes_belong } for x in cluster_list: x.adjacent_list = x.adjacent_list.difference( nodes_belongs_to_cluster) return cluster_list
def launch_algo(self) -> bool: """""" combinations = self._create_batches( graph_len=len(self.graph), number_of_clusters=self.number_of_clusters) i = 0 current_leader = None current_min_deviation = 999999999999 global_start = time.time() etalon_combination = None for combination in combinations: start = time.time() print( sum([ len(value.adjacent_list) for value in self.graph.values() ])) cluster_list = [ Cluster( serial_num=x, nodes_belong=[self.graph[self.clusternames[y]]], ) for x, y in zip(range(self.number_of_clusters), combination) ] # TODO перенести добавление полей restricted_nodes в отделную функцию for cluster in cluster_list: for node in cluster.nodes_belong: if node: self.restricted_nodes.add(node.name) for cluster in cluster_list: cluster.adjacent_list = cluster.adjacent_list.difference( self.restricted_nodes) # for key in self.graph.keys(): # self.graph[key].adjacent_list = self.graph[key].adjacent_list.difference(self.restricted_nodes) for key in self.graph.keys(): for r_node in self.restricted_nodes: if r_node in self.graph[key].adjacent_list: self.graph[key].adjacent_list_deleted.add(r_node) self.graph[key].adjacent_list.discard(r_node) res = self._make_iteration(clusters=cluster_list, graph=self.graph) for key in self.graph.keys(): self.graph[key].adjacent_list = self.graph[ key].adjacent_list_deleted.union( self.graph[key].adjacent_list) self.graph[key].adjacent_list_deleted = set() print( sum([ len(value.adjacent_list) for value in self.graph.values() ])) self.restricted_nodes = set() i += 1 # print(self.etalon_square) # print(self.count_cluster_metrics_max(clusters)) # print([x.square for x in clusters], [x.adjacent_list for x in clusters]) end = time.time() # print(end - start) if current_min_deviation >= self.count_cluster_metrics_mean_deviation( res): current_min_deviation = self.count_cluster_metrics_mean_deviation( res) current_leader = ( [x.square for x in res], [x.name for x in res], [x.adjacent_list for x in res], ) print("!", combination) print(current_leader) etalon_combination = combination global_end = time.time() print("100000", global_end - global_start) print(current_leader, self.etalon_square) names = [x.split() for x in current_leader[1]] names_2 = [set(x.split()) for x in current_leader[1]] for x, y in zip(names, names_2): if len(x) != len(y): print("PANIc") print(x) print(y) print(names) for i in range(len(names)): for j in range(i, len(names)): if i != j: print("shit", i, j, set(names[i]).intersection(set(names[j]))) names = [x.split() for x in current_leader[1]] print(names) print(etalon_combination)
def solve(): """ Скрипт для дополнения справочника Если указанные границы уже были проанализирваны, справочник останется без изменений Опции: -b граница отсечения -d количество точек -p """ datapath = Path(__file__).resolve().parents[1] / "data" argv = sys.argv[1:] try: opts, args = getopt.getopt(sys.argv[1:], "b:d:f:", ["border=", "dots=", "file="]) border = None dots = None for opt, arg in opts: if opt == "-b" or opt == "--border": border = arg elif opt == "-d" or opt == "--dots": dots = arg elif opt == "-f" or opt == "--file": datapath = datapath / arg # TODO как ошибки обрабатываем? if not border.isdigit(): raise if not dots.isdigit(): raise border = int(border) dots = int(dots) data = pd.read_excel(datapath) graph_constructor.construct_graph(data) print(data) # err_adj_list = validate(data) # TODO выбрасываем все это, так как учимся самостоятельно определять центры полей по координатам # и строить граф габриэля по центрам # if err_adj_list: # ic(err_adj_list) # return # # TODO научиться исправлять ошибку отсутствия антипараллельных ребер самостоятельно # err_double_adj = validate_double_edges(data) # if err_double_adj: # ic(err_double_adj) # return # # TODO научиться исправлять ошибку наличия имени ноды в своем же списке смежности(не критично) # err_node_in_adj = validate_node_name_not_in_adj_list(data) # if err_node_in_adj: # ic(err_node_in_adj) # return solver = Solver(data=data, given_sum=dots, number_of_clusters=4) cluster_for_checking_map_full_connecticity = Cluster( serial_num=-1, nodes_belong=list(solver.crop_map.graph.values())) if not Solver._check_cluster_for_connectivity( solver.crop_map.graph, cluster_for_checking_map_full_connecticity): ic("not_connected_initial_state") return res_new = solver.solve_new(cut_bound_max=border) ic(res_new) ic([x.square for x in res_new[1]]) ic(solver.crop_map.etalon_square) new_clusters = [] for index, cluster in enumerate(res_new[1]): names = [] for x in cluster.nodes_belong: names.append(x.name) names.extend(list(name for name in x.absorbed_names)) for name in names: new_clusters.append([name, index]) names.extend(list(name for name in x.absorbed_names)) for name in names: new_clusters.append([name, index]) new_сlusters_data = pd.DataFrame(data=new_clusters) new_сlusters_data.to_excel(excel_writer="kmn_21_1_new_clusters.xlsx", index=False) except getopt.GetoptError: logging( f"Запуск: script_refbook.py -b <bank> -l <lower num bound> -h <higher num bound>" ) sys.exit(-1)