def print_stats(self): """ Print a few stats corresponding to time """ clustering_logger.info('Time spent in remove add ' + str(self.time_remove_add) + ' and decrease ' + str(self.time_decrease) + ' and complete ' + str(self.time_complete) + ' and add then complete ' + str(self.time_add)) clustering_logger.info('Time spent in knapsack mip ' + str(self.time_mip_knapsack) + ' and in mip least removal ' + str(self.time_mip_least_stops))
def _initialization(self, read_cluster): """ Take care of the initialization of the clusters :param read_cluster: indicates if we read the cluster from a file or not :return: update the manager cluster and the iteration """ file_cluster = local_path.PATH_TO_PROJECT + 'database/cvrptw/clusters_save.csv' stats_file = local_path.PATH_TO_PROJECT + 'database/cvrptw/stats_algo.csv' # update cluster and stats if read_cluster: pd_cluster = pd.read_csv(file_cluster) self.manager_cluster.build_from_df(df=pd_cluster, manager_ref=self.manager_stops) clustering_logger.info('manager built') pd_stats = pd.read_csv(stats_file) self.iteration = pd_stats['iteration'].max() self.lp_value = list(pd_stats['lp_value']) self.nb_total_clusters = list(pd_stats['nb_clusters']) self.per_negative_rc = list(pd_stats['per_neg_rc']) self.avg_negative_rc = list(pd_stats['avg_neg_rc']) self.avg_robustness_created = list( pd_stats['avg_robustness_created']) self.predicted_nb_vehi = list(pd_stats['predicted_nb_vehi']) self.accuracy = list(pd_stats['sca_accuracy']) self.real_nb_vehi = list(pd_stats['real_nb_vehi']) else: self._initialize_clusters() if os.path.isfile(file_cluster): os.remove(file_cluster) if os.path.isfile(stats_file): os.remove(stats_file) clustering_logger.info('Init done ' + str(len(self.manager_cluster))) assert self.manager_cluster.check_cluster_initialized()
def check_accuracy(self, list_clu): """ Check the clusters accuracy... :return: """ accuracy = 0 num_total_vehicle = 0 total_dist = 0 prediction = 0 for clus_id in tqdm(list_clu, desc='checking accuracy in main clustering algo'): clus = self.manager_cluster[clus_id] routing_solver = cvrptw_routing_solver.RoutingSolverCVRPTW( clus, self.config) num_vehicle, distance, list_routes = routing_solver.solve_parse_routing( ) total_dist += distance num_total_vehicle += num_vehicle # check accuracy prediction += clus.prediction if num_vehicle == clus.prediction: clustering_logger.info("accurate " + str(clus.prediction) + ' ' + str(clus.expected_prediction) + " vs " + str(num_vehicle) + " for " + clus.guid + str(self.manager_cluster[clus_id]. is_robust(self.threshold))) accuracy += 1 else: clustering_logger.info("error " + str(clus.prediction) + ' ' + str(clus.expected_prediction) + " vs " + str(num_vehicle) + " for " + clus.guid + str(self.manager_cluster[clus_id]. is_robust(self.threshold))) accuracy = accuracy / len(list_clu) clustering_logger.info("Number of clusters selected " + str(len(list_clu))) clustering_logger.info(" We have an accuracy of " + str(accuracy) + "for prediction" + str(prediction) + " for total number vehu " + str(num_total_vehicle) + " and distance " + str(total_dist)) return accuracy, num_total_vehicle
def create_relevant_clusters(self, read_cluster=False): """ Main function, create the clusters and solve the scp :param read_cluster: True then read cluster previously solved :return: the final set of clusters, on which to perform the routing """ self._initialization(read_cluster) list_selected_clusters, dict_reduced_cost, dict_dual_val, obj_val, dict_x = self.solve_scp( relax=True) clustering_logger.info( 'SCP solve iteration: 0, number of clusters selected ' + str(len(list_selected_clusters))) # for clu_id in list_selected_clusters: # print(clu_id,self.manager_cluster[clu_id].expected_prediction, self.manager_cluster[clu_id].tracking_evolution) if not read_cluster: self._track_stats(obj_val, check_acc=False) self.save_results() initial_iter = self.iteration + 1 for it in tqdm(range(initial_iter, self.nb_iter), desc='Iteration of the main loop in clustering algo'): self.iteration = it self._reset_stats() time_begin = time.time() self._create_new_clusters(dict_reduced_cost, dict_dual_val) time_create = time.time() clustering_logger.info('Clusters created in ' + str(time_create - time_begin)) self.total_time_creation += time_create - time_begin self._improve_clusters(dict_x, dict_reduced_cost, dict_dual_val) time_improve = time.time() clustering_logger.info('Improvement of clusters done in ' + str(time_improve - time_create)) self.total_time_improv += time_improve - time_create self._merge_clusters(dict_x, dict_reduced_cost, dict_dual_val) clustering_logger.info('Clusters have been merged in ' + str(time.time() - time_improve)) self.total_time_merge += time.time() - time_improve # rc_negative = self.cluster_operation.reduced_cost_negative # print("Number of clusters created with negative reduced cost ", rc_negative, ' on a total of modified ', # self.cluster_operation.total_modify, ' i.e. ', rc_negative/self.cluster_operation.total_modify) list_selected_clusters, dict_reduced_cost, dict_dual_val, obj_val, dict_x = self.solve_scp( relax=True) clustering_logger.info('SCP solve iteration: ' + str(it) + ' number of clusters selected ' + str(len(list_selected_clusters))) # for clu_id in list_selected_clusters: # print(clu_id, self.manager_cluster[clu_id].expected_prediction, self.manager_cluster[clu_id].tracking_evolution) self._track_stats(obj_val, check_acc=False) self.save_results() clustering_logger.info('finished iteration in ' + str(time.time() - time_begin))
def _upper_bound_by_branching(self, dict_x): """ Find an upper bound by the branching rule :param dict_x: a dict[clu_id] = x value :return: the upper bound found, the list of clusters selected """ time_start = time.time() dict_wanted = { 600: 1, 1000: 1, 1500: 2, 2000: 2, 3000: 3, 5000: 5, 7500: 5, 10000: 5 } nb_wanted = dict_wanted[size_data.NUMBER_CUSTOMERS] still_to_be_served = list(self.manager_stops.keys()).copy() avalailable_cluster = list(self.manager_cluster.keys()).copy() final_cluster = [] comp = 0 while len(still_to_be_served) >= 0.5: list_x_clu_id = [(x, clu_id) for clu_id, x in dict_x.items()] list_x_clu_id.sort(reverse=True) list_x_clu_id = list_x_clu_id[0:min(nb_wanted, len(list_x_clu_id))] list_stop_served = [] for x, clu_id in list_x_clu_id: list_stop_served.extend( list(self.manager_cluster[clu_id].keys())) avalailable_cluster.remove(clu_id) final_cluster.append(clu_id) for stop_id in set(list_stop_served): if stop_id in still_to_be_served: still_to_be_served.remove(stop_id) dict_stop_cluster = {} for stop_id in still_to_be_served: dict_stop_cluster[stop_id] = [ clu_id for clu_id in self.manager_cluster.dict_stop_clusters[stop_id] if not clu_id in final_cluster ] scp_mip = scp_solver.MIP_set_covering( list_stop=still_to_be_served, dict_stop_clus=dict_stop_cluster, dict_clus_predict={ clu_id: self.manager_cluster[clu_id].expected_prediction for clu_id in avalailable_cluster }) # note the updated prediction with robustness should # already have been considered list_selected_clusters, dict_reduced_cost, dict_dual_val, obj_val, dict_x = scp_mip.solve( relax=True, warm_start=None) comp += 1 ub = sum(self.manager_cluster[clu_id].expected_prediction for clu_id in final_cluster) clustering_logger.info('We have done the branching in ' + str(comp) + ' iterations and ' + str(time.time() - time_start) + 's for an upper bound of ' + str(ub)) return ub, final_cluster
def _solve_integer_scp(self): """ solve the integer version of the scp :return: list of selected cluster, dict of reduced cost and dual values, objective value, dict of x value """ time_begin = time.time() _, dict_reduced_cost, _, lb, dict_x = self._solve_relax_scp() time_relax = time.time() clustering_logger.info('Lower bound on the relaxation ' + str(lb) + ' in ' + str(time_relax - time_begin) + ' total nb clusters ' + str(len(self.manager_cluster))) ub_branching, final_clusters = self._upper_bound_by_branching(dict_x) assert ub_branching >= lb - 0.01, print(ub_branching, lb) rc_threshold = np.percentile(list(dict_reduced_cost.values()), 5) rc_threshold = max(0, rc_threshold) list_considered_clusters = [ clu_id for clu_id, val in dict_reduced_cost.items() if val <= rc_threshold ] list_considered_clusters.extend(final_clusters) list_considered_clusters = list(set(list_considered_clusters)) is_covered = [ stop_id for clu_id in list_considered_clusters for stop_id in self.manager_cluster[clu_id] ] assert len(set(self.manager_stops.keys()).difference( set(is_covered))) == 0, print( set(self.manager_stops.keys()).difference(set(is_covered))) list_selected_clusters, _, _, ub_restricted, _ = self._solve_scp_resctricted_cluster( list_considered_clusters, relax=False, time_run=180, warm_start=final_clusters) time_ub = time.time() clustering_logger.info('Upper bound on the relaxation ' + str(ub_restricted) + ' threshold used ' + str(rc_threshold) + ' leading to ' + str(len(list_considered_clusters)) + ' solved in ' + str(time_ub - time_relax)) ub = min(ub_restricted, ub_branching) # filter only on the one with a low reduced cost list_considered_clusters = [ clu_id for clu_id, val in dict_reduced_cost.items() if val <= ub - lb ] list_selected_clusters, dict_reduced_cost, dict_dual_val, obj_val, dict_x = self._solve_scp_resctricted_cluster( list_considered_clusters, relax=False, time_run=1200, warm_start=list_selected_clusters) clustering_logger.info( 'Final interger problem solved, the optimality gap is of ' + str(ub - lb) + ' nb clusters ' + str(len(list_considered_clusters)) + ' final solu ' + str(obj_val) + ' in ' + str(time.time() - time_ub)) return list_selected_clusters, dict_reduced_cost, dict_dual_val, obj_val, dict_x