def improve_initial_intchg(pareto_clu, pareto_wss, chashset2chashtpl): pareto_clu_queue = [] pareto_wss_queue = [] for e, i in enumerate(pareto_clu): pareto_clu_queue.append(np.copy(i)) pareto_wss_queue.append(deepcopy(pareto_wss[e])) do_interchange = True do_trace = False lcounter = 0 for e, initial in enumerate(pareto_clu_queue): lcounter += 1 print("Investigating neighborhood of clustering no. %s" % e) # print("Starting from a new clustering.") new_clu, new_wss, flag_improved_intchg = improve_initial_workhorse(initial, do_interchange, do_trace) chash, chashtpl = make_clu_hash(new_clu) if flag_improved_intchg and chash not in chashset2chashtpl.keys(): chash, chashtpl = make_clu_hash(new_clu) chashset2chashtpl[chash] = chashtpl dominated, flag_pareto = pf.eval_paretof(pareto_wss, new_wss) for dom_idx in dominated: dhash = make_clu_hash_set(pareto_clu[dom_idx]) del chashset2chashtpl[dhash] del pareto_clu[dom_idx] del pareto_wss[dom_idx] pareto_clu.append(np.copy(new_clu)) pareto_wss.append(deepcopy(tuple(new_wss))) # if flag_pareto: print("IMPROVEMENT made in MOVE/INTERCHANGE.") else: print("Improvement made in MOVE/INTERCHANGE but no impact on Pareto front.") else: print("No improvement in MOVE/INTERCHANGE transformation.") print("Number of all Pareto clusterings (candidates): %s" % len(pareto_clu)) return pareto_clu, pareto_wss
def improve_pareto_intchg(clu, wss, metapack): global dominated_enc global paretofront_clu, paretofront_wss global chash, chashtpl, chashset2chashtpl global rmtah, ritah, ritahset rmtah, ritah, ritahset, chashset2chashtpl, dominated_enc = metapack do_interchange = True do_trace = False clu_intchg_queue = [] wss_intchg_queue = [] for e, i in enumerate(clu): clu_intchg_queue.append(np.copy(i)) wss_intchg_queue.append(deepcopy(wss[e])) paretofront_clu = [] paretofront_wss = [] for e, i in enumerate(clu): paretofront_clu.append(np.copy(i)) paretofront_wss.append(deepcopy(wss[e])) flag_newclusterings = True while flag_newclusterings: lcounter = 0 num_clu_new_all = 0 num_clu_dominated_all = 0 for e, initial in enumerate(clu_intchg_queue): initial_enc = cdat.encode_bin_to_base64_clu(initial) if initial_enc in dominated_enc: print( "The queued clustering no.: %s is now a local pareto. Skipping." % e) continue chash, chashtpl = mapping.make_clu_hash(initial) rmtah_chash = rmtah[chash] try: ritah_chash = ritah[chash] if pftbox.check_clu_mvi_transformations( ritah_chash, initial.size): print( "All move-interchange transformations already checked." ) continue except KeyError: ritah[chash] = np.copy(initial) ritahset[chash] = np.full(initial.shape, set(), dtype=object) try: chashtpl_known = chashset2chashtpl[chash] if chashtpl == chashtpl_known: pass else: talign = translate_alignment(chashtpl, chashtpl_known) rmtah[chash] = np.copy(rmtah[chash][talign]) chashset2chashtpl[chash] = chashtpl ritah[chash] = np.copy(ritah[chash][talign]) ritahset[chash] = np.copy(ritahset[chash][talign]) except KeyError: print( "KeyError in processin initial @ fast move - this should not ever happen!!!" ) rmtah[chash] = np.copy(rmtah_chash) chashset2chashtpl[chash] = chashtpl ritah[chash] = np.copy(initial) ritahset[chash] = np.full(aclu.shape, set(), dtype=object) lcounter += 1 print("FAST MOVE-INTERCHANGE iteration: %s" % lcounter) if pf.eval_clust_nondominance(paretofront_wss, wss_intchg_queue[e]): print("Starting from a new clustering.") new_clu_mv_tip, new_wss_k_mv_tip, moved, num_clu_new, num_clu_dominated = improve_pareto( initial, do_interchange, do_trace) if moved: new_pareto_encoded = cdat.encode_bin_to_base64_clu( new_clu_mv_tip) num_clu_new_all += num_clu_new num_clu_dominated_all += num_clu_dominated else: pass else: print("Initial clustering is DOMINATED") # # # print("Number of new clustertings: %s" % num_clu_new_all) print("Number of dominated clustertings: %s" % num_clu_dominated_all) clu_intchg_queue = [] wss_intchg_queue = [] for e, i in enumerate(paretofront_clu): clu_intchg_queue.append(np.copy(i)) wss_intchg_queue.append(np.copy(paretofront_wss[e])) # Loop continuation logic if num_clu_new_all == 0: flag_newclusterings = False metapack = [rmtah, ritah, ritahset, chashset2chashtpl, dominated_enc] return paretofront_clu, paretofront_wss, metapack
def improve_pareto_mv(clu, wss, metapack): global dominated_enc global paretofront_clu, paretofront_wss global chash, chashtpl, chashset2chashtpl global rmtah, ritah, ritahset rmtah, ritah, ritahset, chashset2chashtpl, dominated_enc = metapack do_interchange = False do_trace = False clu_intchg_queue = [] wss_intchg_queue = [] for e, i in enumerate(clu): clu_intchg_queue.append(np.copy(i)) wss_intchg_queue.append(deepcopy(wss[e])) paretofront_clu = [] paretofront_wss = [] for e, i in enumerate(clu): paretofront_clu.append(np.copy(i)) paretofront_wss.append(deepcopy(wss[e])) lcounter = 0 flag_newclusterings = True while flag_newclusterings: num_clu_new_all = 0 num_clu_dominated_all = 0 for e, initial in enumerate(clu_intchg_queue): lcounter += 1 if pf.eval_clust_nondominance(paretofront_wss, wss_intchg_queue[e]): print("Starting from a new clustering: %s." % lcounter) chash, chashtpl = mapping.make_clu_hash(initial) rmtah_chash = rmtah[chash] if np.sum(rmtah_chash.astype(int)) == initial.size: print( "Prevented from entering the queue. All move transitions already tested." ) continue try: chashtpl_known = chashset2chashtpl[chash] if chashtpl == chashtpl_known: pass else: talign = translate_alignment(chashtpl, chashtpl_known) rmtah[chash] = np.copy(rmtah[chash][talign]) chashset2chashtpl[chash] = chashtpl except KeyError: print( "KeyError in processing initial @ fast move - this should not ever happen!!!" ) rmtah[chash] = np.copy(initial) chashset2chashtpl[chash] = chashtpl # new_clu_mv_tip, new_wss_k_mv_tip, moved, num_clu_new, num_clu_dominated = improve_pareto( initial, do_interchange, do_trace) if moved: num_clu_new_all += num_clu_new num_clu_dominated_all += num_clu_dominated else: print("Initial clustering is DOMINATED") print("Number of new clustertings: %s" % num_clu_new_all) print("Number of dominated clustertings: %s" % num_clu_dominated_all) clu_intchg_queue = [] wss_intchg_queue = [] for enum, ithc in enumerate(paretofront_clu): clu_intchg_queue.append(np.copy(ithc)) wss_intchg_queue.append(np.copy(paretofront_wss[enum])) # Loop continuation logic if num_clu_new_all == 0: flag_newclusterings = False metapack = [rmtah, ritah, ritahset, chashset2chashtpl, dominated_enc] return paretofront_clu, paretofront_wss, metapack
def improve_pareto(aclui, do_interchange, do_trace): global dominated_enc global paretofront_clu, paretofront_wss global chash, chashtpl, chashset2chashtpl global rmtah, ritah, ritahset aclu = np.copy(aclui) units = list(range(gd.nunits)) clulst = list(range(pset.k)) flag_continue = True flag_improvement = False num_clu_new = 0 num_clu_dominated = 0 trace_clu = [] trace_wss = [] cur_wss = ecs.allecswss_bvect(aclu) rmtah_chash = rmtah[chash] try: ritah_chash = ritah[chash] ritahset_chash = ritahset[chash] except KeyError: ritah[chash] = np.copy(aclu) ritahset[chash] = np.full(aclu.shape, set(), dtype=object) ritah_chash = ritah[chash] ritahset_chash = ritahset[chash] while_loopcounter = 0 while flag_continue: csd_val = cdat.cluster_size_all(aclu) csd_tf = csd_val <= (pset.mspcs) flag_clustersize = any(csd_tf) flag_continue = False flag_break = False if pftbox.check_clu_mv_transformations(rmtah_chash, aclu.size): if do_interchange: if pftbox.check_clu_mvi_transformations( ritah_chash, aclu.size): print( "All move-interchange transformations for clustering already tested." ) flag_continue = False continue else: print( "All move transformations for clustering already tested.") flag_continue = False continue if pset.doshuffle: random.shuffle(units) while_loopcounter += 1 print("Iteration: %s" % while_loopcounter) for mvun in units: if flag_break: break if not do_interchange: if pftbox.check_unit_mv_transformations(rmtah_chash, mvun): print("All move transformations tested for unit: %s." % mvun) continue else: if pftbox.check_unit_mvi_transformations(ritah_chash, mvun): print( "All move-interchange transformations tested for unit: %s." % mvun) continue csd_val = cdat.cluster_size_all(aclu) csd_tf = csd_val <= (pset.mspcs) flag_clustersize = any(csd_tf) # # MOVE # clu_from = np.copy(aclu[:, mvun]) clu_from_idx = vect2idx[tuple(clu_from)] # if pset.doshuffle: random.shuffle(clulst) for clx in clulst: if flag_break: break clu_to = np.copy(idx2vect[clx]) cltidx = clx if all(clu_from == clu_to): continue flag_move_allowed = bool(1) if flag_clustersize: if any(clu_from & csd_tf): if do_interchange: flag_move_allowed = bool(0) else: print("Cluster too small. Skipping.") continue # Move scenario if not do_interchange: if rmtah_chash[cltidx, mvun]: print( "Move transformation already tested - unit: %s, cluster: %s" % (mvun, cltidx)) continue else: # print("Testing transformation - unit: %s to cluster: %s" % (mvun, cltidx)) dominated, move_wss, flag_improvement_mv = ecs.allecswss_bpareto_mv( aclu, cur_wss, mvun, clu_to, paretofront_wss) rmtah_chash[cltidx, mvun] = True if not flag_move_allowed: print( "Cluster too small, transformation is not allowed") continue # Move-interchange scenario else: if rmtah_chash[cltidx, mvun] and ritah_chash[cltidx, mvun]: print( "All interchange transformations already tested - unit: %s, cluster: %s" % (mvun, cltidx)) continue else: # print("Testing transformation. unit: %s to cluster: %s" % (mvun, cltidx)) dominated, move_wss, flag_improvement_mv = ecs.allecswss_bpareto_mv( aclu, cur_wss, mvun, clu_to, paretofront_wss) rmtah_chash[cltidx, mvun] = True rmtah[chash][cltidx, mvun] = True if flag_improvement_mv and not flag_move_allowed: print( "MOVE IMPROVEMENT but cluster to small for transformation." ) flag_improvement_mv = False dominated = [] # (MOVE) IMPROVEMENT if flag_improvement_mv: flag_continue = True flag_improvement = True num_clu_new += 1 num_clu_dominated += len(dominated) rmtah[chash] = np.copy(rmtah_chash) ritah[chash] = np.copy(ritah_chash) ritahset[chash] = np.copy(ritahset_chash) aclu[:, mvun] = np.copy(clu_to) new_hash, new_hash_tpl = mapping.make_clu_hash(aclu) if new_hash in rmtah.keys(): aclu[:, mvun] = np.copy(clu_from) flag_continue = False flag_improvement = False continue else: chash, chashtpl = new_hash, new_hash_tpl csd_val = cdat.cluster_size_all(aclu) csd_tf = csd_val <= pset.mspcs flag_clustersize = any(csd_tf) cur_wss = deepcopy(move_wss) chashset2chashtpl[chash] = chashtpl inactive_mask_tran, inactive_mask_val = pftbox.mask_inactive( aclu, clu_from, clu_to) rmtah_chash = np.where(inactive_mask_tran, rmtah_chash, False) ritah_chash = np.where(inactive_mask_tran, ritah_chash, False) ritahset_chash = np.where(inactive_mask_val, ritahset_chash, set()) for dom_idx in dominated: denc = cdat.encode_bin_to_base64_clu( paretofront_clu[dom_idx]) dominated_enc.add(denc) dhash, dhash_list = mapping.make_clu_hash( paretofront_clu[dom_idx]) if chash == dhash: continue # Delete from Pareto front del paretofront_clu[dom_idx] del paretofront_wss[dom_idx] # Append aclu, cur_wss to Pareto front paretofront_clu.append(np.copy(aclu)) paretofront_wss.append(copy(tuple(cur_wss))) print("Improvement in MOVE. WSS: %s" % str(cur_wss)) # TRACING if do_trace: trace_clu.append(np.copy(aclu)) trace_wss.append(ecs.allecswss_bvect(aclu)) #print("Move improvement. unit: %s, cluster: %s" % (mvun, cltidx+1)) flag_break = True break elif do_interchange: mvback_candidates_idx, movedbackset = pftbox.get_mvbackunits( ritahset_chash, cltidx, mvun, aclu[clu_to][0]) if pset.doshuffle: random.shuffle(mvback_candidates_idx) for intchgun in mvback_candidates_idx: if flag_break: break dominated, interchange_wss, interchange_improvement = ecs.allecswss_bpareto_itchg( aclu, cur_wss, mvun, intchgun, clu_from, clu_to, paretofront_wss) movedbackset.add(intchgun) if all([ bool(1) for mci in mvback_candidates_idx if mci in movedbackset ]): ritah_chash[cltidx, mvun] = True ritahset_chash[cltidx, mvun] = set() ritah[chash] = np.copy(ritah_chash) ritahset[chash] = np.copy(ritahset_chash) if interchange_improvement: flag_continue = True flag_improvement = True num_clu_new += 1 num_clu_dominated += len(dominated) rmtah[chash] = np.copy(rmtah_chash) ritah[chash] = np.copy(ritah_chash) ritahset[chash] = np.copy(ritahset_chash) aclu[:, mvun] = np.copy(clu_to) aclu[:, intchgun] = np.copy(clu_from) check_hash = mapping.make_clu_hash_set(aclu) try: blind_check = rmtah[check_hash] aclu[:, mvun] = np.copy(clu_from) aclu[:, intchgun] = np.copy(clu_to) flag_continue = False flag_improvement = False continue except KeyError: pass cur_wss = interchange_wss print("Improvement in INTERCHANGE. WSS: %s" % str(cur_wss)) # Remove dominated for dom_idx in dominated: denc = cdat.encode_bin_to_base64_clu( paretofront_clu[dom_idx]) dominated_enc.add(denc) dhash, dhash_list = mapping.make_clu_hash( paretofront_clu[dom_idx]) if chash == dhash: continue # Delete from pareto del paretofront_clu[dom_idx] del paretofront_wss[dom_idx] # Append aclu, cur_wss to Pareto front paretofront_clu.append(np.copy(aclu)) paretofront_wss.append(copy(tuple(cur_wss))) chash, chashtpl = mapping.make_clu_hash(aclu) chashset2chashtpl[chash] = chashtpl inactive_mask_tran, inactive_mask_val = pftbox.mask_inactive( aclu, clu_from, clu_to) rmtah_chash = np.where(inactive_mask_tran, rmtah_chash, False) ritah_chash = np.where(inactive_mask_tran, ritah_chash, False) ritahset_chash = np.where(inactive_mask_val, ritahset_chash, set()) rmtah[chash] = np.copy(rmtah_chash) ritah[chash] = np.copy(ritah_chash) ritahset[chash] = np.copy(ritahset_chash) if do_trace: trace_clu.append(aclu) trace_wss.append(cur_wss_sum) flag_break = True break rmtah[chash] = np.copy(rmtah_chash) ritah[chash] = np.copy(ritah_chash) ritahset[chash] = np.copy(ritahset_chash) return aclu, cur_wss, flag_improvement, num_clu_new, num_clu_dominated