def main(data_name, r=1): with open(data_name + '.pkl', 'rb') as f: data = pickle.load(f) insts = data.keys() seeds = list(filter(lambda x: '#' not in x, insts)) for i in seeds: inst = TSP(data[i]) dists = np.array(inst.dist_mat).flatten() # normalise to r == 1 dists = dists / dists.max() * 2.0 * r _, shp, _, scl = exponweib.fit(dists, f0=1) print(i, np.abs(-0.509 * shp + 0.707)) # plt.clf() # plt.title(i) # _ = plt.hist(dists, density=True) # x = np.linspace(dists.min(), dists.max(), 1000) # plt.plot(x, weibull(x, shp, scl)) # plt.show() for i in range(5): inst = TSP(int(data_name.split('_')[-1])) dists = np.array(inst.dist_mat).flatten() dists = dists / dists.max() * 2.0 * r _, shp, _, scl = exponweib.fit(dists, f0=1) print('rand' + str(i), np.abs(-0.509 * shp + 0.707))
def init_exp_main(data_dir): metrics = [ Map_Dist(), Geo_Dist(), Geo_Dist(embedding='deepwalk'), Geo_Dist(embedding='node2vec'), # Geo_Dist(embedding='line'), # Geo_Dist(embedding='sdne'), Geo_Dist(embedding='struc2vec'), Abstract_Dist(), DMST_Heuristic(), FI_Heuristic(), # Gr_Heuristic(), NI_Heuristic(), NN_Heuristic() ] solver = GA_Solver() ofn = 'init_0.txt' with open(ofn, 'w') as f: print('inst_a inst_b best', file=f, end=' ') for l in [m.lbl for m in metrics]: print(l, file=f, end=' ') print(file=f) fns_ = os.listdir(data_dir) fns = list(filter(lambda s: s.endswith('.pkl'), fns_)) for fn in fns: with open(os.path.join(data_dir, fn), 'rb') as f: data = pickle.load(f) keys = list(data.keys()) i = np.argmin([len(k) for k in keys]) inst_a = TSP(data[keys[i]]) print(keys[i]) for k in keys: inst_b = TSP(data[k]) res = [] for m in metrics: if m.lbl.startswith('heuristic'): res.append( solver.solve(inst_b, seed=m.solve(inst_b)['tour'])['fitness']) else: d = m.dist(inst_a, inst_b) res.append( solver.solve( inst_b, seed=d['pi'][inst_a.optimal_tour])['fitness']) with open(ofn, 'a') as f: print(keys[i], k, inst_b.eval(inst_b.optimal_tour)[0], file=f, end=' ') for r in res: print(r, file=f, end=' ') print(file=f)
def main0(): ia = TSP(n) ib = TSP(n) metric = Map_Dist() res_ident = metric.dist(ia, ia) res_indep = metric.dist(ia, ib) print('ident:', res_ident['dist'], res_ident['pi']) print('indep:', res_indep['dist'], res_indep['pi'])
def dist(self, inst_a: TSP, inst_b: TSP, tour_a=None, tour_b=None, **kwargs): if tour_a is None: tour_a = inst_a.optimal_tour if tour_b is None: tour_b = inst_b.optimal_tour # make sure n_b is larger if inst_b.n < inst_a.n: inst_a, inst_b = inst_b, inst_a tour_a, tour_b = tour_b, tour_a coords_a = np.array(inst_a) coords_b = np.array(inst_b) fit_a = inst_a.eval(tour_a) fit_b = inst_b.eval(tour_b) coords_a = coords_a / np.sqrt(fit_a / inst_a.n) coords_b = coords_b / np.sqrt(fit_b / inst_b.n) sol_a = self._translate(coords_a, tour_a) sol_b = self._translate(coords_b, tour_b) best_dist = np.inf cost_ab = np.array( [[self._cost(sol_a[i], sol_b[j]) for j in range(inst_b.n)] for i in range(inst_a.n)]) cost_b = np.array( [self._cost(None, sol_b[j]) for j in range(inst_b.n)]) for offset in range(inst_a.n): diff = np.zeros((inst_a.n, inst_b.n)) diff[0][0] = cost_ab[offset][0] for j in range(1, inst_b.n - inst_a.n + 1): diff[0][j] = min((diff[0][j - 1] + cost_b[j], np.sum(cost_b[:j - 1]) + cost_ab[offset][j])) for i in range(1, inst_a.n): ii = offset + i - inst_a.n diff[i][i] = diff[i - 1][i - 1] + cost_ab[ii][ii] for j in range(i + 1, inst_b.n - inst_a.n + i + 1): diff[i][j] = min((diff[i][j - 1] + cost_b[j], diff[i - 1][j - 1] + cost_ab[ii][j])) if best_dist > diff[-1][-1]: best_dist = diff[-1][-1] return {'dist': best_dist}
def solve(self, inst: TSP, eval=None): tour = [np.random.choice(inst.n)] working_dm = np.array(inst.dist_mat) for _ in range(inst.n - 1): working_dm[:, tour[-1]] = np.inf tour.append(np.argmin(working_dm[tour[-1]])) tour = np.array(tour) return {'tour': tour, 'fitness': inst.eval(tour)}
def main0(): insts = [TSP(n) for _ in range(10)] for k in kernels: metric = Kernel_Dist(k) print(k) for i in range(0, 10, 2): res = metric.dist(insts[i], insts[i + 1]) print('\t', res['dist']) print()
def tune_init_exp_main(data_dir): metrics = [ Map_Dist(sample_size=100), Map_Dist(sample_size=150), Map_Dist(sample_size=200), Map_Dist(sample_size=250), Map_Dist(sample_size=300) ] lbls = [100, 150, 200, 250, 300] solver = GA_Solver() ofn = 'tune_X.txt' with open(ofn, 'w') as f: print('name best', file=f, end=' ') for l in lbls: print(l, file=f, end=' ') print(file=f) fns_ = os.listdir(data_dir) fns = list(filter(lambda s: s.endswith('.pkl'), fns_)) for fn in fns: with open(os.path.join(data_dir, fn), 'rb') as f: data = pickle.load(f) keys = list(data.keys()) i = np.argmin([len(k) for k in keys]) inst_a = TSP(data[keys[i]]) inst_b = TSP(data[keys[i] + '_r9_#0']) res = [] print(keys[i]) for m in metrics: d = m.dist(inst_a, inst_b) res.append( solver.solve(inst_b, seed=d['pi'][inst_a.optimal_tour])['fitness']) with open(ofn, 'a') as f: print(keys[i], inst_b.eval(inst_b.optimal_tour)[0], file=f, end=' ') for r in res: print(r, file=f, end=' ') print(file=f)
def solve(self, inst: TSP, eval=None): mst = minimum_spanning_tree(inst.dist_mat).toarray() mst += mst.T tour = [np.random.choice(inst.n)] self._dfs(mst, tour) tour = np.array(tour) return {'tour': tour, 'fitness': inst.eval(tour)}
def travel_from_seed(inst_0, c): inst = TSP(inst_0) n = len(inst) r = np.mean(cdist(np.zeros((1, 2)), inst)) / np.sqrt(n) * c alpha = np.random.random(n) * np.pi * 2 delta = np.zeros((n, 2)) delta[:, 0] = np.sin(alpha) * r delta[:, 1] = np.cos(alpha) * r inst += delta return inst
def read_inst(fn, dir=None): if dir is not None: fn = os.path.join(dir, fn) points = [] with open(fn) as f: for line in f: point = line.rstrip().split() points.append(point) return TSP(np.array(points, dtype=float))
def solve(self, inst: TSP, eval=None): args = np.argsort(inst.dist_mat.flatten())[inst.n:] n_colour = 0 colour = np.zeros(inst.n, dtype=int) - 1 neigh = np.zeros((inst.n, 2), dtype=int) - 1 for arg in args: u = arg // inst.n v = arg % inst.n if colour[u] < 0 and colour[v] < 0: neigh[u][0] = v neigh[v][0] = u colour[u] = n_colour colour[v] = n_colour n_colour += 1 elif colour[u] < 0 and neigh[v][1] < 0: neigh[u][0] = v neigh[v][1] = u colour[u] = colour[v] elif colour[v] < 0 and neigh[u][1] < 0: neigh[v][0] = u neigh[u][1] = v colour[v] = colour[u] elif colour[u] != colour[v] and neigh[u][1] < 0 and neigh[v][1] < 0: neigh[u][1] = v neigh[v][1] = u np.place(colour, colour == colour[u], colour[v]) u, v = filter(lambda x: neigh[x][1] == -1, range(inst.n)) neigh[u][1] = v neigh[v][1] = u tour = [] in_tour = [False] * inst.n tour.append(0) in_tour[0] = True while True: u = tour[-1] v = neigh[u][0] if not in_tour[v]: tour.append(v) in_tour[v] = True continue v = neigh[u][1] if not in_tour[v]: tour.append(v) in_tour[v] = True continue break tour = np.array(tour) return {'tour': tour, 'fitness': inst.eval(tour)}
def cat_exp_main(data_dir): m = 3 metric = Map_Dist() fns_ = os.listdir(data_dir) fns = list(filter(lambda s: s.endswith('.pkl'), fns_)) for fn in fns: ofn = fn[:-3] + 'csv' if ofn in fns_: continue with open(os.path.join(data_dir, fn), 'rb') as f: data = pickle.load(f) insts = data.keys() s = min(insts, key=len) print(s, end='') seed = TSP(data[s]) with open(os.path.join(data_dir, ofn), 'w') as f: print('Inst_a,Inst_b,Distance,Baseline', file=f) for i in insts: print('*', end='') inst = TSP(data[i]) if s == i: t = s + '_r0_#0' else: t = i for _ in range(m): res = (s, t, str(metric.dist(seed, inst)['dist']), str(abs(seed.hardness_est() - inst.hardness_est()))) with open(os.path.join(data_dir, ofn), 'a') as f: print(','.join(res), file=f) print()
def main1(): ident = [] indep = [] print('prob size', n) metric = Map_Dist() for i in range(30): print('\r', i, end='') ia = TSP(n) ib = TSP(n) id_res = metric.dist(ia, ia) in_res = metric.dist(ia, ib) ident.append(id_res['dist']) indep.append(in_res['dist']) print('\r', end='') print('identical set mean', np.mean(ident)) print('identical set std', np.std(ident)) print('independent set mean', np.mean(indep)) print('independent set std', np.std(indep)) print('p =', ttest_ind(ident, indep)[1])
def tsp_instance(n, phi=None): rf_x, rf_y = random_func(phi) inst = np.zeros((n, 2)) inst[:, 0] = rf_x(size=n) inst[:, 1] = rf_y(size=n) while True: idx = (cdist(np.zeros((1, 2)), inst) > 1)[0] m = np.sum(idx) if m == 0: return TSP(inst) inst[idx, 0] = rf_x(size=m) inst[idx, 1] = rf_y(size=m)
def harden_seed(inst_0, max_iter=100, num_candi=10): inst = TSP(inst_0) for i in range(max_iter): inst_new = inst fit_new = inst_new.hardness_est() for j in range(num_candi): inst_x = resample_from_seed(inst, 0.1, 0.5) fit_x = inst_x.hardness_est() if fit_new > fit_x: inst_new = inst_x fit_new = fit_x print(fit_new) inst = inst_new return inst
def resample_from_seed(inst_0, c, std=0.0): inst = TSP(inst_0) n = len(inst) m = int(c * n) idx = np.random.choice(n, m, replace=False) if std: inst[idx] = np.random.normal(inst[idx], scale=std) # bounce out of scope points back idx = (cdist(np.zeros((1, 2)), inst) > 1)[0] points = np.array(inst)[idx] norms = np.linalg.norm(points, axis=1) inst[idx, 0] = points[:, 0] / (norms**2) inst[idx, 1] = points[:, 1] / (norms**2) else: inst[idx] = tsp_instance(m) return inst
def solve(self, inst: TSP, eval=None): working_dm = np.array(inst.dist_mat) working_dm[np.identity(inst.n, dtype=bool)] = np.inf u = np.argmin(np.min(working_dm, axis=1)) v = np.argmin(working_dm[u]) tour = [u, v] working_dm[:, u] = np.inf working_dm[:, v] = np.inf for _ in range(inst.n - 2): u = np.argmin(np.min(working_dm[tour], axis=1)) v = np.argmin(working_dm[u]) ind = -1 min_inc = np.inf for i in range(len(tour)): inc = inst.dist_mat[v, tour[i]] + inst.dist_mat[tour[i - 1], v] - inst.dist_mat[tour[i - 1], tour[i]] if min_inc > inc: min_inc = inc ind = i tour.insert(ind, v) working_dm[:, v] = np.inf tour = np.array(tour) return {'tour': tour, 'fitness': inst.eval(tour)}
def acc_exp_main(data_dir): m = 3 metric = Map_Dist() ofn = 'dists.csv' with open(os.path.join(data_dir, ofn), 'w') as f: print('Inst_a,Inst_b,Distance,Baseline', file=f) fns_ = os.listdir(data_dir) fns = list(filter(lambda s: s.endswith('.pkl'), fns_)) data = {} for fn in fns: with open(os.path.join(data_dir, fn), 'rb') as f: d = pickle.load(f) keys = list(d.keys()) i = np.argmin([len(k) for k in keys]) data[keys[i]] = d[keys[i]] # data.update() idx = list(data.keys()) n = len(idx) met_dist_mat = np.zeros((n, n)) har_dist_mat = np.zeros((n, n)) print('#instances:', n) for i in range(n): for j in range(n): inst_i = TSP(data[idx[i]]) inst_j = TSP(data[idx[j]]) met_res = [] har_res = [] for k in range(m): met_res.append(metric.dist(inst_i, inst_j)['dist']) har_res.append(abs(inst_i.hardness_est() - inst_j.hardness_est())) met_dist_mat[i][j] = np.mean(met_res) har_dist_mat[i][j] = np.mean(har_res) s = (idx[i], idx[j], str(met_dist_mat[i][j]), str(har_dist_mat[i][j])) print(s) with open(os.path.join(data_dir, ofn), 'a') as f: print(','.join(s), file=f)
0.2, self._max_iter, verbose=False, stats=stats, halloffame=hof) # population, logbook = algorithms.eaSimple(population, toolbox, 0.7, 0.2, self._max_iter, verbose=False) tour = np.array(hof[0]) if eval is None: return { 'tour': tour, 'fitness': inst.eval(tour)[0], 'pop': population, 'iter_avg': [iter['avg'] for iter in logbook] } else: return { 'tour': tour, 'fitness': eval(tour)[0], 'pop': population, 'iter_avg': [iter['avg'] for iter in logbook] } if __name__ == '__main__': tsp = TSP(50) slr = GA_Solver() res = slr.solve(tsp) print(list(res['iter_avg']))
def contr_exp_main(data_dir): metrics = [ Map_Dist(), Geo_Dist(), Geo_Dist(embedding='deepwalk'), Geo_Dist(embedding='node2vec'), # Geo_Dist(embedding='line'), # Geo_Dist(embedding='sdne'), Geo_Dist(embedding='struc2vec'), Abstract_Dist(), Kernel_Dist('edge_hist_gauss'), Kernel_Dist('edge_hist'), Kernel_Dist('random_walk_exp'), Kernel_Dist('random_walk_geo'), Kernel_Dist('vertex_edge_hist_gauss'), Kernel_Dist('vertex_edge_hist'), Kernel_Dist('vertex_vertex_edge_hist'), ] ofn = 'contr_0.txt' with open(ofn, 'w') as f: print('inst_a inst_b inst_c', file=f, end=' ') for l in [m.lbl for m in metrics]: print(l, file=f, end=' ') print(file=f) fns_ = os.listdir(data_dir) fns = list(filter(lambda s: s.endswith('.pkl'), fns_)) data = dict() for fn in fns: with open(os.path.join(data_dir, fn), 'rb') as f: data_ = pickle.load(f) data.update(data_) keys = list(data.keys()) # print(keys) for _ in range(30): for cat in ['circuit', 'city', 'gaussian', 'uniform']: ks = list() ks.append(random.choice([k for k in keys if k.startswith(cat)])) ks.append( random.choice([ k for k in keys if k.startswith(cat) and k.split('_')[1] == ks[0].split('_')[1] ])) ks.append( random.choice([ k for k in keys if k.startswith(cat) and k.split('_')[1] != ks[0].split('_')[1] ])) ks.append(random.choice([k for k in keys if not k.startswith(cat)])) print(ks) insts = [TSP(data[k]) for k in ks] for triples in [[(0, 1), (0, 3), (1, 3)], [(0, 2), (0, 3), (2, 3)]]: res = [] for m in metrics: for i, j in triples: if m.lbl.startswith('kernel'): res.append(1.0 / m.dist(insts[i], insts[j])['dist']) else: res.append(m.dist(insts[i], insts[j])['dist']) with open(ofn, 'a') as f: for i, k in enumerate(ks): if (0, 1) in triples and i == 2: continue if (0, 2) in triples and i == 1: continue print(k, file=f, end=' ') for r in res: print(r, file=f, end=' ') print(file=f)
def retr_exp_main(data_dir): metrics = [ Map_Dist(), Geo_Dist(), Geo_Dist(embedding='deepwalk'), Geo_Dist(embedding='node2vec'), # Geo_Dist(embedding='line'), # Geo_Dist(embedding='sdne'), Geo_Dist(embedding='struc2vec'), Abstract_Dist(), Kernel_Dist('edge_hist_gauss'), Kernel_Dist('edge_hist'), Kernel_Dist('random_walk_exp'), Kernel_Dist('random_walk_geo'), Kernel_Dist('vertex_edge_hist_gauss'), Kernel_Dist('vertex_edge_hist'), Kernel_Dist('vertex_vertex_edge_hist'), DMST_Heuristic(), FI_Heuristic(), # Gr_Heuristic(), NI_Heuristic(), NN_Heuristic() ] solver = GA_Solver() ofn = 'retr_0.txt' with open(ofn, 'w') as f: print('query metric candidate dist fitness', file=f) fns_ = os.listdir(data_dir) fns = list(filter(lambda s: s.endswith('.pkl'), fns_)) data = dict() for fn in fns: with open(os.path.join(data_dir, fn), 'rb') as f: data_ = pickle.load(f) data.update(data_) keys = list(data.keys()) queries = list(filter(lambda k: k[-2] != '#', keys)) random.shuffle(queries) for q in queries: # candidates = list(filter(lambda k: k.startswith(q), keys)) # candidates.remove(q) # candidates.extend(queries) # candidates.remove(q) candidates = list(queries) candidates.remove(q) inst_b = TSP(data[q]) for m in metrics: print(q, m.lbl) if m.lbl.startswith('heuristic'): fit = solver.solve(inst_b, seed=m.solve(inst_b)['tour'])['fitness'] with open(ofn, 'a') as f: print(q, m.lbl, None, None, fit, file=f) continue seeds = [] for c in candidates: inst_a = TSP(data[c]) d = m.dist(inst_a, inst_b) val = d['dist'] if m.lbl.startswith('kernel'): val = 1.0 / val if val != 0.0 else np.inf seed = d['pi'][inst_a.optimal_tour] seeds.append((val, seed)) fit = solver.solve(inst_b, seed=seed)['fitness'] with open(ofn, 'a') as f: print(q, m.lbl, c, val, fit, file=f) if m.lbl == 'mapping': seeds = [s[1] for s in sorted(seeds, key=lambda x: x[0])] for k in [3, 5, 10]: fit = solver.solve(inst_b, seed=seeds)['fitness'] with open(ofn, 'a') as f: print(q, m.lbl, 'top_k', k, fit, file=f)
def sub_sample(inst_0, n): idx = np.sort(np.random.choice(len(inst_0), n, replace=False)) return TSP(inst_0[idx])
model = DeepWalk(G, walk_length=10, num_walks=80, workers=1) model.train(window_size=5, iter=3) elif self._embedding == 'node2vec': model = Node2Vec(G, walk_length=10, num_walks=80, p=0.25, q=4, workers=1) # init model model.train(window_size=5, iter=3) # train model elif self._embedding == 'line': model = LINE(G, embedding_size=128, order='second') # init model,order can be ['first','second','all'] model.train(batch_size=1024, epochs=50, verbose=2) # train model elif self._embedding == 'sdne': model = SDNE(G, hidden_size=[256, 128]) # init model model.train(batch_size=3000, epochs=40, verbose=2) # train model elif self._embedding == 'struc2vec': model = Struc2Vec(G, 10, 80, workers=4, verbose=40, ) # init model model.train(window_size=5, iter=3) # train model else: return self._normalise(inst) ebds = model.get_embeddings() coords = [] for i in range(inst.n): coords.append(ebds[str(i)]) return np.array(coords) if __name__ == '__main__': # metric = Geo_Dist(embedding='deepwalk') metric = Geo_Dist() inst_a = TSP(10) inst_b = TSP(10) print(metric.dist(inst_a, inst_b))
def exp_main(data_name, out_name): with open(data_name + '.pkl', 'rb') as f: data = pickle.load(f) insts = data.keys() seeds = list(filter(lambda x: '#' not in x, insts)) metric = Map_Dist() solver = GA_Solver() heuristics = [NN_Heuristic(), Gr_Heuristic(), NI_Heuristic(), FI_Heuristic(), DMST_Heuristic()] res = dict() m = 5 for ia in seeds: res_ia = dict() print(ia) inst_a = TSP(data[ia]) print('other inst init') for ib in seeds: if ia == ib: continue inst_b = TSP(data[ib]) pi = metric.dist(inst_b, inst_a)['pi'] init = pi[inst_b.optimal_tour] iter_avg = [] for k in range(m): sol = solver.solve(inst_a, seed=init) iter_avg.append(sol['iter_avg']) iter_avg = np.array(iter_avg) res_ia[ib] = (init, iter_avg.sum(axis=0)) print('closest inst init') for ib in insts: if ia not in ib or ('r1' not in ib and 't1' not in ib): continue inst_b = TSP(data[ib]) pi = metric.dist(inst_b, inst_a)['pi'] init = pi[inst_b.optimal_tour] iter_avg = [] for k in range(m): sol = solver.solve(inst_a, seed=init) iter_avg.append(sol['iter_avg']) iter_avg = np.array(iter_avg) res_ia[ib] = (init, iter_avg.sum(axis=0)) print('heuristic init') for heuristic in heuristics: init = heuristic.solve(inst_a)['tour'] iter_avg = [] for k in range(m): sol = solver.solve(inst_a, seed=init) iter_avg.append(sol['iter_avg']) iter_avg = np.array(iter_avg) res_ia[heuristic.__class__.__name__] = (init, iter_avg.sum(axis=0)) print('random init') iter_avg = [] for k in range(m): sol = solver.solve(inst_a) iter_avg.append(sol['iter_avg']) iter_avg = np.array(iter_avg) res_ia['random'] = (init, iter_avg.sum(axis=0)) res[ia] = res_ia with open(out_name, 'wb') as f: pickle.dump(res, f, protocol=pickle.HIGHEST_PROTOCOL)
u, v = filter(lambda x: neigh[x][1] == -1, range(inst.n)) neigh[u][1] = v neigh[v][1] = u tour = [] in_tour = [False] * inst.n tour.append(0) in_tour[0] = True while True: u = tour[-1] v = neigh[u][0] if not in_tour[v]: tour.append(v) in_tour[v] = True continue v = neigh[u][1] if not in_tour[v]: tour.append(v) in_tour[v] = True continue break tour = np.array(tour) return {'tour': tour, 'fitness': inst.eval(tour)} if __name__ == '__main__': tsp = TSP(10) slr = Gr_Heuristic() sol = slr.solve(tsp)