def main(): try: args = parseArguments() except: args = {"-h": True} if "-l" not in args: args["-l"] = False else: args["-l"] = True # handle argument if "-h" in args: printHelp() return if "-a" not in args: greedy(args) return if args["-a"] == "aco": aco(args) elif args["-a"] == "ga": ga(args) elif args["-a"] == "greedy": greedy(args) elif args["-a"] == "mst": mst(args) else: printHelp()
def christo(nodes,arcs,cost): n=len(nodes) print ('\nChristofides approximation algorithm') tree = mst.mst(nodes, arcs, cost) adj = {} # Create adjacency list for arc in tree: adj.setdefault(arc[0],[]).append(arc[1]) adj.setdefault(arc[1],[]).append(arc[0]) # Nodes with odd degree odds = [] for node in nodes: if len(adj[node])%2!=0 : odds.append(node) # Find minimum-cost perfect matching of nodes with odd degree match = matching(odds, cost) tree.extend(match) # Eulerian tour eul,tour = euler(nodes,tree) print ('Optimal tour:') print (tour) print ('Cost: ', sum([cost[(tour[i],tour[(i+1)%n])] for i in range(n)])) return tour
def perceptron(learning_rate=1, itertations=2, dist_feature=False): # the weight vector is sparse, for convinient and running time reasons. weight = sparse_vector([], N**2 + T**2) rand_iter = list(range(len(train_parsed))) random.shuffle(rand_iter) if not dist_feature: feature_function_to_use = feature_function else: feature_function_to_use = feature_function_w_dist w_sum = sparse_vector([], N**2 + T**2) for i in range(itertations): for j in rand_iter: G = sentence_to_full_graph(feature_function_to_use, weight, train_tagged[j]) T_opt = mst.mst(0, G) t = to_tree(train_parsed[j]) temp = sum_tree(t, train_tagged[j], feature_function_to_use) temp.sub(sum_tree(T_opt, train_tagged[j], feature_function_to_use)) temp.mult_by_scalar(learning_rate) weight.add(temp) w_sum.add(weight) w_sum.mult_by_scalar(1.0 / (len(train_tagged) * itertations)) return w_sum
def christo(nodes,arcs,cost): n=len(nodes) print '\nChristofides approximation algorithm' tree = mst.mst(nodes, arcs, cost) adj = {} # Create adjacency list for arc in tree: adj.setdefault(arc[0],[]).append(arc[1]) adj.setdefault(arc[1],[]).append(arc[0]) # Nodes with odd degree odds = [] for node in nodes: if len(adj[node])%2!=0 : odds.append(node) # Find minimum-cost perfect matching of nodes with odd degree match = matching(odds, cost) tree.extend(match) # Eulerian tour eul,tour = euler(nodes,tree) print 'Optimal tour:' print tour print 'Cost: ', sum([cost[(tour[i],tour[(i+1)%n])] for i in range(n)]) return tour
def get_mst(): points = map(tuple, json_loads(request.form.get('data'))) return json_dumps( mst.mst( mst.fullmap_of_pointslist(points)))
def expected_edges_counter(nr_of_points): nr_of_test = int(math.sqrt(nr_of_points)) C = [] for i in range(nr_of_test): points = random_points(nr_of_points) e, c, ne = mst(points) C.append(c) return sum(C) * 1.0 / nr_of_test
def predict_batch(S_arc, S_lab, tags): # Predict heads S = S_arc.data.numpy() heads = mst(S) # Predict labels select = torch.LongTensor(heads).unsqueeze(0).expand(S_lab.size(0), -1) select = Variable(select) selected = torch.gather(S_lab, 1, select.unsqueeze(1)).squeeze(1) _, labels = selected.max(dim=0) labels = labels.data.numpy() return heads, labels
def score_sent(w_train, tagged_sent, feature_function, gold_tree): G = sentence_to_full_graph(feature_function, w_train, tagged_sent) T = mst.mst(0, G) gold_tree = to_tree(gold_tree) num_of_right_edges = 0 for node in gold_tree: if node in T.keys(): neighbours = gold_tree[node].keys() for node2 in neighbours: if node2 in T[node].keys(): num_of_right_edges += 1 return num_of_right_edges / len(tagged_sent)
def decoding(self, src_encodings): src_len = len(src_encodings) # NOTE: should transpose before calling `mst` method! s_arc, s_label = self.cal_scores(src_encodings) s_arc_values = s_arc.npvalue().transpose() # src_len, src_len s_label_values = np.asarray([x.npvalue() for x in s_label]).transpose((2, 1, 0)) # src_len, src_len, n_labels # weights = np.zeros((src_len + 1, src_len + 1)) # weights[0, 1:(src_len + 1)] = np.inf # weights[1:(src_len + 1), 0] = np.inf # weights[1:(src_len + 1), 1:(src_len + 1)] = s_arc_values[batch] weights = s_arc_values pred_heads = mst(weights) pred_labels = [np.argmax(labels[head]) for head, labels in zip(pred_heads, s_label_values)] return pred_heads, pred_labels
def inverse_transform(self, pred, trees): if self.params.force_trees: output = [] for i, tree in enumerate(trees): probs = pred[i, :, :].copy() n = len(trees[i].tokens) # make sure we won't predict padding as a head probs = probs[:n, :n] # choose the best tree heads = mst(probs) output.append(heads.astype(str)) return output else: return np.argmax(pred, axis=2).astype(str)
def decoding(self, src_encodings): src_len = len(src_encodings) # NOTE: should transpose before calling `mst` method! s_arc, s_label = self.cal_scores(src_encodings) s_arc_values = dy.softmax(s_arc).npvalue().transpose() # src_len, src_len s_label_values = np.asarray([x.npvalue() for x in s_label]).transpose((2, 1, 0)) # src_len, src_len, n_labels # weights = np.zeros((src_len + 1, src_len + 1)) # weights[0, 1:(src_len + 1)] = np.inf # weights[1:(src_len + 1), 0] = np.inf # weights[1:(src_len + 1), 1:(src_len + 1)] = s_arc_values[batch] weights = s_arc_values pred_heads = mst(weights) pred_labels = [np.argmax(labels[head]) for head, labels in zip(pred_heads, s_label_values)] return pred_heads, pred_labels
def test(no_tests, size_graph): '''Test mst against mst_Martin_Louis_Bright for no_tests many test of size size_graph''' # This variables chek the number of tests on which the comparation algorithm # did return a tree, and how many times it had the same weight pass_test = 0 same_weight = 0 count = 0 for t in range(no_tests): graph = random_graph_gen(size_graph) score = graph2scores(graph) test_tree = mst.mst(graph) comp_tree = mst_test.chu_liu_edmonds(score) # compare the two trees by comarping the resulting weight test_sum = 0 comp_sum = 0 for vtx, nbhs in test_tree.items(): for nbh, weight in nbhs.items(): test_sum += graph[vtx][nbh] for nbh, vtx in enumerate(comp_tree): if nbh != 0 and nbh != vtx: comp_sum += graph[vtx][nbh] if test_sum != comp_sum: #count += 1 return ('problem found:', graph, test_sum, comp_sum, test_tree, comp_tree, score) # Below is the comparation of the exact trees ''' for vtx, nbhs in comp_tree.items(): test_nbh = list(test_tree[vtx].keys()) comp_nbh = list(comp_tree[vtx].keys()) if test_nbh != comp_nbh: return ('problem found:', graph, test_tree, comp_tree)''' return ('All fine!', count)
def testMstHillClimbing(self): runTimes = 200 ans = [] self.fillgraph(graph1) for i in range(runTimes): tree = mst(graph1) seq = msthillclimbing.hillClimbing(graph1, tree) seq_cost = cost(seq, graph1) ans.append((seq, seq_cost)) cost_list = [float(ans[i][1]) for i in range(len(ans))] print self.mean(cost_list), max(cost_list), min(cost_list) self.drawHist(cost_list)
def save(args, model, tokenizer, device): # convert data to ids examples = [args.sentence_text] features = convert_examples_to_features( examples=examples, seq_length=2 + get_max_seq_length(examples, tokenizer), tokenizer=tokenizer) # extract and write dependency parses all_input_ids = torch.tensor([f.input_ids for f in features], dtype=torch.long) all_input_mask = torch.tensor([f.input_mask for f in features], dtype=torch.long) all_example_index = torch.arange(all_input_ids.size(0), dtype=torch.long) eval_data = TensorDataset(all_input_ids, all_input_mask, all_example_index) eval_sampler = SequentialSampler(eval_data) eval_dataloader = DataLoader(eval_data, sampler=eval_sampler, batch_size=args.batch_size) for input_ids, input_mask, example_indices in eval_dataloader: input_ids = input_ids.to(device) input_mask = input_mask.to(device) all_encoder_layers, pooled_layer, raw_attn_layers = model( input_ids, token_type_ids=None, attention_mask=input_mask) cur_tokens = features[example_indices[0]].tokens[1:-1] cur_layer = raw_attn_layers[args.layer_id - 1].squeeze() cur_head = cur_layer[args.head_id - 1] cur_attn_matrix = cur_head[0:len(cur_tokens) + 1, 0:len(cur_tokens) + 1].detach().cpu().numpy() cur_attn_matrix[:, 0] = -1. cur_attn_matrix[args.sentence_root, 0] = 1.0 np.fill_diagonal(cur_attn_matrix, -1.) mst_out = mst(cur_attn_matrix) tokens = ['<root>'] + cur_tokens print('tokens ==>') print(tokens) print('heads ==>') print([tokens[head_id] for head_id in mst_out]) break
def predict(model, words, tags): assert type(words) == type(tags) if type(words) == type(tags) == list: # Convert the lists into input for the PyTorch model. words = Variable(torch.LongTensor([words])) tags = Variable(torch.LongTensor([tags])) # Dissable dropout. model.eval() # Predict arc and label score matrices. S_arc, S_lab = model(words, tags) # Predict heads S = S_arc[0].data.numpy() heads = mst(S) # Predict labels S_lab = S_lab[0] select = torch.LongTensor(heads).unsqueeze(0).expand(S_lab.size(0), -1) select = Variable(select) selected = torch.gather(S_lab, 1, select.unsqueeze(1)).squeeze(1) _, labels = selected.max(dim=0) labels = labels.data.numpy() return heads, labels
def test_runtime(self): cur = self.conn.cursor() gname = "mst_graph" generate_undirected_graph(gname, 100, self.conn) print "graph builded" mst(self.conn, gname, gname)
""" Code taken from Hyperbolic Hierarchical Clustering (HypHC) by Chami et al. for more details visit https://github.com/HazyResearch/HypHC """ import numpy as np import mst if __name__ == '__main__': x = np.array([0, 1, 3, 7, 15], dtype=np.float) dists = np.abs(x[np.newaxis, :] - x[:, np.newaxis]) print(dists) print(mst.mst(dists, 5)) print(-dists) print(mst.mst(-dists, 5)) A = np.arange(16, dtype=np.float).reshape((4, 4)) print(A) B = mst.reorder(A, np.array([3, 2, 1, 0]), 4) print(B)
def get_mst(): points = map(tuple, json_loads(request.form.get('data'))) return json_dumps(mst.mst(mst.fullmap_of_pointslist(points)))
while len(remaining) > 0: current = path[-1] distances = sorted([(r, edge_distance(current, r)) for r in remaining], key=lambda x: x[1]) city_indeces = [d[0] for d in distances] weights = 1 / np.array([d[1] for d in distances]) next_city = np.random.choice(city_indeces, p=weights/sum(weights)) path.append(next_city) remaining.remove(next_city) return path import mst # print("Generating MST") mst_graph = mst.mst(len(cities), edge_distance) # print("MST generated") def mst_heuristic(ind_len): # generate individual by random walk over minimum spanning tree of the cities start = random.randrange(0, ind_len) path = [start] def dfs(current): neighbours = mst_graph[current][:] random.shuffle(neighbours) for n in neighbours: if n not in path: path.append(n) dfs(n)
def cell_state(input_file, normalization, cutoff, distance_metric, dims=None): points = read_points(input_file) normalization(points, cutoff) if dims != None: reduce_dimensions(points, dims) return mst.mst(points, distance_metric)
previous_city_no = cur_city_no distance += city_distance_data[previous_city_no][0] return distance if __name__ == "__main__": runTimes = 200 ans = [] fillgraph(graph1) print("begin running mst heuristic hill climbing for {0} times".format( runTimes)) for i in range(runTimes): tree = mst(graph1) seq = hillClimbing(graph1, tree) seq_cost = cost(seq, graph1) print(seq_cost) ans.append((seq, seq_cost)) cost_list = [float(ans[i][1]) for i in range(len(ans))] print( "final result, tsp min cost = {0}; tsp max cost = {1};tsp average cost = {2}" .format(min(cost_list), max(cost_list), sum(cost_list) / (len(cost_list) + 0.0)))
from distance import distance, mirror from mst import mst from time import time n = 125 m = [] for i in range(n): for j in range(n): m.append((i,j)) t = time() m = distance(m) print (time() - t) t = time() g = mst(m) l = 0 for e in g: l += m[e[0]][e[1]] print g print l print (time() - t)
def tspmst(Graph): start_time = time.time() ET = mst(Graph) print 'Finished MST calculation in ', time.time() - start_time ETd = copy.deepcopy(ET) for v1, v2, e in ETd: e['id'] = e['id'] + '.5' dmst = ET + ETd T = nx.MultiGraph() T.add_edges_from(dmst) start_time = time.time() C = euler(T) print 'Finished eulercycle calculation in ', time.time() - start_time T = [] ET = [] ETd = [] H = [] visited = collections.OrderedDict() for v1, v2, eid in C: visited[v1] = False skip = False skipUntil = 'N' start_time = time.time() for edge in C: if skip == True and edge[0] != skipUntil: continue elif skip == True and edge[0] == skipUntil: skip = False skipUntil = 'N' if any('False' in str(v) for v in visited.values()): if visited[edge[0]] == False or visited[edge[1]] == False: H.append(edge) visited[edge[0]] = True visited[edge[1]] = True else: shortcut = [k for k, v in visited.iteritems() if v == False][0] v1 = edge[0] H.append((edge[0], shortcut, 'id 0')) visited[shortcut] = True skip = True skipUntil = shortcut else: break Tour = [] cost = 0 for v1, v2, eid in H: costEdge = Graph.get_edge_data(v1, v2)['weight'] Graph[v1][v2]['color'] = 'red' cost = cost + costEdge Tour.append(v1) Tour.append(H[-1][1]) last = Tour[-1] first = Tour[0] costLastEdge = Graph.get_edge_data(last, first)['weight'] Graph[first][last]['color'] = 'red' Tour.append(first) cost = cost + costLastEdge print 'Finished TSP calculation in ', time.time() - start_time print 'Tour: ', Tour print 'Cost: ', cost x = nx.get_node_attributes(Graph, 'x') y = nx.get_node_attributes(Graph, 'y') tmp = [x, y] pos = {} for k in x.iterkeys(): pos[k] = tuple(d[k] for d in tmp) edges = Graph.edges() colors = [Graph[u][v]['color'] for u,v in edges] nx.draw(Graph, pos, edges=edges, edge_color=colors) plt.show()
print(s) # Input prep sentenceInWords, sentenceInTags = s.getSentenceInWordsAndInTags( ) # Getting tokens and tags wordsToIndices = [w2i[w] for w in sentenceInWords] words_tensor = torch.LongTensor(wordsToIndices) tagsToIndices = [t2i[t] for t in sentenceInTags] tags_tensor = torch.LongTensor(tagsToIndices) scoreMatrix = model.predictArcs(Variable(words_tensor), Variable(tags_tensor)) if scoreMatrix.size() == (1, 1) and scoreMatrix.data[0, 0] == 0: headsForWords = 0 print('huh') else: headsForWords = mst(scoreMatrix.data.numpy().T) labelsMatrix = model.predictLabels(torch.LongTensor(headsForWords)) labelsForWords = np.argmax(labelsMatrix.data.numpy(), axis=1) sentencesDepsPredictions.append( createSentenceDependencies(sentenceInWords, sentenceInTags, headsForWords, [i2l[l] for l in labelsForWords])) break writer = ConlluFileWriter('output/predictions.conllu') writer.write(sentencesDepsPredictions)
from distance import distance, mirror from mst import mst from time import time n = 125 m = [] for i in range(n): for j in range(n): m.append((i, j)) t = time() m = distance(m) print(time() - t) t = time() g = mst(m) l = 0 for e in g: l += m[e[0]][e[1]] print g print l print(time() - t)