def testClusteringDistances(self): c = cluster.Clustering() c.NEIGHBOR_DISTANCE = 3 c.AddSymbolLists([ list('abcd'), list('acbe'), list('bacf'), list('badf'), list('baef') ]) distances = {} for n in c._neighbors: self.assertFalse((n.src, n.dst) in distances) distances[(n.src, n.dst)] = n.dist self.assertEqual(13, len(distances)) self.assertEqual((2 + 1 + 1 + 2000) / 5., distances[('a', 'c')]) self.assertEqual((1 + 4000) / 5., distances[('a', 'd')]) self.assertEqual((1 + 4000) / 5., distances[('a', 'e')]) self.assertEqual((2 + 2 + 2 + 2000) / 5., distances[('a', 'f')]) self.assertEqual(0, distances[('b', 'a')]) self.assertEqual((1 + -1 + 2 + 2000) / 5., distances[('b', 'c')]) self.assertTrue(('b', 'd') in distances) self.assertTrue(('b', 'e') in distances) self.assertTrue(('c', 'd') in distances) self.assertTrue(('c', 'e') in distances) self.assertTrue(('c', 'f') in distances) self.assertTrue(('d', 'f') in distances) self.assertTrue(('e', 'f') in distances)
def testClusterOf(self): clstr = cluster.Clustering() c = clstr.ClusterOf('a') self.assertEqual(['a'], c.syms) c = clstr._MakeCluster(['a', 'b', 'c']) self.assertEqual(c, clstr.ClusterOf('a')) self.assertEqual(c, clstr.ClusterOf('b')) self.assertEqual(c, clstr.ClusterOf('c'))
def testClusterToList(self): c = cluster.Clustering() c.NEIGHBOR_DISTANCE = 3 c.AddSymbolLists([ list('abcd'), list('acbe'), list('bacf'), list('badf'), list('baef') ]) self.assertEqual(list('bacfed'), c.ClusterToList())
def testSizedClusterToList(self): c = cluster.Clustering() c.NEIGHBOR_DISTANCE = 3 c.MAX_CLUSTER_SIZE = 1 # Will supress all clusters size_map = {'a': 3, 'b': 4, 'c': 5, 'd': 6, 'e': 7, 'f': 8} c.AddSymbolLists([ list('abcd'), list('acbe'), list('bacf'), list('badf'), list('baef') ]) self.assertEqual(list('fedcba'), c.ClusterToList(size_map))
def testClusterCombine(self): clstr = cluster.Clustering() x = clstr._MakeCluster(['a', 'b']) self.assertEqual(x, clstr.ClusterOf('a')) self.assertEqual(x, clstr.ClusterOf('b')) y = clstr._MakeCluster(['c']) self.assertEqual(y, clstr.ClusterOf('c')) z = clstr.Combine(y, x) self.assertEqual(['c', 'a', 'b'], z.syms) self.assertEqual(z, clstr.ClusterOf('a')) self.assertEqual(z, clstr.ClusterOf('b')) self.assertEqual(z, clstr.ClusterOf('c'))
def testClusteringDistancesForCallGraph(self): c = cluster.Clustering() callerA = cluster.CallerInfo(caller_symbol='a', count=1) callerB = cluster.CallerInfo(caller_symbol='b', count=2) callerC = cluster.CallerInfo(caller_symbol='c', count=3) callerD = cluster.CallerInfo(caller_symbol='d', count=100) callerE = cluster.CallerInfo(caller_symbol='e', count=200) calleeA = cluster.CalleeInfo(index=4, callee_symbol='a', misses=0, caller_and_count=[]) calleeB = cluster.CalleeInfo(index=8, callee_symbol='b', misses=1, caller_and_count=[callerA]) calleeC = cluster.CalleeInfo(index=12, callee_symbol='c', misses=1, caller_and_count=[callerA, callerE]) calleeD = cluster.CalleeInfo( index=20, callee_symbol='d', misses=1, caller_and_count=[callerB, callerC, callerE]) calleeF = cluster.CalleeInfo(index=28, callee_symbol='f', misses=10, caller_and_count=[callerD]) process1 = [calleeA, calleeB, calleeC, calleeD] process2 = [calleeA, calleeB, calleeC, calleeD, calleeF] call_graph = [process1, process2] whitelist = ['e', 'g', 'h', 'k', 'l'] c.AddSymbolCallGraph(call_graph, whitelist) distances = {} for n in c._neighbors: self.assertFalse((n.src, n.dst) in distances) distances[(n.src, n.dst)] = n.dist self.assertEqual(5, len(distances)) self.assertEquals(-2, distances[('a', 'b')]) self.assertEquals(-2, distances[('a', 'c')]) self.assertEquals(-4, distances[('b', 'd')]) self.assertEquals(-6, distances[('c', 'd')]) self.assertEquals(-100, distances[('d', 'f')]) self.assertEquals(list('abcdf'), c.ClusterToList())
def clust_translocations(fragments, chrm1, chrm2, M, S, config, direction): logger.info('Clustering translocations for chromosomes ' + chrm1 + ' ' + chrm2) if fragments: clusters = cluster.Clustering(fragments, M, S, chrm1, chrm2, 1, config, direction) else: clusters = [] logger.info('Done, clusters: ' + str(len(clusters))) if len(clusters) > 0: f = open( config['working_dir'] + config['clusters_files_dir'] + 'clusters_translocations_' + chrm1 + '_' + chrm2 + '.txt', 'a') for cl in clusters: if cl.num_elements > 1: f.write(cl.to_string()) f.write('\n') f.close()
def clust(fragments, chrm, type, M, S, config, direction): logger.info('Clustering fragments for chromosome ' + chrm + ', direction type ' + type + ', input fragments: ' + str(len(fragments)) + '...') if fragments: clusters = cluster.Clustering(fragments, M, S, chrm, chrm, 0, config, direction) else: clusters = [] logger.info('Done, clusters: ' + str(len(clusters))) if len(clusters) > 0: f = open( config['working_dir'] + config['clusters_files_dir'] + 'clusters_' + chrm + '_' + type + '.txt', 'w') logger.debug('Here would be results!!') logger.debug(config['working_dir'] + config['clusters_files_dir'] + 'clusters_' + chrm + '_' + type + '.txt') for cl in clusters: if cl.num_elements > 1: f.write(cl.to_string()) #logger.info(cl.to_string()) f.write('\n') f.close()
def testClusterReallyShortList(self): c = cluster.Clustering() c.NEIGHBOR_DISTANCE = 3 c.AddSymbolLists([list('a')]) self.assertEqual([], c.ClusterToList())
def testClusterOneList(self): c = cluster.Clustering() c.NEIGHBOR_DISTANCE = 3 c.AddSymbolLists([list('fedcba')]) self.assertEqual(list('fedcba'), c.ClusterToList())
def main(): # ============================= Генерация графа ============================== # g = graph.getGraphList() # ============================ Запись графа в файл ============================ # with open('graph.csv', 'w') as csv_file: # writer = csv.writer(csv_file, delimiter = ',') # for node in g: # line_csv = [] # line_csv.append(node) # for adj_node in g[node]: # line_csv.append(adj_node) # line_csv.append(g[node][adj_node]) # writer.writerow(line_csv) # ============================ Чтение графа из файла ========================== g = {} file_name = 'graph.csv' f = open(file_name, 'r') for line in f.readlines(): dates = line.split(',') dates[len(dates) - 1] = dates[len(dates) - 1][:len(dates[len(dates) - 1]) - 1] g[dates[0]] = {} r = int((len(dates) - 1) / 2) for i in range(r): g[dates[0]][dates[2 * i + 1]] = float(dates[2 * i + 2]) del g[''] f.close() # ================================== КРАТЧАЙШИЕ ПУТИ ==================================== buildings_list = xmlparser.getBuildingsNodes() hospitals_list = xmlparser.getHospitalsNodes() N = 10 M = 100 buildings = [] hospitals = [] coords = xmlparser.getNodesCoords() for i in range(N): hospitals.append(graph.NearestNode(g, coords, hospitals_list[i])) while (len(buildings) < M): buildings.append( graph.NearestNode(g, coords, random.choice(buildings_list))) all_ways_exist = False while (not all_ways_exist): building_trees = {} hospital_trees = {} for node in hospitals: hospital_trees[node] = graph.Dijkstra(g, node) for node in buildings: building_trees[node] = graph.Dijkstra(g, node) buildings_to_hospitals = {} hospitals_to_buildings = {} for node_b in buildings: buildings_to_hospitals[node_b] = {} for node_h in hospitals: (D, Parent) = building_trees[node_b] buildings_to_hospitals[node_b][node_h] = D[node_h] for node_h in hospitals: hospitals_to_buildings[node_h] = {} for node_b in buildings: (D, Parent) = hospital_trees[node_h] hospitals_to_buildings[node_h][node_b] = D[node_b] isolated_building = '' for node_b in buildings: for node_h in buildings_to_hospitals[node_b]: if (buildings_to_hospitals[node_b][node_h] == math.inf): isolated_building = node_b if (isolated_building != ''): buildings.remove(isolated_building) buildings.append( graph.NearestNode(g, coords, random.choice(buildings_list))) continue all_ways_exist = True # ============================= Запись деревьев в csv =========================== # for i in range(len(buildings)): # tree = pd.DataFrame(building_trees[buildings[i]]) # tree.to_csv('trees/buildings/building_'+str(i)+'.csv') # for i in range(len(hospitals)): # tree = pd.DataFrame(hospital_trees[hospitals[i]]) # tree.to_csv('trees/hospitals/hospital_'+str(i)+'.csv') # ============================= 1.1 ===================================== print('Задание 1.1') building_nearest_objects = {} for node_b in buildings: building_nearest_objects[node_b] = {} min_dist = math.inf nearest_from = '' for node_h in hospitals: if buildings_to_hospitals[node_b][node_h] < min_dist: nearest_from = node_h min_dist = buildings_to_hospitals[node_b][node_h] building_nearest_objects[node_b]['from'] = nearest_from min_dist = math.inf nearest_to = '' for node_h in hospitals: if hospitals_to_buildings[node_h][node_b] < min_dist: nearest_to = node_h min_dist = hospitals_to_buildings[node_h][node_b] building_nearest_objects[node_b]['to'] = nearest_to min_dist = math.inf nearest_fromto = '' for node_h in hospitals: if buildings_to_hospitals[node_b][node_h] + hospitals_to_buildings[ node_h][node_b] < min_dist: nearest_fromto = node_h min_dist = buildings_to_hospitals[node_b][ node_h] + hospitals_to_buildings[node_h][node_b] building_nearest_objects[node_b]['fromto'] = nearest_fromto print('Ближайшие больницы для каждого дома: ') print(building_nearest_objects) # ============================= 1.2 ===================================== print( 'Задание 1.2. Определить, какой из объектов расположен так, что время/расстояние между ним и самым дальним домом минимально' ) object_furthest_buildings = {} for node_h in hospitals: object_furthest_buildings[node_h] = {} max_dist = 0 furthest_from = '' for node_b in buildings: if hospitals_to_buildings[node_h][node_b] > max_dist: furthest_from = node_b max_dist = hospitals_to_buildings[node_h][node_b] object_furthest_buildings[node_h]['from'] = furthest_from max_dist = 0 furthest_to = '' for node_b in buildings: if buildings_to_hospitals[node_b][node_h] > max_dist: furthest_to = node_b max_dist = buildings_to_hospitals[node_b][node_h] object_furthest_buildings[node_h]['to'] = furthest_to max_dist = 0 furthest_fromto = '' for node_b in buildings: if hospitals_to_buildings[node_h][node_b] + buildings_to_hospitals[ node_b][node_h] > max_dist: furthest_fromto = node_b max_dist = hospitals_to_buildings[node_h][ node_b] + buildings_to_hospitals[node_b][node_h] object_furthest_buildings[node_h]['fromto'] = furthest_fromto print('Туда: ') min_max = math.inf ans = '' for node_h in hospitals: if (hospitals_to_buildings[node_h][object_furthest_buildings[node_h] ['from']] <= min_max): min_max = hospitals_to_buildings[node_h][ object_furthest_buildings[node_h]['from']] ans = node_h print('Ответ: ', ans) print('Расстояние до дома с номером ', object_furthest_buildings[ans]['from'], ' равно: ', min_max) print('Обратно: ') min_max = math.inf ans = '' for node_h in hospitals: if (hospitals_to_buildings[node_h][object_furthest_buildings[node_h] ['to']] <= min_max): min_max = hospitals_to_buildings[node_h][ object_furthest_buildings[node_h]['to']] ans = node_h print('Ответ: ', ans) print('Расстояние от дома с номером ', object_furthest_buildings[ans]['to'], ' равно: ', min_max) print('Туда и обратно: ') min_max = math.inf ans = '' for node_h in hospitals: if (hospitals_to_buildings[node_h][object_furthest_buildings[node_h] ['fromto']] + buildings_to_hospitals[object_furthest_buildings[node_h] ['fromto']][node_h] <= min_max): min_max = hospitals_to_buildings[node_h][object_furthest_buildings[ node_h]['fromto']] + buildings_to_hospitals[ object_furthest_buildings[node_h]['fromto']][node_h] ans = node_h print('Ответ: ', ans) print('Расстояние до+от дома с номером ', object_furthest_buildings[ans]['fromto'], ' равно: ', min_max) # ============================= 1.3 ===================================== print( 'Задание 1.3. Для какого объекта инфраструктуры сумма кратчайших расстояний от него до всех домов минимальна.' ) ans = '' min_sum = math.inf for node_h in hospitals: sum = 0 for node_b in buildings: sum = sum + hospitals_to_buildings[node_h][node_b] if (sum < min_sum): ans = node_h min_sum = sum print('Ответ: ', ans) print('Сумма: ', min_sum) # ============================= 1.4 ===================================== print( 'Задание 1.4. Для какого объекта инфраструктуры построенное дерево кратчайших путей имеет минимальный вес.' ) min_weight = math.inf ans = '' for node_h in hospitals: (D, Parent) = hospital_trees[node_h] subtree_edges = graph.getSubtreeEdges(Parent, node_h, buildings) subtree_weight = graph.getSubtreeWeight(subtree_edges, g) if (subtree_weight < min_weight): min_weight = subtree_weight ans = node_h print('Ответ: ', ans) print('Вес дерева: ', min_weight) # ============================== Интерфейс ======================================= # while(True): # print('Просмотреть информацию о больницах? Y/N ') # if (input() == 'Y'): # print('Номера N узлов-больниц: ') # for i in hospitals: # print(i) # print('Введите номер узла-больницы: ') # node_h = str(input()) # print('Ближайший дом: ') # min_dist = math.inf # nearest_building = '' # for node_b in buildings: # if hospitals_to_buildings[node_h][node_b] < min_dist: # nearest_building = node_b # min_dist = hospitals_to_buildings[node_h][node_b] # print(nearest_building) # print('Расстояние до него: ') # print(min_dist) # print('Путь до него: ') # (D, Parent) = hospital_trees[node_h] # print(graph.getWayInTree(Parent, node_h, nearest_building)) # else: # break # print('Просмотреть информацию о домах? Y/N ') # if (input() == 'Y'): # print('Номера M узлов-домов: ') # for i in buildings: # print(i) # print('Введите номер узла-дома: ') # node_b = str(input()) # print('Ближайшая больница: ') # min_dist = math.inf # nearest_hospital = '' # for node_h in hospitals: # if buildings_to_hospitals[node_b][node_h] < min_dist: # nearest_hospital = node_h # min_dist = buildings_to_hospitals[node_b][node_h] # print(nearest_hospital) # print('Расстояние до неё: ') # print(min_dist) # print('Путь до неё: ') # (D, Parent) = building_trees[node_b] # print(graph.getWayInTree(Parent, node_b, nearest_hospital)) # else: # break # ================================= 2 задание ====================================== print('Задание 2') hospital = hospitals[0] (D, Parent) = hospital_trees[hospital] subtree_edges = graph.getSubtreeEdges(Parent, hospital, buildings) weight = graph.getSubtreeWeight(subtree_edges, g) sum_w = 0 for node_b in buildings: sum_w = sum_w + D[node_b] print('Длина дерева:', weight) print('Сумма расстояний:', sum_w) for n in [2, 3, 5]: print(n, 'кластеров: ') clusters = cluster.Clustering(buildings, g, n) centers = cluster.FindCenters(clusters, g, coords) subtree_edges_obj = graph.getSubtreeEdges(Parent, hospital, centers) sum_w = 0 subtree_edges = subtree_edges_obj.copy() for i in range(len(clusters)): sum_w = sum_w + D[centers[i]] (D_cluster, Parent_cluster) = graph.Dijkstra(g, centers[i]) subtree_edges_cluster = graph.getSubtreeEdges( Parent_cluster, centers[i], clusters[i]) subtree_edges.update(subtree_edges_cluster) for node in clusters[i]: sum_w = sum_w + D_cluster[node] weight = graph.getSubtreeWeight(subtree_edges, g) print('Длина дерева:', weight) print('Сумма расстояний:', sum_w) visualisation.drawClusters(buildings, clusters, n, g, coords)