Esempio n. 1
0
 def testClusteringDistances(self):
     c = cluster.Clustering()
     c.NEIGHBOR_DISTANCE = 3
     c.AddSymbolLists([
         list('abcd'),
         list('acbe'),
         list('bacf'),
         list('badf'),
         list('baef')
     ])
     distances = {}
     for n in c._neighbors:
         self.assertFalse((n.src, n.dst) in distances)
         distances[(n.src, n.dst)] = n.dist
     self.assertEqual(13, len(distances))
     self.assertEqual((2 + 1 + 1 + 2000) / 5., distances[('a', 'c')])
     self.assertEqual((1 + 4000) / 5., distances[('a', 'd')])
     self.assertEqual((1 + 4000) / 5., distances[('a', 'e')])
     self.assertEqual((2 + 2 + 2 + 2000) / 5., distances[('a', 'f')])
     self.assertEqual(0, distances[('b', 'a')])
     self.assertEqual((1 + -1 + 2 + 2000) / 5., distances[('b', 'c')])
     self.assertTrue(('b', 'd') in distances)
     self.assertTrue(('b', 'e') in distances)
     self.assertTrue(('c', 'd') in distances)
     self.assertTrue(('c', 'e') in distances)
     self.assertTrue(('c', 'f') in distances)
     self.assertTrue(('d', 'f') in distances)
     self.assertTrue(('e', 'f') in distances)
Esempio n. 2
0
 def testClusterOf(self):
     clstr = cluster.Clustering()
     c = clstr.ClusterOf('a')
     self.assertEqual(['a'], c.syms)
     c = clstr._MakeCluster(['a', 'b', 'c'])
     self.assertEqual(c, clstr.ClusterOf('a'))
     self.assertEqual(c, clstr.ClusterOf('b'))
     self.assertEqual(c, clstr.ClusterOf('c'))
Esempio n. 3
0
 def testClusterToList(self):
     c = cluster.Clustering()
     c.NEIGHBOR_DISTANCE = 3
     c.AddSymbolLists([
         list('abcd'),
         list('acbe'),
         list('bacf'),
         list('badf'),
         list('baef')
     ])
     self.assertEqual(list('bacfed'), c.ClusterToList())
Esempio n. 4
0
 def testSizedClusterToList(self):
     c = cluster.Clustering()
     c.NEIGHBOR_DISTANCE = 3
     c.MAX_CLUSTER_SIZE = 1  # Will supress all clusters
     size_map = {'a': 3, 'b': 4, 'c': 5, 'd': 6, 'e': 7, 'f': 8}
     c.AddSymbolLists([
         list('abcd'),
         list('acbe'),
         list('bacf'),
         list('badf'),
         list('baef')
     ])
     self.assertEqual(list('fedcba'), c.ClusterToList(size_map))
Esempio n. 5
0
    def testClusterCombine(self):
        clstr = cluster.Clustering()
        x = clstr._MakeCluster(['a', 'b'])
        self.assertEqual(x, clstr.ClusterOf('a'))
        self.assertEqual(x, clstr.ClusterOf('b'))

        y = clstr._MakeCluster(['c'])
        self.assertEqual(y, clstr.ClusterOf('c'))

        z = clstr.Combine(y, x)
        self.assertEqual(['c', 'a', 'b'], z.syms)
        self.assertEqual(z, clstr.ClusterOf('a'))
        self.assertEqual(z, clstr.ClusterOf('b'))
        self.assertEqual(z, clstr.ClusterOf('c'))
Esempio n. 6
0
    def testClusteringDistancesForCallGraph(self):
        c = cluster.Clustering()
        callerA = cluster.CallerInfo(caller_symbol='a', count=1)
        callerB = cluster.CallerInfo(caller_symbol='b', count=2)
        callerC = cluster.CallerInfo(caller_symbol='c', count=3)
        callerD = cluster.CallerInfo(caller_symbol='d', count=100)
        callerE = cluster.CallerInfo(caller_symbol='e', count=200)

        calleeA = cluster.CalleeInfo(index=4,
                                     callee_symbol='a',
                                     misses=0,
                                     caller_and_count=[])
        calleeB = cluster.CalleeInfo(index=8,
                                     callee_symbol='b',
                                     misses=1,
                                     caller_and_count=[callerA])
        calleeC = cluster.CalleeInfo(index=12,
                                     callee_symbol='c',
                                     misses=1,
                                     caller_and_count=[callerA, callerE])
        calleeD = cluster.CalleeInfo(
            index=20,
            callee_symbol='d',
            misses=1,
            caller_and_count=[callerB, callerC, callerE])
        calleeF = cluster.CalleeInfo(index=28,
                                     callee_symbol='f',
                                     misses=10,
                                     caller_and_count=[callerD])
        process1 = [calleeA, calleeB, calleeC, calleeD]
        process2 = [calleeA, calleeB, calleeC, calleeD, calleeF]
        call_graph = [process1, process2]
        whitelist = ['e', 'g', 'h', 'k', 'l']
        c.AddSymbolCallGraph(call_graph, whitelist)
        distances = {}
        for n in c._neighbors:
            self.assertFalse((n.src, n.dst) in distances)
            distances[(n.src, n.dst)] = n.dist
        self.assertEqual(5, len(distances))
        self.assertEquals(-2, distances[('a', 'b')])
        self.assertEquals(-2, distances[('a', 'c')])
        self.assertEquals(-4, distances[('b', 'd')])
        self.assertEquals(-6, distances[('c', 'd')])
        self.assertEquals(-100, distances[('d', 'f')])
        self.assertEquals(list('abcdf'), c.ClusterToList())
Esempio n. 7
0
def clust_translocations(fragments, chrm1, chrm2, M, S, config, direction):
    logger.info('Clustering translocations for chromosomes ' + chrm1 + ' ' +
                chrm2)
    if fragments:
        clusters = cluster.Clustering(fragments, M, S, chrm1, chrm2, 1, config,
                                      direction)
    else:
        clusters = []
    logger.info('Done, clusters: ' + str(len(clusters)))
    if len(clusters) > 0:
        f = open(
            config['working_dir'] + config['clusters_files_dir'] +
            'clusters_translocations_' + chrm1 + '_' + chrm2 + '.txt', 'a')
        for cl in clusters:
            if cl.num_elements > 1:
                f.write(cl.to_string())
                f.write('\n')
        f.close()
Esempio n. 8
0
def clust(fragments, chrm, type, M, S, config, direction):
    logger.info('Clustering fragments for chromosome ' + chrm +
                ', direction type ' + type + ', input fragments: ' +
                str(len(fragments)) + '...')
    if fragments:
        clusters = cluster.Clustering(fragments, M, S, chrm, chrm, 0, config,
                                      direction)
    else:
        clusters = []
    logger.info('Done, clusters: ' + str(len(clusters)))
    if len(clusters) > 0:
        f = open(
            config['working_dir'] + config['clusters_files_dir'] +
            'clusters_' + chrm + '_' + type + '.txt', 'w')
        logger.debug('Here would be results!!')
        logger.debug(config['working_dir'] + config['clusters_files_dir'] +
                     'clusters_' + chrm + '_' + type + '.txt')
        for cl in clusters:
            if cl.num_elements > 1:
                f.write(cl.to_string())
                #logger.info(cl.to_string())
                f.write('\n')
        f.close()
Esempio n. 9
0
 def testClusterReallyShortList(self):
     c = cluster.Clustering()
     c.NEIGHBOR_DISTANCE = 3
     c.AddSymbolLists([list('a')])
     self.assertEqual([], c.ClusterToList())
Esempio n. 10
0
 def testClusterOneList(self):
     c = cluster.Clustering()
     c.NEIGHBOR_DISTANCE = 3
     c.AddSymbolLists([list('fedcba')])
     self.assertEqual(list('fedcba'), c.ClusterToList())
Esempio n. 11
0
def main():

    # ============================= Генерация графа ==============================

    # g = graph.getGraphList()

    # ============================ Запись графа в файл ============================

    # with open('graph.csv', 'w') as csv_file:
    #     writer = csv.writer(csv_file, delimiter = ',')
    #     for node in g:
    #         line_csv = []
    #         line_csv.append(node)
    #         for adj_node in g[node]:
    #             line_csv.append(adj_node)
    #             line_csv.append(g[node][adj_node])
    #         writer.writerow(line_csv)

    # ============================ Чтение графа из файла ==========================

    g = {}
    file_name = 'graph.csv'
    f = open(file_name, 'r')
    for line in f.readlines():
        dates = line.split(',')
        dates[len(dates) - 1] = dates[len(dates) -
                                      1][:len(dates[len(dates) - 1]) - 1]
        g[dates[0]] = {}
        r = int((len(dates) - 1) / 2)
        for i in range(r):
            g[dates[0]][dates[2 * i + 1]] = float(dates[2 * i + 2])
    del g['']
    f.close()

    # ================================== КРАТЧАЙШИЕ ПУТИ ====================================

    buildings_list = xmlparser.getBuildingsNodes()
    hospitals_list = xmlparser.getHospitalsNodes()
    N = 10
    M = 100
    buildings = []
    hospitals = []
    coords = xmlparser.getNodesCoords()

    for i in range(N):
        hospitals.append(graph.NearestNode(g, coords, hospitals_list[i]))

    while (len(buildings) < M):
        buildings.append(
            graph.NearestNode(g, coords, random.choice(buildings_list)))

    all_ways_exist = False
    while (not all_ways_exist):

        building_trees = {}
        hospital_trees = {}

        for node in hospitals:
            hospital_trees[node] = graph.Dijkstra(g, node)

        for node in buildings:
            building_trees[node] = graph.Dijkstra(g, node)

        buildings_to_hospitals = {}
        hospitals_to_buildings = {}

        for node_b in buildings:
            buildings_to_hospitals[node_b] = {}
            for node_h in hospitals:
                (D, Parent) = building_trees[node_b]
                buildings_to_hospitals[node_b][node_h] = D[node_h]

        for node_h in hospitals:
            hospitals_to_buildings[node_h] = {}
            for node_b in buildings:
                (D, Parent) = hospital_trees[node_h]
                hospitals_to_buildings[node_h][node_b] = D[node_b]

        isolated_building = ''
        for node_b in buildings:
            for node_h in buildings_to_hospitals[node_b]:
                if (buildings_to_hospitals[node_b][node_h] == math.inf):
                    isolated_building = node_b

        if (isolated_building != ''):
            buildings.remove(isolated_building)
            buildings.append(
                graph.NearestNode(g, coords, random.choice(buildings_list)))
            continue

        all_ways_exist = True

    # ============================= Запись деревьев в csv ===========================

    # for i in range(len(buildings)):
    #     tree = pd.DataFrame(building_trees[buildings[i]])
    #     tree.to_csv('trees/buildings/building_'+str(i)+'.csv')

    # for i in range(len(hospitals)):
    #     tree = pd.DataFrame(hospital_trees[hospitals[i]])
    #     tree.to_csv('trees/hospitals/hospital_'+str(i)+'.csv')

    # ============================= 1.1 =====================================

    print('Задание 1.1')

    building_nearest_objects = {}
    for node_b in buildings:
        building_nearest_objects[node_b] = {}

        min_dist = math.inf
        nearest_from = ''
        for node_h in hospitals:
            if buildings_to_hospitals[node_b][node_h] < min_dist:
                nearest_from = node_h
                min_dist = buildings_to_hospitals[node_b][node_h]
        building_nearest_objects[node_b]['from'] = nearest_from

        min_dist = math.inf
        nearest_to = ''
        for node_h in hospitals:
            if hospitals_to_buildings[node_h][node_b] < min_dist:
                nearest_to = node_h
                min_dist = hospitals_to_buildings[node_h][node_b]
        building_nearest_objects[node_b]['to'] = nearest_to

        min_dist = math.inf
        nearest_fromto = ''
        for node_h in hospitals:
            if buildings_to_hospitals[node_b][node_h] + hospitals_to_buildings[
                    node_h][node_b] < min_dist:
                nearest_fromto = node_h
                min_dist = buildings_to_hospitals[node_b][
                    node_h] + hospitals_to_buildings[node_h][node_b]
        building_nearest_objects[node_b]['fromto'] = nearest_fromto

    print('Ближайшие больницы для каждого дома: ')
    print(building_nearest_objects)

    # ============================= 1.2 =====================================

    print(
        'Задание 1.2. Определить, какой из объектов расположен так, что время/расстояние между ним и самым дальним домом минимально'
    )

    object_furthest_buildings = {}
    for node_h in hospitals:
        object_furthest_buildings[node_h] = {}

        max_dist = 0
        furthest_from = ''
        for node_b in buildings:
            if hospitals_to_buildings[node_h][node_b] > max_dist:
                furthest_from = node_b
                max_dist = hospitals_to_buildings[node_h][node_b]
        object_furthest_buildings[node_h]['from'] = furthest_from

        max_dist = 0
        furthest_to = ''
        for node_b in buildings:
            if buildings_to_hospitals[node_b][node_h] > max_dist:
                furthest_to = node_b
                max_dist = buildings_to_hospitals[node_b][node_h]
        object_furthest_buildings[node_h]['to'] = furthest_to

        max_dist = 0
        furthest_fromto = ''
        for node_b in buildings:
            if hospitals_to_buildings[node_h][node_b] + buildings_to_hospitals[
                    node_b][node_h] > max_dist:
                furthest_fromto = node_b
                max_dist = hospitals_to_buildings[node_h][
                    node_b] + buildings_to_hospitals[node_b][node_h]
        object_furthest_buildings[node_h]['fromto'] = furthest_fromto

    print('Туда: ')
    min_max = math.inf
    ans = ''
    for node_h in hospitals:
        if (hospitals_to_buildings[node_h][object_furthest_buildings[node_h]
                                           ['from']] <= min_max):
            min_max = hospitals_to_buildings[node_h][
                object_furthest_buildings[node_h]['from']]
            ans = node_h
    print('Ответ: ', ans)
    print('Расстояние до дома с номером ',
          object_furthest_buildings[ans]['from'], ' равно: ', min_max)

    print('Обратно: ')
    min_max = math.inf
    ans = ''
    for node_h in hospitals:
        if (hospitals_to_buildings[node_h][object_furthest_buildings[node_h]
                                           ['to']] <= min_max):
            min_max = hospitals_to_buildings[node_h][
                object_furthest_buildings[node_h]['to']]
            ans = node_h
    print('Ответ: ', ans)
    print('Расстояние от дома с номером ',
          object_furthest_buildings[ans]['to'], ' равно: ', min_max)

    print('Туда и обратно: ')
    min_max = math.inf
    ans = ''
    for node_h in hospitals:
        if (hospitals_to_buildings[node_h][object_furthest_buildings[node_h]
                                           ['fromto']] +
                buildings_to_hospitals[object_furthest_buildings[node_h]
                                       ['fromto']][node_h] <= min_max):
            min_max = hospitals_to_buildings[node_h][object_furthest_buildings[
                node_h]['fromto']] + buildings_to_hospitals[
                    object_furthest_buildings[node_h]['fromto']][node_h]
            ans = node_h
    print('Ответ: ', ans)
    print('Расстояние до+от дома с номером ',
          object_furthest_buildings[ans]['fromto'], ' равно: ', min_max)

    # ============================= 1.3 =====================================

    print(
        'Задание 1.3. Для какого объекта инфраструктуры сумма кратчайших расстояний от него до всех домов минимальна.'
    )

    ans = ''
    min_sum = math.inf
    for node_h in hospitals:
        sum = 0
        for node_b in buildings:
            sum = sum + hospitals_to_buildings[node_h][node_b]
        if (sum < min_sum):
            ans = node_h
            min_sum = sum

    print('Ответ: ', ans)
    print('Сумма: ', min_sum)

    # ============================= 1.4 =====================================

    print(
        'Задание 1.4. Для какого объекта инфраструктуры построенное дерево кратчайших путей имеет минимальный вес.'
    )

    min_weight = math.inf
    ans = ''

    for node_h in hospitals:
        (D, Parent) = hospital_trees[node_h]
        subtree_edges = graph.getSubtreeEdges(Parent, node_h, buildings)
        subtree_weight = graph.getSubtreeWeight(subtree_edges, g)
        if (subtree_weight < min_weight):
            min_weight = subtree_weight
            ans = node_h

    print('Ответ: ', ans)
    print('Вес дерева: ', min_weight)

    # ============================== Интерфейс =======================================

    # while(True):
    #     print('Просмотреть информацию о больницах? Y/N ')
    #     if (input() == 'Y'):
    #         print('Номера N узлов-больниц: ')
    #         for i in hospitals:
    #             print(i)
    #         print('Введите номер узла-больницы: ')
    #         node_h = str(input())
    #         print('Ближайший дом: ')
    #         min_dist = math.inf
    #         nearest_building = ''
    #         for node_b in buildings:
    #             if hospitals_to_buildings[node_h][node_b] < min_dist:
    #                 nearest_building = node_b
    #                 min_dist = hospitals_to_buildings[node_h][node_b]
    #         print(nearest_building)
    #         print('Расстояние до него: ')
    #         print(min_dist)
    #         print('Путь до него: ')
    #         (D, Parent) = hospital_trees[node_h]
    #         print(graph.getWayInTree(Parent, node_h, nearest_building))
    #     else:
    #         break

    #     print('Просмотреть информацию о домах? Y/N ')
    #     if (input() == 'Y'):
    #         print('Номера M узлов-домов: ')
    #         for i in buildings:
    #             print(i)
    #         print('Введите номер узла-дома: ')
    #         node_b = str(input())
    #         print('Ближайшая больница: ')
    #         min_dist = math.inf
    #         nearest_hospital = ''
    #         for node_h in hospitals:
    #             if buildings_to_hospitals[node_b][node_h] < min_dist:
    #                 nearest_hospital = node_h
    #                 min_dist = buildings_to_hospitals[node_b][node_h]
    #         print(nearest_hospital)
    #         print('Расстояние до неё: ')
    #         print(min_dist)
    #         print('Путь до неё: ')
    #         (D, Parent) = building_trees[node_b]
    #         print(graph.getWayInTree(Parent, node_b, nearest_hospital))
    #     else:
    #         break

    # ================================= 2 задание ======================================

    print('Задание 2')

    hospital = hospitals[0]
    (D, Parent) = hospital_trees[hospital]
    subtree_edges = graph.getSubtreeEdges(Parent, hospital, buildings)
    weight = graph.getSubtreeWeight(subtree_edges, g)
    sum_w = 0
    for node_b in buildings:
        sum_w = sum_w + D[node_b]
    print('Длина дерева:', weight)
    print('Сумма расстояний:', sum_w)

    for n in [2, 3, 5]:
        print(n, 'кластеров: ')
        clusters = cluster.Clustering(buildings, g, n)
        centers = cluster.FindCenters(clusters, g, coords)
        subtree_edges_obj = graph.getSubtreeEdges(Parent, hospital, centers)
        sum_w = 0
        subtree_edges = subtree_edges_obj.copy()
        for i in range(len(clusters)):
            sum_w = sum_w + D[centers[i]]
            (D_cluster, Parent_cluster) = graph.Dijkstra(g, centers[i])
            subtree_edges_cluster = graph.getSubtreeEdges(
                Parent_cluster, centers[i], clusters[i])
            subtree_edges.update(subtree_edges_cluster)
            for node in clusters[i]:
                sum_w = sum_w + D_cluster[node]
        weight = graph.getSubtreeWeight(subtree_edges, g)
        print('Длина дерева:', weight)
        print('Сумма расстояний:', sum_w)
        visualisation.drawClusters(buildings, clusters, n, g, coords)