コード例 #1
0
    def test_equalsTest_sameInstance_2D(self):
        d2 = 2
        d3 = 3
        s1 = Sample((0.0,0.0))
        s2 = Sample((1.0,0.0))
        s3 = Sample((2.0,0.0))
        s4 = Sample((3.0,0.0))
        s5 = Sample((3.0,1.0))
        s6 = Sample((0.0,0.0,0.0))


        c1 = Cluster([s1,s2,s3],d2)
        c2 = Cluster([s4],d2)
        c3 = Cluster([s5],d2)
        c4 = Cluster([s6],d3)

        container1 = ClusterContainer([c1,c2],d2)
        container2 = ClusterContainer([c1,c2],d2)
        container3 = ClusterContainer([c1,c3],d2)
        container4 = ClusterContainer([c3],d2)
        container5 = c3
        container6 = ClusterContainer([c4],d3)

        self.assertEquals(container1, container2, "los container son iguales")
        self.assertNotEquals(container1,container3, "los container no son iguales")
        self.assertNotEquals(container1,container4, "los container no son iguales")
        self.assertNotEquals(container1,container5, "container 5 no es un container")
        self.assertNotEquals(container1,container6, "container 6 es una dimension diferente")
コード例 #2
0
    def test_createDistanceGraph_severalSamplesInClusters_4D(self):
        d = 4
        c1 = Cluster([(1.0, 7.0, 0.0, 0.0), (3.0, 7.0, 0.0, 0.0),
                      (1.0, 5.0, 0.0, 0.0), (3.0, 5.0, 0.0, 0.0)], d)
        c3 = Cluster([(4.0, 7.0, 0.0, 0.0), (6.0, 5.0, 0.0, 0.0),
                      (6.0, 7.0, 0.0, 0.0), (4.0, 5.0, 0.0, 0.0)], d)
        c2 = Cluster([(1.0, 3.0, 0.0, 0.0), (3.0, 3.0, 0.0, 0.0),
                      (1.0, 1.0, 0.0, 0.0), (3.0, 1.0, 0.0, 0.0)], d)
        c4 = Cluster([(4.0, 3.0, 0.0, 0.0), (4.0, 1.0, 0.0, 0.0),
                      (6.0, 1.0, 0.0, 0.0), (6.0, 3.0, 0.0, 0.0)], d)
        clusters = ClusterContainer([c1, c2, c3, c4], d)
        g = createDistanceGraph(clusters.getClusters())

        self.assertEquals(
            g.get_edge_data(c1, c2)['weight'], 4.0, "la distancia debe ser 4")
        self.assertEquals(
            g.get_edge_data(c1, c3)['weight'], 3.0, "la distancia debe ser 3")
        self.assertEquals(
            g.get_edge_data(c1, c4)['weight'], 5.0, "la distancia debe ser 5")
        self.assertEquals(
            g.get_edge_data(c2, c3)['weight'], 5.0, "la distancia debe ser 5")
        self.assertEquals(
            g.get_edge_data(c2, c4)['weight'], 3.0, "la distancia debe ser 3")
        self.assertEquals(
            g.get_edge_data(c3, c4)['weight'], 4.0, "la distancia debe ser 4")

        "minimumdistance: escuentra la arista de menor peso en el grafo de distancia"
コード例 #3
0
    def test_createDistanceGraph_severalSamplesInClusters_3D(self):
        d = 3
        c1 = Cluster([(1.0, 7.0, 0.0), (3.0, 7.0, 0.0), (1.0, 5.0, 0.0),
                      (3.0, 5.0, 0.0)], d)
        c3 = Cluster([(4.0, 7.0, 0.0), (6.0, 5.0, 0.0), (6.0, 7.0, 0.0),
                      (4.0, 5.0, 0.0)], d)
        c2 = Cluster([(1.0, 3.0, 0.0), (3.0, 3.0, 0.0), (1.0, 1.0, 0.0),
                      (3.0, 1.0, 0.0)], d)
        c4 = Cluster([(4.0, 3.0, 0.0), (4.0, 1.0, 0.0), (6.0, 1.0, 0.0),
                      (6.0, 3.0, 0.0)], d)
        clusters = ClusterContainer([c1, c2, c3, c4], d)
        g = createDistanceGraph(clusters.getClusters())

        self.assertEquals(
            g.get_edge_data(c1, c2)['weight'], 4.0, "la distancia debe ser 4")
        self.assertEquals(
            g.get_edge_data(c1, c3)['weight'], 3.0, "la distancia debe ser 3")
        self.assertEquals(
            g.get_edge_data(c1, c4)['weight'], 5.0, "la distancia debe ser 5")
        self.assertEquals(
            g.get_edge_data(c2, c3)['weight'], 5.0, "la distancia debe ser 5")
        self.assertEquals(
            g.get_edge_data(c2, c4)['weight'], 3.0, "la distancia debe ser 3")
        self.assertEquals(
            g.get_edge_data(c3, c4)['weight'], 4.0, "la distancia debe ser 4")
コード例 #4
0
    def test_minimumDistance_trivial_4d(self):
        d = 4
        c1 = Cluster([(0.0, 0.0, 0.0, 0.0)], d)
        c2 = Cluster([(1.0, 0.0, 0.0, 0.0)], d)
        clusters = ClusterContainer([c1, c2], d)
        g = createDistanceGraph(clusters.getClusters())

        (u, v) = minimumEdge(g)
        self.assertEquals(g[u][v]['weight'], 1.0,
                          "la minima arista tiene peso 1")
コード例 #5
0
 def test_minimunDistanceOnlyOneSamplesForCluster_4D(self):
     d = 4
     c1 = Cluster([(2.0, 6.0, 0.0, 0.0)], d)
     c2 = Cluster([(2.0, 2.0, 0.0, 0.0)], d)
     c3 = Cluster([(5.0, 6.0, 0.0, 0.0)], d)
     c4 = Cluster([(5.0, 2.0, 0.0, 0.0)], d)
     clusters = ClusterContainer([c1, c2, c3, c4], d)
     g = createDistanceGraph(clusters.getClusters())
     (u, v) = minimumEdge(g)
     self.assertEquals(g[u][v]['weight'], 3.0,
                       "la minima arista tiene peso 3")
コード例 #6
0
    def test_minimumDistance_severalSamplesInClusters_2D(self):
        d = 2
        c1 = Cluster([(1.0, 7.0), (3.0, 7.0), (1.0, 5.0), (3.0, 5.0)], d)
        c2 = Cluster([(1.0, 3.0), (3.0, 3.0), (1.0, 1.0), (3.0, 1.0)], d)
        c3 = Cluster([(4.0, 7.0), (6.0, 5.0), (6.0, 7.0), (4.0, 5.0)], d)
        c4 = Cluster([(4.0, 3.0), (4.0, 1.0), (6.0, 1.0), (6.0, 3.0)], d)
        clusters = ClusterContainer([c1, c2, c3, c4], d)
        g = createDistanceGraph(clusters.getClusters())

        (u, v) = minimumEdge(g)
        self.assertEquals(g[u][v]['weight'], 3.0,
                          "la minima arista tiene peso 3")
コード例 #7
0
    def test_equalsTest_differentInstancesOfTheSameData3D(self):

        d = 3
        d4 = 4
        c1 = ClusterContainer([Cluster([(0.0,1.0,0.0),(1.0,1.0,0.0),(2.0,1.0,0.0)],d), Cluster([(2.6,3.4,0.0)],d)],d)
        c2 = ClusterContainer([Cluster([(0.0,1.0,0.0),(1.0,1.0,0.0),(2.0,1.0,0.0)],d), Cluster([(2.6,3.4,0.0)],d)],d)
        c3 = ClusterContainer([Cluster([(0.0,1.0,0.0),(1.0,1.0,0.0),(2.0,1.0,0.0)],d), Cluster([(2.7,3.4,0.0)],d)],d)
        c4 = ClusterContainer([Cluster([(2.6,3.4,0.0)],d)],d)
        c5 = Cluster([(2.6,3.4,0.0)],d)
        c6 = ClusterContainer([Cluster([(0.0,1.0,0.0,0.0),(1.0,1.0,0.0,0.0),(2.0,1.0,0.0,0.0)],d), Cluster([(2.6,3.4,0.0,0.0)],d4)],d4)

        self.assertEquals(c1,c2,"Los dos clusters container son iguales")
        self.assertNotEquals(c1,c3, "los clusters container no son iguales")
        self.assertNotEquals(c1, c4, "Los clusters container no son iguales")
        self.assertNotEquals(c1, c5, "c5 no es un cluster container")
        self.assertNotEquals(c1, c6, "los cluster container son de dimension diferente")
コード例 #8
0
    def test_createDistanceGraph_4D(self):
        d = 4
        c1 = Cluster([(2.0, 6.0, 0.0, 0.0)], d)
        c2 = Cluster([(2.0, 2.0, 0.0, 0.0)], d)
        c3 = Cluster([(5.0, 6.0, 0.0, 0.0)], d)
        c4 = Cluster([(5.0, 2.0, 0.0, 0.0)], d)
        clusters = ClusterContainer([c1, c2, c3, c4], d)
        g = createDistanceGraph(clusters.getClusters())

        self.assertEquals(
            g.get_edge_data(c1, c2)['weight'], 4.0, "la distancia debe ser 4")
        self.assertEquals(
            g.get_edge_data(c1, c3)['weight'], 3.0, "la distancia debe ser 3")
        self.assertEquals(
            g.get_edge_data(c1, c4)['weight'], 5.0, "la distancia debe ser 5")
        self.assertEquals(
            g.get_edge_data(c2, c3)['weight'], 5.0, "la distancia debe ser 5")
        self.assertEquals(
            g.get_edge_data(c2, c4)['weight'], 3.0, "la distancia debe ser 3")
        self.assertEquals(
            g.get_edge_data(c3, c4)['weight'], 4.0, "la distancia debe ser 4")
コード例 #9
0
 def test_onlyOneOutlier_3D(self):
     d = 3
     classA = SampleContainer([(0.0, 0.0, 0.0), (0.0, 1.0, 0.0),
                               (0.0, 2.0, 0.0), (0.0, 3.0, 0.0)], d)
     classB = SampleContainer([(0.0, 1.5, 0.0)], d)
     clusters = createClusters(classA, classB)
     clusters_test = ClusterContainer([
         Cluster([(0.0, 2.0, 0.0), (0.0, 3.0, 0.0)], d),
         Cluster([(0.0, 0.0, 0.0), (0.0, 1.0, 0.0)], d)
     ], d)
     self.assertEquals(
         clusters, clusters_test,
         "las muestras mergeables deben estar en el mismo cluster")
コード例 #10
0
    def test_onlyOneSampleForCluster_2D(self):
        d = 2
        s0_1, s0_2, s0_3, s0_4 = (0.0, 2.0), (0.0, 4.0), (0.0, 6.0), (0.0, 8.0)
        s1_1, s1_2, s1_3, s1_4 = (0.0, 1.0), (0.0, 3.0), (0.0, 5.0), (0.0, 7.0)
        classA = SampleContainer([s1_1, s1_2, s1_3, s1_4], d)
        classB = SampleContainer([s0_1, s0_2, s0_3, s0_4], d)

        clusters = createClusters(classA, classB)
        clusters_test = ClusterContainer([
            Cluster([s1_1], d),
            Cluster([s1_2], d),
            Cluster([s1_3], d),
            Cluster([s1_4], d)
        ], d)
        self.assertEquals(clusters, clusters_test,
                          "debe generarce un cluster para cada muestra")
コード例 #11
0
def createClusters(samplesA, samplesB):
    clusters = None
    if not isinstance(samplesA, ClusterContainer):
        print("no es sample")
        clusters = createDefaultClusters(samplesA)
    else:
        print("si es sample")
        clusters = samplesA
    samples = samplesB

    if samplesA.getSize() == 1:
        return clusters

    else:

        K = clusters.getSize()
        k = 0
        distances_graph = createDistanceGraph(clusters.getClusters())
        #sorted_edges = sorted(distances_graph.edges(data=True), key=lambda x: x[2]['weight'])
        while k < K:

            #(u,v,w) = sorted_edges[0]
            (u, v) = minimumEdge(distances_graph)
            merged = mergeClusters(u, v)
            """print("se puede fusionar: " + str(not containsOutlier(merged, samples)) + " k: " + str(k) + " K: " + str(K))
            print("cluster u: " + str(map(lambda s : s.getData(), u.getSamples())))
            print("cluster v: " + str(map(lambda s : s.getData(), v.getSamples())))
            print("cluster merged: " + str(map(lambda s : s.getData(), merged.getSamples())))"""
            if containsOutlier(merged, samples):
                #k = k + 1
                #sorted_edges.remove(sorted_edges[0])
                distances_graph[u][v]['weight'] = float('inf')

            else:

                clusters = updateClusterContainer(clusters, u, v, merged)
                distances_graph = updateDistanceGraph(distances_graph, u, v,
                                                      merged)
                #sorted_edges = sorted(distances_graph.edges(data=True), key=lambda x: x[2]['weight'])
                K = K - 1
                k = 0
            k = k + 1

        return ClusterContainer(
            filter(lambda cls: cls.getSize() >= samplesA.getSize() * 0.01,
                   clusters.getClusters()), clusters.getDimension())
コード例 #12
0
    def test_createClusters_allSamplesInTheSameCluster_2D(self):
        d = 2
        s0_1, s0_2, s0_3 = Sample((3.0, 3.0)), Sample((4.0, 4.0)), Sample(
            (3.0, 4.0))
        s1_1, s1_2, s1_3, s1_4, s1_5, s1_6 = Sample((0.0, 1.0)), Sample(
            (0.0, 2.0)), Sample((0.0, 3.0)), Sample((1.0, 0.0)), Sample(
                (1.0, 1.0)), Sample((1.0, 2.0))

        class0 = SampleContainer([s0_1, s0_2, s0_3], d)
        class1 = SampleContainer([s1_1, s1_2, s1_3, s1_4, s1_5, s1_6], d)

        clusters_test = ClusterContainer(
            [Cluster([s1_1, s1_2, s1_3, s1_4, s1_5, s1_6], d)], d)
        clusters = createClusters(class1, class0)
        self.assertEquals(
            clusters_test, clusters,
            "todas las muestras deben estar en un unico cluster")
コード例 #13
0
    def test_createRegions_trivial4D(self):
        d = 4
        groups = GroupContainer(d)
        groups.addSamples(1, [Sample((5.0, 4.0, 0.0, 0.0))])
        clusters = ClusterContainer([Cluster([(7.0, 4.0, 0.0, 0.0)], d)], d)
        regions = createRegions(groups, clusters)
        hiperplanes = regions[0].getHyperplanes().pop()

        self.assertTrue(
            0.999999950215 < hiperplanes.getCoefficient(0)
            and 0.999999950216 > hiperplanes.getCoefficient(0),
            "0.99999995001 debe multiplicar la primer incognita")
        self.assertEquals(0.0, hiperplanes.getCoefficient(1),
                          "0.0 debe multiplicar la segunda incognita")
        self.assertTrue(
            5.9999997013 > hiperplanes.getIntercept()
            and 5.9999997012 < hiperplanes.getIntercept(),
            "alfa debe ser 5.9999997013")
コード例 #14
0
    def test_createClusters_severalOutliers2D(self):
        d = 2
        s1, s2, s3, s4, s5 = (3.0, 7.0), (3.0, 6.0), (10.0, 7.0), (10.0,
                                                                   6.0), (6.5,
                                                                          6.5)
        samplesA = SampleContainer([s1, s2, s3, s4, s5], d)
        samplesB = SampleContainer([(6.0, 7.0), (6.0, 6.0), (7.0, 6.0),
                                    (7.0, 7.0), (6.0, 6.5), (7.0, 6.5)], d)
        c1 = Cluster([s1, s2], d)
        c2 = Cluster([s4, s3], d)
        c3 = Cluster([s5], d)

        container_test = ClusterContainer([c1, c2, c3], d)
        container = createClusters(samplesA, samplesB)

        self.assertEquals(
            container, container_test,
            "Deben definirse los clusters: [s1,s2],[s3,s4],[s5]")
コード例 #15
0
def createClusters2(samplesA, samplesB):

    clusters = createDefaultClusters(samplesA)
    samples = samplesB

    if samplesA.getSize() == 1:
        return clusters

    else:

        K = clusters.getSize()
        k = 0
        print("creando grafo de distancias...")
        distances_graph = createDistanceGraph(clusters.getClusters())
        #print("ordenando aristas...")
        sorted_edges = sorted(distances_graph.edges(data=True),
                              key=lambda x: x[2]['weight'])
        has_already_been_merged = createMap(distances_graph.nodes)
        #print("Cantidad de aristas " + str(len(sorted_edges)))
        #print("Cantidad de clusters: " + str(clusters.getSize()))

        print("reduciendo clusters...")
        while k < K:

            if len(sorted_edges) == 0:
                print("re-ordenando...")
                sorted_edges = sorted(distances_graph.edges(data=True),
                                      key=lambda x: x[2]['weight'])
                has_already_been_merged = createMap(distances_graph.nodes)
                """print("Cantidad de aristas " + str(len(sorted_edges)))
                print("Cantidad de clusters: " + str(clusters.getSize()))
                print("cantidad de clusters: " + str(map(lambda s: s.getSize(),clusters.getClusters())))
                print("K: " + str(K))"""

            else:
                (u, v, w) = sorted_edges[0]
                #(u,v) = minimumEdge(distances_graph)
                if (not has_already_been_merged[v]
                        and not has_already_been_merged[u]):
                    merged = mergeClusters(u, v)
                    """print("se puede fusionar: " + str(not containsOutlier(merged, samples)) + " k: " + str(k) + " K: " + str(K))
                    print("cluster u: " + str(map(lambda s : s.getData(), u.getSamples())))
                    print("cluster v: " + str(map(lambda s : s.getData(), v.getSamples())))
                    print("cluster merged: " + str(map(lambda s : s.getData(), merged.getSamples())))"""

                    if not containsOutlier(merged, samples):
                        clusters = updateClusterContainer(
                            clusters, u, v, merged)
                        distances_graph = updateDistanceGraph(
                            distances_graph, u, v, merged)
                        has_already_been_merged[v] = True
                        has_already_been_merged[u] = True
                        K = K - 1
                        k = 0
                    k = k + 1
                sorted_edges.remove(sorted_edges[0])

        print(map(lambda c: c.getSize(), clusters.getClusters()))

        clusters = createClusters(clusters, samplesB)
        return ClusterContainer(
            filter(lambda cls: cls.getSize() >= samplesA.getSize() * 0.01,
                   clusters.getClusters()), clusters.getDimension())
コード例 #16
0
def createDefaultClusters(samples):
    d = samples.getDimension()
    return ClusterContainer(
        map(lambda spl: Cluster([spl], d), samples.getSamples()), d)
コード例 #17
0
def removeOutliers(clusters, outliers):
    return ClusterContainer(
        map(
            lambda clstr: Cluster(clstr.getSamples() - outliers.getSamples(),
                                  clstr.getDimension()),
            clusters.getClusters()), clusters.getDimension())