def test_containsOutliers5D(self): d = 5 cluster_A = Cluster([(0.0, 3.0, 0.0, 2.0, 0.0), (0.0, 4.0, 0.0, 2.0, 0.0), (0.0, 4.0, 0.0, 1.0, 0.0), (0.0, 3.0, 0.0, 1.0, 0.0)], d) cluster_B = Cluster([(0.0, 3.0, 0.0, 5.0, 0.0), (0.0, 5.0, 0.0, 5.0, 0.0), (0.0, 4.0, 0.0, 6.0, 0.0)], d) samples = SampleContainer([(0.0, 7.0, 0.0, 5.0, 0.0), (0.0, 7.0, 0.0, 4.0, 0.0)], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( False, resul, "Dos clusters deberian ser mergeables si forman una componente convexa sin outliers en dimension %s" % (d)) samples = SampleContainer([(0.0, 4.0, 0.0, 4.0, 0.0), (0.0, 3.0, 0.0, 4.0, 0.0), (0.0, 5.0, 0.0, 4.0, 0.0)], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( True, resul, "Dos clusters no deberian ser mergeables si forman una componente convexa con outliers %s" % (d))
def test_merge_2D(self): d = 2 cluster_A = Cluster([(0.0, 0.0), (1.0, 1.0)], d) cluster_B = Cluster([(2.0, 2.0), (3.0, 3.0)], d) c_test = Cluster([(0.0, 0.0), (1.0, 1.0), (2.0, 2.0), (3.0, 3.0)], d) c = mergeClusters(cluster_A, cluster_B) self.assertEquals(c_test, c, "los cluster deben ser iguales") self.assertNotEquals(cluster_A, c, "los clusters no son iguales") self.assertNotEquals(cluster_B, c, "los clusters no son iguales")
def test_containsOutliers2D(self): d = 2 cluster_A = Cluster([(0.0, 2.0), (0.0, 4.0)], d) cluster_B = Cluster([(4.0, 2.0), (4.0, 4.0)], d) samples = SampleContainer([(6.0, 3.0)], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( False, resul, "Dos clusters deberian ser mergeables si forman una componente convexa sin outliers" ) samples = SampleContainer([(2.3, 3.0)], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( True, resul, "Dos clusters no deberian ser mergeables si forman una componente convexa con outliers" ) pass
def testisMergeableEmptyOutliers(self): d = 2 cluster_A = Cluster([(0.0, 2.0), (0.0, 4.0)], d) cluster_B = Cluster([(4.0, 2.0), (4.0, 4.0)], d) samples = SampleContainer([], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( False, resul, "Dos clusters deberian ser mergeables si no hay otras muestras")
def createClusters(samplesA, samplesB): clusters = None if not isinstance(samplesA, ClusterContainer): print("no es sample") clusters = createDefaultClusters(samplesA) else: print("si es sample") clusters = samplesA samples = samplesB if samplesA.getSize() == 1: return clusters else: K = clusters.getSize() k = 0 distances_graph = createDistanceGraph(clusters.getClusters()) #sorted_edges = sorted(distances_graph.edges(data=True), key=lambda x: x[2]['weight']) while k < K: #(u,v,w) = sorted_edges[0] (u, v) = minimumEdge(distances_graph) merged = mergeClusters(u, v) """print("se puede fusionar: " + str(not containsOutlier(merged, samples)) + " k: " + str(k) + " K: " + str(K)) print("cluster u: " + str(map(lambda s : s.getData(), u.getSamples()))) print("cluster v: " + str(map(lambda s : s.getData(), v.getSamples()))) print("cluster merged: " + str(map(lambda s : s.getData(), merged.getSamples())))""" if containsOutlier(merged, samples): #k = k + 1 #sorted_edges.remove(sorted_edges[0]) distances_graph[u][v]['weight'] = float('inf') else: clusters = updateClusterContainer(clusters, u, v, merged) distances_graph = updateDistanceGraph(distances_graph, u, v, merged) #sorted_edges = sorted(distances_graph.edges(data=True), key=lambda x: x[2]['weight']) K = K - 1 k = 0 k = k + 1 return ClusterContainer( filter(lambda cls: cls.getSize() >= samplesA.getSize() * 0.01, clusters.getClusters()), clusters.getDimension())
def createClusters2(samplesA, samplesB): clusters = createDefaultClusters(samplesA) samples = samplesB if samplesA.getSize() == 1: return clusters else: K = clusters.getSize() k = 0 print("creando grafo de distancias...") distances_graph = createDistanceGraph(clusters.getClusters()) #print("ordenando aristas...") sorted_edges = sorted(distances_graph.edges(data=True), key=lambda x: x[2]['weight']) has_already_been_merged = createMap(distances_graph.nodes) #print("Cantidad de aristas " + str(len(sorted_edges))) #print("Cantidad de clusters: " + str(clusters.getSize())) print("reduciendo clusters...") while k < K: if len(sorted_edges) == 0: print("re-ordenando...") sorted_edges = sorted(distances_graph.edges(data=True), key=lambda x: x[2]['weight']) has_already_been_merged = createMap(distances_graph.nodes) """print("Cantidad de aristas " + str(len(sorted_edges))) print("Cantidad de clusters: " + str(clusters.getSize())) print("cantidad de clusters: " + str(map(lambda s: s.getSize(),clusters.getClusters()))) print("K: " + str(K))""" else: (u, v, w) = sorted_edges[0] #(u,v) = minimumEdge(distances_graph) if (not has_already_been_merged[v] and not has_already_been_merged[u]): merged = mergeClusters(u, v) """print("se puede fusionar: " + str(not containsOutlier(merged, samples)) + " k: " + str(k) + " K: " + str(K)) print("cluster u: " + str(map(lambda s : s.getData(), u.getSamples()))) print("cluster v: " + str(map(lambda s : s.getData(), v.getSamples()))) print("cluster merged: " + str(map(lambda s : s.getData(), merged.getSamples())))""" if not containsOutlier(merged, samples): clusters = updateClusterContainer( clusters, u, v, merged) distances_graph = updateDistanceGraph( distances_graph, u, v, merged) has_already_been_merged[v] = True has_already_been_merged[u] = True K = K - 1 k = 0 k = k + 1 sorted_edges.remove(sorted_edges[0]) print(map(lambda c: c.getSize(), clusters.getClusters())) clusters = createClusters(clusters, samplesB) return ClusterContainer( filter(lambda cls: cls.getSize() >= samplesA.getSize() * 0.01, clusters.getClusters()), clusters.getDimension())