def test_containsOutliers5D(self): d = 5 cluster_A = Cluster([(0.0, 3.0, 0.0, 2.0, 0.0), (0.0, 4.0, 0.0, 2.0, 0.0), (0.0, 4.0, 0.0, 1.0, 0.0), (0.0, 3.0, 0.0, 1.0, 0.0)], d) cluster_B = Cluster([(0.0, 3.0, 0.0, 5.0, 0.0), (0.0, 5.0, 0.0, 5.0, 0.0), (0.0, 4.0, 0.0, 6.0, 0.0)], d) samples = SampleContainer([(0.0, 7.0, 0.0, 5.0, 0.0), (0.0, 7.0, 0.0, 4.0, 0.0)], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( False, resul, "Dos clusters deberian ser mergeables si forman una componente convexa sin outliers en dimension %s" % (d)) samples = SampleContainer([(0.0, 4.0, 0.0, 4.0, 0.0), (0.0, 3.0, 0.0, 4.0, 0.0), (0.0, 5.0, 0.0, 4.0, 0.0)], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( True, resul, "Dos clusters no deberian ser mergeables si forman una componente convexa con outliers %s" % (d))
def test_createCluster2_onlyOneOutlier_2D(self): d = 2 classA = SampleContainer([(0.0, 0.0), (0.0, 1.0), (0.0, 2.0), (0.0, 3.0)], d) classB = SampleContainer([(0.0, 1.5)], d) clusters = createClusters(classA, classB) for spl in classA.getSamples(): self.assertTrue(spl in clusters.getSamples().getSamples())
def test_createClusters2_onlyOneSampleForCluster_2D(self): d = 2 s0_1, s0_2, s0_3, s0_4 = (0.0, 2.0), (0.0, 4.0), (0.0, 6.0), (0.0, 8.0) s1_1, s1_2, s1_3, s1_4 = (0.0, 1.0), (0.0, 3.0), (0.0, 5.0), (0.0, 7.0) classA = SampleContainer([s1_1, s1_2, s1_3, s1_4], d) classB = SampleContainer([s0_1, s0_2, s0_3, s0_4], d) clusters = createClusters(classA, classB) for spl in classA.getSamples(): self.assertTrue(spl in clusters.getSamples().getSamples())
def test_createClusters2_DefineClusterNoneOutlierOnlyOneSample_2D(self): d = 2 sA = Sample((0.0, 0.0)) classA = SampleContainer([sA], d) classB = SampleContainer([(4.0, 0.0)], d) clusters = createClusters2(classA, classB) for spl in classA.getSamples(): self.assertTrue(spl in clusters.getSamples().getSamples())
def test_DefineClusterNoneOutlierOnlyOneSample_4D(self): d = 4 sA = Sample((0.0, 0.0, 0.0, 0.0)) classA = SampleContainer([sA], d) classB = SampleContainer([(0.0, 4.0, 0.0, 0.0)], d) clusters = createClusters(classA, classB) self.assertEquals(clusters.getSize(), 1, "solo debe generarce un cluster") self.assertTrue(sA in clusters.getClusters().pop().getSamples())
def test_onlyOneOutlier_3D(self): d = 3 classA = SampleContainer([(0.0, 0.0, 0.0), (0.0, 1.0, 0.0), (0.0, 2.0, 0.0), (0.0, 3.0, 0.0)], d) classB = SampleContainer([(0.0, 1.5, 0.0)], d) clusters = createClusters(classA, classB) clusters_test = ClusterContainer([ Cluster([(0.0, 2.0, 0.0), (0.0, 3.0, 0.0)], d), Cluster([(0.0, 0.0, 0.0), (0.0, 1.0, 0.0)], d) ], d) self.assertEquals( clusters, clusters_test, "las muestras mergeables deben estar en el mismo cluster")
def test_createClusters2_severalOutliers2D(self): d = 2 s1, s2, s3, s4, s5 = (3.0, 7.0), (3.0, 6.0), (10.0, 7.0), (10.0, 6.0), (6.5, 6.5) samplesA = SampleContainer([s1, s2, s3, s4, s5], d) samplesB = SampleContainer([(6.0, 7.0), (6.0, 6.0), (7.0, 6.0), (7.0, 7.0), (6.0, 6.5), (7.0, 6.5)], d) c1 = Cluster([s1, s2], d) c2 = Cluster([s4, s3], d) c3 = Cluster([s5], d) clusters = createClusters(samplesA, samplesB) for spl in samplesA.getSamples(): self.assertTrue(spl in clusters.getSamples().getSamples())
def test_createClusters2_allSamplesInTheSameCluster_2D(self): d = 2 s0_1, s0_2, s0_3 = Sample((3.0, 3.0)), Sample((4.0, 4.0)), Sample( (3.0, 4.0)) s1_1, s1_2, s1_3, s1_4, s1_5, s1_6 = Sample((0.0, 1.0)), Sample( (0.0, 2.0)), Sample((0.0, 3.0)), Sample((1.0, 0.0)), Sample( (1.0, 1.0)), Sample((1.0, 2.0)) class0 = SampleContainer([s0_1, s0_2, s0_3], d) class1 = SampleContainer([s1_1, s1_2, s1_3, s1_4, s1_5, s1_6], d) clusters = createClusters2(class1, class0) for spl in class1.getSamples(): self.assertTrue(spl in clusters.getSamples().getSamples())
def test_onlyOneSampleForCluster_2D(self): d = 2 s0_1, s0_2, s0_3, s0_4 = (0.0, 2.0), (0.0, 4.0), (0.0, 6.0), (0.0, 8.0) s1_1, s1_2, s1_3, s1_4 = (0.0, 1.0), (0.0, 3.0), (0.0, 5.0), (0.0, 7.0) classA = SampleContainer([s1_1, s1_2, s1_3, s1_4], d) classB = SampleContainer([s0_1, s0_2, s0_3, s0_4], d) clusters = createClusters(classA, classB) clusters_test = ClusterContainer([ Cluster([s1_1], d), Cluster([s1_2], d), Cluster([s1_3], d), Cluster([s1_4], d) ], d) self.assertEquals(clusters, clusters_test, "debe generarce un cluster para cada muestra")
def test_createClusters_allSamplesInTheSameCluster_2D(self): d = 2 s0_1, s0_2, s0_3 = Sample((3.0, 3.0)), Sample((4.0, 4.0)), Sample( (3.0, 4.0)) s1_1, s1_2, s1_3, s1_4, s1_5, s1_6 = Sample((0.0, 1.0)), Sample( (0.0, 2.0)), Sample((0.0, 3.0)), Sample((1.0, 0.0)), Sample( (1.0, 1.0)), Sample((1.0, 2.0)) class0 = SampleContainer([s0_1, s0_2, s0_3], d) class1 = SampleContainer([s1_1, s1_2, s1_3, s1_4, s1_5, s1_6], d) clusters_test = ClusterContainer( [Cluster([s1_1, s1_2, s1_3, s1_4, s1_5, s1_6], d)], d) clusters = createClusters(class1, class0) self.assertEquals( clusters_test, clusters, "todas las muestras deben estar en un unico cluster")
def test_createClusters_severalOutliers2D(self): d = 2 s1, s2, s3, s4, s5 = (3.0, 7.0), (3.0, 6.0), (10.0, 7.0), (10.0, 6.0), (6.5, 6.5) samplesA = SampleContainer([s1, s2, s3, s4, s5], d) samplesB = SampleContainer([(6.0, 7.0), (6.0, 6.0), (7.0, 6.0), (7.0, 7.0), (6.0, 6.5), (7.0, 6.5)], d) c1 = Cluster([s1, s2], d) c2 = Cluster([s4, s3], d) c3 = Cluster([s5], d) container_test = ClusterContainer([c1, c2, c3], d) container = createClusters(samplesA, samplesB) self.assertEquals( container, container_test, "Deben definirse los clusters: [s1,s2],[s3,s4],[s5]")
def test_containsOutliers2D(self): d = 2 cluster_A = Cluster([(0.0, 2.0), (0.0, 4.0)], d) cluster_B = Cluster([(4.0, 2.0), (4.0, 4.0)], d) samples = SampleContainer([(6.0, 3.0)], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( False, resul, "Dos clusters deberian ser mergeables si forman una componente convexa sin outliers" ) samples = SampleContainer([(2.3, 3.0)], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( True, resul, "Dos clusters no deberian ser mergeables si forman una componente convexa con outliers" ) pass
def testisMergeableEmptyOutliers(self): d = 2 cluster_A = Cluster([(0.0, 2.0), (0.0, 4.0)], d) cluster_B = Cluster([(4.0, 2.0), (4.0, 4.0)], d) samples = SampleContainer([], d) resul = containsOutlier(mergeClusters(cluster_A, cluster_B), samples) self.assertEqual( False, resul, "Dos clusters deberian ser mergeables si no hay otras muestras")
def main(): d = 2 k = 1 c0, c1 = Importer.readSample( "/home/pandari/Escritorio/Tesis-Classification/Resources/R2/t1-ConjuntosDisjuntos.csv" ) ''' train0 = [(-0.1663, -0.208), (-1.4265, 1.2276), (6.8148, -0.6143), (-0.7036, 1.0372), (0.2668, -1.6665), (0.2529, -1.9605)] train1 = [(-0.1663, -0.208), (-1.4265, 1.2276), (-0.7036, 1.0372), (0.2668, -1.6665), (0.2529, -1.9605)] test0 = [(-0.1663, -0.208), (-1.4265, 1.2276), (6.8148, -0.6143), (-0.7036, 1.0372), (0.2668, -1.6665), (0.2529, -1.9605)] test1 = [(-0.1663, -0.208), (-1.4265, 1.2276), (-0.7036, 1.0372), (0.2668, -1.6665), (0.2529, -1.9605)] ''' train0, test0 = divideProportionally(c0, 0.9) train1, test1 = divideProportionally(c1, 0.9) train0 = SampleContainer(train0, d) train1 = SampleContainer(train1, d) t0 = "rojo" t1 = "azul" clasifier = Classifier(train0, train1, t0, t1, d, k) clasifier.train() TP, FP, TN, FN = ConfuseMatrix.generateConfuseMatrix( clasifier, test0, test1, t0, t1) metC0 = MetricsClassifier(0, TP, FP, TN, FN) metC1 = MetricsClassifier(1, FN, TN, FP, TP) TITLE_CASETEST = "Titulo del testsss" ACCURACY = "\nAccuracy: {}\n" CONFUSE_MATRIX = "Matrix Confuse:\n|{}, {}|\n|{}, {}|\n" HEADER_METRIC = "Report:\n\tClass\tPresicion\tRecall\t\tF1-Score\tSupport\n" print(TITLE_CASETEST) print(ACCURACY.format(metC0.getAccuracy())) print(CONFUSE_MATRIX.format(int(TP), int(FP), int(TN), int(FN))) print(HEADER_METRIC) print(metC0.showMetrics()) print(metC1.showMetrics())
def test_createScrollTuple(self): d = 2 samples = SampleContainer([(2.0, 3.0), (-2.0, 3.0), (-2.0, -2.0), (3.0, -2.0), (-4.0, 4.0), (-4.0, -2.0), (1.0, -3.0)], d) gb = createScrollSample(samples, d) for i in range(d): print(gb.getFeature(i)) self.assertEquals(Sample((4.0, 3.0)), createScrollSample(samples, d))
def __init__(self, c0,c1,t0,t1,d,k): ''' Un Classifier se compone de ''' self.regions = [] self.__dimension = d self.__num_groups = k self.__class1 = c1 self.__class0 = c0 self.__tag0 = t0 self.__tag1 = t1 self.__displace_sample = createDisplaceSample(SampleContainer(self.__class0.getSamples().union(self.__class1.getSamples()),d), self.__dimension)
def test_displace(self): def isInFirstQuandrant(sample): for i in range(sample.getDimension()): if sample.getFeature(i) < 0: return False return True d = 2 samples = SampleContainer([(2.0, 3.0), (-2.0, 3.0), (-2.0, -2.0), (3.0, -2.0), (-4.0, 4.0), (-4.0, -2.0), (1.0, -3.0)], d) scroll = Sample((4.0, 3.0)) test = displace(samples, scroll) for sample in test.getSamples(): self.assertTrue(isInFirstQuandrant(sample))
def main(): d = 2 k = 1 c0, c1 = Importer.readSample( "/home/javier/Documentos/Repositorios Git/Tesis-Classification/Resources/R2/t7-DiagonalIntercalada.csv" ) split_samples_0 = map(lambda cluster: divideProportionally(cluster, 0.7), c0) split_samples_1 = map(lambda cluster: divideProportionally(cluster, 0.7), c1) train0 = [ item for sublist in map(lambda s: s[0], split_samples_0) for item in sublist ] test0 = [ item for sublist in map(lambda s: s[1], split_samples_0) for item in sublist ] train1 = [ item for sublist in map(lambda s: s[0], split_samples_1) for item in sublist ] test1 = [ item for sublist in map(lambda s: s[1], split_samples_1) for item in sublist ] train0 = SampleContainer(train0, d) train1 = SampleContainer(train1, d) t0 = "rojo" t1 = "azul" clasifier = Classifier(train0, train1, t0, t1, d, k) clasifier.train(createClustersMethod=createClusters) TP, FP, TN, FN = ConfuseMatrix.generateConfuseMatrix( clasifier, test0, test1, t0, t1) metC0 = MetricsClassifier(0, TP, FP, FN, TN) metC1 = MetricsClassifier(1, TN, FN, FP, TP) TITLE_CASETEST = "Titulo del testsss" ACCURACY = "\nAccuracy: {}\n" CONFUSE_MATRIX = "Matrix Confuse:\n|{}, {}|\n|{}, {}|\n" HEADER_METRIC = "Report:\n\tClass\tPresicion\tRecall\t\tF1-Score\tSupport\n" print(TITLE_CASETEST) print(ACCURACY.format(metC0.getAccuracy())) print(CONFUSE_MATRIX.format(int(TP), int(FP), int(FN), int(TN))) print(HEADER_METRIC) print(metC0.showMetrics()) print(metC1.showMetrics()) clasifier.export( "/home/javier/Documents/LiClipse Workspace/Ploteo/TEST/solution", d) clasifier.exportRegion( "/home/javier/Documents/LiClipse Workspace/Ploteo/TEST/solutionPrimeraRegion", d, clasifier.regions.pop()) print("vector de desplazamiento: " + str(clasifier.getDisplaceSample().getData())) print("DONE")
def displace(samples, scrollSample): d = samples.getDimension() return SampleContainer(map(lambda spl: sampleSum(spl,scrollSample,d) , samples.getSamples()),d)
def getSamples(self): ret = set() for c in self.getClusters(): ret = ret | c.getSamples() return SampleContainer(ret, self.getDimension())
def getOutliers(eVar, clusters): return SampleContainer( filter(lambda spl: eVar[spl] > 1, clusters.getSamples().getSamples()), clusters.getDimension())
def displace(samples, scrollSample): d = samples.getDimension() def sampleSum(s1,s2): return Sample(tuple(map(lambda i:s1.getFeature(i) + s2.getFeature(i) , range(d)))) return SampleContainer(map(lambda spl: sampleSum(spl,scrollSample) , samples.getSamples()),d)