def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1))
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2))
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement)
     distance21 = entry2.get_distance(entry1, type_measurement)
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001
     if (type_measurement == measurement_type.CENTROID_EUCLIDEAN_DISTANCE):
         assert distance12 == euclidean_distance_square(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
Exemple #2
0
 def templateDistanceCalculation(self, cluster1, cluster2, type_measurement):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
     
     # check that the same distance from 1 to 2 and from 2 to 1.
     distance12 = entry1.get_distance(entry2, type_measurement);
     distance21 = entry2.get_distance(entry1, type_measurement);
     
     assert distance12 == distance21;
     
     # check with utils calculation
     float_delta = 0.0000001;
     if (type_measurement == measurement_type.CENTROID_EUCLIDIAN_DISTANCE):
         assert distance12 == euclidean_distance_sqrt(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.CENTROID_MANHATTAN_DISTANCE):
         assert distance12 == manhattan_distance(entry1.get_centroid(), entry2.get_centroid());
     
     elif (type_measurement == measurement_type.AVERAGE_INTER_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_inter_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.AVERAGE_INTRA_CLUSTER_DISTANCE):
         assert numpy.isclose(distance12, average_intra_cluster_distance(cluster1, cluster2)) == True;
     
     elif (type_measurement == measurement_type.VARIANCE_INCREASE_DISTANCE):
         assert numpy.isclose(distance12, variance_increase_distance(cluster1, cluster2)) == True;
    def templateCfEntryValueDistance(self, cluster1, cluster2, value,
                                     tolerance, type_measurment):
        entry1 = cfentry(len(cluster1), linear_sum(cluster1),
                         square_sum(cluster1))
        entry2 = cfentry(len(cluster2), linear_sum(cluster2),
                         square_sum(cluster2))

        distance = entry1.get_distance(entry2, type_measurment)
        assert ((value - tolerance < distance)
                and (value + tolerance > distance))
 def templateCfEntryDistance(self, type_measurement):
     cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]];
     cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]];
     cluster3 = [[0.9, 0.9], [0.9, 1.0], [1.0, 0.9], [1.0, 1.0]];
        
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
     entry3 = cfentry(len(cluster3), linear_sum(cluster3), square_sum(cluster3));
        
     distance12 = entry1.get_distance(entry2, type_measurement);
     distance23 = entry2.get_distance(entry3, type_measurement);
     distance13 = entry1.get_distance(entry3, type_measurement);
        
     assert distance12 < distance23;
     assert distance23 < distance13;  
 def templateCfEntryDistance(self, type_measurement):
     cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]]
     cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]]
     cluster3 = [[0.9, 0.9], [0.9, 1.0], [1.0, 0.9], [1.0, 1.0]]
        
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1))
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2))
     entry3 = cfentry(len(cluster3), linear_sum(cluster3), square_sum(cluster3))
        
     distance12 = entry1.get_distance(entry2, type_measurement)
     distance23 = entry2.get_distance(entry3, type_measurement)
     distance13 = entry1.get_distance(entry3, type_measurement)
     
     assert distance12 < distance23;
     assert distance23 < distance13;
Exemple #6
0
    def __get_nearest_feature(self, point, feature_collection):
        """!
        @brief Find nearest entry for specified point.

        @param[in] point (list): Pointer to point from input dataset.
        @param[in] feature_collection (list): Feature collection that is used for
        obtaining nearest feature for the specified point.

        @return (double, uint) Tuple of distance to nearest entry to the specified point and index of that entry.

        """

        minimum_distance = float("Inf")
        index_nearest_feature = -1

        for index_entry in range(0, len(feature_collection)):
            point_entry = cfentry(1, linear_sum([point]), square_sum([point]))

            distance = feature_collection[index_entry].get_distance(
                point_entry, self.__measurement_type)
            if distance < minimum_distance:
                minimum_distance = distance
                index_nearest_feature = index_entry

        return minimum_distance, index_nearest_feature
Exemple #7
0
 def testCfTreeInserionOneLeafThreeEntries(self):
     cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]];
     cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]];
     cluster3 = [[0.9, 0.9], [0.9, 1.0], [1.0, 0.9], [1.0, 1.0]];
         
     tree = cftree(3, 4, 0.0);
     tree.insert_cluster(cluster1);
     tree.insert_cluster(cluster2);
     tree.insert_cluster(cluster3);
         
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
     entry3 = cfentry(len(cluster3), linear_sum(cluster3), square_sum(cluster3));
         
     assert tree.find_nearest_leaf(entry1) == tree.find_nearest_leaf(entry2);
     assert tree.find_nearest_leaf(entry2) == tree.find_nearest_leaf(entry3);
 def testCfTreeInserionOneLeafThreeEntries(self):
     cluster1 = [[0.1, 0.1], [0.1, 0.2], [0.2, 0.1], [0.2, 0.2]];
     cluster2 = [[0.4, 0.4], [0.4, 0.5], [0.5, 0.4], [0.5, 0.5]];
     cluster3 = [[0.9, 0.9], [0.9, 1.0], [1.0, 0.9], [1.0, 1.0]];
         
     tree = cftree(3, 4, 0.0);
     tree.insert_cluster(cluster1);
     tree.insert_cluster(cluster2);
     tree.insert_cluster(cluster3);
         
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
     entry3 = cfentry(len(cluster3), linear_sum(cluster3), square_sum(cluster3));
         
     assert tree.find_nearest_leaf(entry1) == tree.find_nearest_leaf(entry2);
     assert tree.find_nearest_leaf(entry2) == tree.find_nearest_leaf(entry3);
Exemple #9
0
    def insert_point(self, point):
        """!
        @brief Insert point that is represented by list of coordinates.

        @param[in] point (list): Point represented by list of coordinates that should be inserted to CF tree.

        """

        entry = cfentry(len([point]), linear_sum([point]), square_sum([point]))
        self.insert(entry)
 def testCfEntryIncrease(self):
     cluster = [[0.1, 0.1], [0.2, 0.2], [0.5, 0.5], [0.4, 0.4], [0.6, 0.6]]
        
     entry1 = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster))
     entry2 = entry1 + entry1
        
     assert cfentry(10, [3.6, 3.6], 3.28) == entry2
        
     entry2 = entry2 + entry2
     assert cfentry(20, [7.2, 7.2], 6.56) == entry2
Exemple #11
0
 def testCfEntryIncrease(self):
     tolerance = 0.00001;
     cluster = [ [0.1, 0.1], [0.2, 0.2], [0.5, 0.5], [0.4, 0.4], [0.6, 0.6] ];
        
     entry1 = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
     entry2 = entry1 + entry1;
        
     assert cfentry(10, [3.6, 3.6], 3.28) == entry2;
        
     entry2 = entry2 + entry2;
     assert cfentry(20, [7.2, 7.2], 6.56) == entry2;
Exemple #12
0
 def insert_cluster(self, cluster):
     """!
     @brief Insert cluster that is represented as list of points where each point is represented by list of coordinates.
     @details Clustering feature is created for that cluster and inserted to the tree.
     
     @param[in] cluster (list): Cluster that is represented by list of points that should be inserted to the tree.
     
     """
     
     entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
     self.insert(entry);
 def testCfEntryIncrease(self):
     tolerance = 0.00001;
     cluster = [ [0.1, 0.1], [0.2, 0.2], [0.5, 0.5], [0.4, 0.4], [0.6, 0.6] ];
        
     entry1 = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
     entry2 = entry1 + entry1;
        
     assert cfentry(10, [3.6, 3.6], 3.28) == entry2;
        
     entry2 = entry2 + entry2;
     assert cfentry(20, [7.2, 7.2], 6.56) == entry2;
Exemple #14
0
 def insert_cluster(self, cluster,hadoop_address_2):
     """!
     @brief Insert cluster that is represented as list of points where each point is represented by list of coordinates.
     @details Clustering feature is created for that cluster and inserted to the tree.
     
     @param[in] cluster (list): Cluster that is represented by list of points that should be inserted to the tree.
     @len([cluster[0][:len(cluster[0])-1]] is the actual point after taking of the hadoop address from the data point
     @[cluster[0][len(cluster[0])-1]] is the hadoop address of a point
     """
     entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster),hadoop_address_2);
     self.insert(entry);
Exemple #15
0
 def insert_cluster(self, cluster):
     """!
     @brief Insert cluster that is represented as list of points where each point is represented by list of coordinates.
     @details Clustering feature is created for that cluster and inserted to the tree.
     
     @param[in] cluster (list): Cluster that is represented by list of points that should be inserted to the tree.
     
     """
     
     entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
     self.insert(entry);
 def templateCfClusterRepresentation(self, cluster, centroid, radius, diameter, tolerance):
     entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
        
     assertion_centroid = centroid;
     if (type(centroid) != list):
         assertion_centroid = [ centroid ];
        
     if (type(centroid) == list):
         for dimension in range(0, len(assertion_centroid)):
             assert (assertion_centroid[dimension] - tolerance < ( entry.get_centroid() )[dimension]) and (( entry.get_centroid() )[dimension] < assertion_centroid[dimension] + tolerance);
        
     assert (radius - tolerance < entry.get_radius()) and (entry.get_radius() < radius + tolerance);
     assert (diameter - tolerance < entry.get_diameter()) and (entry.get_diameter() < diameter + tolerance);
Exemple #17
0
 def templateCfClusterRepresentation(self, cluster, centroid, radius, diameter, tolerance):
     entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
        
     assertion_centroid = centroid;
     if (type(centroid) != list):
         assertion_centroid = [ centroid ];
        
     if (type(centroid) == list):
         for dimension in range(0, len(assertion_centroid)):
             assert (assertion_centroid[dimension] - tolerance < ( entry.get_centroid() )[dimension]) and (( entry.get_centroid() )[dimension] < assertion_centroid[dimension] + tolerance);
        
     assert (radius - tolerance < entry.get_radius()) and (entry.get_radius() < radius + tolerance);
     assert (diameter - tolerance < entry.get_diameter()) and (entry.get_diameter() < diameter + tolerance);
    def templateCfClusterRepresentation(self, cluster, centroid, radius, diameter, tolerance):
        entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster))
           
        assertion_centroid = centroid
        if type(centroid) != list:
            assertion_centroid = [centroid]
           
        if type(centroid) == list:
            for dimension in range(0, len(assertion_centroid)):
                self.assertAlmostEqual(assertion_centroid[dimension], (entry.get_centroid())[dimension], tolerance)

        self.assertAlmostEqual(radius, entry.get_radius(), tolerance)
        self.assertAlmostEqual(diameter, entry.get_diameter(), tolerance)
Exemple #19
0
    def __get_nearest_feature(self, point, feature_collection):
        minimum_distance = float("Inf")
        index_nearest_feature = -1

        for index_entry in range(0, len(feature_collection)):
            point_entry = cfentry(1, linear_sum([point]), square_sum([point]))

            distance = feature_collection[index_entry].get_distance(
                point_entry, self.__measurement_type)
            if distance < minimum_distance:
                minimum_distance = distance
                index_nearest_feature = index_entry

        return minimum_distance, index_nearest_feature
 def testCfTreeEntryAbsorbing(self):
     tree = cftree(2, 1, 10000.0);
     absorbing_entry = cfentry(0, [0.0, 0.0], 0.0);
       
     for offset in range(0, 10):
         cluster = [ [random() + offset, random() + offset] for i in range(10)];
         entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster));
           
         absorbing_entry += entry;
           
         tree.insert(entry);
           
         assert 1 == tree.amount_entries;
         assert 1 == tree.amount_nodes;
         assert 1 == tree.height;
           
         assert None == tree.root.parent;
         assert absorbing_entry == tree.root.feature;
    def testCfTreeEntryAbsorbing(self):
        tree = cftree(2, 1, 10000.0)
        absorbing_entry = cfentry(0, [0.0, 0.0], 0.0)
          
        for offset in range(0, 10):
            cluster = [[random() + offset, random() + offset] for i in range(10)]
            entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster))

            absorbing_entry += entry

            tree.insert(entry)

            assert 1 == tree.amount_entries
            assert 1 == tree.amount_nodes
            assert 1 == tree.height
              
            assert None == tree.root.parent
            assert absorbing_entry == tree.root.feature
    def testGetNearestEntry(self):
        sample = read_sample(SIMPLE_SAMPLES.SAMPLE_SIMPLE1)
        tree = cftree(10, 100, 0.2, measurement_type.CENTROID_EUCLIDEAN_DISTANCE)

        self.assertEqual(10, tree.branch_factor)
        self.assertEqual(100, tree.max_entries)
        self.assertEqual(0.2, tree.threshold)
        self.assertEqual(measurement_type.CENTROID_EUCLIDEAN_DISTANCE, tree.type_measurement)

        for index_point in range(len(sample)):
            tree.insert_point(sample[index_point])

        cluster = [[0.1, 0.1], [0.2, 0.2]]
        entry = cfentry(len(cluster), linear_sum(cluster), square_sum(cluster))

        leaf = tree.find_nearest_leaf(entry)
        found_entry = leaf.get_nearest_entry(entry, measurement_type.CENTROID_EUCLIDEAN_DISTANCE)
        found_index_entry = leaf.get_nearest_index_entry(entry, measurement_type.CENTROID_EUCLIDEAN_DISTANCE)

        self.assertEqual(leaf.entries[found_index_entry], found_entry)
Exemple #23
0
 def __get_nearest_feature(self, point):
     """!
     @brief Find nearest entry for specified point.
     
     @param[in] point (list): Pointer to point from input dataset.
     
     @return (uint) Index of nearest entry to the specified point.
     
     """
     
     minimum_distance = float("Inf");
     index_nearest_feature = -1;
     
     for index_entry in range(0, len(self.__features)):
         point_entry = cfentry(1, linear_sum([ point ]), square_sum([ point ]),self.__hadoop_address);
         
         distance = self.__features[index_entry].get_distance(point_entry, self.__measurement_type);
         if (distance < minimum_distance):
             minimum_distance = distance;
             index_nearest_feature = index_entry;
             
     return index_nearest_feature;
Exemple #24
0
    def __get_nearest_feature(self, point):
        """!
        @brief Find nearest entry for specified point.
        
        @param[in] point (list): Pointer to point from input dataset.
        
        @return (uint) Index of nearest entry to the specified point.
        
        """

        minimum_distance = float("Inf")
        index_nearest_feature = -1

        for index_entry in range(0, len(self.__features)):
            point_entry = cfentry(1, linear_sum([point]), square_sum([point]))

            distance = self.__features[index_entry].get_distance(
                point_entry, self.__measurement_type)
            if (distance < minimum_distance):
                minimum_distance = distance
                index_nearest_feature = index_entry

        return index_nearest_feature
Exemple #25
0
 def __get_nearest_feature(self, point, feature_collection):
     """!
     @brief Find nearest entry for specified point.
     
     @param[in] point (list): Pointer to point from input dataset.
     @param[in] feature_collection (list): Feature collection that is used for obtaining nearest feature for the specified point.
     
     @return (double, uint) Tuple of distance to nearest entry to the specified point and index of that entry.
     
     """
     
     minimum_distance = float("Inf");
     index_nearest_feature = -1;
     
     for index_entry in range(0, len(feature_collection)):
         point_entry = cfentry(1, linear_sum([ point ]), square_sum([ point ]));
         
         distance = feature_collection[index_entry].get_distance(point_entry, self.__measurement_type);
         if (distance < minimum_distance):
             minimum_distance = distance;
             index_nearest_feature = index_entry;
             
     return (minimum_distance, index_nearest_feature);
 def templateCfEntryValueDistance(self, cluster1, cluster2, value, tolerance, type_measurment):
     entry1 = cfentry(len(cluster1), linear_sum(cluster1), square_sum(cluster1));
     entry2 = cfentry(len(cluster2), linear_sum(cluster2), square_sum(cluster2));
        
     distance = entry1.get_distance(entry2, type_measurment);
     assert ( (value - tolerance < distance) and (value + tolerance > distance) );