def test_relative_size_analyzer_three(self): nb_clusters = 3 nb_classes = 3 clusters_by_class = [[[] for x in range(nb_clusters)] for y in range(nb_classes)] clusters_by_class[0] = [0, 1, 1, 2, 2] # Class 0 clusters_by_class[1] = [1, 0, 0, 2, 2] # Class 1 clusters_by_class[2] = [0, 0, 0, 2, 1, 1] # Class 2 analyzer = ClusteringAnalyzer() analyzer.analyze_clusters(clusters_by_class)
def test_relative_size_analyzer(self): nb_clusters = 2 nb_classes = 4 clusters_by_class = [[[] for x in range(nb_clusters)] for y in range(nb_classes)] clusters_by_class[0] = [0, 1, 1, 1, 1] # Class 0 clusters_by_class[1] = [1, 0, 0, 0, 0] # Class 1 clusters_by_class[2] = [0, 0, 0, 0, 1] # Class 2 clusters_by_class[3] = [0, 0, 1, 1, 1] # Class 3 analyzer = ClusteringAnalyzer() assigned_clean_by_class, poison_clusters = analyzer.analyze_by_relative_size( clusters_by_class) # print("clusters_by_class") # print(clusters_by_class) # print("assigned_clean_by_class") # print(assigned_clean_by_class) # print("poison_clusters") # print(poison_clusters) clean = 0 poison = 1 # For class 0, cluster 0 should be marked as poison. self.assertEqual(poison_clusters[0][0], poison) # For class 0, cluster 1 should be marked as clean. self.assertEqual(poison_clusters[0][1], clean) # Inverse relations for class 1 self.assertEqual(poison_clusters[1][0], clean) self.assertEqual(poison_clusters[1][1], poison) self.assertEqual(poison_clusters[2][0], clean) self.assertEqual(poison_clusters[2][1], poison) self.assertEqual(poison_clusters[3][0], clean) self.assertEqual(poison_clusters[3][1], clean) poison = 0 self.assertEqual(assigned_clean_by_class[0][0], poison) self.assertEqual(assigned_clean_by_class[1][0], poison) self.assertEqual(assigned_clean_by_class[2][4], poison) self.assertEqual(sum(assigned_clean_by_class[3]), len(assigned_clean_by_class[3]))
def analyze_clusters(self, **kwargs): """ This function analyzes the clusters according to the provided method :param kwargs: a dictionary of cluster-analysis-specific parameters :type kwargs: `dict` :return: assigned_clean_by_class, an array of arrays that contains what data points where classified as clean. :rtype: `np.ndarray` """ self.set_params(**kwargs) if not self.clusters_by_class: self.cluster_activations() analyzer = ClusteringAnalyzer() if self.cluster_analysis == 'smaller': self.assigned_clean_by_class, self.poisonous_clusters \ = analyzer.analyze_by_size(self.clusters_by_class) elif self.cluster_analysis == 'relative-size': self.assigned_clean_by_class, self.poisonous_clusters \ = analyzer.analyze_by_relative_size(self.clusters_by_class) elif self.cluster_analysis == 'distance': self.assigned_clean_by_class, self.poisonous_clusters \ = analyzer.analyze_by_distance(self.clusters_by_class, separated_activations=self.red_activations_by_class) elif self.cluster_analysis == 'silhouette-scores': self.assigned_clean_by_class, self.poisonous_clusters \ = analyzer.analyze_by_sihouette_score(self.clusters_by_class, reduced_activations_by_class=self.red_activations_by_class) else: raise ValueError( "Unsupported cluster analysis technique " + self.cluster_analysis) return self.assigned_clean_by_class
def test_size_analyzer_three(self): nb_clusters = 3 nb_classes = 3 clusters_by_class = [[[] for x in range(nb_clusters)] for y in range(nb_classes)] clusters_by_class[0] = [0, 1, 1, 2, 2] # Class 0 clusters_by_class[1] = [1, 0, 0, 2, 2] # Class 1 clusters_by_class[2] = [0, 0, 0, 2, 1, 1] # Class 2 analyzer = ClusteringAnalyzer() assigned_clean_by_class, poison_clusters = analyzer.analyze_by_size( clusters_by_class) # print("clusters_by_class") # print(clusters_by_class) # print("assigned_clean_by_class") # print(assigned_clean_by_class) # print("poison_clusters") # print(poison_clusters) clean = 0 poison = 1 # For class 0, cluster 0 should be marked as poison. self.assertEqual(poison_clusters[0][0], poison) # For class 0, cluster 1 and 2 should be marked as clean. self.assertEqual(poison_clusters[0][1], clean) self.assertEqual(poison_clusters[0][2], clean) self.assertEqual(poison_clusters[1][1], poison) self.assertEqual(poison_clusters[1][0], clean) self.assertEqual(poison_clusters[1][2], clean) self.assertEqual(poison_clusters[2][2], poison) self.assertEqual(poison_clusters[2][0], clean) self.assertEqual(poison_clusters[2][1], clean) poison = 0 self.assertEqual(assigned_clean_by_class[0][0], poison) self.assertEqual(assigned_clean_by_class[1][0], poison) self.assertEqual(assigned_clean_by_class[2][3], poison)
def analyze_clusters(self, **kwargs): """ This function analyzes the clusters according to the provided method :param kwargs: a dictionary of cluster-analysis-specific parameters :type kwargs: `dict` :return: (report, assigned_clean_by_class), where the report is a json object and assigned_clean_by_class is an array of arrays that contains what data points where classified as clean. :rtype: `tuple(json, np.ndarray)` """ self.set_params(**kwargs) if not self.clusters_by_class: self.cluster_activations() analyzer = ClusteringAnalyzer() if self.cluster_analysis == 'smaller': self.assigned_clean_by_class, self.poisonous_clusters, report \ = analyzer.analyze_by_size(self.clusters_by_class) elif self.cluster_analysis == 'relative-size': self.assigned_clean_by_class, self.poisonous_clusters, report \ = analyzer.analyze_by_relative_size(self.clusters_by_class) elif self.cluster_analysis == 'distance': self.assigned_clean_by_class, self.poisonous_clusters, report \ = analyzer.analyze_by_distance(self.clusters_by_class, separated_activations=self.red_activations_by_class) elif self.cluster_analysis == 'silhouette-scores': self.assigned_clean_by_class, self.poisonous_clusters, report \ = analyzer.analyze_by_silhouette_score(self.clusters_by_class, reduced_activations_by_class=self.red_activations_by_class) else: raise ValueError( "Unsupported cluster analysis technique " + self.cluster_analysis) # Add to the report current parameters used to run the defence and the analysis summary report = dict(list(report.items()) + list(self.get_params().items())) import json jreport = json.dumps(report) return jreport, self.assigned_clean_by_class
def test_size_analyzer(self): nb_clusters = 2 nb_classes = 3 clusters_by_class = [[[] for x in range(nb_clusters)] for y in range(nb_classes)] clusters_by_class[0] = [0, 1, 1, 1, 1] # Class 0 clusters_by_class[1] = [1, 0, 0, 0, 0] # Class 1 clusters_by_class[2] = [0, 0, 0, 0, 1] # Class 2 analyzer = ClusteringAnalyzer() assigned_clean_by_class, poison_clusters, report = analyzer.analyze_by_size( clusters_by_class) # print("clusters_by_class") # print(clusters_by_class) # print("assigned_clean_by_class") # print(assigned_clean_by_class) # print("poison_clusters") # print(poison_clusters) clean = 0 poison = 1 # For class 0, cluster 0 should be marked as poison. self.assertEqual(poison_clusters[0][0], poison) # For class 0, cluster 1 should be marked as clean. self.assertEqual(poison_clusters[0][1], clean) self.assertEqual(report['Class_0']['cluster_0']['suspicious_cluster'], True) self.assertEqual(report['Class_0']['cluster_1']['suspicious_cluster'], False) total = len(clusters_by_class[0]) c1 = sum(clusters_by_class[0]) self.assertEqual(report['Class_0']['cluster_0']['ptc_data_in_cluster'], (total - c1) / total) self.assertEqual(report['Class_0']['cluster_1']['ptc_data_in_cluster'], c1 / total) # Inverse relations for class 1 self.assertEqual(poison_clusters[1][0], clean) self.assertEqual(poison_clusters[1][1], poison) self.assertEqual(report['Class_1']['cluster_0']['suspicious_cluster'], False) self.assertEqual(report['Class_1']['cluster_1']['suspicious_cluster'], True) total = len(clusters_by_class[1]) c1 = sum(clusters_by_class[1]) self.assertEqual(report['Class_1']['cluster_0']['ptc_data_in_cluster'], (total - c1) / total) self.assertEqual(report['Class_1']['cluster_1']['ptc_data_in_cluster'], c1 / total) self.assertEqual(poison_clusters[2][0], clean) self.assertEqual(poison_clusters[2][1], poison) self.assertEqual(report['Class_2']['cluster_0']['suspicious_cluster'], False) self.assertEqual(report['Class_2']['cluster_1']['suspicious_cluster'], True) total = len(clusters_by_class[2]) c1 = sum(clusters_by_class[2]) self.assertEqual(report['Class_2']['cluster_0']['ptc_data_in_cluster'], (total - c1) / total) self.assertEqual(report['Class_2']['cluster_1']['ptc_data_in_cluster'], c1 / total) poison = 0 self.assertEqual(assigned_clean_by_class[0][0], poison) self.assertEqual(assigned_clean_by_class[1][0], poison) self.assertEqual(assigned_clean_by_class[2][4], poison)
def test_size_analyzer_three(self): nb_clusters = 3 nb_classes = 3 clusters_by_class = [[[] for x in range(nb_clusters)] for y in range(nb_classes)] clusters_by_class[0] = [0, 1, 1, 2, 2] # Class 0 clusters_by_class[1] = [1, 0, 0, 2, 2] # Class 1 clusters_by_class[2] = [0, 0, 0, 2, 1, 1] # Class 2 analyzer = ClusteringAnalyzer() assigned_clean_by_class, poison_clusters, report = analyzer.analyze_by_size( clusters_by_class) # print("clusters_by_class") # print(clusters_by_class) # print("assigned_clean_by_class") # print(assigned_clean_by_class) # print("poison_clusters") # print(poison_clusters) clean = 0 poison = 1 # For class 0, cluster 0 should be marked as poison. self.assertEqual(poison_clusters[0][0], poison) # For class 0, cluster 1 and 2 should be marked as clean. self.assertEqual(poison_clusters[0][1], clean) self.assertEqual(poison_clusters[0][2], clean) self.assertEqual(report['Class_0']['cluster_0']['suspicious_cluster'], True) self.assertEqual(report['Class_0']['cluster_1']['suspicious_cluster'], False) self.assertEqual(report['Class_0']['cluster_2']['suspicious_cluster'], False) total = len(clusters_by_class[0]) counts = np.bincount(clusters_by_class[0]) self.assertEqual(report['Class_0']['cluster_0']['ptc_data_in_cluster'], counts[0] / total) self.assertEqual(report['Class_0']['cluster_1']['ptc_data_in_cluster'], counts[1] / total) self.assertEqual(report['Class_0']['cluster_2']['ptc_data_in_cluster'], counts[2] / total) self.assertEqual(poison_clusters[1][0], clean) self.assertEqual(poison_clusters[1][1], poison) self.assertEqual(poison_clusters[1][2], clean) self.assertEqual(report['Class_1']['cluster_0']['suspicious_cluster'], False) self.assertEqual(report['Class_1']['cluster_1']['suspicious_cluster'], True) self.assertEqual(report['Class_1']['cluster_2']['suspicious_cluster'], False) total = len(clusters_by_class[1]) counts = np.bincount(clusters_by_class[1]) self.assertEqual(report['Class_1']['cluster_0']['ptc_data_in_cluster'], counts[0] / total) self.assertEqual(report['Class_1']['cluster_1']['ptc_data_in_cluster'], counts[1] / total) self.assertEqual(report['Class_1']['cluster_2']['ptc_data_in_cluster'], counts[2] / total) self.assertEqual(poison_clusters[2][0], clean) self.assertEqual(poison_clusters[2][1], clean) self.assertEqual(poison_clusters[2][2], poison) self.assertEqual(report['Class_2']['cluster_0']['suspicious_cluster'], False) self.assertEqual(report['Class_2']['cluster_1']['suspicious_cluster'], False) self.assertEqual(report['Class_2']['cluster_2']['suspicious_cluster'], True) total = len(clusters_by_class[2]) counts = np.bincount(clusters_by_class[2]) self.assertEqual(report['Class_2']['cluster_0']['ptc_data_in_cluster'], round(counts[0] / total, 2)) self.assertEqual(report['Class_2']['cluster_1']['ptc_data_in_cluster'], round(counts[1] / total, 2)) self.assertEqual(report['Class_2']['cluster_2']['ptc_data_in_cluster'], round(counts[2] / total, 2)) poison = 0 self.assertEqual(assigned_clean_by_class[0][0], poison) self.assertEqual(assigned_clean_by_class[1][0], poison) self.assertEqual(assigned_clean_by_class[2][3], poison)