def analyze_clusters(self, **kwargs):
        """
        This function analyzes the clusters according to the provided method

        :param kwargs: a dictionary of cluster-analysis-specific parameters
        :type kwargs: `dict`
        :return: assigned_clean_by_class, an array of arrays that contains what data points where classified as clean.
        :rtype: `np.ndarray`
        """
        self.set_params(**kwargs)

        if not self.clusters_by_class:
            self.cluster_activations()

        analyzer = ClusteringAnalyzer()

        if self.cluster_analysis == 'smaller':
            self.assigned_clean_by_class, self.poisonous_clusters \
                = analyzer.analyze_by_size(self.clusters_by_class)
        elif self.cluster_analysis == 'relative-size':
            self.assigned_clean_by_class, self.poisonous_clusters \
                = analyzer.analyze_by_relative_size(self.clusters_by_class)
        elif self.cluster_analysis == 'distance':
            self.assigned_clean_by_class, self.poisonous_clusters \
                = analyzer.analyze_by_distance(self.clusters_by_class,
                                               separated_activations=self.red_activations_by_class)
        elif self.cluster_analysis == 'silhouette-scores':
            self.assigned_clean_by_class, self.poisonous_clusters \
                = analyzer.analyze_by_sihouette_score(self.clusters_by_class,
                                                      reduced_activations_by_class=self.red_activations_by_class)
        else:
            raise ValueError(
                "Unsupported cluster analysis technique " + self.cluster_analysis)

        return self.assigned_clean_by_class
    def test_relative_size_analyzer(self):
        nb_clusters = 2
        nb_classes = 4
        clusters_by_class = [[[] for x in range(nb_clusters)]
                             for y in range(nb_classes)]

        clusters_by_class[0] = [0, 1, 1, 1, 1]  # Class 0
        clusters_by_class[1] = [1, 0, 0, 0, 0]  # Class 1
        clusters_by_class[2] = [0, 0, 0, 0, 1]  # Class 2
        clusters_by_class[3] = [0, 0, 1, 1, 1]  # Class 3
        analyzer = ClusteringAnalyzer()
        assigned_clean_by_class, poison_clusters = analyzer.analyze_by_relative_size(
            clusters_by_class)

        # print("clusters_by_class")
        # print(clusters_by_class)
        # print("assigned_clean_by_class")
        # print(assigned_clean_by_class)
        # print("poison_clusters")
        # print(poison_clusters)

        clean = 0
        poison = 1
        # For class 0, cluster 0 should be marked as poison.
        self.assertEqual(poison_clusters[0][0], poison)
        # For class 0, cluster 1 should be marked as clean.
        self.assertEqual(poison_clusters[0][1], clean)

        # Inverse relations for class 1
        self.assertEqual(poison_clusters[1][0], clean)
        self.assertEqual(poison_clusters[1][1], poison)

        self.assertEqual(poison_clusters[2][0], clean)
        self.assertEqual(poison_clusters[2][1], poison)

        self.assertEqual(poison_clusters[3][0], clean)
        self.assertEqual(poison_clusters[3][1], clean)

        poison = 0
        self.assertEqual(assigned_clean_by_class[0][0], poison)
        self.assertEqual(assigned_clean_by_class[1][0], poison)
        self.assertEqual(assigned_clean_by_class[2][4], poison)
        self.assertEqual(sum(assigned_clean_by_class[3]),
                         len(assigned_clean_by_class[3]))
Exemple #3
0
    def analyze_clusters(self, **kwargs):
        """
        This function analyzes the clusters according to the provided method

        :param kwargs: a dictionary of cluster-analysis-specific parameters
        :type kwargs: `dict`
        :return: (report, assigned_clean_by_class), where the report is a json object and assigned_clean_by_class
                 is an array of arrays that contains what data points where classified as clean.
        :rtype: `tuple(json, np.ndarray)`
        """
        self.set_params(**kwargs)

        if not self.clusters_by_class:
            self.cluster_activations()

        analyzer = ClusteringAnalyzer()

        if self.cluster_analysis == 'smaller':
            self.assigned_clean_by_class, self.poisonous_clusters, report \
                = analyzer.analyze_by_size(self.clusters_by_class)
        elif self.cluster_analysis == 'relative-size':
            self.assigned_clean_by_class, self.poisonous_clusters, report \
                = analyzer.analyze_by_relative_size(self.clusters_by_class)
        elif self.cluster_analysis == 'distance':
            self.assigned_clean_by_class, self.poisonous_clusters, report \
                = analyzer.analyze_by_distance(self.clusters_by_class,
                                               separated_activations=self.red_activations_by_class)
        elif self.cluster_analysis == 'silhouette-scores':
            self.assigned_clean_by_class, self.poisonous_clusters, report \
                = analyzer.analyze_by_silhouette_score(self.clusters_by_class,
                                                       reduced_activations_by_class=self.red_activations_by_class)
        else:
            raise ValueError(
                "Unsupported cluster analysis technique " + self.cluster_analysis)

        # Add to the report current parameters used to run the defence and the analysis summary
        report = dict(list(report.items()) + list(self.get_params().items()))
        import json
        jreport = json.dumps(report)

        return jreport, self.assigned_clean_by_class
Exemple #4
0
    def test_relative_size_analyzer(self):
        nb_clusters = 2
        nb_classes = 4
        clusters_by_class = [[[] for x in range(nb_clusters)]
                             for y in range(nb_classes)]

        clusters_by_class[0] = [0, 1, 1, 1, 1]  # Class 0
        clusters_by_class[1] = [1, 0, 0, 0, 0]  # Class 1
        clusters_by_class[2] = [0, 0, 0, 0, 1]  # Class 2
        clusters_by_class[3] = [0, 0, 1, 1, 1]  # Class 3
        analyzer = ClusteringAnalyzer()
        assigned_clean_by_class, poison_clusters, report = analyzer.analyze_by_relative_size(
            clusters_by_class)

        # print("clusters_by_class")
        # print(clusters_by_class)
        # print("assigned_clean_by_class")
        # print(assigned_clean_by_class)
        # print("poison_clusters")
        # print(poison_clusters)

        clean = 0
        poison = 1
        # For class 0, cluster 0 should be marked as poison.
        self.assertEqual(poison_clusters[0][0], poison)
        # For class 0, cluster 1 should be marked as clean.
        self.assertEqual(poison_clusters[0][1], clean)
        self.assertEqual(report['Class_0']['cluster_0']['suspicious_cluster'],
                         True)
        self.assertEqual(report['Class_0']['cluster_1']['suspicious_cluster'],
                         False)
        total = len(clusters_by_class[0])
        c1 = sum(clusters_by_class[0])
        self.assertEqual(report['Class_0']['cluster_0']['ptc_data_in_cluster'],
                         round((total - c1) / total, 2))
        self.assertEqual(report['Class_0']['cluster_1']['ptc_data_in_cluster'],
                         round(c1 / total, 2))

        # Inverse relations for class 1
        self.assertEqual(poison_clusters[1][0], clean)
        self.assertEqual(poison_clusters[1][1], poison)
        self.assertEqual(report['Class_1']['cluster_0']['suspicious_cluster'],
                         False)
        self.assertEqual(report['Class_1']['cluster_1']['suspicious_cluster'],
                         True)
        total = len(clusters_by_class[1])
        c1 = sum(clusters_by_class[1])
        self.assertEqual(report['Class_1']['cluster_0']['ptc_data_in_cluster'],
                         round((total - c1) / total, 2))
        self.assertEqual(report['Class_1']['cluster_1']['ptc_data_in_cluster'],
                         round(c1 / total, 2))

        self.assertEqual(poison_clusters[2][0], clean)
        self.assertEqual(poison_clusters[2][1], poison)
        self.assertEqual(report['Class_2']['cluster_0']['suspicious_cluster'],
                         False)
        self.assertEqual(report['Class_2']['cluster_1']['suspicious_cluster'],
                         True)
        total = len(clusters_by_class[2])
        c1 = sum(clusters_by_class[2])
        self.assertEqual(report['Class_2']['cluster_0']['ptc_data_in_cluster'],
                         round((total - c1) / total, 2))
        self.assertEqual(report['Class_2']['cluster_1']['ptc_data_in_cluster'],
                         round(c1 / total, 2))

        self.assertEqual(poison_clusters[3][0], clean)
        self.assertEqual(poison_clusters[3][1], clean)
        self.assertEqual(report['Class_3']['cluster_0']['suspicious_cluster'],
                         False)
        self.assertEqual(report['Class_3']['cluster_1']['suspicious_cluster'],
                         False)
        total = len(clusters_by_class[3])
        c1 = sum(clusters_by_class[3])
        self.assertEqual(report['Class_3']['cluster_0']['ptc_data_in_cluster'],
                         round((total - c1) / total, 2))
        self.assertEqual(report['Class_3']['cluster_1']['ptc_data_in_cluster'],
                         round(c1 / total, 2))

        poison = 0
        self.assertEqual(assigned_clean_by_class[0][0], poison)
        self.assertEqual(assigned_clean_by_class[1][0], poison)
        self.assertEqual(assigned_clean_by_class[2][4], poison)
        self.assertEqual(sum(assigned_clean_by_class[3]),
                         len(assigned_clean_by_class[3]))