def test_add_outliers(self):
        computed_dataframe = ShowResults._add_distances(self.dataframe, point_col='point_col')
        computed_pdf = ShowResults._add_outliers(computed_dataframe).toPandas()

        # Boundary pre calculated mean for prediction 0: mean+2*stddev
        actual_values = [False]*5+[True]+4*[False]
        self.assertListEqual(list(computed_pdf['is_outlier']), actual_values)
        print('add_outliers \n', computed_pdf)
    def test_add_distances(self):
        from math import sqrt
        computed_dataframe = ShowResults._add_distances(self.dataframe, point_col='point_col')
        self.assertIn(('distance', 'double'), computed_dataframe.dtypes)

        p_computed_dataframe = computed_dataframe.toPandas()
        actual_distances = [sqrt(1.0), sqrt(1.0), sqrt(1.0), sqrt(1.0), sqrt(4.0),
                            sqrt(9.0+16.0), sqrt(1.0), sqrt(100.0), sqrt(4.0), sqrt(25.0)]
        for idx, val in enumerate(actual_distances):
            self.assertEqual(val, p_computed_dataframe['distance'][idx])
        print('add_distance \n', p_computed_dataframe)
    def test_compute_summary(self):
        computed_dataframe = ShowResults._add_distances(self.dataframe, point_col='point_col')
        computed_df = ShowResults._add_outliers(computed_dataframe)
        summary_pdf = ShowResults.compute_summary(computed_df).toPandas()

        # counts from predictionCol
        actual_count_prediction = [6, 3, 1]
        # counts from outliers in distance
        actual_count_outliers = [1, 0, 0]
        # percentage from actual_count_outliers / actual_count_prediction
        actual_count_percentage = list(map(float, ['%.f' % elem for elem in
                                                   [out/pre*100 for out, pre in
                                                    zip(actual_count_outliers, actual_count_prediction)]]))

        self.assertEqual(list(summary_pdf['count']), actual_count_prediction)
        self.assertEqual(list(summary_pdf['outlier_count']), actual_count_outliers)
        self.assertEqual(list(summary_pdf['outlier percentage']), actual_count_percentage)
        print('compute_summary \n', summary_pdf)