def test_70_fail_impute_too_high(self):
        """(70) Fail: Impute higher than max"""
        msgt(self.test_70_fail_impute_too_high.__doc__)

        analysis_plan = self.analysis_plan

        # invalid min/max
        analysis_plan.variable_info['EyeHeight']['min'] = -8
        analysis_plan.variable_info['EyeHeight']['max'] = 5
        analysis_plan.save()

        # Send the dp_statistics for validation
        #
        stat_spec = self.general_stat_spec
        stat_spec['fixed_value'] = 40

        request_plan = dict(analysis_plan_id=analysis_plan.object_id,
                            dp_statistics=[stat_spec])

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        self.assertTrue(serializer.is_valid())

        # Now run the validator
        #
        stats_valid = serializer.save(**dict(opendp_user=self.user_obj))
        print('stats_valid.data', stats_valid.data)
        self.assertTrue(stats_valid.success)
        self.assertFalse(stats_valid.data[0]['valid'])

        user_msg2 = 'The "fixed value" (40.0) cannot be more than the "max" (5.0)'
        self.assertEqual(stats_valid.data[0]['message'], user_msg2)
    def test_50_bad_total_epsilon(self):
        """(50) Fail: Bad total epsilon"""
        msgt(self.test_50_bad_total_epsilon.__doc__)

        analysis_plan = self.analysis_plan

        setup_info = analysis_plan.dataset.get_depositor_setup_info()
        setup_info.epsilon = 4
        setup_info.save()

        # Send the dp_statistics for validation
        #
        stat_spec = self.general_stat_spec

        request_plan = dict(analysis_plan_id=analysis_plan.object_id,
                            dp_statistics=[stat_spec])

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        valid = serializer.is_valid()
        self.assertTrue(valid)
        self.assertTrue(serializer.errors == {})

        # Now run the validator
        #
        stats_valid = serializer.save(**dict(opendp_user=self.user_obj))
        self.assertFalse(stats_valid.success)

        self.assertTrue(
            stats_valid.message.find(astatic.ERR_MSG_BAD_TOTAL_EPSILON) > -1)
    def test_20_fail_unsupported_stat(self):
        """(20) Fail: Test a known but unsupported statistic"""
        msgt(self.test_20_fail_unsupported_stat.__doc__)

        analysis_plan = self.analysis_plan

        # Send the dp_statistics for validation
        #
        stat_spec = self.general_stat_spec
        stat_spec['statistic'] = astatic.DP_QUANTILE

        request_plan = dict(analysis_plan_id=analysis_plan.object_id,
                            dp_statistics=[stat_spec])

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        valid = serializer.is_valid()
        self.assertTrue(valid)
        self.assertTrue(serializer.errors == {})

        # Now run the validator
        #
        stats_info = serializer.save(**dict(opendp_user=self.user_obj))
        #print('stats_info.success', stats_info.success)
        self.assertTrue(stats_info.success)

        self.assertEqual(stats_info.data[0]['valid'], False)
        self.assertEqual(stats_info.data[0]['statistic'], astatic.DP_QUANTILE)
        self.assertEqual(stats_info.data[0]['variable'], 'EyeHeight')
        self.assertEqual(
            stats_info.data[0]['message'],
            f'Statistic "{astatic.DP_QUANTILE}" will be supported soon!')
    def test_60_bad_running_epsilon(self):
        """(60) Fail: Total epsilon from dp_statistics > depositor_setup_info.epsilon"""
        msgt(self.test_60_bad_running_epsilon.__doc__)

        analysis_plan = self.analysis_plan

        variable_info_mod = analysis_plan.variable_info

        variable_info_mod['BlinkDuration']['min'] = 1
        variable_info_mod['BlinkDuration']['max'] = 400
        analysis_plan.variable_info = variable_info_mod
        analysis_plan.save()

        # Send the dp_statistics for validation
        #
        stat_specs = self.get_test_60_65_specs()

        request_plan = dict(analysis_plan_id=analysis_plan.object_id,
                            dp_statistics=stat_specs)

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        valid = serializer.is_valid()
        self.assertTrue(valid)
        self.assertTrue(serializer.errors == {})

        # Now run the validator
        #
        stats_valid = serializer.save(**dict(opendp_user=self.user_obj))
        self.assertTrue(stats_valid.success)

        _sample_result_data = [{
            'variable': 'EyeHeight',
            'statistic': 'mean',
            'valid': False,
            'message': 'constant must be a member of DA'
        }, {
            'variable':
            'BlinkDuration',
            'statistic':
            'mean',
            'valid':
            False,
            'message':
            'The running epsilon (1.05) exceeds the max epsilon (1.0)'
        }]

        self.assertTrue(stats_valid.data[0]['valid'] is True)
        self.assertTrue(stats_valid.data[1]['valid'] is False)
        self.assertTrue(
            stats_valid.data[1]['message'].find('exceeds the max epsilon') > -1
        )
    def test_80_fail_impute_too_low(self):
        """(80) Fail: Impute lower than min"""
        msgt(self.test_80_fail_impute_too_low.__doc__)

        analysis_plan = self.analysis_plan

        # invalid min/max
        analysis_plan.variable_info['EyeHeight']['min'] = -8
        analysis_plan.variable_info['EyeHeight']['max'] = 5
        analysis_plan.save()

        # Send the dp_statistics for validation
        #
        # Send the dp_statistics for validation
        #
        stat_spec = {
            "statistic": astatic.DP_MEAN,
            "variable": "EyeHeight",
            "epsilon": 1,
            "delta": 0,
            "cl": astatic.CL_95,
            "error": "",
            "missing_values_handling": astatic.MISSING_VAL_INSERT_FIXED,
            "handle_as_fixed": False,
            "fixed_value": "-10",
            "locked": False,
            "label": "EyeHeight"
        }

        request_plan = dict(analysis_plan_id=analysis_plan.object_id,
                            dp_statistics=[stat_spec])

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        self.assertTrue(serializer.is_valid())
        self.assertTrue(serializer.errors == {})

        # Now run the validator
        #
        stats_valid2 = serializer.save(**dict(opendp_user=self.user_obj))
        # print('stats_valid.success', stats_valid.success)
        self.assertTrue(stats_valid2.success)
        self.assertFalse(stats_valid2.data[0]['valid'])
        print('stats_valid.data', stats_valid2.data)

        user_msg3 = 'The "fixed value" (-10.0) cannot be less than the "min" (-8.0)'

        self.assertEqual(stats_valid2.data[0]['message'], user_msg3)
    def test_10_validate_stats(self):
        """(10) Test a working stat"""
        msgt(self.test_10_validate_stats.__doc__)

        analysis_plan = self.analysis_plan

        # Send the dp_statistics for validation
        #
        request_plan = dict(analysis_plan_id=analysis_plan.object_id,
                            dp_statistics=[self.general_stat_spec])

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        valid = serializer.is_valid()
        self.assertTrue(valid)
        self.assertTrue(serializer.errors == {})

        # Now run the validator
        #
        stats_info = serializer.save(**dict(opendp_user=self.user_obj))
        #print('stats_info.success', stats_info.success)
        self.assertTrue(stats_info.success)

        expected_result = [{
            'variable': 'EyeHeight',
            'statistic': 'mean',
            'valid': True,
            'message': None,
            'accuracy': {
                'value':
                1.6370121873967791,
                'message':
                'Releasing mean for the variable EyeHeight. With at least probability 0.95 the output mean will differ from the true mean by at most 1.6370121873967791 units. Here the units are the same units the variable has in the dataset.'
            }
        }]

        # print('stats_info.data', stats_info.data)
        self.assertEqual(stats_info.data[0]['valid'], True)

        # Were accuracy results included?
        self.assertTrue('value' in stats_info.data[0]['accuracy'])
        self.assertTrue('message' in stats_info.data[0]['accuracy'])

        accuracy_msg = f'DP {astatic.DP_MEAN.title()} will differ from the true {astatic.DP_MEAN.title()} by at'
        self.assertTrue(
            stats_info.data[0]['accuracy']['message'].find(accuracy_msg) > -1)
    def test_30_fail_bad_min_max(self):
        """(30) Fail: Add bad min/max values"""
        msgt(self.test_30_fail_bad_min_max.__doc__)

        analysis_plan = self.analysis_plan

        variable_info_mod = analysis_plan.variable_info
        # invalid min/max
        variable_info_mod['TypingSpeed']['min'] = 120
        variable_info_mod['TypingSpeed']['max'] = 5
        analysis_plan.variable_info = variable_info_mod
        analysis_plan.save()

        # Send the dp_statistics for validation
        #
        stat_spec = self.general_stat_spec
        stat_spec['variable'] = "TypingSpeed"

        request_plan = dict(analysis_plan_id=analysis_plan.object_id,
                            dp_statistics=[stat_spec])

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        valid = serializer.is_valid()
        self.assertTrue(valid)
        self.assertTrue(serializer.errors == {})

        # Now run the validator
        #
        stats_valid = serializer.save(**dict(opendp_user=self.user_obj))
        #print('stats_valid.success', stats_valid.success)
        self.assertTrue(stats_valid.success)

        expected_result = [{
            'variable': 'TypingSpeed',
            'statistic': astatic.DP_MEAN,
            'valid': False,
            'message': astatic.ERR_MSG_INVALID_MIN_MAX
        }]

        # self.assertEqual(expected_result, stats_valid.data)
        self.assertEqual(stats_valid.data[0]['valid'], False)
        self.assertTrue(stats_valid.data[0]['message'].find(
            'must be less than the max') > -1)
    def test_90_ok_impute_equals_min(self):
        """(90) Fail: Impute equals min"""
        msgt(self.test_90_ok_impute_equals_min.__doc__)

        analysis_plan = self.analysis_plan

        # invalid min/max
        analysis_plan.variable_info['TypingSpeed']['min'] = -8
        analysis_plan.variable_info['TypingSpeed']['max'] = 5
        analysis_plan.save()

        # Send the dp_statistics for validation
        #
        stat_spec = self.general_stat_spec
        stat_spec["variable"] = "TypingSpeed"
        stat_spec["fixed_value"] = -8

        request_plan = dict(analysis_plan_id=analysis_plan.object_id,
                            dp_statistics=[stat_spec])

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        self.assertTrue(serializer.is_valid())
        self.assertTrue(serializer.errors == {})

        # Now run the validator
        #
        stats_valid = serializer.save(**dict(opendp_user=self.user_obj))
        # print('stats_valid.success', stats_valid.success)
        self.assertTrue(stats_valid.success)
        self.assertTrue(stats_valid.data[0]['valid'])
        self.assertTrue('accuracy' in stats_valid.data[0])

        # ----------------------------------------------
        # have impute == max
        # ----------------------------------------------
        stat_spec2 = self.general_stat_spec
        stat_spec2["fixed_value"] = 5

        request_plan2 = dict(analysis_plan_id=analysis_plan.object_id,
                             dp_statistics=[stat_spec])

        # Check the basics
        #
        serializer2 = ReleaseValidationSerializer(data=request_plan2)
        self.assertTrue(serializer2.is_valid())

        # Now run the validator
        #
        stats_valid2 = serializer.save(**dict(opendp_user=self.user_obj))
        self.assertTrue(stats_valid2.success)
        self.assertTrue(stats_valid2.data[0]['valid'])
        self.assertTrue('accuracy' in stats_valid2.data[0])
    def test_45_api_fail_single_stat_bad_epsilon(self):
        """(45) Fail: API, Single stat exceeds total epsilon"""
        msgt(self.test_45_api_fail_single_stat_bad_epsilon.__doc__)

        analysis_plan = self.analysis_plan

        variable_info_mod = analysis_plan.variable_info
        # valid min/max
        analysis_plan.variable_info['BlinkDuration']['min'] = 1.0
        analysis_plan.variable_info['BlinkDuration']['max'] = 400.0
        #analysis_plan.variable_info = variable_info_mod
        analysis_plan.save()

        # Send the dp_statistics for validation
        #
        stat_spec = self.general_stat_spec
        stat_spec['epsilon'] = 1.5

        request_plan = dict(analysis_plan_id=str(analysis_plan.object_id),
                            dp_statistics=[stat_spec])

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        valid = serializer.is_valid()

        self.assertTrue(valid)
        self.assertTrue(serializer.errors == {})

        response = self.client.post('/api/validation/',
                                    json.dumps(request_plan),
                                    content_type='application/json')

        jresp = response.json()
        self.assertEqual(response.status_code, 200)
        self.assertTrue(jresp['success'])

        # print('jresp', jresp)

        self.assertEqual(jresp['data'][0]['valid'], False)
        self.assertTrue(
            jresp['data'][0]['message'].find(VALIDATE_MSG_EPSILON) > -1)
    def test_40_fail_single_stat_bad_epsilon(self):
        """(40) Fail: Single stat exceeds total epsilon"""
        msgt(self.test_40_fail_single_stat_bad_epsilon.__doc__)

        analysis_plan = self.analysis_plan

        variable_info_mod = analysis_plan.variable_info
        # valid min/max
        variable_info_mod['BlinkDuration']['min'] = 1
        variable_info_mod['BlinkDuration']['max'] = 400
        analysis_plan.variable_info = variable_info_mod
        analysis_plan.save()

        # Send the dp_statistics for validation
        #
        stat_spec = self.general_stat_spec
        stat_spec['epsilon'] = 1.5

        request_plan = dict(analysis_plan_id=analysis_plan.object_id,
                            dp_statistics=[stat_spec])

        # Check the basics
        #
        serializer = ReleaseValidationSerializer(data=request_plan)
        valid = serializer.is_valid()
        self.assertTrue(valid)
        self.assertTrue(serializer.errors == {})

        # Now run the validator
        #
        stats_valid = serializer.save(**dict(opendp_user=self.user_obj))
        self.assertTrue(stats_valid.success)
        print('40!! stats_valid', stats_valid.data)
        self.assertEqual(stats_valid.data[0]['valid'], False)
        self.assertTrue(
            stats_valid.data[0]['message'].find(VALIDATE_MSG_EPSILON) > -1)
Example #11
0
class ValidationView(viewsets.ViewSet):

    statistics = ReleaseValidationSerializer()
    analysis_plan = AnalysisPlanSerializer()
    permission_classes = [permissions.IsAuthenticated]

    http_method_names = ['post']  # 'patch']

    def create(self, request, *args, **kwargs):
        """
        Note: No data is saved. This endpoint is used for validation.

        endpoint: /api/validation/

        Example POST input:
            {
                "analysis_plan_id": "616b5167-4ce8-4def-85dc-6f0d8de2316c",
                "dp_statistics": [
                    {
                        "statistic": "mean",
                        "variable": "EyeHeight",
                        "epsilon": 0.6,
                        "delta": 0,
                        "error": "",
                        "missing_values_handling": "insert_fixed",
                        "handle_as_fixed": false,
                        "fixed_value": "5.0",
                        "locked": false,
                        "label": "EyeHeight"
                    }
                ]
            }

        -- Example outputs --

        (1) Overall error
            Status code: 400
                {
                    "success": false,
                    "message": "The depositor setup info has an invalid epsilon value: 4.0"
                }

        (2) Single statistic error -- even if only 1 statistic submitted
            Status code: 200  - NOTE status code is 200!
            {
                "success": true,
                "message": "validation results returned",
                "data": [
                    {
                        "var_name": "BlinkDuration",
                        "statistic": "mean",
                        "valid": false,
                        "message": "As a rule of thumb, epsilon should not be less than 0.001 nor greater than 1."
                    }
                ]
            }

        (2) Single statistic success -- even if only 1 statistic submitted
            Status code: 200  - NOTE status code is 200!

            {
                "success": true,
                "message": "validation results returned",
                "data": [
                    {
                        "var_name": "EyeHeight",
                        "statistic": "mean",
                        "valid": true,
                        "message": null
                    }
                ]
            }

        (3) Mixed success and error -- even if only 1 statistic submitted
            Status code: 200  - NOTE status code is 200!
            {
                "success": true,
                "message": "validation results returned",
                "data": [
                    {
                        "var_name": "EyeHeight",
                        "statistic": "mean",
                        "valid": true,
                        "message": null
                    },
                    {
                        "var_name": "BlinkDuration",
                        "statistic": "mean",
                        "valid": false,
                        "message": "The running epsilon (1.45) exceeds the max epsilon (1.0)"
                    }
                ]
            }

        """
        #print('>> ReleaseView.create >>>', request.data)
        release_info_serializer = ReleaseValidationSerializer(
            data=request.data)
        if not release_info_serializer.is_valid():
            print('release_info_serializer.errors',
                  release_info_serializer.errors)
            return Response(get_json_error(
                'Field validation failed',
                errors=release_info_serializer.errors),
                            status=status.HTTP_200_OK)
            #status=status.HTTP_400_BAD_REQUEST)

        save_result = release_info_serializer.save(**dict(
            opendp_user=request.user))
        #print(save_result.success)
        if not save_result.success:
            #print(save_result.message)

            return Response(get_json_error(save_result.message),
                            status=status.HTTP_400_BAD_REQUEST)

        #print(save_result.data)
        return Response(get_json_success('validation results returned',
                                         data=save_result.data),
                        status=status.HTTP_200_OK)
Example #12
0
 def retrieve(self, request, pk=None):
     release_info = get_object_or_404(ReleaseInfo, object_id=pk)
     serializer = ReleaseValidationSerializer(release_info,
                                              context={'request': request})
     return Response(data=serializer)