def test_create_rule_using_org_id_to_establish_dq_check_relationship(self):
        # Ensure no address_line_2 rules exist by default beforehand
        dq = DataQualityCheck.retrieve(self.org.id)
        self.assertEqual(0, dq.rules.filter(field='address_line_2').count())

        base_rule_info = {
            'field': 'address_line_2',
            'table_name': 'PropertyState',
            'enabled': True,
            'data_type': Rule.TYPE_STRING,
            'rule_type': Rule.RULE_TYPE_DEFAULT,
            'condition': Rule.RULE_INCLUDE,
            'required': False,
            'not_null': False,
            'min': None,
            'max': None,
            'text_match': 'some random text',
            'severity': Rule.SEVERITY_ERROR,
            'units': "",
            'status_label': None,
        }

        url = reverse('api:v3:data_quality_check-rules-list',
                      kwargs={'nested_organization_id': self.org.id})
        self.client.post(url,
                         content_type='application/json',
                         data=json.dumps(base_rule_info))

        dq = DataQualityCheck.retrieve(self.org.id)
        self.assertEqual(1, dq.rules.filter(field='address_line_2').count())
Beispiel #2
0
def create_organization(user=None, org_name='', *args, **kwargs):
    """
    Helper script to create a user/org relationship from scratch. This is heavily used and
    creates the default labels, columns, and data quality rules when a new organization is created

    :param user: user inst.
    :param org_name: str, name of Organization we'd like to create.
    :param (optional) kwargs: 'role', int; 'status', str.
    """
    from seed.models import StatusLabel as Label
    organization_user = None
    user_added = False

    organization = Organization.objects.create(name=org_name)

    if user:
        organization_user, user_added = OrganizationUser.objects.get_or_create(
            user=user, organization=organization)

    for label in Label.DEFAULT_LABELS:
        Label.objects.get_or_create(
            name=label,
            super_organization=organization,
            defaults={'color': 'blue'},
        )

    # upon initializing a new organization (SuperOrganization), create
    # the default columns
    _create_default_columns(organization.id)

    # create the default rules for this organization
    DataQualityCheck.retrieve(organization.id)

    return organization, organization_user, user_added
Beispiel #3
0
    def test_ensure_default_rules(self):
        dq = DataQualityCheck.retrieve(self.org)
        initial_pk = dq.pk

        self.assertEqual(dq.rules.count(), len(DEFAULT_RULES))
        self.assertEqual(dq.results, {})
        self.assertEqual(initial_pk, dq.pk)

        # check again to make sure that it does not append more rules to the same org
        dq = DataQualityCheck.retrieve(self.org.pk)
        self.assertEqual(dq.rules.count(), len(DEFAULT_RULES))
    def get_queryset(self):
        # Handle the anonymous case (e.g. Swagger page load)
        if not self.kwargs:
            return Rule.objects.none()

        org_id = self.kwargs.get('nested_organization_id')
        rule_id = self.kwargs.get('pk')

        if rule_id is None:
            return DataQualityCheck.retrieve(org_id).rules.all()
        else:
            return DataQualityCheck.retrieve(org_id).rules.filter(id=rule_id)
Beispiel #5
0
    def test_multiple_data_quality_check_objects(self):
        dq = DataQualityCheck.retrieve(self.org)
        self.assertEqual(dq.name, 'Default Data Quality Check')

        DataQualityCheck.objects.create(organization=self.org,
                                        name='test manual creation')
        DataQualityCheck.objects.create(organization=self.org,
                                        name='test manual creation 2')
        DataQualityCheck.objects.create(organization=self.org,
                                        name='test manual creation 3')
        dq = DataQualityCheck.retrieve(self.org)

        # The method above will delete the multiple objects and return the original
        self.assertEqual(dq.name, 'Default Data Quality Check')
Beispiel #6
0
    def test_property_state_quality(self):
        # Import the file and run mapping
        qs = PropertyState.objects.filter(
            import_file=self.import_file, ).iterator()

        d = DataQualityCheck.retrieve(self.org)
        d.check_data('PropertyState', qs)
        self.assertEqual(len(d.results), 7)

        result = d.retrieve_result_by_address('95373 E Peach Avenue')
        self.assertTrue(result['address_line_1'], '95373 E Peach Avenue')
        res = [{
            "severity": "error",
            "value": "",
            "field": "pm_property_id",
            "table_name": "PropertyState",
            "message": "PM Property ID is null",
            "detailed_message": "PM Property ID is null",
            "formatted_field": "PM Property ID"
        }]
        self.assertEqual(res, result['data_quality_results'])

        result = d.retrieve_result_by_address('120243 E True Lane')
        res = [{
            "severity": "error",
            "value": "10000000000.0",
            "field": "gross_floor_area",
            "table_name": "PropertyState",
            "message": "Gross Floor Area out of range",
            "detailed_message": "Gross Floor Area [10000000000.0] > 7000000.0",
            "formatted_field": "Gross Floor Area"
        }, {
            "severity": "error",
            "value": "0",
            "field": "year_built",
            "table_name": "PropertyState",
            "message": "Year Built out of range",
            "detailed_message": "Year Built [0] < 1700",
            "formatted_field": "Year Built"
        }, {
            "severity": "error",
            "value": "",
            "field": "custom_id_1",
            "table_name": "PropertyState",
            "message": "Custom ID 1 (Property) is null",
            "detailed_message": "Custom ID 1 (Property) is null",
            "formatted_field": "Custom ID 1 (Property)"
        }, {
            "severity": "error",
            "value": "",
            "field": "pm_property_id",
            "table_name": "PropertyState",
            "message": "PM Property ID is null",
            "detailed_message": "PM Property ID is null",
            "formatted_field": "PM Property ID"
        }]
        self.assertItemsEqual(res, result['data_quality_results'])

        result = d.retrieve_result_by_address('1234 Peach Tree Avenue')
        self.assertEqual(result, None)
Beispiel #7
0
    def test_reset_default_rules(self):
        dq = DataQualityCheck.retrieve(self.org)

        new_rule = {
            'table_name': 'PropertyState',
            'field': 'test_floor_area',
            'data_type': TYPE_NUMBER,
            'rule_type': RULE_TYPE_DEFAULT,
            'min': 0,
            'max': 7000000,
            'severity': SEVERITY_ERROR,
            'units': 'square feet'
        }
        dq.add_rule(new_rule)
        self.assertEqual(dq.rules.count(), len(DEFAULT_RULES) + 1)

        # change one of the default rules
        rule = dq.rules.filter(field='gross_floor_area').first()
        rule.min = -10000
        rule.save()

        self.assertEqual(
            dq.rules.filter(field='gross_floor_area').first().min, -10000)
        dq.reset_default_rules()

        self.assertEqual(
            dq.rules.filter(field='gross_floor_area').first().min, 100)

        # ensure non-default rule still exists
        non_def_rules = dq.rules.filter(field='test_floor_area')
        self.assertEqual(non_def_rules.count(), 1)
Beispiel #8
0
    def test_check_property_state_example_data_with_labels(self):
        dq = DataQualityCheck.retrieve(self.org.id)

        # Create labels and apply them to the rules being triggered later
        site_eui_label = StatusLabel.objects.create(name='Check Site EUI', super_organization=self.org)
        site_eui_rule = dq.rules.get(table_name='PropertyState', field='site_eui', max='1000')
        site_eui_rule.status_label = site_eui_label
        site_eui_rule.save()

        year_built_label = StatusLabel.objects.create(name='Check Year Built', super_organization=self.org)
        year_built_rule = dq.rules.get(table_name='PropertyState', field='year_built')
        year_built_rule.status_label = year_built_label
        year_built_rule.save()

        # Create state and associate it to view
        ps_data = {
            'no_default_data': True,
            'custom_id_1': 'abcd',
            'address_line_1': '742 Evergreen Terrace',
            'pm_property_id': 'PMID',
            'site_eui': 525600,
            'year_built': 1699,
        }
        ps = self.property_state_factory.get_property_state(None, **ps_data)
        property = self.property_factory.get_property()
        PropertyView.objects.create(
            property=property, cycle=self.cycle, state=ps
        )

        dq.check_data(ps.__class__.__name__, [ps])

        dq_results = dq.results[ps.id]['data_quality_results']
        labels = [r['label'] for r in dq_results]
        self.assertCountEqual(['Check Site EUI', 'Check Year Built'], labels)
Beispiel #9
0
    def reset_default_data_quality_rules(self, request):
        """
        Resets an organization's data data_quality rules
        ---
        parameters:
            - name: organization_id
              description: Organization ID
              type: integer
              required: true
              paramType: query
        type:
            status:
                type: string
                description: success or error
                required: true
            in_range_checking:
                type: array[string]
                required: true
                description: An array of in-range error rules
            missing_matching_field:
                type: array[string]
                required: true
                description: An array of fields to verify existence
            missing_values:
                type: array[string]
                required: true
                description: An array of fields to ignore missing values
        """
        organization = Organization.objects.get(
            pk=request.query_params['organization_id'])

        dq = DataQualityCheck.retrieve(organization.id)
        dq.reset_default_rules()
        return self.data_quality_rules(request)
Beispiel #10
0
    def test_rule_with_label_set_to_null(self):
        dq = DataQualityCheck.retrieve(self.org)
        sl_data = {
            'name': 'test label on rule for null',
            'super_organization': self.org
        }
        status_label, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'name': 'Name not to be forgotten',
            'table_name': 'PropertyState',
            'field': 'conditioned_floor_area',
            'data_type': TYPE_NUMBER,
            'rule_type': RULE_TYPE_DEFAULT,
            'min': 0,
            'max': 7000000,
            'severity': SEVERITY_ERROR,
            'units': 'square feet',
            'status_label': status_label
        }
        dq.add_rule(new_rule)

        # Find data rule that has the status rule (from above)
        rules = dq.rules.filter(status_label__isnull=False)
        self.assertEqual(rules.count(), 1)
        self.assertEqual(rules[0].status_label, status_label)
        status_label.delete()
Beispiel #11
0
    def test_rule_with_label(self):
        dq = DataQualityCheck.retrieve(self.org)
        rules = dq.rules.filter(status_label__isnull=False)
        self.assertEqual(rules.count(), 0)

        sl_data = {
            'name': 'test label on rule',
            'super_organization': self.org
        }
        status_label, _ = StatusLabel.objects.get_or_create(**sl_data)
        sls = StatusLabel.objects.filter(**sl_data)
        self.assertEqual(sls.count(), 1)
        new_rule = {
            'table_name': 'PropertyState',
            'field': 'conditioned_floor_area',
            'data_type': TYPE_NUMBER,
            'rule_type': RULE_TYPE_DEFAULT,
            'min': 0,
            'max': 7000000,
            'severity': SEVERITY_ERROR,
            'units': 'square feet',
            'status_label': status_label
        }
        dq.add_rule(new_rule)
        rules = dq.rules.filter(status_label__isnull=False)
        self.assertEqual(rules.count(), 1)
        self.assertEqual(rules[0].status_label, status_label)

        # delete the rule but make sure that the label does not get deleted
        dq.remove_all_rules()
        sls = StatusLabel.objects.filter(**sl_data)
        self.assertEqual(sls.count(), 1)
Beispiel #12
0
    def create(self, validated_data):
        # For now, use an Org ID to find the DQ Check ID to apply (later, use the DQ Check ID directly)
        org_id = self.context['request'].parser_context['kwargs'][
            'nested_organization_id']
        validated_data['data_quality_check_id'] = DataQualityCheck.retrieve(
            org_id).id

        return Rule.objects.create(**validated_data)
Beispiel #13
0
 def test_add_new_rule_exception(self):
     dq = DataQualityCheck.retrieve(self.org)
     new_rule = {'wrong': 'data'}
     with self.assertRaisesRegexp(
             TypeError,
             "Rule data is not defined correctly: 'wrong' is an invalid keyword argument for this function"
     ):
         dq.add_rule(new_rule)
    def test_update_rule_include_empty_text_match_validation(self):
        # Start with 1 Rule
        dq = DataQualityCheck.retrieve(self.org.id)
        dq.remove_all_rules()
        base_rule_info = {
            'field': 'address_line_1',
            'table_name': 'PropertyState',
            'enabled': True,
            'data_type': Rule.TYPE_STRING,
            'rule_type': Rule.RULE_TYPE_DEFAULT,
            'condition': Rule.RULE_INCLUDE,
            'required': False,
            'not_null': False,
            'min': None,
            'max': None,
            'text_match': 'Test Rule 1',
            'severity': Rule.SEVERITY_ERROR,
            'units': "",
        }
        dq.add_rule(base_rule_info)
        rule = dq.rules.get()

        # Send invalid update request
        put_data = deepcopy(base_rule_info)
        put_data['text_match'] = None
        url = reverse('api:v3:data_quality_check-rules-detail',
                      kwargs={
                          'nested_organization_id': self.org.id,
                          'pk': rule.id
                      })
        res = self.client.put(url,
                              content_type='application/json',
                              data=json.dumps(put_data))

        self.assertEqual(res.status_code, 400)
        self.assertTrue('Rule must not include or exclude an empty string. ' in
                        json.loads(res.content)['message'])

        # Remove text_match and make condition NOT_NULL, then try making condition EXCLUDE
        rule.text_match = None
        rule.condition = Rule.RULE_NOT_NULL
        rule.save()

        put_data_2 = deepcopy(base_rule_info)
        del put_data_2['text_match']  # don't update text_match
        put_data_2['condition'] = Rule.RULE_EXCLUDE
        url = reverse('api:v3:data_quality_check-rules-detail',
                      kwargs={
                          'nested_organization_id': self.org.id,
                          'pk': dq.rules.get().id
                      })
        res = self.client.put(url,
                              content_type='application/json',
                              data=json.dumps(put_data_2))

        self.assertEqual(res.status_code, 400)
        self.assertTrue('Rule must not include or exclude an empty string. ' in
                        json.loads(res.content)['message'])
Beispiel #15
0
 def reset(self, request, nested_organization_id=None):
     """
     Resets an organization's data data_quality rules
     """
     # TODO: Refactor to get all the rules for a DataQualityCheck object directly.
     # At that point, nested_organization_id should be changed to data_quality_check_id
     dq = DataQualityCheck.retrieve(nested_organization_id)
     dq.remove_all_rules()
     return self.list(request, nested_organization_id)
Beispiel #16
0
    def test_tax_lot_state_quality(self):
        # Import the file and run mapping
        qs = TaxLotState.objects.filter(
            import_file=self.import_file).iterator()

        d = DataQualityCheck.retrieve(self.org)
        d.check_data('TaxLotState', qs)
        # import json
        # print json.dumps(d.results, indent=2)
        self.assertEqual(len(d.results), 4)
Beispiel #17
0
    def test_remove_all_rules(self):
        dq = DataQualityCheck.retrieve(self.org)
        count = Rule.objects.filter(data_quality_check_id=dq.pk).count()
        self.assertEqual(count, len(DEFAULT_RULES))

        dq.remove_all_rules()
        self.assertEqual(dq.rules.count(), 0)
        # ensure that the database has no rules for this dq associated with it
        count = Rule.objects.filter(data_quality_check_id=dq.pk).count()
        self.assertEqual(count, 0)
Beispiel #18
0
    def test_filter_rules(self):
        dq = DataQualityCheck.retrieve(self.org)

        rule_count = dq.rules.filter(enabled=True).count()

        # disable one of the rules
        rule = dq.rules.first()
        rule.enabled = False
        rule.save()

        rules = dq.rules.filter(enabled=True)
        self.assertEqual(rules.count(), rule_count - 1)
Beispiel #19
0
    def test_add_custom_rule_exception(self):
        dq = DataQualityCheck.retrieve(self.org.id)
        dq.remove_all_rules()

        ex_rule = {
            'table_name_does_not_exist': 'PropertyState',
        }

        with self.assertRaises(Exception) as exc:
            dq.add_rule(ex_rule)
        self.assertEqual(
            str(exc.exception),
            "Rule data is not defined correctly: 'table_name_does_not_exist' is an invalid keyword argument for this function"
        )
Beispiel #20
0
    def test_add_new_rule_and_reset(self):
        dq = DataQualityCheck.retrieve(self.org)

        new_rule = {
            'table_name': 'PropertyState',
            'field': 'conditioned_floor_area',
            'data_type': TYPE_NUMBER,
            'rule_type': RULE_TYPE_DEFAULT,
            'min': 0,
            'max': 7000000,
            'severity': SEVERITY_ERROR,
            'units': 'square feet'
        }
        dq.add_rule(new_rule)
        self.assertEqual(dq.rules.count(), len(DEFAULT_RULES) + 1)

        dq.reset_all_rules()
        self.assertEqual(dq.rules.count(), len(DEFAULT_RULES))
Beispiel #21
0
    def test_add_custom_rule(self):
        dq = DataQualityCheck.retrieve(self.org.id)
        dq.remove_all_rules()

        ex_rule = {
            'table_name': 'PropertyState',
            'field': 'some_floor_area',
            'data_type': Rule.TYPE_AREA,
            'rule_type': Rule.RULE_TYPE_DEFAULT,
            'min': 8760,
            'max': 525600,
            'severity': Rule.SEVERITY_ERROR,
            'units': 'm**2',
        }

        dq.add_rule(ex_rule)
        self.assertEqual(dq.rules.count(), 1)
        self.assertDictContainsSubset(ex_rule, model_to_dict(dq.rules.first()))
Beispiel #22
0
    def test_default_create(self):
        dq = DataQualityCheck.retrieve(self.org.id)
        self.assertEqual(dq.rules.count(), 22)
        # Example rule to check
        ex_rule = {
            'table_name': 'PropertyState',
            'field': 'conditioned_floor_area',
            'data_type': Rule.TYPE_AREA,
            'rule_type': Rule.RULE_TYPE_DEFAULT,
            'min': 0,
            'max': 7000000,
            'severity': Rule.SEVERITY_ERROR,
            'units': 'ft**2',
        }

        rule = Rule.objects.filter(table_name='PropertyState',
                                   field='conditioned_floor_area',
                                   severity=Rule.SEVERITY_ERROR)
        self.assertDictContainsSubset(ex_rule, model_to_dict(rule.first()))
    def test_update_rule_status_label_validation(self):
        # Start with 1 Rule
        dq = DataQualityCheck.retrieve(self.org.id)
        dq.remove_all_rules()
        base_rule_info = {
            'field': 'address_line_1',
            'table_name': 'PropertyState',
            'enabled': True,
            'data_type': Rule.TYPE_STRING,
            'rule_type': Rule.RULE_TYPE_DEFAULT,
            'condition': Rule.RULE_INCLUDE,
            'required': False,
            'not_null': False,
            'min': None,
            'max': None,
            'text_match': 'Test Rule 1',
            'severity': Rule.SEVERITY_ERROR,
            'units': "",
        }
        dq.add_rule(base_rule_info)
        rule = dq.rules.get()

        # Send invalid update request that includes a label id from another org
        new_org, _, _ = create_organization(self.user, "test-organization-a")
        wrong_org_label_id = new_org.labels.first().id
        put_data = deepcopy(base_rule_info)
        put_data['status_label'] = wrong_org_label_id
        url = reverse('api:v3:data_quality_check-rules-detail',
                      kwargs={
                          'nested_organization_id': self.org.id,
                          'pk': rule.id
                      })
        res = self.client.put(url,
                              content_type='application/json',
                              data=json.dumps(put_data))

        self.assertEqual(res.status_code, 400)
        self.assertTrue(
            f'Label with ID {wrong_org_label_id} not found in organization, {self.org.name}.'
            in json.loads(res.content)['status_label'])
    def test_reset_rules(self):
        # Start with 1 Rule
        dq = DataQualityCheck.retrieve(self.org.id)
        dq.remove_all_rules()
        base_rule_info = {
            'field': 'address_line_1',
            'table_name': 'PropertyState',
            'enabled': True,
            'data_type': Rule.TYPE_STRING,
            'rule_type': Rule.RULE_TYPE_DEFAULT,
            'condition': Rule.RULE_INCLUDE,
            'required': False,
            'not_null': False,
            'min': None,
            'max': None,
            'text_match': 'Test Rule 1',
            'severity': Rule.SEVERITY_ERROR,
            'units': "",
        }
        dq.add_rule(base_rule_info)

        url = reverse('api:v3:data_quality_check-rules-reset',
                      kwargs={'nested_organization_id': self.org.id})
        response = self.client.put(url)
        rules = json.loads(response.content)

        self.assertEqual(len(rules), 22)

        property_count = 0
        taxlot_count = 0
        for r in rules:
            if r['table_name'] == 'PropertyState':
                property_count += 1
            elif r['table_name'] == 'TaxLotState':
                taxlot_count += 1

        self.assertEqual(taxlot_count, 2)
        self.assertEqual(property_count, 20)
Beispiel #25
0
 def test_text_match(self):
     dq = DataQualityCheck.retrieve(self.org.id)
     dq.remove_all_rules()
     new_rule = {
         'table_name': 'PropertyState',
         'field': 'address_line_1',
         'data_type': Rule.TYPE_STRING,
         'rule_type': Rule.RULE_TYPE_DEFAULT,
         'severity': Rule.SEVERITY_ERROR,
         'not_null': True,
         'text_match': 742,
     }
     dq.add_rule(new_rule)
     ps_data = {
         'no_default_data': True,
         'custom_id_1': 'abcd',
         'address_line_1': '742 Evergreen Terrace',
         'pm_property_id': 'PMID',
         'site_eui': 525600,
     }
     ps = self.property_state_factory.get_property_state(None, **ps_data)
     dq.check_data(ps.__class__.__name__, [ps])
     self.assertEqual(dq.results, {})
Beispiel #26
0
    def test_check_property_state_example_data(self):
        dq = DataQualityCheck.retrieve(self.org.id)
        ps_data = {
            'no_default_data': True,
            'custom_id_1': 'abcd',
            'address_line_1': '742 Evergreen Terrace',
            'pm_property_id': 'PMID',
            'site_eui': 525600,
        }
        ps = self.property_state_factory.get_property_state(None, **ps_data)

        dq.check_data(ps.__class__.__name__, [ps])

        # {
        #   11: {
        #           'id': 11,
        #           'custom_id_1': 'abcd',
        #           'pm_property_id': 'PMID',
        #           'address_line_1': '742 Evergreen Terrace',
        #           'data_quality_results': [
        #               {
        #                  'severity': 'error', 'value': '525600', 'field': 'site_eui', 'table_name': 'PropertyState', 'message': 'Site EUI out of range', 'detailed_message': 'Site EUI [525600] > 1000', 'formatted_field': 'Site EUI'
        #               }
        #           ]
        #       }
        error_found = False
        for index, row in dq.results.items():
            self.assertEqual(row['custom_id_1'], 'abcd')
            self.assertEqual(row['pm_property_id'], 'PMID')
            self.assertEqual(row['address_line_1'], '742 Evergreen Terrace')
            for violation in row['data_quality_results']:
                if violation['message'] == 'Site EUI out of range':
                    error_found = True
                    self.assertEqual(violation['detailed_message'],
                                     'Site EUI [525600] > 1000')

        self.assertEqual(error_found, True)
Beispiel #27
0
    def save_data_quality_rules(self, request, pk=None):
        """
        Saves an organization's settings: name, query threshold, shared fields.
        The method passes in all the fields again, so it is okay to remove
        all the rules in the db, and just recreate them (albeit inefficient)
        ---
        parameter_strategy: replace
        parameters:
            - name: organization_id
              description: Organization ID
              type: integer
              required: true
              paramType: query
            - name: body
              description: JSON body containing organization rules information
              paramType: body
              pytype: RulesSerializer
              required: true
        type:
            status:
                type: string
                description: success or error
                required: true
            message:
                type: string
                description: error message, if any
                required: true
        """
        organization = Organization.objects.get(
            pk=request.query_params['organization_id'])

        body = request.data
        if body.get('data_quality_rules') is None:
            return JsonResponse(
                {
                    'status': 'error',
                    'message': 'missing the data_quality_rules'
                },
                status=status.HTTP_404_NOT_FOUND)

        posted_rules = body['data_quality_rules']
        updated_rules = []
        for rule in posted_rules['properties']:
            updated_rules.append({
                'field':
                rule['field'],
                'table_name':
                'PropertyState',
                'enabled':
                rule['enabled'],
                'data_type':
                _get_rule_type_from_js(rule['data_type']),
                'rule_type':
                rule['rule_type'],
                'required':
                rule['required'],
                'not_null':
                rule['not_null'],
                'min':
                rule['min'],
                'max':
                rule['max'],
                'text_match':
                rule['text_match'],
                'severity':
                _get_severity_from_js(rule['severity']),
                'units':
                rule['units'],
                'status_label_id':
                rule['label']
            })

        for rule in posted_rules['taxlots']:
            updated_rules.append({
                'field':
                rule['field'],
                'table_name':
                'TaxLotState',
                'enabled':
                rule['enabled'],
                'data_type':
                _get_rule_type_from_js(rule['data_type']),
                'rule_type':
                rule['rule_type'],
                'required':
                rule['required'],
                'not_null':
                rule['not_null'],
                'min':
                rule['min'],
                'max':
                rule['max'],
                'text_match':
                rule['text_match'],
                'severity':
                _get_severity_from_js(rule['severity']),
                'units':
                rule['units'],
                'status_label_id':
                rule['label']
            })

        dq = DataQualityCheck.retrieve(organization.id)
        dq.remove_all_rules()
        for rule in updated_rules:
            try:
                dq.add_rule(rule)
            except TypeError as e:
                return JsonResponse({
                    'status': 'error',
                    'message': e,
                },
                                    status=status.HTTP_400_BAD_REQUEST)

        return self.data_quality_rules(request)
Beispiel #28
0
    def data_quality_rules(self, request):
        """
        Returns the data_quality rules for an org.
        ---
        parameters:
            - name: organization_id
              description: Organization ID
              type: integer
              required: true
              paramType: query
        type:
            status:
                type: string
                required: true
                description: success or error
            rules:
                type: object
                required: true
                description: An object containing 'properties' and 'taxlots' arrays of rules
        """
        organization = Organization.objects.get(
            pk=request.query_params['organization_id'])

        result = {
            'status': 'success',
            'rules': {
                'properties': [],
                'taxlots': []
            }
        }

        dq = DataQualityCheck.retrieve(organization.id)
        rules = dq.rules.order_by('field', 'severity')
        for rule in rules:
            result['rules']['properties' if rule.table_name ==
                            'PropertyState' else 'taxlots'].append({
                                'field':
                                rule.field,
                                'enabled':
                                rule.enabled,
                                'data_type':
                                _get_js_rule_type(rule.data_type),
                                'rule_type':
                                rule.rule_type,
                                'required':
                                rule.required,
                                'not_null':
                                rule.not_null,
                                'min':
                                rule.min,
                                'max':
                                rule.max,
                                'text_match':
                                rule.text_match,
                                'severity':
                                _get_js_rule_severity(rule.severity),
                                'units':
                                rule.units,
                                'label':
                                rule.status_label_id
                            })

        return JsonResponse(result)
Beispiel #29
0
    def test_check_multiple_text_match(self):
        d = DataQualityCheck.retrieve(self.org)
        d.remove_all_rules()

        sl_data = {'name': 'No meters present', 'super_organization': self.org}
        sl_ok_1, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'PropertyState',
            'field': 'meters_present',
            'data_type': TYPE_STRING,
            'rule_type': RULE_TYPE_CUSTOM,
            'text_match': 'OK',
            'severity': SEVERITY_ERROR,
            'status_label': sl_ok_1,
        }
        d.add_rule(new_rule)

        sl_data = {
            'name': 'No 12 Consectutive Months',
            'super_organization': self.org
        }
        sl_ok_2, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'PropertyState',
            'field': '12 Consectutive Months',
            'data_type': TYPE_STRING,
            'rule_type': RULE_TYPE_CUSTOM,
            'text_match': 'OK',
            'severity': SEVERITY_ERROR,
            'status_label': sl_ok_2,
        }
        d.add_rule(new_rule)

        sl_data = {'name': 'No Monthly Data', 'super_organization': self.org}
        sl_ok_3, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'PropertyState',
            'field': 'Monthly Data',
            'data_type': TYPE_STRING,
            'rule_type': RULE_TYPE_CUSTOM,
            'text_match': 'OK',
            'severity': SEVERITY_ERROR,
            'status_label': sl_ok_3,
        }
        d.add_rule(new_rule)

        # import data
        tasks.save_raw_data(self.import_file.id)
        Column.create_mappings(self.fake_mappings, self.org, self.user,
                               self.import_file.pk)
        tasks.map_data(self.import_file.id)
        tasks.match_buildings(self.import_file.id)

        qs = PropertyState.objects.filter(
            import_file=self.import_file,
            source_type=ASSESSED_BS,
        ).iterator()
        d.reset_results()
        d.check_data('PropertyState', qs)

        # Check multiple strings
        props = PropertyView.objects.filter(
            property__labels=sl_ok_1).select_related('state')
        addresses = sorted([p.state.address_line_1 for p in props])
        expected = [
            u'1 International Road', u'17246 Esch Drive',
            u'2581 Schiller Parkway', u'3 Northport Place',
            u'84807 Buell Trail'
        ]
        self.assertListEqual(expected, addresses)

        props = PropertyView.objects.filter(
            property__labels=sl_ok_2).select_related('state')
        addresses = sorted([p.state.address_line_1 for p in props])
        expected = [
            u'1 International Road', u'2581 Schiller Parkway',
            u'49705 Harper Crossing'
        ]
        self.assertListEqual(expected, addresses)

        props = PropertyView.objects.filter(
            property__labels=sl_ok_3).select_related('state')
        addresses = sorted([p.state.address_line_1 for p in props])
        expected = [
            u'1 International Road', u'17246 Esch Drive', u'84807 Buell Trail',
            u'88263 Scoville Park'
        ]
        self.assertListEqual(expected, addresses)
Beispiel #30
0
    def test_check(self):
        # data quality check
        d = DataQualityCheck.retrieve(self.org)
        d.remove_all_rules()
        d.add_rule({
            'table_name': 'PropertyState',
            'field': 'gross_floor_area',
            'data_type': TYPE_NUMBER,
            'rule_type': RULE_TYPE_DEFAULT,
            'min': 100,
            'max': 7000000,
            'severity': SEVERITY_ERROR,
            'units': 'square feet',
        })

        d.add_rule({
            'table_name': 'PropertyState',
            'field': 'recent_sale_date',
            'data_type': TYPE_DATE,
            'rule_type': RULE_TYPE_DEFAULT,
            'min': 18890101,
            'max': 20201231,
            'severity': SEVERITY_ERROR,
        })
        # create some status labels for testing
        sl_data = {
            'name': 'year - old or future',
            'super_organization': self.org
        }
        sl_year, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'PropertyState',
            'field': 'year_built',
            'data_type': TYPE_YEAR,
            'rule_type': RULE_TYPE_DEFAULT,
            'min': 1700,
            'max': 2019,
            'severity': SEVERITY_ERROR,
            'status_label': sl_year,
        }
        d.add_rule(new_rule)

        sl_data = {
            'name': 'extra data ps float error',
            'super_organization': self.org
        }
        sl_string, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'PropertyState',
            'field': 'extra_data_ps_alpha',
            'data_type': TYPE_STRING,
            'rule_type': RULE_TYPE_CUSTOM,
            'text_match': 'alpha',
            'severity': SEVERITY_ERROR,
            'units': 'square feet',
            'status_label': sl_string,
        }
        d.add_rule(new_rule)

        sl_data = {
            'name': 'extra data ps string error',
            'super_organization': self.org
        }
        sl_float, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'PropertyState',
            'field': 'extra_data_ps_float',
            'data_type': TYPE_NUMBER,
            'rule_type': RULE_TYPE_CUSTOM,
            'min': 9999,
            'max': 10001,
            'severity': SEVERITY_ERROR,
            'status_label': sl_float,
        }
        d.add_rule(new_rule)

        sl_data = {
            'name': 'jurisdiction id does not match',
            'super_organization': self.org
        }
        sl_jurid, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'TaxLotState',
            'field': 'jurisdiction_tax_lot_id',
            'data_type': TYPE_STRING,
            'rule_type': RULE_TYPE_CUSTOM,
            'text_match': '1235',
            'severity': SEVERITY_ERROR,
            'status_label': sl_jurid,
        }
        d.add_rule(new_rule)

        sl_data = {'name': 'No meters present', 'super_organization': self.org}
        sl_ok_1, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'PropertyState',
            'field': 'Meters Present',
            'data_type': TYPE_STRING,
            'rule_type': RULE_TYPE_CUSTOM,
            'text_match': 'OK',
            'severity': SEVERITY_ERROR,
            'status_label': sl_ok_1,
        }
        d.add_rule(new_rule)

        sl_data = {
            'name': 'No 12 Consectutive Months',
            'super_organization': self.org
        }
        sl_ok_2, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'PropertyState',
            'field': '12 Consectutive Months',
            'data_type': TYPE_STRING,
            'rule_type': RULE_TYPE_CUSTOM,
            'text_match': 'OK',
            'severity': SEVERITY_ERROR,
            'status_label': sl_ok_2,
        }
        d.add_rule(new_rule)

        sl_data = {'name': 'No Monthly Data', 'super_organization': self.org}
        sl_ok_3, _ = StatusLabel.objects.get_or_create(**sl_data)
        new_rule = {
            'table_name': 'PropertyState',
            'field': 'Monthly Data',
            'data_type': TYPE_STRING,
            'rule_type': RULE_TYPE_CUSTOM,
            'text_match': 'OK',
            'severity': SEVERITY_ERROR,
            'status_label': sl_ok_3,
        }
        d.add_rule(new_rule)

        # import data
        tasks.save_raw_data(self.import_file.id)
        Column.create_mappings(self.fake_mappings, self.org, self.user,
                               self.import_file.pk)
        tasks.map_data(self.import_file.id)
        tasks.match_buildings(self.import_file.id)

        qs = PropertyState.objects.filter(
            import_file=self.import_file,
            source_type=ASSESSED_BS,
        ).iterator()
        d.reset_results()
        d.check_data('PropertyState', qs)

        result = d.retrieve_result_by_address('4 Myrtle Parkway')
        res = [{
            "severity": "error",
            "value": "27.0",
            "field": "extra_data_ps_float",
            "table_name": "PropertyState",
            "message": "Extra Data Ps Float out of range",
            "detailed_message": "Extra Data Ps Float [27.0] < 9999.0",
            "formatted_field": "Extra Data Ps Float"
        }, {
            "severity": "error",
            "value": "5.0",
            "field": "gross_floor_area",
            "table_name": "PropertyState",
            "message": "Gross Floor Area out of range",
            "detailed_message": "Gross Floor Area [5.0] < 100.0",
            "formatted_field": "Gross Floor Area"
        }]
        self.assertListEqual(result['data_quality_results'], res)

        result = d.retrieve_result_by_address('94 Oxford Hill')
        res = [{
            "severity": "error",
            "value": "20000.0",
            "field": "extra_data_ps_float",
            "table_name": "PropertyState",
            "message": "Extra Data Ps Float out of range",
            "detailed_message": "Extra Data Ps Float [20000.0] > 10001.0",
            "formatted_field": "Extra Data Ps Float"
        }, {
            "severity": "error",
            "value": "1888-01-01 08:00:00",
            "field": "recent_sale_date",
            "table_name": "PropertyState",
            "message": "Recent Sale Date out of range",
            "detailed_message":
            "Recent Sale Date [1888-01-01 08:00:00] < 1889-01-01 00:00:00",
            "formatted_field": "Recent Sale Date"
        }]
        self.assertListEqual(result['data_quality_results'], res)

        result = d.retrieve_result_by_address("3 Portage Alley")
        res = [{
            'severity': u'error',
            'value': 'beta',
            'field': u'extra_data_ps_alpha',
            'table_name': u'PropertyState',
            'message': u'Extra Data Ps Alpha does not match expected value',
            'detailed_message':
            u'Extra Data Ps Alpha [beta] does not contain "alpha"',
            'formatted_field': u'Extra Data Ps Alpha'
        }]
        self.assertListEqual(result['data_quality_results'], res)

        # make sure that the label has been applied
        props = PropertyView.objects.filter(
            property__labels=sl_year).select_related('state')
        addresses = sorted([p.state.address_line_1 for p in props])
        expected = sorted([u'84807 Buell Trail', u'1 International Road'])
        self.assertListEqual(expected, addresses)

        props = PropertyView.objects.filter(
            property__labels=sl_float).select_related('state')
        addresses = sorted([p.state.address_line_1 for p in props])
        expected = sorted([u'4 Myrtle Parkway', u'94 Oxford Hill'])
        self.assertListEqual(expected, addresses)

        props = PropertyView.objects.filter(
            property__labels=sl_string).select_related('state')
        addresses = [p.state.address_line_1 for p in props]
        expected = [u'3 Portage Alley']
        self.assertListEqual(expected, addresses)

        # Check tax lots
        qs = TaxLotState.objects.filter(
            import_file=self.import_file, ).iterator()
        d.reset_results()
        d.check_data('TaxLotState', qs)

        result = d.retrieve_result_by_tax_lot_id("1234")
        res = [{
            "severity": "error",
            "value": "1234",
            "field": "jurisdiction_tax_lot_id",
            "table_name": "TaxLotState",
            "message": "Jurisdiction Tax Lot ID does not match expected value",
            "detailed_message":
            "Jurisdiction Tax Lot ID [1234] does not contain \"1235\"",
            "formatted_field": "Jurisdiction Tax Lot ID"
        }]
        self.assertListEqual(result['data_quality_results'], res)

        # verify labels
        taxlots = TaxLotView.objects.filter(
            taxlot__labels=sl_jurid).select_related('state')
        ids = [t.state.jurisdiction_tax_lot_id for t in taxlots]
        expected = '1234'
        self.assertEqual(expected, ids[0])

        # Check multiple strings
        props = PropertyView.objects.filter(
            property__labels=sl_ok_1).select_related('state')
        addresses = [p.state.address_line_1 for p in props]