def test_get_column_mappings(self): """We produce appropriate data structure for mapping""" raw_data = [ { "from_field": "raw_data_0", "to_field": "destination_0", "to_table_name": "PropertyState" }, { "from_field": "raw_data_1", "to_field": "destination_1", "to_table_name": "PropertyState" }, { "from_field": "raw_data_2", "to_field": "destination_0", "to_table_name": "TaxLotState" }, ] Column.create_mappings(raw_data, self.fake_org, self.fake_user) expected = { u'raw_data_0': (u'PropertyState', u'destination_0'), u'raw_data_1': (u'PropertyState', u'destination_1'), u'raw_data_2': (u'TaxLotState', u'destination_0'), } test_mapping, no_concat = ColumnMapping.get_column_mappings(self.fake_org) self.assertDictEqual(test_mapping, expected) self.assertEqual(no_concat, [])
def test_promote_properties(self): """Good case for testing our matching system.""" tasks.save_raw_data(self.import_file.id) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) cycle2, _ = Cycle.objects.get_or_create( name=u'Hack Cycle 2016', organization=self.org, start=datetime.datetime(2016, 1, 1), end=datetime.datetime(2016, 12, 31), ) # make sure that the new data was loaded correctly ps = PropertyState.objects.filter(address_line_1='1181 Douglas Street')[0] self.assertEqual(ps.site_eui, 439.9) self.assertEqual(ps.extra_data['CoStar Property ID'], '1575599') # Promote the PropertyState to a PropertyView pv1 = ps.promote(self.cycle) pv2 = ps.promote(self.cycle) # should just return the same object self.assertEqual(pv1, pv2) # promote the same state for a new cycle, same data pv3 = ps.promote(cycle2) self.assertNotEqual(pv3, pv1) props = PropertyView.objects.all() self.assertEqual(len(props), 2)
def test_map_data(self): """Save mappings based on user specifications.""" # Create new import file to test import_record = ImportRecord.objects.create( owner=self.user, last_modified_by=self.user, super_organization=self.org ) import_file = ImportFile.objects.create( import_record=import_record, source_type=ASSESSED_RAW, ) import_file.raw_save_done = True import_file.save() fake_raw_bs = PropertyState.objects.create( organization=self.org, import_file=import_file, extra_data=self.fake_row, source_type=ASSESSED_RAW, data_state=DATA_STATE_IMPORT, ) # tasks._save_raw_data(import_file.pk, 'fake_cache_key', 1) self.fake_mappings = FAKE_MAPPINGS['fake_row'] Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(import_file.pk) mapped_bs = list(PropertyState.objects.filter( import_file=import_file, source_type=ASSESSED_BS, )) self.assertEqual(len(mapped_bs), 1) test_bs = mapped_bs[0] self.assertNotEqual(test_bs.pk, fake_raw_bs.pk) self.assertEqual(test_bs.property_name, self.fake_row['Name']) self.assertEqual(test_bs.address_line_1, self.fake_row['Address Line 1']) self.assertEqual( test_bs.year_built, parser.parse(self.fake_row['Year Built']).year ) # Make sure that we saved the extra_data column mappings data_columns = Column.objects.filter( organization=self.org, is_extra_data=True ) # There's only one piece of data that didn't cleanly map. # Note that as of 09/15/2016 - extra data still needs to be defined in the mappings, it # will no longer magically appear in the extra_data field if the user did not specify to # map it! self.assertListEqual( sorted([d.column_name for d in data_columns]), ['Double Tester'] )
def test_import_file(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) ps = PropertyState.objects.filter(pm_property_id='2264').first() ps.promote(self.cycle) # should only be 11 unmatched_properties because one was promoted. ps = self.import_file.find_unmatched_property_states() self.assertEqual(len(ps), 13)
def test_match_buildings(self): """ case A (one property <-> one tax lot) """ tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # Check to make sure all the properties imported ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file, ) self.assertEqual(len(ps), 14) # Check to make sure the taxlots were imported ts = TaxLotState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file, ) # self.assertEqual(len(ts), 10) # 10 unique taxlots after duplicates and delimeters # Check a single case of the taxlotstate ts = TaxLotState.objects.filter(jurisdiction_tax_lot_id='1552813').first() self.assertEqual(ts.jurisdiction_tax_lot_id, '1552813') self.assertEqual(ts.address_line_1, None) self.assertEqual(ts.extra_data["data_008"], 1) # Check a single case of the propertystate ps = PropertyState.objects.filter(pm_property_id='2264') self.assertEqual(len(ps), 1) ps = ps.first() self.assertEqual(ps.pm_property_id, '2264') self.assertEqual(ps.address_line_1, '50 Willow Ave SE') self.assertEqual('data_007' in ps.extra_data.keys(), True) self.assertEqual('data_008' in ps.extra_data.keys(), False) self.assertEqual(ps.extra_data["data_007"], 'a') # verify that the lot_number has the tax_lot information. For this case it is one-to-one self.assertEqual(ps.lot_number, ts.jurisdiction_tax_lot_id) tasks.match_buildings(self.import_file.id, self.user.id) self.assertEqual(TaxLot.objects.count(), 10) qry = PropertyView.objects.filter(state__custom_id_1='7') self.assertEqual(qry.count(), 1) state = qry.first().state self.assertEqual(state.address_line_1, "12 Ninth Street") self.assertEqual(state.property_name, "Grange Hall")
def test_multiple_id_matches(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # verify that there are no properties listed as canonical property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 0) # promote two properties ps = PropertyState.objects.filter(custom_id_1='13') ps_test = ps.first() ps_test_2 = ps.last() for p in ps: p.promote(self.cycle) # from seed.utils.generic import pp # pp(p) property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 2) # no arguments passed should return no results matches = tasks.query_property_matches(property_states, None, None) self.assertEqual(len(matches), 0) # should return 2 properties matches = tasks.query_property_matches(property_states, None, '13') self.assertEqual(len(matches), 2) self.assertEqual(matches[0], ps_test) self.assertEqual(matches[1], ps_test_2) # should return only the second property matches = tasks.query_property_matches(property_states, '2342', None) self.assertEqual(len(matches), 1) self.assertEqual(matches[0], ps_test_2) # should return both properties, the first one should be the pm match, i.e. the first prop matches = tasks.query_property_matches(property_states, '481516', '13') self.assertEqual(len(matches), 2) self.assertEqual(matches[0], ps_test) self.assertEqual(matches[1], ps_test_2) # if passing in the second pm then it will not be the first matches = tasks.query_property_matches(property_states, '2342', '13') self.assertEqual(len(matches), 2) self.assertEqual(matches[1], ps_test_2) # pass the pm id into the custom id. it should still return the correct buildings. # not sure that this is the right behavior, but this is what it does, so just testing. matches = tasks.query_property_matches(property_states, None, '2342') self.assertEqual(len(matches), 1) self.assertEqual(matches[0], ps_test_2) matches = tasks.query_property_matches(property_states, '13', None) self.assertEqual(len(matches), 2) self.assertEqual(matches[0], ps_test) self.assertEqual(matches[1], ps_test_2)
def test_save_columns(self): # create ps = PropertyState.objects.create( organization=self.fake_org, extra_data={'a': 123, 'lab': 'hawkins national laboratory'} ) Column.save_column_names(ps) c = Column.objects.filter(column_name='lab')[0] self.assertEqual(c.is_extra_data, True) self.assertEqual(c.table_name, 'PropertyState') self.assertEqual(ps.extra_data['lab'], 'hawkins national laboratory')
def test_mapping_no_properties(self): # update the mappings to not include any taxlot tables in the data for m in self.fake_mappings: if m["to_table_name"] == 'PropertyState': m["to_table_name"] = 'TaxLotState' tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # make sure that no taxlot objects were created. the 12 here are the import extra_data. ps = PropertyState.objects.all() self.assertEqual(len(ps), 14) # make sure that the new data was loaded correctly ts = TaxLotState.objects.filter(address_line_1='50 Willow Ave SE').first() self.assertEqual(ts.extra_data['site_eui'], 125)
def update_columns_based_on_mappings_file(self, org): """Go through each of the organization columns as reported by the 'extradata.csv' mappings file and make sure it points to the table specified in that file. """ logging_info("Updating columns for org {} to match that in migration mapping file.".format(org)) taxlot_column_names = get_taxlot_columns(org) property_column_names = get_property_columns(org) found = 0 notfound = 0 for prop_col in property_column_names: qry = Column.objects.filter(organization=org, column_name=prop_col) cnt = qry.count() if cnt: # Update the column col = qry.first() logging_info("Setting Column '{}' to SOURCE_PROPERTY".format(col)) col.extra_data_source = Column.SOURCE_PROPERTY col.save() else: col = Column(organization=org, column_name=prop_col, is_extra_data=True, table_name="PropertyState") logging_info("Creating Column '{}' based on missing from mappings file".format(prop_col)) col.save() for tl_col in taxlot_column_names: qry = Column.objects.filter(organization=org, column_name=tl_col) cnt = qry.count() if cnt: # Update the column col = qry.first() col.extra_data_source = Column.SOURCE_TAXLOT logging_info("Setting Column '{}' to SOURCE_TAXLOT".format(col)) col.save() else: col = Column(organization=org, column_name=tl_col, is_extra_data=True, table_name="TaxLotState") logging_info("Creating Column '{}' based on missing from mappings file".format(tl_col)) col.save() return
def test_mapping(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # There are a total of 18 tax lot ids in the import file ts = TaxLotState.objects.all() self.assertEqual(len(ts), 18) # make sure that the new data was loaded correctly and that the lot_number was set # appropriately ps = PropertyState.objects.filter(address_line_1='2700 Welstone Ave NE')[0] self.assertEqual(ps.site_eui, 1202) self.assertEqual(ps.lot_number, '11160509') ps = PropertyState.objects.filter(address_line_1='521 Elm Street')[0] self.assertEqual(ps.site_eui, 1358) # The lot_number should also have the normalized code run, then re-delimited self.assertEqual(ps.lot_number, '33366555;33366125;33366148')
def test_mapping_no_taxlot(self): # update the mappings to not include any taxlot tables in the data # note that save_data reads in from the propertystate table, so that will always # have entries in the db (for now). for m in self.fake_mappings: if m["to_table_name"] == 'TaxLotState': m["to_table_name"] = 'PropertyState' tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # make sure that no taxlot objects were created ts = TaxLotState.objects.all() self.assertEqual(len(ts), 0) # make sure that the new data was loaded correctly ps = PropertyState.objects.filter(address_line_1='2700 Welstone Ave NE')[0] self.assertEqual(ps.site_eui, 1202) self.assertEqual(ps.extra_data['jurisdiction_tax_lot_id'], '11160509')
def test_single_id_matches(self): tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_mappings, self.org, self.user) tasks.map_data(self.import_file.pk) # verify that there are no properties listed as canonical property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 0) # promote a properties ps = PropertyState.objects.filter(pm_property_id='2264').first() ps.promote(self.cycle) property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 1) matches = tasks.query_property_matches(property_states, None, None) self.assertEqual(len(matches), 0) matches = tasks.query_property_matches(property_states, '2264', None) self.assertEqual(len(matches), 1) self.assertEqual(matches[0], ps)
def test_map_data_zip(self): # -- Setup with patch.object(ImportFile, 'cache_first_rows', return_value=None): progress_info = tasks.save_raw_data(self.import_file.pk) self.assertEqual('success', progress_info['status'], json.dumps(progress_info)) self.assertEqual( PropertyState.objects.filter(import_file=self.import_file).count(), 2) # make the column mappings self.fake_mappings = default_buildingsync_profile_mappings() Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) # -- Act progress_info = tasks.map_data(self.import_file.pk) # -- Assert self.assertEqual('success', progress_info['status']) ps = PropertyState.objects.filter(address_line_1='123 Main St', import_file=self.import_file) self.assertEqual(len(ps), 2)
def test_mapping_properties_only(self): # update the mappings to not include any taxlot tables in the data # note that save_data reads in from the propertystate table, so that will always # have entries in the db (for now). new_mappings = copy.deepcopy(self.fake_mappings) for m in new_mappings: if m["to_table_name"] == 'TaxLotState': m["to_table_name"] = 'PropertyState' tasks._save_raw_data(self.import_file.pk, 'fake_cache_key', 1) Column.create_mappings(new_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk) # make sure that no taxlot objects were created ts = TaxLotState.objects.all() self.assertEqual(len(ts), 0) # make sure that the new data was loaded correctly ps = PropertyState.objects.filter( address_line_1='2700 Welstone Ave NE')[0] self.assertEqual(ps.site_eui, 1202) self.assertEqual(ps.extra_data['jurisdiction_tax_lot_id'], '11160509')
def columns(self, request): """ List all tax lot columns --- parameters: - name: organization_id description: The organization_id for this user's organization required: true paramType: query """ organization_id = int(request.query_params.get('organization_id')) columns = Column.retrieve_all(organization_id, 'taxlot') return JsonResponse({'columns': columns})
def test_map_all_models_zip(self): # -- Setup with patch.object(ImportFile, 'cache_first_rows', return_value=None): progress_info = tasks.save_raw_data(self.import_file.pk) self.assertEqual('success', progress_info['status'], json.dumps(progress_info)) self.assertEqual( PropertyState.objects.filter(import_file=self.import_file).count(), 2) # make the column mappings self.fake_mappings = default_buildingsync_profile_mappings() Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) # map the data progress_info = tasks.map_data(self.import_file.pk) self.assertEqual('success', progress_info['status']) ps = PropertyState.objects.filter(address_line_1='123 Main St', import_file=self.import_file) self.assertEqual(ps.count(), 2) # -- Act tasks.map_additional_models(self.import_file.pk) # -- Assert ps = PropertyState.objects.filter(address_line_1='123 Main St', import_file=self.import_file) self.assertEqual(ps.count(), 2) # verify there are 2 building files bfs = BuildingFile.objects.all() self.assertEqual(bfs.count(), 2) # check that scenarios were created scenarios = Scenario.objects.all() self.assertEqual(scenarios.count(), 31)
def test_save_column_mapping_by_file(self): self.mapping_import_file = os.path.abspath( "./seed/tests/data/test_mapping.csv") Column.create_mappings_from_file(self.mapping_import_file, self.fake_org, self.fake_user) expected = { 'City': ('PropertyState', 'city'), 'Custom ID': ('PropertyState', 'custom_id_1'), 'Zip': ('PropertyState', 'postal_code'), 'GBA': ('PropertyState', 'gross_floor_area'), 'PM Property ID': ('PropertyState', 'pm_property_id'), 'BLDGS': ('PropertyState', 'building_count'), 'AYB_YearBuilt': ('PropertyState', 'year_build'), 'State': ('PropertyState', 'state'), 'Address': ('PropertyState', 'address_line_1'), 'Owner': ('PropertyState', 'owner'), 'Raw Column': ('Table Name', 'Field Name'), 'Property Type': ('PropertyState', 'property_type'), 'UBI': ('TaxLotState', 'jurisdiction_tax_lot_id') } test_mapping, _ = ColumnMapping.get_column_mappings(self.fake_org) self.assertCountEqual(expected, test_mapping)
def test_single_id_matches(self): tasks.save_raw_data(self.import_file.pk) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.pk) # verify that there are no properties listed as canonical property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 0) # promote a properties ps = PropertyState.objects.filter(pm_property_id='2264').first() ps.promote(self.cycle) property_states = tasks.list_canonical_property_states(self.org) self.assertEqual(len(property_states), 1) matches = self.query_property_matches(property_states, None, None, None, None) self.assertEqual(len(matches), 0) matches = self.query_property_matches(property_states, '2264', None, None, None) self.assertEqual(len(matches), 1) self.assertEqual(matches[0], ps)
def test_list_properties_with_profile_id(self): state = self.property_state_factory.get_property_state(extra_data={"field_1": "value_1"}) prprty = self.property_factory.get_property() PropertyView.objects.create( property=prprty, cycle=self.cycle, state=state ) # save all the columns in the state to the database so we can setup column list settings Column.save_column_names(state) # get the columnlistsetting (default) for all columns columnlistsetting = self.column_list_factory.get_columnlistsettings(columns=['address_line_1', 'field_1']) params = { 'organization_id': self.org.pk, 'profile_id': columnlistsetting.id, } url = reverse('api:v2.1:properties-list') + '?cycle_id={}'.format(self.cycle.pk) response = self.client.get(url, params) data = response.json() self.assertEqual(len(data['properties']), 1) result = data['properties'][0] self.assertEqual(result['state']['address_line_1'], state.address_line_1) self.assertEqual(result['state']['extra_data']['field_1'], 'value_1') self.assertFalse(result['state'].get('city', None))
def suggestions(self, request): """ Retrieves suggestions given raw column headers. parameters: - headers:--------------------------------------------------------------------------------------------------------------------------- - name: organization_id description: The organization_id for this user's organization required: true (at least, nothing will be returned if not provided) paramType: query """ try: org_id = request.query_params.get('organization_id', None) raw_headers = request.data.get('headers', []) suggested_mappings = mapper.build_column_mapping( raw_headers, Column.retrieve_all_by_tuple(org_id), previous_mapping=None, map_args=None, thresh= 80 # percentage match that we require. 80% is random value for now. ) # replace None with empty string for column names and PropertyState for tables # TODO #239: Move this fix to build_column_mapping for m in suggested_mappings: table, destination_field, _confidence = suggested_mappings[m] if destination_field is None: suggested_mappings[m][1] = '' # Fix the table name, eventually move this to the build_column_mapping for m in suggested_mappings: table, _destination_field, _confidence = suggested_mappings[m] # Do not return the campus, created, updated fields... that is force them to be in the property state if not table or table == 'Property': suggested_mappings[m][0] = 'PropertyState' elif table == 'TaxLot': suggested_mappings[m][0] = 'TaxLotState' return JsonResponse({ 'status': 'success', 'data': suggested_mappings, }) except Exception as e: return JsonResponse({ 'status': 'error', 'data': str(e), }, status=HTTP_400_BAD_REQUEST)
def save_column_mappings(self, file_id, mappings): import_file = ImportFile.objects.get(pk=file_id) org = self.org status = Column.create_mappings(mappings, org, self.user) column_mappings = [{ 'from_field': m['from_field'], 'to_field': m['to_field'], 'to_table_name': m['to_table_name'] } for m in mappings] if status: import_file.save_cached_mapped_columns(column_mappings) return {'status': 'success'} else: return {'status': 'error'}
def test_column_retrieve_db_fields(self): c = Column.retrieve_db_fields() data = ['address_line_1', 'address_line_2', 'block_number', 'building_certification', 'building_count', 'campus', 'city', 'conditioned_floor_area', 'custom_id_1', 'district', 'energy_alerts', 'energy_score', 'generation_date', 'gross_floor_area', 'home_energy_score_id', 'jurisdiction_property_id', 'jurisdiction_tax_lot_id', 'lot_number', 'number_properties', 'occupied_floor_area', 'owner', 'owner_address', 'owner_city_state', 'owner_email', 'owner_postal_code', 'owner_telephone', 'pm_parent_property_id', 'pm_property_id', 'postal_code', 'property_name', 'property_notes', 'property_type', 'recent_sale_date', 'release_date', 'site_eui', 'site_eui_weather_normalized', 'source_eui', 'source_eui_weather_normalized', 'space_alerts', 'state', 'use_description', 'year_built', 'year_ending'] self.assertItemsEqual(data, c)
def save_column_mappings(self, request, pk=None): """ Saves the mappings between the raw headers of an ImportFile and the destination fields in the `to_table_name` model which should be either PropertyState or TaxLotState Valid source_type values are found in ``seed.models.SEED_DATA_SOURCES`` Payload:: { "import_file_id": ID of the ImportFile record, "mappings": [ { 'from_field': 'eui', # raw field in import file 'to_field': 'energy_use_intensity', 'to_table_name': 'PropertyState', }, { 'from_field': 'gfa', 'to_field': 'gross_floor_area', 'to_table_name': 'PropertyState', } ] } Returns:: {'status': 'success'} """ body = request.data import_file = ImportFile.objects.get(pk=pk) organization = import_file.import_record.super_organization mappings = body.get('mappings', []) status1 = Column.create_mappings(mappings, organization, request.user) # extract the to_table_name and to_field column_mappings = [ {'from_field': m['from_field'], 'to_field': m['to_field'], 'to_table_name': m['to_table_name']} for m in mappings] if status1: import_file.save_cached_mapped_columns(column_mappings) return JsonResponse({'status': 'success'}) else: return JsonResponse({'status': 'error'})
def test_tax_lot_property_get_related(self): """Test to make sure get_related returns the fields""" for i in range(50): p = self.property_view_factory.get_property_view() self.properties.append(p.id) qs_filter = {"pk__in": self.properties} qs = PropertyView.objects.filter(**qs_filter) columns = [ 'address_line_1', 'generation_date', 'energy_alerts', 'space_alerts', 'building_count', 'owner', 'source_eui', 'jurisdiction_tax_lot_id', 'city', 'confidence', 'district', 'best_guess_confidence', 'site_eui', 'building_certification', 'modified', 'match_type', 'source_eui_weather_normalized', u'id', 'property_name', 'conditioned_floor_area', 'pm_property_id', 'use_description', 'source_type', 'year_built', 'release_date', 'gross_floor_area', 'owner_city_state', 'owner_telephone', 'recent_sale_date', ] columns_from_database = Column.retrieve_all(self.org.id, 'property', False) data = TaxLotProperty.get_related(qs, columns, columns_from_database) self.assertEqual(len(data), 50) self.assertEqual(len(data[0]['related']), 0)
def mappable(self, request): """ List only inventory columns that are mappable """ organization_id = int(request.query_params.get('organization_id')) inventory_type = request.query_params.get('inventory_type') if inventory_type not in ['property', 'taxlot']: return JsonResponse({ 'status': 'error', 'message': 'Query param `inventory_type` must be "property" or "taxlot"' }) columns = Column.retrieve_mapping_columns(organization_id, inventory_type) return JsonResponse({'status': 'success', 'columns': columns})
def test_columns_extra_tag(self): columns = Column.retrieve_all(self.fake_org.pk, 'taxlot', False) # go through and delete all the results.ids so that it is easy to do a compare for result in columns: del result['id'] del result['name'] c = { "table_name": "TaxLotState", "column_name": "gross_floor_area", "display_name": "Gross Floor Area", "data_type": "None", "is_extra_data": True, "sharedFieldType": "None", "related": False, } self.assertIn(c, columns)
def list(self, request): """ Retrieves all columns for the user's organization including the raw database columns. Will return all the columns across both the Property and Tax Lot tables. The related field will be true if the column came from the other table that is not the 'inventory_type' (which defaults to Property) """ organization_id = self.get_organization(self.request) inventory_type = request.query_params.get('inventory_type', 'property') only_used = json.loads(request.query_params.get('only_used', 'false')) columns = Column.retrieve_all(organization_id, inventory_type, only_used) organization = Organization.objects.get(pk=organization_id) if json.loads(request.query_params.get('display_units', 'true')): columns = [add_pint_unit_suffix(organization, x) for x in columns] return JsonResponse({ 'status': 'success', 'columns': columns, })
def test_retrieve_db_field_name_from_db_tables(self): """These values are the fields that can be used for hashing a property to check if it is the same record.""" expected = ['address_line_1', 'address_line_2', 'analysis_end_time', 'analysis_start_time', 'analysis_state_message', 'block_number', 'building_certification', 'building_count', 'campus', 'city', 'conditioned_floor_area', 'created', 'custom_id_1', 'district', 'energy_alerts', 'energy_score', 'generation_date', 'gross_floor_area', 'home_energy_score_id', 'jurisdiction_property_id', 'jurisdiction_tax_lot_id', 'latitude', 'longitude', 'lot_number', 'number_properties', 'occupied_floor_area', 'owner', 'owner_address', 'owner_city_state', 'owner_email', 'owner_postal_code', 'owner_telephone', 'pm_parent_property_id', 'pm_property_id', 'postal_code', 'property_name', 'property_notes', 'property_type', 'recent_sale_date', 'release_date', 'site_eui', 'site_eui_modeled', 'site_eui_weather_normalized', 'source_eui', 'source_eui_modeled', 'source_eui_weather_normalized', 'space_alerts', 'state', 'ubid', 'updated', 'use_description', 'year_built', 'year_ending'] method_columns = Column.retrieve_db_field_name_for_hash_comparison() self.assertListEqual(method_columns, expected)
def columns(self, request, pk=None): """ Delete all columns for an organization. This method is typically not recommended if there are data in the inventory as it will invalidate all extra_data fields. This also removes all the column mappings that existed. --- parameters: - name: pk description: The organization_id required: true paramType: path type: status: description: success or error type: string required: true column_mappings_deleted_count: description: Number of column_mappings that were deleted type: integer required: true columns_deleted_count: description: Number of columns that were deleted type: integer required: true """ try: org = Organization.objects.get(pk=pk) c_count, cm_count = Column.delete_all(org) return JsonResponse({ 'status': 'success', 'column_mappings_deleted_count': cm_count, 'columns_deleted_count': c_count, }) except Organization.DoesNotExist: return JsonResponse( { 'status': 'error', 'message': 'organization with with id {} does not exist'.format(pk) }, status=status.HTTP_404_NOT_FOUND)
def test_save_state_match(self): # create a couple states to merge together ps_1 = self.property_state_factory.get_property_state(property_name="this should persist") ps_2 = self.property_state_factory.get_property_state( extra_data={"extra_1": "this should exist too"}) priorities = Column.retrieve_priorities(self.org.pk) merged_state = save_state_match(ps_1, ps_2, priorities) self.assertEqual(merged_state.merge_state, MERGE_STATE_MERGED) self.assertEqual(merged_state.property_name, ps_1.property_name) self.assertEqual(merged_state.extra_data['extra_1'], "this should exist too") # verify that the audit log is correct. pal = PropertyAuditLog.objects.get(organization=self.org, state=merged_state) self.assertEqual(pal.name, 'System Match') self.assertEqual(pal.parent_state1, ps_1) self.assertEqual(pal.parent_state2, ps_2) self.assertEqual(pal.description, 'Automatic Merge')
def shared_fields(self, request, pk=None): """ Retrieves all fields marked as shared for the organization. Will only return used fields. """ result = {'status': 'success', 'public_fields': []} columns = Column.retrieve_all(pk, 'property', True) for c in columns: if c['sharedFieldType'] == 'Public': new_column = { 'table_name': c['table_name'], 'name': c['name'], 'column_name': c['column_name'], # this is the field name in the db. The other name can have tax_ 'display_name': c['display_name'] } result['public_fields'].append(new_column) return JsonResponse(result)
def columns(self, request): """ List all property columns parameters: - name: organization_id description: The organization_id for this user's organization required: true paramType: query - name: used_only description: Determine whether or not to show only the used fields. Ones that have been mapped type: boolean required: false paramType: query """ organization_id = int(request.query_params.get('organization_id')) only_used = request.query_params.get('only_used', False) columns = Column.retrieve_all(organization_id, 'property', only_used) return JsonResponse({'status': 'success', 'columns': columns})
def column_mappings(self, request, pk=None): """ Saves the mappings between the raw headers of an ImportFile and the destination fields in the `to_table_name` model which should be either PropertyState or TaxLotState Valid source_type values are found in ``seed.models.SEED_DATA_SOURCES`` """ import_file_id = request.query_params.get('import_file_id') if import_file_id is None: return JsonResponse( { 'status': 'error', 'message': 'Query param `import_file_id` is required' }, status=status.HTTP_400_BAD_REQUEST) try: _ = ImportFile.objects.get(pk=import_file_id) organization = Organization.objects.get(pk=pk) except ImportFile.DoesNotExist: return JsonResponse( { 'status': 'error', 'message': 'No import file found' }, status=status.HTTP_404_NOT_FOUND) except Organization.DoesNotExist: return JsonResponse( { 'status': 'error', 'message': 'No organization found' }, status=status.HTTP_404_NOT_FOUND) result = Column.create_mappings(request.data.get('mappings', []), organization, request.user, import_file_id) if result: return JsonResponse({'status': 'success'}) else: return JsonResponse({'status': 'error'})
def taxlots_across_cycles(org_id, profile_id, cycle_ids=[]): # Identify column preferences to be used to scope fields/values columns_from_database = Column.retrieve_all(org_id, 'taxlot', False) if profile_id == -1: show_columns = list( Column.objects.filter(organization_id=org_id).values_list( 'id', flat=True)) else: try: profile = ColumnListSetting.objects.get( organization_id=org_id, id=profile_id, settings_location=VIEW_LIST, inventory_type=VIEW_LIST_TAXLOT) show_columns = list( ColumnListSettingColumn.objects.filter( column_list_setting_id=profile.id).values_list('column_id', flat=True)) except ColumnListSetting.DoesNotExist: show_columns = None results = {} for cycle_id in cycle_ids: # get -Views for this Cycle taxlot_views = TaxLotView.objects.select_related('taxlot', 'state', 'cycle') \ .filter(taxlot__organization_id=org_id, cycle_id=cycle_id) \ .order_by('id') related_results = TaxLotProperty.get_related(taxlot_views, show_columns, columns_from_database) org = Organization.objects.get(pk=org_id) unit_collapsed_results = [ apply_display_unit_preferences(org, x) for x in related_results ] results[cycle_id] = unit_collapsed_results return results
def test_columns_extra_tag(self): columns = Column.retrieve_all(self.fake_org.pk, 'taxlot', False) # go through and delete all the results.ids so that it is easy to do a compare for result in columns: del result['id'] del result['name'] del result['organization_id'] c = { 'table_name': 'TaxLotState', 'column_name': 'Gross Floor Area', 'display_name': 'Gross Floor Area', 'data_type': 'None', 'is_extra_data': True, 'merge_protection': 'Favor New', 'sharedFieldType': 'None', 'related': False, 'unit_name': None, 'unit_type': None, } self.assertIn(c, columns)
def test_column_retrieve_db_fields(self): c = Column.retrieve_db_fields(self.fake_org.pk) data = ['address_line_1', 'address_line_2', 'analysis_end_time', 'analysis_start_time', 'analysis_state', 'analysis_state_message', 'block_number', 'building_certification', 'building_count', 'campus', 'city', 'conditioned_floor_area', 'created', 'custom_id_1', 'district', 'energy_alerts', 'energy_score', 'generation_date', 'geocoding_confidence', 'gross_floor_area', 'home_energy_score_id', 'jurisdiction_property_id', 'jurisdiction_tax_lot_id', 'latitude', 'longitude', 'lot_number', 'normalized_address', 'number_properties', 'occupied_floor_area', 'owner', 'owner_address', 'owner_city_state', 'owner_email', 'owner_postal_code', 'owner_telephone', 'pm_parent_property_id', 'pm_property_id', 'postal_code', 'property_footprint', 'property_name', 'property_notes', 'property_type', 'recent_sale_date', 'release_date', 'site_eui', 'site_eui_modeled', 'site_eui_weather_normalized', 'source_eui', 'source_eui_modeled', 'source_eui_weather_normalized', 'space_alerts', 'state', 'taxlot_footprint', 'ubid', 'ulid', 'updated', 'use_description', 'year_built', 'year_ending'] self.assertCountEqual(c, data)
def find_missing_columns_based_on_extra_data(self, org): """Look through all the extra_data fields of the TaxLot and Property State objects and make sure there are columns that point to them. """ logging_info("Creating any columns for non-mapped extra data fields for organization {}".format(org)) property_states = PropertyState.objects.filter(organization=org).all() taxlot_states = TaxLotState.objects.filter(organization=org).all() get_ed_keys = lambda state: state.extra_data.keys() property_keys = set(itertools.chain.from_iterable(map(get_ed_keys, property_states))) taxlot_keys = set(itertools.chain.from_iterable(map(get_ed_keys, taxlot_states))) # Iterate through each of the extra data fields associated # with the org's PropertyState objects and check to make sure # there is Column with that key name. for key in property_keys: cnt = Column.objects.filter(organization=org, column_name=key).count() if not cnt: logging_info("Missing column '{}' found in PropertyState extra_data keys".format(key)) logging_info("Creating missing column '{}'".format(key)) col = Column(organization=org, column_name=key, is_extra_data=True, table_name="PropertyState") col.save() # Iterate through each of the extra data fields associated with the TaxLotStates for key in taxlot_keys: cnt = Column.objects.filter(organization=org, column_name=key).count() if not cnt: logging_info("Missing column '{}' found in TaxLotState extra_data keys.".format(key)) logging_info("Creating missing column '{}'".format(key)) col = Column(organization=org, column_name=key, is_extra_data=True, table_name="TaxLotState") col.save() return
def check_data(self, record_type, rows): """ Send in data as a queryset from the Property/Taxlot ids. :param record_type: one of PropertyState | TaxLotState :param rows: rows of data to be checked for data quality :return: None """ # grab the columns so we can grab the display names columns = Column.retrieve_all(self.organization, record_type) # create lookup tuple for the display name for c in columns: self.column_lookup[(c['table'], c['name'])] = c['displayName'] # grab all the rules once, save query time rules = self.rules.filter(enabled=True, table_name=record_type).order_by( 'field', 'severity') # Get the list of the field names that will show in every result fields = self.get_fieldnames(record_type) for row in rows: # Initialize the ID if it does not exist yet. Add in the other # fields that are of interest to the GUI if row.id not in self.results: self.results[row.id] = {} for field in fields: self.results[row.id][field] = getattr(row, field) self.results[row.id]['data_quality_results'] = [] # Run the checks self._check(rules, row) # Prune the results will remove any entries that have zero data_quality_results for k, v in self.results.items(): if not v['data_quality_results']: del self.results[k]
def test_csv_export_with_notes(self): multi_line_note = self.property_view.notes.create( name='Manually Created', note_type=Note.NOTE, text='multi\nline\nnote') single_line_note = self.property_view.notes.create( name='Manually Created', note_type=Note.NOTE, text='single line') self.properties.append(self.property_view.id) columns = [] for c in Column.retrieve_all(self.org.id, 'property', False): columns.append(c['name']) # call the API url = reverse_lazy('api:v2.1:tax_lot_properties-export') response = self.client.post(url + '?{}={}&{}={}&{}={}'.format( 'organization_id', self.org.pk, 'cycle_id', self.cycle, 'inventory_type', 'properties'), data=json.dumps({ 'columns': columns, 'export_type': 'csv' }), content_type='application/json') # parse the content as array data = response.content.decode('utf-8').split('\r\n') notes_string = (multi_line_note.created.astimezone().strftime( "%Y-%m-%d %I:%M:%S %p") + "\n" + multi_line_note.text + "\n----------\n" + single_line_note.created.astimezone().strftime( "%Y-%m-%d %I:%M:%S %p") + "\n" + single_line_note.text) self.assertEqual(len(data), 3) self.assertTrue('Property Notes' in data[0].split(',')) self.assertTrue(notes_string in data[1])
def _merge_log_states(org_id, state_1, state_2, log_name, ignore_merge_protection): if isinstance(state_1, PropertyState): StateClass = PropertyState AuditLogClass = PropertyAuditLog else: StateClass = TaxLotState AuditLogClass = TaxLotAuditLog priorities = Column.retrieve_priorities(org_id) merged_state = StateClass.objects.create(organization_id=org_id) merged_state = merging.merge_state(merged_state, state_1, state_2, priorities[StateClass.__name__], ignore_merge_protection) state_1_audit_log = AuditLogClass.objects.filter(state=state_1).first() state_2_audit_log = AuditLogClass.objects.filter(state=state_2).first() AuditLogClass.objects.create(organization_id=org_id, parent1=state_1_audit_log, parent2=state_2_audit_log, parent_state1=state_1, parent_state2=state_2, state=merged_state, name=log_name, description='Automatic Merge', import_filename=None, record_type=AUDIT_IMPORT) # Set the merged_state to merged merged_state.data_state = DATA_STATE_MATCHING merged_state.merge_state = MERGE_STATE_MERGED merged_state.save() state_1.merge_state = MERGE_STATE_UNKNOWN state_1.save() state_2.merge_state = MERGE_STATE_UNKNOWN state_2.save() return merged_state
def columns(self, request): """ List all tax lot columns --- parameters: - name: organization_id description: The organization_id for this user's organization required: true paramType: query - name: used_only description: Determine whether or not to show only the used fields. Ones that have been mapped type: boolean required: false paramType: query """ organization_id = int(request.query_params.get('organization_id')) organization = Organization.objects.get(pk=organization_id) only_used = request.query_params.get('only_used', False) columns = Column.retrieve_all(organization_id, 'taxlot', only_used) unitted_columns = [add_pint_unit_suffix(organization, x) for x in columns] return JsonResponse({'status': 'success', 'columns': unitted_columns})
def test_json_export(self): """Test to make sure get_related returns the fields""" for i in range(50): p = self.property_view_factory.get_property_view() self.properties.append(p.id) columns = [] for c in Column.retrieve_all(self.org.id, 'property', False): columns.append(c['name']) # call the API url = reverse_lazy('api:v2.1:tax_lot_properties-export') response = self.client.post(url + '?{}={}&{}={}&{}={}'.format( 'organization_id', self.org.pk, 'cycle_id', self.cycle, 'inventory_type', 'properties'), data=json.dumps({ 'columns': columns, 'export_type': 'geojson' }), content_type='application/json') # parse the content as dictionary data = json.loads(response.content) first_level_keys = list(data.keys()) self.assertIn("type", first_level_keys) self.assertIn("crs", first_level_keys) self.assertIn("features", first_level_keys) record_level_keys = list(data['features'][0]['properties'].keys()) self.assertIn('Address Line 1', record_level_keys) self.assertTrue('Gross Floor Area', record_level_keys) # ids 52 up to and including 102 self.assertEqual(len(data['features']), 51)
def test_cleanse(self): # Import the file and run mapping # Year Ending,Energy Score,Total GHG Emissions (MtCO2e),Weather Normalized Site EUI (kBtu/ft2), # National Median Site EUI (kBtu/ft2),Source EUI (kBtu/ft2),Weather Normalized Source EUI (kBtu/ft2), # National Median Source EUI (kBtu/ft2),Parking - Gross Floor Area (ft2),Organization # Release Date fake_mappings = [ { "from_field": u'Property Id', "to_table_name": u'PropertyState', "to_field": u'pm_property_id', }, { "from_field": u'Property Name', "to_table_name": u'PropertyState', "to_field": u'property_name', }, { "from_field": u'Address 1', "to_table_name": u'PropertyState', "to_field": u'address_line_1', }, { "from_field": u'Address 2', "to_table_name": u'PropertyState', "to_field": u'address_line_2', }, { "from_field": u'City', "to_table_name": u'PropertyState', "to_field": u'city', }, { "from_field": u'State/Province', "to_table_name": u'PropertyState', "to_field": u'state_province', }, { "from_field": u'Postal Code', "to_table_name": u'PropertyState', "to_field": u'postal_code', }, { "from_field": u'Year Built', "to_table_name": u'PropertyState', "to_field": u'year_built', }, { "from_field": u'Property Floor Area (Buildings and Parking) (ft2)', "to_table_name": u'PropertyState', "to_field": u'gross_floor_area', }, { "from_field": u'Site EUI (kBtu/ft2)', "to_table_name": u'PropertyState', "to_field": u'site_eui', }, { "from_field": u'Generation Date', "to_table_name": u'PropertyState', "to_field": u'generation_date', } ] tasks.save_raw_data(self.import_file.id) Column.create_mappings(fake_mappings, self.org, self.user) tasks.map_data(self.import_file.id) qs = PropertyState.objects.filter( import_file=self.import_file, source_type=PORTFOLIO_BS, ).iterator() c = Cleansing(self.org) c.cleanse('property', qs) _log.debug(c.results) self.assertEqual(len(c.results), 2) result = [v for v in c.results.values() if v['address_line_1'] == '120243 E True Lane'] if len(result) == 1: result = result[0] else: raise RuntimeError('Non unity results') res = [{ 'field': u'pm_property_id', 'formatted_field': u'PM Property ID', 'value': u'', 'message': u'PM Property ID is missing', 'detailed_message': u'PM Property ID is missing', 'severity': u'error' }] self.assertEqual(res, result['cleansing_results']) result = [v for v in c.results.values() if v['address_line_1'] == '95373 E Peach Avenue'] if len(result) == 1: result = result[0] else: raise RuntimeError('Non unity results') res = [{ 'field': u'site_eui', 'formatted_field': u'Site EUI', 'value': 0.1, 'message': u'Site EUI out of range', 'detailed_message': u'Site EUI [0.1] < 10.0', 'severity': u'warning' }] self.assertEqual(res, result['cleansing_results'])
def test_check_multiple_text_match(self): d = DataQualityCheck.retrieve(self.org) d.remove_all_rules() sl_data = {'name': 'No meters present', 'super_organization': self.org} sl_ok_1, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'meters_present', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_1, } d.add_rule(new_rule) sl_data = { 'name': 'No 12 Consectutive Months', 'super_organization': self.org } sl_ok_2, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': '12 Consectutive Months', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_2, } d.add_rule(new_rule) sl_data = {'name': 'No Monthly Data', 'super_organization': self.org} sl_ok_3, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'Monthly Data', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_3, } d.add_rule(new_rule) # import data tasks.save_raw_data(self.import_file.id) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.id) tasks.match_buildings(self.import_file.id) qs = PropertyState.objects.filter( import_file=self.import_file, source_type=ASSESSED_BS, ).iterator() d.reset_results() d.check_data('PropertyState', qs) # Check multiple strings props = PropertyView.objects.filter( property__labels=sl_ok_1).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = [ u'1 International Road', u'17246 Esch Drive', u'2581 Schiller Parkway', u'3 Northport Place', u'84807 Buell Trail' ] self.assertListEqual(expected, addresses) props = PropertyView.objects.filter( property__labels=sl_ok_2).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = [ u'1 International Road', u'2581 Schiller Parkway', u'49705 Harper Crossing' ] self.assertListEqual(expected, addresses) props = PropertyView.objects.filter( property__labels=sl_ok_3).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = [ u'1 International Road', u'17246 Esch Drive', u'84807 Buell Trail', u'88263 Scoville Park' ] self.assertListEqual(expected, addresses)
def test_check(self): # data quality check d = DataQualityCheck.retrieve(self.org) d.remove_all_rules() d.add_rule({ 'table_name': 'PropertyState', 'field': 'gross_floor_area', 'data_type': TYPE_NUMBER, 'rule_type': RULE_TYPE_DEFAULT, 'min': 100, 'max': 7000000, 'severity': SEVERITY_ERROR, 'units': 'square feet', }) d.add_rule({ 'table_name': 'PropertyState', 'field': 'recent_sale_date', 'data_type': TYPE_DATE, 'rule_type': RULE_TYPE_DEFAULT, 'min': 18890101, 'max': 20201231, 'severity': SEVERITY_ERROR, }) # create some status labels for testing sl_data = { 'name': 'year - old or future', 'super_organization': self.org } sl_year, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'year_built', 'data_type': TYPE_YEAR, 'rule_type': RULE_TYPE_DEFAULT, 'min': 1700, 'max': 2019, 'severity': SEVERITY_ERROR, 'status_label': sl_year, } d.add_rule(new_rule) sl_data = { 'name': 'extra data ps float error', 'super_organization': self.org } sl_string, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'extra_data_ps_alpha', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'alpha', 'severity': SEVERITY_ERROR, 'units': 'square feet', 'status_label': sl_string, } d.add_rule(new_rule) sl_data = { 'name': 'extra data ps string error', 'super_organization': self.org } sl_float, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'extra_data_ps_float', 'data_type': TYPE_NUMBER, 'rule_type': RULE_TYPE_CUSTOM, 'min': 9999, 'max': 10001, 'severity': SEVERITY_ERROR, 'status_label': sl_float, } d.add_rule(new_rule) sl_data = { 'name': 'jurisdiction id does not match', 'super_organization': self.org } sl_jurid, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'TaxLotState', 'field': 'jurisdiction_tax_lot_id', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': '1235', 'severity': SEVERITY_ERROR, 'status_label': sl_jurid, } d.add_rule(new_rule) sl_data = {'name': 'No meters present', 'super_organization': self.org} sl_ok_1, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'Meters Present', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_1, } d.add_rule(new_rule) sl_data = { 'name': 'No 12 Consectutive Months', 'super_organization': self.org } sl_ok_2, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': '12 Consectutive Months', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_2, } d.add_rule(new_rule) sl_data = {'name': 'No Monthly Data', 'super_organization': self.org} sl_ok_3, _ = StatusLabel.objects.get_or_create(**sl_data) new_rule = { 'table_name': 'PropertyState', 'field': 'Monthly Data', 'data_type': TYPE_STRING, 'rule_type': RULE_TYPE_CUSTOM, 'text_match': 'OK', 'severity': SEVERITY_ERROR, 'status_label': sl_ok_3, } d.add_rule(new_rule) # import data tasks.save_raw_data(self.import_file.id) Column.create_mappings(self.fake_mappings, self.org, self.user, self.import_file.pk) tasks.map_data(self.import_file.id) tasks.match_buildings(self.import_file.id) qs = PropertyState.objects.filter( import_file=self.import_file, source_type=ASSESSED_BS, ).iterator() d.reset_results() d.check_data('PropertyState', qs) result = d.retrieve_result_by_address('4 Myrtle Parkway') res = [{ "severity": "error", "value": "27.0", "field": "extra_data_ps_float", "table_name": "PropertyState", "message": "Extra Data Ps Float out of range", "detailed_message": "Extra Data Ps Float [27.0] < 9999.0", "formatted_field": "Extra Data Ps Float" }, { "severity": "error", "value": "5.0", "field": "gross_floor_area", "table_name": "PropertyState", "message": "Gross Floor Area out of range", "detailed_message": "Gross Floor Area [5.0] < 100.0", "formatted_field": "Gross Floor Area" }] self.assertListEqual(result['data_quality_results'], res) result = d.retrieve_result_by_address('94 Oxford Hill') res = [{ "severity": "error", "value": "20000.0", "field": "extra_data_ps_float", "table_name": "PropertyState", "message": "Extra Data Ps Float out of range", "detailed_message": "Extra Data Ps Float [20000.0] > 10001.0", "formatted_field": "Extra Data Ps Float" }, { "severity": "error", "value": "1888-01-01 08:00:00", "field": "recent_sale_date", "table_name": "PropertyState", "message": "Recent Sale Date out of range", "detailed_message": "Recent Sale Date [1888-01-01 08:00:00] < 1889-01-01 00:00:00", "formatted_field": "Recent Sale Date" }] self.assertListEqual(result['data_quality_results'], res) result = d.retrieve_result_by_address("3 Portage Alley") res = [{ 'severity': u'error', 'value': 'beta', 'field': u'extra_data_ps_alpha', 'table_name': u'PropertyState', 'message': u'Extra Data Ps Alpha does not match expected value', 'detailed_message': u'Extra Data Ps Alpha [beta] does not contain "alpha"', 'formatted_field': u'Extra Data Ps Alpha' }] self.assertListEqual(result['data_quality_results'], res) # make sure that the label has been applied props = PropertyView.objects.filter( property__labels=sl_year).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = sorted([u'84807 Buell Trail', u'1 International Road']) self.assertListEqual(expected, addresses) props = PropertyView.objects.filter( property__labels=sl_float).select_related('state') addresses = sorted([p.state.address_line_1 for p in props]) expected = sorted([u'4 Myrtle Parkway', u'94 Oxford Hill']) self.assertListEqual(expected, addresses) props = PropertyView.objects.filter( property__labels=sl_string).select_related('state') addresses = [p.state.address_line_1 for p in props] expected = [u'3 Portage Alley'] self.assertListEqual(expected, addresses) # Check tax lots qs = TaxLotState.objects.filter( import_file=self.import_file, ).iterator() d.reset_results() d.check_data('TaxLotState', qs) result = d.retrieve_result_by_tax_lot_id("1234") res = [{ "severity": "error", "value": "1234", "field": "jurisdiction_tax_lot_id", "table_name": "TaxLotState", "message": "Jurisdiction Tax Lot ID does not match expected value", "detailed_message": "Jurisdiction Tax Lot ID [1234] does not contain \"1235\"", "formatted_field": "Jurisdiction Tax Lot ID" }] self.assertListEqual(result['data_quality_results'], res) # verify labels taxlots = TaxLotView.objects.filter( taxlot__labels=sl_jurid).select_related('state') ids = [t.state.jurisdiction_tax_lot_id for t in taxlots] expected = '1234' self.assertEqual(expected, ids[0]) # Check multiple strings props = PropertyView.objects.filter( property__labels=sl_ok_1).select_related('state') addresses = [p.state.address_line_1 for p in props]
def test_cleanse(self): # Import the file and run mapping # This is silly, the mappings are backwards from what you would expect. # The key is the BS field, and the value is the value in the CSV fake_mappings = [ { "from_field": u'block_number', "to_table_name": u'PropertyState', "to_field": u'block_number', }, { "from_field": u'error_type', "to_table_name": u'PropertyState', "to_field": u'error_type', }, { "from_field": u'building_count', "to_table_name": u'PropertyState', "to_field": u'building_count', }, { "from_field": u'conditioned_floor_area', "to_table_name": u'PropertyState', "to_field": u'conditioned_floor_area', }, { "from_field": u'energy_score', "to_table_name": u'PropertyState', "to_field": u'energy_score', }, { "from_field": u'gross_floor_area', "to_table_name": u'PropertyState', "to_field": u'gross_floor_area', }, { "from_field": u'lot_number', "to_table_name": u'PropertyState', "to_field": u'lot_number', }, { "from_field": u'occupied_floor_area', "to_table_name": u'PropertyState', "to_field": u'occupied_floor_area', }, { "from_field": u'conditioned_floor_area', "to_table_name": u'PropertyState', "to_field": u'conditioned_floor_area', }, { "from_field": u'postal_code', "to_table_name": u'PropertyState', "to_field": u'postal_code', }, { "from_field": u'site_eui', "to_table_name": u'PropertyState', "to_field": u'site_eui', }, { "from_field": u'site_eui_weather_normalized', "to_table_name": u'PropertyState', "to_field": u'site_eui_weather_normalized', }, { "from_field": u'source_eui', "to_table_name": u'PropertyState', "to_field": u'source_eui', }, { "from_field": u'source_eui_weather_normalized', "to_table_name": u'PropertyState', "to_field": u'source_eui_weather_normalized', }, { "from_field": u'address_line_1', "to_table_name": u'PropertyState', "to_field": u'address_line_1', }, { "from_field": u'address_line_2', "to_table_name": u'PropertyState', "to_field": u'address_line_2', }, { "from_field": u'building_certification', "to_table_name": u'PropertyState', "to_field": u'building_certification', }, { "from_field": u'city', "to_table_name": u'PropertyState', "to_field": u'city', }, { "from_field": u'custom_id_1', "to_table_name": u'PropertyState', "to_field": u'custom_id_1', }, { "from_field": u'district', "to_table_name": u'PropertyState', "to_field": u'district', }, { "from_field": u'energy_alerts', "to_table_name": u'PropertyState', "to_field": u'energy_alerts', }, { "from_field": u'owner_address', "to_table_name": u'PropertyState', "to_field": u'owner_address', }, { "from_field": u'owner_city_state', "to_table_name": u'PropertyState', "to_field": u'owner_city_state', }, { "from_field": u'owner_email', "to_table_name": u'PropertyState', "to_field": u'owner_email', }, { "from_field": u'owner_postal_code', "to_table_name": u'PropertyState', "to_field": u'owner_postal_code', }, { "from_field": u'owner_telephone', "to_table_name": u'PropertyState', "to_field": u'owner_telephone', }, { "from_field": u'pm_property_id', "to_table_name": u'PropertyState', "to_field": u'pm_property_id', }, { "from_field": u'property_name', "to_table_name": u'PropertyState', "to_field": u'property_name', }, { "from_field": u'property_notes', "to_table_name": u'PropertyState', "to_field": u'property_notes', }, { "from_field": u'space_alerts', "to_table_name": u'PropertyState', "to_field": u'space_alerts', }, { "from_field": u'state_province', "to_table_name": u'PropertyState', "to_field": u'state_province', }, { "from_field": u'tax_lot_id', "to_table_name": u'PropertyState', "to_field": u'tax_lot_id', }, { "from_field": u'use_description', "to_table_name": u'PropertyState', "to_field": u'use_description', }, { "from_field": u'generation_date', "to_table_name": u'PropertyState', "to_field": u'generation_date', }, { "from_field": u'recent_sale_date', "to_table_name": u'PropertyState', "to_field": u'recent_sale_date', }, { "from_field": u'generation_date', "to_table_name": u'PropertyState', "to_field": u'generation_date', }, { "from_field": u'release_date', "to_table_name": u'PropertyState', "to_field": u'release_date', }, { "from_field": u'year_built', "to_table_name": u'PropertyState', "to_field": u'year_built', }, { "from_field": u'year_ending', "to_table_name": u'PropertyState', "to_field": u'year_ending', } ] tasks.save_raw_data(self.import_file.id) Column.create_mappings(fake_mappings, self.org, self.user) tasks.map_data(self.import_file.id) qs = PropertyState.objects.filter( import_file=self.import_file, source_type=ASSESSED_BS, ).iterator() c = Cleansing(self.org) c.cleanse('property', qs) # _log.debug(c.results) # This only checks to make sure the 34 errors have occurred. self.assertEqual(len(c.results), 34)
def import_exported_data(self, filename): """ Import test files from Stephen for many-to-many testing. This imports and maps the data accordingly. Presently these files are missing a couple of attributes to make them valid: 1) the master campus record to define the pm_property_id 2) the joins between propertystate and taxlotstate """ # Do a bunch of work to flatten out this temp file that has extra_data # asa string representation of a dict data = [] keys = None new_keys = set() f = os.path.join(os.path.dirname(__file__), 'data', filename) with open(f, 'rb') as csvfile: reader = csv.DictReader(csvfile) keys = reader.fieldnames for row in reader: ed = json.loads(row.pop('extra_data')) for k, v in ed.iteritems(): new_keys.add(k) row[k] = v data.append(row) # remove the extra_data column and add in the new columns keys.remove('extra_data') for k in new_keys: keys.append(k) # save the new file new_file_name = 'tmp_{}_flat.csv'.format( os.path.splitext(os.path.basename(filename))[0] ) f_new = os.path.join(os.path.dirname(__file__), 'data', new_file_name) with open(f_new, 'w') as csvfile: writer = csv.DictWriter(csvfile, fieldnames=keys) writer.writeheader() for d in data: writer.writerow(d) # save the keys This doesn't appear to be used anywhere new_file_name = 'tmp_{}_keys.csv'.format( os.path.splitext(os.path.basename(filename))[0] ) f_new = os.path.join(os.path.dirname(__file__), 'data', new_file_name) with open(f_new, 'w') as outfile: outfile.writelines([str(key) + '\n' for key in keys]) # Continue saving the raw data new_file_name = "tmp_{}_flat.csv".format( os.path.splitext(os.path.basename(filename))[0] ) f_new = os.path.join(os.path.dirname(__file__), 'data', new_file_name) self.import_file.file = File(open(f_new)) self.import_file.save() save_raw_data(self.import_file.id) # the mapping is just the 'keys' repeated since the file # was created as a database dump mapping = [] for k in keys: if k == 'id': continue mapping.append( { "from_field": k, "to_table_name": "PropertyState", "to_field": k } ) Column.create_mappings(mapping, self.org, self.user) # call the mapping function from the tasks file map_data(self.import_file.id)
def test_demo_v2(self): tasks._save_raw_data(self.import_file_tax_lot.pk, 'fake_cache_key', 1) Column.create_mappings(self.fake_taxlot_mappings, self.org, self.user) Column.create_mappings(self.fake_portfolio_mappings, self.org, self.user) tasks.map_data(self.import_file_tax_lot.pk) # Check to make sure the taxlots were imported ts = TaxLotState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_tax_lot, ) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) self.assertEqual(len(ps), 0) self.assertEqual(len(ts), 9) tasks.match_buildings(self.import_file_tax_lot.id, self.user.id) # Check a single case of the taxlotstate self.assertEqual(TaxLotState.objects.filter(address_line_1='050 Willow Ave SE').count(), 1) self.assertEqual( TaxLotView.objects.filter(state__address_line_1='050 Willow Ave SE').count(), 1 ) self.assertEqual(TaxLotView.objects.count(), 9) # Import the property data tasks._save_raw_data(self.import_file_property.pk, 'fake_cache_key', 1) tasks.map_data(self.import_file_property.pk) ts = TaxLotState.objects.filter( # data_state=DATA_STATE_MAPPING, # Look at all taxlotstates organization=self.org, import_file=self.import_file_tax_lot, ) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) self.assertEqual(len(ts), 9) self.assertEqual(len(ps), 14) tasks.match_buildings(self.import_file_property.id, self.user.id) ps = PropertyState.objects.filter( data_state=DATA_STATE_MAPPING, organization=self.org, import_file=self.import_file_property, ) # there shouldn't be any properties left in the mapping state self.assertEqual(len(ps), 0) # psv = PropertyView.objects.filter(state__organization=self.org) # self.assertEqual(len(psv), 12) # tlv = TaxLotView.objects.filter(state__organization=self.org) # self.assertEqual(len(tlv), 9) self.assertEqual(PropertyView.objects.filter(state__organization=self.org, state__pm_property_id='2264').count(), 1) pv = PropertyView.objects.filter(state__organization=self.org, state__pm_property_id='2264').first() self.assertEqual(pv.state.property_name, 'University Inn') self.assertEqual(pv.state.address_line_1, '50 Willow Ave SE')