def test_map_row_dynamic_mapping_with_cleaner(self): """Type-based cleaners on dynamic fields based on reverse-mapping.""" mapper.build_column_mapping(self.raw_columns, self.dest_columns) fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', } fake_model_class = FakeModel modified_model = mapper.map_row(fake_row, self.fake_mapping, fake_model_class, cleaner=self.test_cleaner) self.assertEqual(modified_model.property_id, 234235423.0)
def test_build_column_mapping_w_no_match(self): """We return the raw column name if there's no good match.""" raw_columns = [ u'Address', u'Name', u'City', u'BBL', u'Building ID', ] dest_columns = [ (u'PropertyState', u'address_line_1'), (u'PropertyState', u'name'), (u'PropertyState', u'city'), (u'TaxLotState', u'jurisdiction_tax_lot_id'), ] expected = { u'Address': [u'PropertyState', u'address_line_1', 90], u'BBL': [u'PropertyState', u'BBL', 100], u'Building ID': [u'TaxLotState', u'jurisdiction_tax_lot_id', 59], u'City': [u'PropertyState', u'city', 100], u'Name': [u'PropertyState', u'name', 100] } mapping = mapper.build_column_mapping(raw_columns, dest_columns, thresh=50) self.assertDictEqual(mapping, expected)
def test_build_column_mapping(self): """Create a useful set of suggestions for mappings.""" dyn_mapping = mapper.build_column_mapping( self.raw_columns, self.dest_columns ) self.assertDictEqual(dyn_mapping, self.expected)
def test_map_row_dynamic_mapping_with_cleaner(self): """Type-based cleaners on dynamic fields based on reverse-mapping.""" mapper.build_column_mapping( self.raw_columns, self.dest_columns ) fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', } fake_model_class = FakeModel modified_model = mapper.map_row( fake_row, self.fake_mapping, fake_model_class, cleaner=self.test_cleaner ) self.assertEqual(modified_model.property_id, 234235423.0)
def test_build_column_mapping_w_no_match(self): """We return None if there's no good match.""" expected = copy.deepcopy(self.expected) # This should be the result of our "previous_mapping" call. null_result = [None, 0] expected[u'BBL'] = null_result dyn_mapping = mapper.build_column_mapping(self.raw_columns, self.dest_columns, thresh=48) self.assertDictEqual(dyn_mapping, expected)
def test_build_column_mapping_w_no_match(self): """We return None if there's no good match.""" expected = copy.deepcopy(self.expected) # This should be the result of our "previous_mapping" call. null_result = [None, 0] expected[u'BBL'] = null_result dyn_mapping = mapper.build_column_mapping( self.raw_columns, self.dest_columns, thresh=48 ) self.assertDictEqual(dyn_mapping, expected)
def test_mapping(self): """Test objects in database can be converted to mapped fields""" # for mapping, you have to create an import file, even it is just one record. This is # more of an ID to track imports state = self.property_state_factory.get_property_state_as_extra_data( import_file_id=self.import_file.id, source_type=ASSESSED_RAW, data_state=DATA_STATE_IMPORT, random_extra=42) # set import_file save done to true self.import_file.raw_save_done = True self.import_file.save() # Create mappings from the new states # TODO #239: Convert this to a single helper method to suggest and save suggested_mappings = mapper.build_column_mapping( list(state.extra_data.keys()), Column.retrieve_all_by_tuple(self.org), previous_mapping=get_column_mapping, map_args=[self.org], thresh=80) # Convert mapping suggests to the format needed for saving mappings = [] for raw_column, suggestion in suggested_mappings.items(): # Single suggestion looks like:'lot_number': ['PropertyState', 'lot_number', 100] mapping = { "from_field": raw_column, "from_units": None, "to_table_name": suggestion[0], "to_field": suggestion[1], "to_field_display_name": suggestion[1], } mappings.append(mapping) # Now save the mappings # print(mappings) Column.create_mappings(mappings, self.org, self.user, self.import_file.id) # END TODO tasks.map_data(self.import_file.id) props = self.import_file.find_unmatched_property_states() self.assertEqual(len(props), 1) self.assertEqual(state.extra_data['year_built'], props.first().year_built) self.assertEqual(state.extra_data['random_extra'], props.first().extra_data['random_extra'])
def suggestions(self, request): """ Retrieves suggestions given raw column headers. parameters: - headers:--------------------------------------------------------------------------------------------------------------------------- - name: organization_id description: The organization_id for this user's organization required: true (at least, nothing will be returned if not provided) paramType: query """ try: org_id = request.query_params.get('organization_id', None) raw_headers = request.data.get('headers', []) suggested_mappings = mapper.build_column_mapping( raw_headers, Column.retrieve_all_by_tuple(org_id), previous_mapping=None, map_args=None, thresh= 80 # percentage match that we require. 80% is random value for now. ) # replace None with empty string for column names and PropertyState for tables # TODO #239: Move this fix to build_column_mapping for m in suggested_mappings: table, destination_field, _confidence = suggested_mappings[m] if destination_field is None: suggested_mappings[m][1] = '' # Fix the table name, eventually move this to the build_column_mapping for m in suggested_mappings: table, _destination_field, _confidence = suggested_mappings[m] # Do not return the campus, created, updated fields... that is force them to be in the property state if not table or table == 'Property': suggested_mappings[m][0] = 'PropertyState' elif table == 'TaxLot': suggested_mappings[m][0] = 'TaxLotState' return JsonResponse({ 'status': 'success', 'data': suggested_mappings, }) except Exception as e: return JsonResponse({ 'status': 'error', 'data': str(e), }, status=HTTP_400_BAD_REQUEST)
def test_build_column_mapping_w_null_saved(self): """We handle explicit saves of null, and return those dutifully.""" expected = copy.deepcopy(self.expected) # This should be the result of our "previous_mapping" call. expected[u'Building ID'] = [None, 1] # Here we pretend that we're doing a query and returning # relevant results. def get_mapping(raw, *args, **kwargs): if raw == u'Building ID': return [None, 1] dyn_mapping = mapper.build_column_mapping( self.raw_columns, self.dest_columns, previous_mapping=get_mapping, ) self.assertDictEqual(dyn_mapping, expected)
def test_build_column_mapping_w_callable(self): """Callable result at the begining of the list.""" expected = copy.deepcopy(self.expected) # This should be the result of our "previous_mapping" call. expected[u'Building ID'] = [u'custom_id_1', 27] # Here we pretend that we're doing a query and returning # relevant results. def get_mapping(raw, *args, **kwargs): if raw == u'Building ID': return [u'custom_id_1', 27] dyn_mapping = mapper.build_column_mapping( self.raw_columns, self.dest_columns, previous_mapping=get_mapping, ) self.assertDictEqual(dyn_mapping, expected)
def test_build_column_mapping_w_callable_and_ignored_column(self): """tests that an ignored column (`['', 100]`) should not return a suggetion. """ expected = copy.deepcopy(self.expected) # This should be the result of our "previous_mapping" call. expected[u'Building ID'] = [u'', 100] # Here we pretend that the callable `get_mapping` finds that the column # has been saved as '' i.e ignored. def get_mapping(raw, *args, **kwargs): if raw == u'Building ID': return [u'', 100] dyn_mapping = mapper.build_column_mapping( self.raw_columns, self.dest_columns, previous_mapping=get_mapping, ) self.assertDictEqual(dyn_mapping, expected)
def test_build_column_mapping_w_callable(self): """Callable result at the begining of the list.""" expected = { u'Address': [u'PropertyState', u'address_line_1', 90], u'BBL': [u'TaxLotState', u'jurisdiction_tax_lot_id', 0], u'Building ID': [u'PropertyState', u'custom_id_1', 27], u'City': [u'PropertyState', u'city', 100], u'Name': [u'PropertyState', u'name', 100] } # Here we pretend that we're doing a query and returning # relevant results. def get_mapping(raw, *args, **kwargs): if raw == u'Building ID': return [u'PropertyState', u'custom_id_1', 27] dyn_mapping = mapper.build_column_mapping( self.raw_columns, self.dest_columns, previous_mapping=get_mapping, ) self.assertDictEqual(dyn_mapping, expected)
def mapping_suggestions(self, request, pk): """ Returns suggested mappings from an uploaded file's headers to known data fields. """ organization_id = request.query_params.get('organization_id', None) result = {'status': 'success'} membership = OrganizationUser.objects.select_related('organization') \ .get(organization_id=organization_id, user=request.user) organization = membership.organization # For now, each organization holds their own mappings. This is non-ideal, but it is the # way it is for now. In order to move to parent_org holding, then we need to be able to # dynamically match columns based on the names and not the db id (or support many-to-many). # parent_org = organization.get_parent() try: import_file = ImportFile.objects.get( pk=pk, import_record__super_organization_id=organization.pk) except ImportFile.DoesNotExist: return JsonResponse( { 'status': 'error', 'message': 'Could not find import file with pk=' + str(pk) }, status=status.HTTP_400_BAD_REQUEST) # Get a list of the database fields in a list, these are the db columns and the extra_data columns property_columns = Column.retrieve_mapping_columns( organization.pk, 'property') taxlot_columns = Column.retrieve_mapping_columns( organization.pk, 'taxlot') # If this is a portfolio manager file, then load in the PM mappings and if the column_mappings # are not in the original mappings, default to PM if import_file.from_portfolio_manager: pm_mappings = simple_mapper.get_pm_mapping( import_file.first_row_columns, resolve_duplicates=True) suggested_mappings = mapper.build_column_mapping( import_file.first_row_columns, Column.retrieve_all_by_tuple(organization_id), previous_mapping=get_column_mapping, map_args=[organization], default_mappings=pm_mappings, thresh=80) elif import_file.from_buildingsync: bsync_mappings = xml_mapper.build_column_mapping() suggested_mappings = mapper.build_column_mapping( import_file.first_row_columns, Column.retrieve_all_by_tuple(organization_id), previous_mapping=get_column_mapping, map_args=[organization], default_mappings=bsync_mappings, thresh=80) else: # All other input types suggested_mappings = mapper.build_column_mapping( import_file.first_row_columns, Column.retrieve_all_by_tuple(organization.pk), previous_mapping=get_column_mapping, map_args=[organization], thresh= 80 # percentage match that we require. 80% is random value for now. ) # replace None with empty string for column names and PropertyState for tables # TODO #239: Move this fix to build_column_mapping for m in suggested_mappings: table, destination_field, _confidence = suggested_mappings[m] if destination_field is None: suggested_mappings[m][1] = '' # Fix the table name, eventually move this to the build_column_mapping for m in suggested_mappings: table, _destination_field, _confidence = suggested_mappings[m] # Do not return the campus, created, updated fields... that is force them to be in the property state if not table or table == 'Property': suggested_mappings[m][0] = 'PropertyState' elif table == 'TaxLot': suggested_mappings[m][0] = 'TaxLotState' result['suggested_column_mappings'] = suggested_mappings result['property_columns'] = property_columns result['taxlot_columns'] = taxlot_columns return JsonResponse(result)
def test_mapping_takes_into_account_selected_units(self): # Just as in the previous test, build extra_data PropertyState raw_state = self.property_state_factory.get_property_state_as_extra_data( import_file_id=self.import_file.id, source_type=ASSESSED_RAW, data_state=DATA_STATE_IMPORT, ) # Replace the site_eui and gross_floor_area key-value that gets # autogenerated by get_property_state_as_extra_data del raw_state.extra_data['site_eui'] raw_state.extra_data['Site EUI'] = 100 del raw_state.extra_data['gross_floor_area'] raw_state.extra_data['Gross Floor Area'] = 100 raw_state.save() self.import_file.raw_save_done = True self.import_file.save() # Build mappings - with unit-aware destinations and non-default unit choices suggested_mappings = mapper.build_column_mapping( list(raw_state.extra_data.keys()), Column.retrieve_all_by_tuple(self.org), previous_mapping=get_column_mapping, map_args=[self.org], thresh=80) mappings = [] for raw_column, suggestion in suggested_mappings.items(): if raw_column == 'Site EUI': mappings.append({ "from_field": raw_column, "from_units": 'kWh/m**2/year', "to_table_name": 'PropertyState', "to_field": 'site_eui', "to_field_display_name": 'Site EUI', }) elif raw_column == 'Gross Floor Area': mappings.append({ "from_field": raw_column, "from_units": 'm**2', "to_table_name": 'PropertyState', "to_field": 'gross_floor_area', "to_field_display_name": 'Gross Floor Area', }) else: other_mapping = { "from_field": raw_column, "from_units": None, "to_table_name": suggestion[0], "to_field": suggestion[1], "to_field_display_name": suggestion[1], } mappings.append(other_mapping) # Perform mapping, creating the initial PropertyState records. Column.create_mappings(mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.id) # Verify that the values have been converted appropriately state = self.import_file.find_unmatched_property_states().get() self.assertAlmostEqual(state.site_eui, (100 * ureg('kWh/m**2/year')).to('kBtu/ft**2/year')) self.assertAlmostEqual(state.gross_floor_area, (100 * ureg('m**2')).to('ft**2'))
def test_remapping_with_and_without_unit_aware_columns_doesnt_lose_data( self): """ During import, when the initial -State objects are created from the extra_data values, ColumnMapping objects are used to take the extra_data dictionary values and create the -State objects, setting the DB-level values as necessary - e.g. taking a raw "Site EUI (kBtu/ft2)" value and inserting it into the DB field "site_eui". Previously, remapping could cause extra Column objects to be created, and subsequently, this created extra ColumnMapping objects. These extra ColumnMapping objects could cause raw values to be inserted into the wrong DB field on -State creation. """ # Just as in the previous test, build extra_data PropertyState state = self.property_state_factory.get_property_state_as_extra_data( import_file_id=self.import_file.id, source_type=ASSESSED_RAW, data_state=DATA_STATE_IMPORT, random_extra=42, ) # Replace the site_eui key-value that gets autogenerated by get_property_state_as_extra_data del state.extra_data['site_eui'] state.extra_data['Site EUI (kBtu/ft2)'] = 123 state.save() self.import_file.raw_save_done = True self.import_file.save() # Build 2 sets of mappings - with and without a unit-aware destination site_eui data suggested_mappings = mapper.build_column_mapping( list(state.extra_data.keys()), Column.retrieve_all_by_tuple(self.org), previous_mapping=get_column_mapping, map_args=[self.org], thresh=80) ed_site_eui_mappings = [] unit_aware_site_eui_mappings = [] for raw_column, suggestion in suggested_mappings.items(): if raw_column == 'Site EUI (kBtu/ft2)': # Make this an extra_data field (without from_units) ed_site_eui_mappings.append({ "from_field": raw_column, "from_units": None, "to_table_name": 'PropertyState', "to_field": raw_column, "to_field_display_name": raw_column, }) unit_aware_site_eui_mappings.append({ "from_field": raw_column, "from_units": 'kBtu/ft**2/year', "to_table_name": 'PropertyState', "to_field": 'site_eui', "to_field_display_name": 'Site EUI', }) else: other_mapping = { "from_field": raw_column, "from_units": None, "to_table_name": suggestion[0], "to_field": suggestion[1], "to_field_display_name": suggestion[1], } ed_site_eui_mappings.append(other_mapping) unit_aware_site_eui_mappings.append(other_mapping) # Map and remap the file multiple times with different mappings each time. # Round 1 - Map site_eui data into Extra Data Column.create_mappings(ed_site_eui_mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.id) # There should only be one raw 'Site EUI (kBtu/ft2)' Column object self.assertEqual( 1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count()) # The one propertystate should have site eui info in extra_data prop = self.import_file.find_unmatched_property_states().get() self.assertIsNone(prop.site_eui) self.assertIsNotNone(prop.extra_data.get('Site EUI (kBtu/ft2)')) # Round 2 - Map site_eui data into the PropertyState attribute "site_eui" Column.create_mappings(unit_aware_site_eui_mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.id, remap=True) self.assertEqual( 1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count()) # The one propertystate should have site eui info in site_eui prop = self.import_file.find_unmatched_property_states().get() self.assertIsNotNone(prop.site_eui) self.assertIsNone(prop.extra_data.get('Site EUI (kBtu/ft2)')) # Round 3 - Map site_eui data into Extra Data Column.create_mappings(ed_site_eui_mappings, self.org, self.user, self.import_file.id) tasks.map_data(self.import_file.id, remap=True) self.assertEqual( 1, self.org.column_set.filter(column_name='Site EUI (kBtu/ft2)', table_name='').count()) # The one propertystate should have site eui info in extra_data prop = self.import_file.find_unmatched_property_states().get() self.assertIsNone(prop.site_eui) self.assertIsNotNone(prop.extra_data.get('Site EUI (kBtu/ft2)'))
def _mapping_suggestions(import_file_id, org_id, user): """ Temp function for allowing both api version for mapping suggestions to return the same data. Move this to the mapping_suggestions once we can deprecate the old get_column_mapping_suggestion method. :param import_file_id: import file id :param org_id: organization id of user :param user: user object from request :return: dict """ result = {'status': 'success'} membership = OrganizationUser.objects.select_related('organization') \ .get(organization_id=org_id, user=user) organization = membership.organization import_file = ImportFile.objects.get( pk=import_file_id, import_record__super_organization_id=organization.pk) # Get a list of the database fields in a list md = mapping_data.MappingData() # TODO: Move this to the MappingData class and remove calling add_extra_data # Check if there are any DB columns that are not defined in the # list of mapping data. # NL 12/2/2016: Removed 'organization__isnull' Query because we only want the # the ones belonging to the organization columns = list( Column.objects.select_related('unit').filter( mapped_mappings__super_organization_id=org_id).exclude( column_name__in=md.keys)) md.add_extra_data(columns) # Portfolio manager files have their own mapping scheme - yuck, really? if import_file.from_portfolio_manager: _log.debug("map Portfolio Manager input file") suggested_mappings = simple_mapper.get_pm_mapping( import_file.first_row_columns, resolve_duplicates=True) else: _log.debug("custom mapping of input file") # All other input types suggested_mappings = mapper.build_column_mapping( import_file.first_row_columns, md.keys_with_table_names, previous_mapping=get_column_mapping, map_args=[organization], thresh= 80 # percentage match that we require. 80% is random value for now. ) # replace None with empty string for column names and PropertyState for tables for m in suggested_mappings: table, field, conf = suggested_mappings[m] if field is None: suggested_mappings[m][1] = u'' # Fix the table name, eventually move this to the build_column_mapping and build_pm_mapping for m in suggested_mappings: table, dest, conf = suggested_mappings[m] if not table: suggested_mappings[m][0] = 'PropertyState' result['suggested_column_mappings'] = suggested_mappings result['column_names'] = md.building_columns result['columns'] = md.data return result