def test_map_row_w_bad_concat_config(self): """Test expected behavior with bad concat config data.""" fake_row = { u'street number': u'1232', u'Property Id': u'23423423', u'street name': u'Fanfare St.', u'quadrant': u'NE', } # No target defined. bad_concat1 = { 'concat_columns': ['street number', 'quadrant', 'street name'], } modified_model = mapper.map_row( fake_row, self.fake_mapping, FakeModel, concat=bad_concat1 ) expected = u'1232 NE Fanfare St.' # We default to saving it to an attribute that won't get serialized. self.assertEqual(modified_model.__broken_target__, expected) # Now with target, but including unknown column headers. bad_concat2 = { 'concat_columns': ['face', 'thing', 'street number', 'quadrant'], 'target': 'address_1', } modified_model = mapper.map_row( fake_row, self.fake_mapping, FakeModel, concat=bad_concat2 ) # All of our non-sense headers were simply ignored. self.assertEqual(modified_model.address_1, u'1232 NE') bad_concat2 = { 'target': 'address_1' } modified_model = mapper.map_row( fake_row, self.fake_mapping, FakeModel, concat=bad_concat2 ) # If we don't specify any columns to concatenate, do nothing self.assertEqual(getattr(modified_model, 'address_1', None), None)
def test_map_row(self): """Test the mapping between csv values and python objects.""" fake_row = { u'Property Id': u'234235423', u'Year Ending': u'2013/03/13', u'heading1': u'value1', u'heading2': u'value2', u'heading3': u'value3' } fake_model_class = FakeModel modified_model = mapper.map_row( fake_row, self.fake_mapping, fake_model_class ) expected_extra = {u'heading3': u'value3'} self.assertEqual(getattr(modified_model, u'property_id'), u'234235423') self.assertEqual( getattr(modified_model, u'year_ending'), u'2013/03/13' ) self.assertEqual(getattr(modified_model, u'heading_1'), u'value1') self.assertEqual(getattr(modified_model, u'heading_2'), u'value2') self.assertTrue( isinstance(getattr(modified_model, 'extra_data'), dict) ) self.assertEqual(modified_model.extra_data, expected_extra)
def test_map_w_apply_func(self): """Make sure that our ``apply_func`` is run against specified items.""" fake_model_class = FakeModel fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', u'Space Warning': 'Something to do with space.', } def test_apply_func(model, item, value): if not getattr(model, 'mapped_extra_data', None): model.mapped_extra_data = {} model.mapped_extra_data[item] = value modified_model = mapper.map_row( fake_row, self.fake_mapping, fake_model_class, cleaner=self.test_cleaner, apply_func=test_apply_func, apply_columns=['Property Id', 'heading1']) # Assert that our function was called only on our specified column # and that its value was set as expected. self.assertDictEqual( modified_model.mapped_extra_data, { u'heading_1': u'value1', # Saved correct column name. u'property_id': 234235423.0 # Also saved correct type. }) # Still maintain that things which aren't mapped, even by apply_func # go to the extra_data bucket. self.assertDictEqual(modified_model.extra_data, {'Space Warning': 'Something to do with space.'})
def test_map_row(self): """Test the mapping between csv values and python objects.""" fake_row = { u'Property Id': u'234235423', u'Year Ending': u'2013/03/13', u'heading1': u'value1', u'heading2': u'value2', u'heading3': u'value3', u'heading4': u'', u'heading5': None, } fake_model_class = FakeModel modified_model = mapper.map_row(fake_row, self.fake_mapping, fake_model_class) # empty columns should not result in entries in extra_data expected_extra = {u'heading3': u'value3'} self.assertEqual(getattr(modified_model, u'property_id'), u'234235423') self.assertEqual(getattr(modified_model, u'year_ending'), u'2013/03/13') self.assertEqual(getattr(modified_model, u'heading_1'), u'value1') self.assertEqual(getattr(modified_model, u'heading_2'), u'value2') self.assertTrue(isinstance(getattr(modified_model, 'extra_data'), dict)) self.assertEqual(modified_model.extra_data, expected_extra)
def test_map_row_w_concat(self): """Make sure that concatenation works.""" test_mapping = copy.deepcopy(self.fake_mapping) concat = { 'target': 'address_1', # Reconstruct in this precise order. 'concat_columns': ['street number', 'quadrant', 'street name'] # No need to specify a delimier here, our default is a space. } fake_row = { u'street number': u'1232', u'street name': u'Fanfare St.', u'quadrant': u'NE', } modified_model = mapper.map_row( fake_row, test_mapping, FakeModel, concat=concat ) # Note: address_1 mapping was dynamically defined by the concat # config. self.assertEqual(modified_model.address_1, u'1232 NE Fanfare St.')
def test_map_row_w_bad_concat_config(self): """Test expected behavior with bad concat config data.""" fake_row = { u'street number': u'1232', u'Property Id': u'23423423', u'street name': u'Fanfare St.', u'quadrant': u'NE', } # No target defined. bad_concat1 = { 'concat_columns': ['street number', 'quadrant', 'street name'], } modified_model = mapper.map_row(fake_row, self.fake_mapping, FakeModel, concat=bad_concat1) expected = u'1232 NE Fanfare St.' # We default to saving it to an attribute that won't get serialized. self.assertEqual(modified_model.__broken_target__, expected) # Now with target, but including unknown column headers. bad_concat2 = { 'concat_columns': ['face', 'thing', 'street number', 'quadrant'], 'target': 'address_1', } modified_model = mapper.map_row(fake_row, self.fake_mapping, FakeModel, concat=bad_concat2) # All of our non-sense headers were simply ignored. self.assertEqual(modified_model.address_1, u'1232 NE') bad_concat2 = {'target': 'address_1'} modified_model = mapper.map_row(fake_row, self.fake_mapping, FakeModel, concat=bad_concat2) # If we don't specify any columns to concatenate, do nothing self.assertEqual(getattr(modified_model, 'address_1', None), None)
def map_row_chunk(chunk, file_pk, source_type, prog_key, increment, *args, **kwargs): """Does the work of matching a mapping to a source type and saving :param chunk: list of dict of str. One row's worth of parse data. :param file_pk: int, the PK for an ImportFile obj. :param source_type: int, represented by either ASSESSED_RAW, or PORTFOLIO_RAW. :param cleaner: (optional), the cleaner class you want to send to mapper.map_row. (e.g. turn numbers into floats.). :param raw_ids: (optional kwarg), the list of ids in chunk order. """ import_file = ImportFile.objects.get(pk=file_pk) save_type = PORTFOLIO_BS if source_type == ASSESSED_RAW: save_type = ASSESSED_BS concats = [] org = Organization.objects.get( pk=import_file.import_record.super_organization.pk) mapping, concats = get_column_mappings(org) map_cleaner = _build_cleaner(org) # For those column mapping which are not db columns, we # need to let MCM know that we apply our mapping function to those. apply_columns = [] mappable_columns = get_mappable_columns() for item in mapping: if mapping[item] not in mappable_columns: apply_columns.append(item) apply_func = apply_data_func(mappable_columns) for row in chunk: model = mapper.map_row(row, mapping, BuildingSnapshot, cleaner=map_cleaner, concat=concats, apply_columns=apply_columns, apply_func=apply_func, *args, **kwargs) model.import_file = import_file model.source_type = save_type model.clean() model.super_organization = import_file.import_record.super_organization model.save() if model: # Make sure that we've saved all of the extra_data column names save_column_names(model, mapping=mapping) increment_cache(prog_key, increment)
def map_rows(self, mapping, model_class): """Convenience method to call ``mapper.map_row`` on all rows. :param mapping: dict, keys map columns to model_class attrs. :param model_class: class, reference to model class. """ for row in self.next(): # Figure out if this is an inser or update. # e.g. model.objects.get('some canonical id') or model_class() yield mapper.map_row(row, mapping, model_class)
def update_building(old_snapshot, updated_values, user, *args, **kwargs): """Creates a new snapshot with updated values.""" from seed.mappings import seed_mappings, mapper as seed_mapper mappable, meta, sources = _get_filtered_values(updated_values) canon = old_snapshot.canonical_building or None # Need to hydrate sources sources = { k: BuildingSnapshot.objects.get(pk=v) for k, v in sources.items() if v } # Handle the mapping of "normal" attributes. new_snapshot = mapper.map_row( mappable, dict(seed_mappings.BuildingSnapshot_to_BuildingSnapshot), BuildingSnapshot, initial_data=sources # Copy parent's source attributes. ) diff_sources = _get_diff_sources(mappable, old_snapshot) for diff in diff_sources: setattr(new_snapshot, '{0}_source'.format(diff), new_snapshot) # convert dates to something django likes new_snapshot.clean() new_snapshot.canonical_building = canon new_snapshot.save() # All all the orgs the old snapshot had. new_snapshot.super_organization = old_snapshot.super_organization # Move the meta data over. for meta_val in meta: setattr(new_snapshot, meta_val, meta[meta_val]) # Insert new_snapshot into the inheritence chain old_snapshot.children.add(new_snapshot) new_snapshot.import_file = old_snapshot.import_file # Update/override anything in extra data. extra, sources = seed_mapper.merge_extra_data( new_snapshot, old_snapshot, default=new_snapshot ) new_snapshot.extra_data = extra new_snapshot.extra_data_sources = sources new_snapshot.save() # If we had a canonical building and its can_snapshot was old, update. if canon and canon.canonical_snapshot == old_snapshot: canon.canonical_snapshot = new_snapshot canon.save() return new_snapshot
def map_row_chunk( chunk, file_pk, source_type, prog_key, increment, *args, **kwargs ): """Does the work of matching a mapping to a source type and saving :param chunk: list of dict of str. One row's worth of parse data. :param file_pk: int, the PK for an ImportFile obj. :param source_type: int, represented by either ASSESSED_RAW, or PORTFOLIO_RAW. :param cleaner: (optional), the cleaner class you want to send to mapper.map_row. (e.g. turn numbers into floats.). :param raw_ids: (optional kwarg), the list of ids in chunk order. """ import_file = ImportFile.objects.get(pk=file_pk) save_type = PORTFOLIO_BS mapping = espm_mapping.MAP map_cleaner = PORTFOLIO_CLEANER # Default to PM so we don't unnecessarily query for mapping if source_type == ASSESSED_RAW: org = Organization.objects.filter( users=import_file.import_record.owner )[0] mapping = get_column_mappings(org) save_type = ASSESSED_BS map_cleaner = ASSESSED_CLEANER # Pull out any columns meant to be concatenated together. mapping, concats = filter_concat_configs(mapping) for row in chunk: model = mapper.map_row( row, mapping, BuildingSnapshot, cleaner=map_cleaner, concat=concats, *args, **kwargs ) model.import_file = import_file model.source_type = save_type model.clean() model.super_organization = import_file.import_record.super_organization model.save() increment_cache(prog_key, increment)
def test_map_row_dynamic_mapping_with_cleaner(self): """Type-based cleaners on dynamic fields based on reverse-mapping.""" mapper.build_column_mapping(self.raw_columns, self.dest_columns) fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', } fake_model_class = FakeModel modified_model = mapper.map_row(fake_row, self.fake_mapping, fake_model_class, cleaner=self.test_cleaner) self.assertEqual(modified_model.property_id, 234235423.0)
def test_map_row_handle_unmapped_columns(self): """No KeyError when we check mappings for our column.""" test_mapping = copy.deepcopy(self.fake_mapping) del (test_mapping[u'Property Id']) fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', } fake_model_class = FakeModel modified_model = mapper.map_row(fake_row, test_mapping, fake_model_class, cleaner=self.test_cleaner) self.assertEqual(getattr(modified_model, 'property_id', None), None) self.assertEqual(getattr(modified_model, 'heading_1'), u'value1')
def test_map_row_handle_unmapped_columns(self): """No KeyError when we check mappings for our column.""" test_mapping = copy.deepcopy(self.fake_mapping) del(test_mapping[u'Property Id']) fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', } fake_model_class = FakeModel modified_model = mapper.map_row( fake_row, test_mapping, fake_model_class, cleaner=self.test_cleaner ) self.assertEqual(getattr(modified_model, 'property_id', None), None) self.assertEqual(getattr(modified_model, 'heading_1'), u'value1')
def test_map_row_dynamic_mapping_with_cleaner(self): """Type-based cleaners on dynamic fields based on reverse-mapping.""" mapper.build_column_mapping( self.raw_columns, self.dest_columns ) fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', } fake_model_class = FakeModel modified_model = mapper.map_row( fake_row, self.fake_mapping, fake_model_class, cleaner=self.test_cleaner ) self.assertEqual(modified_model.property_id, 234235423.0)
def test_map_row_w_initial_data(self): """Make sure that we apply initial data before mapping.""" test_mapping = copy.deepcopy(self.fake_mapping) initial_data = {'property_name': 'Example'} fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', } fake_model_class = FakeModel modified_model = mapper.map_row(fake_row, test_mapping, fake_model_class, cleaner=self.test_cleaner, initial_data=initial_data) # Our data is set by initial_data self.assertEqual(getattr(modified_model, 'property_name', None), 'Example') # Even though we have no explicit mapping for it. self.assertTrue('property_name' not in test_mapping)
def test_map_row_w_concat_and_delimiter(self): """Make sure we honor the delimiter.""" concat = { 'target': 'address_1', # Reconstruct in this precise order. 'concat_columns': ['street number', 'quadrant', 'street name'], # No need to specify a delimier here, our default is a space. 'delimiter': '/', } fake_row = { u'street number': u'1232', u'street name': u'Fanfare St.', u'quadrant': u'NE', } modified_model = mapper.map_row(fake_row, self.fake_mapping, FakeModel, concat=concat) self.assertEqual(modified_model.address_1, u'1232/NE/Fanfare St.')
def test_map_w_apply_func(self): """Make sure that our ``apply_func`` is run against specified items.""" fake_model_class = FakeModel fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', u'Space Warning': 'Something to do with space.', } def test_apply_func(model, item, value): if not getattr(model, 'mapped_extra_data', None): model.mapped_extra_data = {} model.mapped_extra_data[item] = value modified_model = mapper.map_row( fake_row, self.fake_mapping, fake_model_class, cleaner=self.test_cleaner, apply_func=test_apply_func, apply_columns=['Property Id', 'heading1'] ) # Assert that our function was called only on our specified column # and that its value was set as expected. self.assertDictEqual( modified_model.mapped_extra_data, { u'heading_1': u'value1', # Saved correct column name. u'property_id': 234235423.0 # Also saved correct type. } ) # Still maintain that things which aren't mapped, even by apply_func # go to the extra_data bucket. self.assertDictEqual( modified_model.extra_data, {'Space Warning': 'Something to do with space.'} )
def test_concat_multiple_targets(self): """Make sure we're able to create multiple concatenation targets.""" fake_row = { u'street number': u'1232', u'Property Id': u'23423423', u'street name': u'Fanfare St.', u'quadrant': u'NE', u'sale_month': '01', u'sale_day': '23', u'sale_year': '2012', } # No target defined. concat = [ # For our street data. { 'target': 'address1', 'concat_columns': ['street number', 'quadrant', 'street name'], }, # For our sale data. { 'target': 'sale_date', 'concat_columns': ['sale_month', 'sale_day', 'sale_year'], 'delimiter': '/' } ] modified_model = mapper.map_row( fake_row, self.fake_mapping, FakeModel, concat=concat ) st_expected = u'1232 NE Fanfare St.' sale_expected = u'01/23/2012' self.assertEqual(modified_model.address1, st_expected) self.assertEqual(modified_model.sale_date, sale_expected)
def test_map_row_w_concat_and_delimiter(self): """Make sure we honor the delimiter.""" concat = { 'target': 'address_1', # Reconstruct in this precise order. 'concat_columns': ['street number', 'quadrant', 'street name'], # No need to specify a delimier here, our default is a space. 'delimiter': '/', } fake_row = { u'street number': u'1232', u'street name': u'Fanfare St.', u'quadrant': u'NE', } modified_model = mapper.map_row( fake_row, self.fake_mapping, FakeModel, concat=concat ) self.assertEqual(modified_model.address_1, u'1232/NE/Fanfare St.')
def test_map_row_w_initial_data(self): """Make sure that we apply initial data before mapping.""" test_mapping = copy.deepcopy(self.fake_mapping) initial_data = {'property_name': 'Example'} fake_row = { u'Property Id': u'234,235,423', u'heading1': u'value1', } fake_model_class = FakeModel modified_model = mapper.map_row( fake_row, test_mapping, fake_model_class, cleaner=self.test_cleaner, initial_data=initial_data ) # Our data is set by initial_data self.assertEqual( getattr(modified_model, 'property_name', None), 'Example' ) # Even though we have no explicit mapping for it. self.assertTrue('property_name' not in test_mapping)
def test_concat_multiple_targets(self): """Make sure we're able to create multiple concatenation targets.""" fake_row = { u'street number': u'1232', u'Property Id': u'23423423', u'street name': u'Fanfare St.', u'quadrant': u'NE', u'sale_month': '01', u'sale_day': '23', u'sale_year': '2012', } # No target defined. concat = [ # For our street data. { 'target': 'address1', 'concat_columns': ['street number', 'quadrant', 'street name'], }, # For our sale data. { 'target': 'sale_date', 'concat_columns': ['sale_month', 'sale_day', 'sale_year'], 'delimiter': '/' } ] modified_model = mapper.map_row(fake_row, self.fake_mapping, FakeModel, concat=concat) st_expected = u'1232 NE Fanfare St.' sale_expected = u'01/23/2012' self.assertEqual(modified_model.address1, st_expected) self.assertEqual(modified_model.sale_date, sale_expected)
def test_map_row_w_concat(self): """Make sure that concatenation works.""" test_mapping = copy.deepcopy(self.fake_mapping) concat = { 'target': 'address_1', # Reconstruct in this precise order. 'concat_columns': ['street number', 'quadrant', 'street name'] # No need to specify a delimier here, our default is a space. } fake_row = { u'street number': u'1232', u'street name': u'Fanfare St.', u'quadrant': u'NE', } modified_model = mapper.map_row(fake_row, test_mapping, FakeModel, concat=concat) # Note: address_1 mapping was dynamically defined by the concat # config. self.assertEqual(modified_model.address_1, u'1232 NE Fanfare St.')
def update_building(old_snapshot, updated_values, user, *args, **kwargs): """Creates a new snapshot with updated values.""" from seed.mappings import seed_mappings, mapper as seed_mapper mappable, meta, sources = _get_filtered_values(updated_values) # extra data will get filtered extra_data = updated_values['extra_data'] extra_data = extra_data or old_snapshot.extra_data or {} canon = old_snapshot.canonical_building or None # Need to hydrate sources sources = { k: BuildingSnapshot.objects.get(pk=v) for k, v in sources.items() if v } # Handle the mapping of "normal" attributes. new_snapshot = mapper.map_row( mappable, dict(seed_mappings.BuildingSnapshot_to_BuildingSnapshot), BuildingSnapshot, initial_data=sources # Copy parent's source attributes. ) diff_sources = _get_diff_sources(mappable, old_snapshot) for diff in diff_sources: setattr(new_snapshot, '{0}_source'.format(diff), new_snapshot) # convert dates to something django likes new_snapshot.clean() new_snapshot.canonical_building = canon new_snapshot.save() # All all the orgs the old snapshot had. new_snapshot.super_organization = old_snapshot.super_organization # Move the meta data over. for meta_val in meta: setattr(new_snapshot, meta_val, meta[meta_val]) # Insert new_snapshot into the inheritence chain old_snapshot.children.add(new_snapshot) new_snapshot.import_file_id = old_snapshot.import_file_id new_snapshot.extra_data = extra_data # Update/override anything in extra data. extra, sources = seed_mapper.merge_extra_data( new_snapshot, old_snapshot, default=new_snapshot ) new_snapshot.extra_data = extra new_snapshot.extra_data_sources = sources new_snapshot.save() # If we had a canonical building and its can_snapshot was old, update. if canon and canon.canonical_snapshot == old_snapshot: canon.canonical_snapshot = new_snapshot canon.save() # If the old snapshot was in any project the ProjectBuilding set # needs to be updated to point to the new snapshot. We might want # to refactor ProjectBuildings to contain a CanonicalBuilding # foreign key in the future. old_snapshot.project_building_snapshots.all().update( building_snapshot=new_snapshot ) # Check to see if there are any new ``extra_data`` fields added for this # org. save_column_names(new_snapshot) return new_snapshot
def map_row_chunk( chunk, file_pk, source_type, prog_key, increment, *args, **kwargs ): """Does the work of matching a mapping to a source type and saving :param chunk: list of dict of str. One row's worth of parse data. :param file_pk: int, the PK for an ImportFile obj. :param source_type: int, represented by either ASSESSED_RAW, or PORTFOLIO_RAW. :param cleaner: (optional), the cleaner class you want to send to mapper.map_row. (e.g. turn numbers into floats.). :param raw_ids: (optional kwarg), the list of ids in chunk order. """ import_file = ImportFile.objects.get(pk=file_pk) save_type = PORTFOLIO_BS if source_type == ASSESSED_RAW: save_type = ASSESSED_BS concats = [] org = Organization.objects.get( pk=import_file.import_record.super_organization.pk ) mapping, concats = get_column_mappings(org) map_cleaner = _build_cleaner(org) # For those column mapping which are not db columns, we # need to let MCM know that we apply our mapping function to those. apply_columns = [] mappable_columns = get_mappable_columns() for item in mapping: if mapping[item] not in mappable_columns: apply_columns.append(item) apply_func = apply_data_func(mappable_columns) for row in chunk: model = mapper.map_row( row, mapping, BuildingSnapshot, cleaner=map_cleaner, concat=concats, apply_columns=apply_columns, apply_func=apply_func, *args, **kwargs ) model.import_file = import_file model.source_type = save_type model.clean() model.super_organization = import_file.import_record.super_organization model.save() if model: # Make sure that we've saved all of the extra_data column names save_column_names(model, mapping=mapping) increment_cache(prog_key, increment)