def cache_first_rows(import_record, parser): """Cache headers, and rows 2-6 for validation/viewing. :param import_record: ImportRecord inst. :param parser: unicode-csv.Reader instance. Unfortunately, this is duplicated logic from data_importer, but since data_importer makes many faulty assumptions we need to do it differently. """ parser.csvfile.seek(0) rows = parser.next() first_row = rows.next().values() if first_row: first_row = ROW_DELIMITER.join(first_row) import_record.cached_first_row = first_row or '' validation_rows = [] for i in range(5): row = rows.next() if row: validation_rows.append(row) import_record.cached_second_to_fifth_row = "\n".join( [ROW_DELIMITER.join(r.values()) for r in validation_rows] ) import_record.save() # Reset our file pointer for mapping. parser.csvfile.seek(0)
def cache_first_rows(import_file, parser): """Cache headers, and rows 2-6 for validation/viewing. :param import_file: ImportFile inst. :param parser: unicode-csv.Reader instance. Unfortunately, this is duplicated logic from data_importer, but since data_importer makes many faulty assumptions we need to do it differently. """ parser.seek_to_beginning() rows = parser.next() validation_rows = [] for i in range(5): row = rows.next() if row: validation_rows.append(row) import_file.cached_second_to_fifth_row = "\n".join( [ ROW_DELIMITER.join(map(lambda x: str(x), r.values())) for r in validation_rows ] ) first_row = rows.next().keys() if first_row: first_row = ROW_DELIMITER.join(first_row) import_file.cached_first_row = first_row or '' import_file.save() # Reset our file pointer for mapping. parser.seek_to_beginning()
def cache_first_rows(import_file, parser): """Cache headers, and rows 2-6 for validation/viewing. :param import_file: ImportFile inst. :param parser: unicode-csv.Reader instance. Unfortunately, this is duplicated logic from data_importer, but since data_importer makes many faulty assumptions we need to do it differently. """ parser.seek_to_beginning() rows = parser.next() validation_rows = [] for i in range(5): try: row = rows.next() if row: validation_rows.append(row) except StopIteration: """Less than 5 rows in file""" break #This is a fix for issue #24 to use original field order when importing #This is ultimately not the correct place for this fix. The correct fix #is to update the mcm code to a newer version where the readers in mcm/reader.py #have a headers() function defined and then just do #first_row = parser.headers() #But until we can patch the mcm code this should fix the issue. local_reader = parser.reader if isinstance(local_reader, reader.ExcelParser): first_row = local_reader.sheet.row_values(local_reader.header_row) elif isinstance(local_reader, reader.CSVParser): first_row = local_reader.csvreader.fieldnames first_row = [local_reader._clean_super(x) for x in first_row] else: #default to the original behavior if a new type of parser for lack of anything better first_row = rows.next().keys() tmp = [] for r in validation_rows: tmp.append(ROW_DELIMITER.join([str(r[x]) for x in first_row])) import_file.cached_second_to_fifth_row = "\n".join(tmp) if first_row: first_row = ROW_DELIMITER.join(first_row) import_file.cached_first_row = first_row or '' import_file.save() # Reset our file pointer for mapping. parser.seek_to_beginning()
def test_get_first_five_rows(self): """Make sure we get our first five rows back correctly.""" import_record = ImportRecord.objects.create() expected_raw_columns = ['tax id', 'name', 'etc.'] expected_raw_rows = [ ['02023', '12 Jefferson St.', 'etc.'], ['12433', '23 Washington St.', 'etc.'], ['04422', '4 Adams St.', 'etc.'], ] expected = [ dict(zip(expected_raw_columns, row)) for row in expected_raw_rows ] expected_saved_format = '\n'.join([ ROW_DELIMITER.join(row) for row in expected_raw_rows ]) import_file = ImportFile.objects.create( import_record=import_record, cached_first_row=ROW_DELIMITER.join(expected_raw_columns), cached_second_to_fifth_row=expected_saved_format ) # Just make sure we were saved correctly self.assertEqual( import_file.cached_second_to_fifth_row, expected_saved_format ) url = reverse_lazy("seed:get_first_five_rows") resp = self.client.post( url, data=json.dumps( {'import_file_id': import_file.pk} ), content_type='application/json' ) body = json.loads(resp.content) self.assertEqual(body.get('first_five_rows', []), expected)
def setUp(self): self.maxDiff = None self.org = Organization.objects.create() user_details = { 'username': '******', 'password': '******', 'email': '*****@*****.**', } self.user = User.objects.create_superuser(**user_details) OrganizationUser.objects.create(user=self.user, organization=self.org) self.client.login(**user_details) self.import_record = ImportRecord.objects.create( owner=self.user ) self.import_record.super_organization = self.org self.import_record.save() self.import_file = ImportFile.objects.create( import_record=self.import_record, cached_first_row=ROW_DELIMITER.join( [u'name', u'address', u'year built', u'building id'] ) )
def test_get_raw_column_names(self): """Make sure we get column names back in a format we expect.""" import_record = ImportRecord.objects.create() expected_raw_columns = ['tax id', 'name', 'etc.'] expected_saved_format = ROW_DELIMITER.join(expected_raw_columns) import_file = ImportFile.objects.create( import_record=import_record, cached_first_row=expected_saved_format ) # Just make sure we were saved correctly self.assertEqual(import_file.cached_first_row, expected_saved_format) url = reverse_lazy("seed:get_raw_column_names") resp = self.client.post( url, data=json.dumps( {'import_file_id': import_file.pk} ), content_type='application/json' ) body = json.loads(resp.content) self.assertEqual(body.get('raw_columns', []), expected_raw_columns)
def create_models(data, import_file): """ Create a BuildingSnapshot, a CanonicalBuilding, and a Meter. Then, create TimeSeries models for each meter reading in data. :params data: dictionary of building data from a green button xml file in the form returned by xml_importer.building_data :params import_file: ImportFile referencing the original xml file; needed for linking to BuildingSnapshot and for determining super_organization :returns: the created CanonicalBuilding """ # cache data on import_file; this is a proof of concept and we # only have two example files available so we hardcode the only # heading present. import_file.cached_first_row = ROW_DELIMITER.join(["address"]) import_file.cached_second_to_fifth_row = ROW_DELIMITER.join( [data['address']] ) import_file.save() raw_bs = BuildingSnapshot() raw_bs.import_file = import_file # We require a save to get our PK # We save here to set our initial source PKs. raw_bs.save() super_org = import_file.import_record.super_organization raw_bs.super_organization = super_org set_initial_sources(raw_bs) raw_bs.address_line_1 = data['address'] raw_bs.source_type = GREEN_BUTTON_BS raw_bs.save() # create canonical building cb = CanonicalBuilding.objects.create(canonical_snapshot=raw_bs) raw_bs.canonical_building = cb raw_bs.save() # log building creation AuditLog.objects.create( organization=import_file.import_record.super_organization, user=import_file.import_record.owner, content_object=cb, action="create_building", action_note="Created building", ) # create meter for this dataset (each dataset is a single energy type) e_type = energy_type(data['service_category']) e_type_string = next( pair[1] for pair in seed.models.ENERGY_TYPES if pair[0] == e_type ) m_name = "gb_{0}[{1}]".format(str(raw_bs.id), e_type_string) m_energy_units = energy_units(data['meter']['uom']) meter = Meter.objects.create( name=m_name, energy_type=e_type, energy_units=m_energy_units ) meter.building_snapshot.add(raw_bs) meter.save() # now timeseries data for the meter for reading in data['interval']['readings']: start_time = int(reading['start_time']) duration = int(reading['duration']) begin_time = datetime.fromtimestamp(start_time) end_time = datetime.fromtimestamp(start_time + duration) value = reading['value'] cost = reading['cost'] new_ts = TimeSeries.objects.create( begin_time=begin_time, end_time=end_time, reading=value, cost=cost ) new_ts.meter = meter new_ts.save() return cb
def create_models(data, import_file): """ Create a BuildingSnapshot, a CanonicalBuilding, and a Meter. Then, create TimeSeries models for each meter reading in data. :params data: dictionary of building data from a green button xml file in the form returned by xml_importer.building_data :params import_file: ImportFile referencing the original xml file; needed for linking to BuildingSnapshot and for determining super_organization :returns: the created CanonicalBuilding """ # cache data on import_file; this is a proof of concept and we # only have two example files available so we hardcode the only # heading present. import_file.cached_first_row = ROW_DELIMITER.join(["address"]) import_file.cached_second_to_fifth_row = ROW_DELIMITER.join( [data['address']]) import_file.save() raw_bs = BuildingSnapshot() raw_bs.import_file = import_file # We require a save to get our PK # We save here to set our initial source PKs. raw_bs.save() super_org = import_file.import_record.super_organization raw_bs.super_organization = super_org set_initial_sources(raw_bs) raw_bs.address_line_1 = data['address'] raw_bs.source_type = GREEN_BUTTON_BS raw_bs.save() # create canonical building cb = CanonicalBuilding.objects.create(canonical_snapshot=raw_bs) raw_bs.canonical_building = cb raw_bs.save() # log building creation AuditLog.objects.create( organization=import_file.import_record.super_organization, user=import_file.import_record.owner, content_object=cb, action="create_building", action_note="Created building", ) # create meter for this dataset (each dataset is a single energy type) e_type = energy_type(data['service_category']) e_type_string = next(pair[1] for pair in seed.models.ENERGY_TYPES if pair[0] == e_type) m_name = "gb_{0}[{1}]".format(str(raw_bs.id), e_type_string) m_energy_units = energy_units(data['meter']['uom']) meter = Meter.objects.create(name=m_name, energy_type=e_type, energy_units=m_energy_units) meter.building_snapshot.add(raw_bs) meter.save() # now timeseries data for the meter for reading in data['interval']['readings']: start_time = int(reading['start_time']) duration = int(reading['duration']) begin_time = datetime.fromtimestamp(start_time) end_time = datetime.fromtimestamp(start_time + duration) value = reading['value'] cost = reading['cost'] new_ts = TimeSeries.objects.create(begin_time=begin_time, end_time=end_time, reading=value, cost=cost) new_ts.meter = meter new_ts.save() return cb