Beispiel #1
0
def cache_first_rows(import_record, parser):
    """Cache headers, and rows 2-6 for validation/viewing.

    :param import_record: ImportRecord inst.
    :param parser: unicode-csv.Reader instance.

    Unfortunately, this is duplicated logic from data_importer,
    but since data_importer makes many faulty assumptions we need to do
    it differently.

    """
    parser.csvfile.seek(0)
    rows = parser.next()
    first_row = rows.next().values()

    if first_row:
        first_row = ROW_DELIMITER.join(first_row)
    import_record.cached_first_row = first_row or ''
    validation_rows = []
    for i in range(5):
        row = rows.next()
        if row:
            validation_rows.append(row)

    import_record.cached_second_to_fifth_row = "\n".join(
        [ROW_DELIMITER.join(r.values()) for r in validation_rows]
    )
    import_record.save()
    # Reset our file pointer for mapping.
    parser.csvfile.seek(0)
Beispiel #2
0
def cache_first_rows(import_file, parser):
    """Cache headers, and rows 2-6 for validation/viewing.

    :param import_file: ImportFile inst.
    :param parser: unicode-csv.Reader instance.

    Unfortunately, this is duplicated logic from data_importer,
    but since data_importer makes many faulty assumptions we need to do
    it differently.

    """
    parser.seek_to_beginning()
    rows = parser.next()

    validation_rows = []
    for i in range(5):
        row = rows.next()
        if row:
            validation_rows.append(row)

    import_file.cached_second_to_fifth_row = "\n".join(
        [
            ROW_DELIMITER.join(map(lambda x: str(x), r.values()))
            for r in validation_rows
        ]
    )
    first_row = rows.next().keys()
    if first_row:
        first_row = ROW_DELIMITER.join(first_row)
    import_file.cached_first_row = first_row or ''

    import_file.save()
    # Reset our file pointer for mapping.
    parser.seek_to_beginning()
Beispiel #3
0
def cache_first_rows(import_file, parser):
    """Cache headers, and rows 2-6 for validation/viewing.

    :param import_file: ImportFile inst.
    :param parser: unicode-csv.Reader instance.

    Unfortunately, this is duplicated logic from data_importer,
    but since data_importer makes many faulty assumptions we need to do
    it differently.

    """
    parser.seek_to_beginning()
    rows = parser.next()

    validation_rows = []
    for i in range(5):
        try:
            row = rows.next()
            if row:
                validation_rows.append(row)
        except StopIteration:
            """Less than 5 rows in file"""
            break

    #This is a fix for issue #24 to use original field order when importing
    #This is ultimately not the correct place for this fix.  The correct fix 
    #is to update the mcm code to a newer version where the readers in mcm/reader.py
    #have a headers() function defined and then just do
    #first_row = parser.headers()
    #But until we can patch the mcm code this should fix the issue.
    local_reader = parser.reader
    if isinstance(local_reader, reader.ExcelParser):
        first_row = local_reader.sheet.row_values(local_reader.header_row)
    elif isinstance(local_reader, reader.CSVParser):
        first_row = local_reader.csvreader.fieldnames
        first_row = [local_reader._clean_super(x) for x in first_row]
    else:
        #default to the original behavior if a new type of parser for lack of anything better
        first_row = rows.next().keys()

    tmp = []
    for r in validation_rows:
        tmp.append(ROW_DELIMITER.join([str(r[x]) for x in first_row]))

    import_file.cached_second_to_fifth_row = "\n".join(tmp)

    if first_row:
        first_row = ROW_DELIMITER.join(first_row)
    import_file.cached_first_row = first_row or ''

    import_file.save()
    # Reset our file pointer for mapping.
    parser.seek_to_beginning()
Beispiel #4
0
    def test_get_first_five_rows(self):
        """Make sure we get our first five rows back correctly."""
        import_record = ImportRecord.objects.create()
        expected_raw_columns = ['tax id', 'name', 'etc.']
        expected_raw_rows = [
            ['02023', '12 Jefferson St.', 'etc.'],
            ['12433', '23 Washington St.', 'etc.'],
            ['04422', '4 Adams St.', 'etc.'],
        ]

        expected = [
            dict(zip(expected_raw_columns, row)) for row in expected_raw_rows
        ]
        expected_saved_format = '\n'.join([
            ROW_DELIMITER.join(row) for row in expected_raw_rows
        ])
        import_file = ImportFile.objects.create(
            import_record=import_record,
            cached_first_row=ROW_DELIMITER.join(expected_raw_columns),
            cached_second_to_fifth_row=expected_saved_format
        )

        # Just make sure we were saved correctly
        self.assertEqual(
            import_file.cached_second_to_fifth_row, expected_saved_format
        )

        url = reverse_lazy("seed:get_first_five_rows")
        resp = self.client.post(
            url, data=json.dumps(
                {'import_file_id': import_file.pk}
            ), content_type='application/json'
        )

        body = json.loads(resp.content)

        self.assertEqual(body.get('first_five_rows', []), expected)
Beispiel #5
0
 def setUp(self):
     self.maxDiff = None
     self.org = Organization.objects.create()
     user_details = {
         'username': '******',
         'password': '******',
         'email': '*****@*****.**',
     }
     self.user = User.objects.create_superuser(**user_details)
     OrganizationUser.objects.create(user=self.user, organization=self.org)
     self.client.login(**user_details)
     self.import_record = ImportRecord.objects.create(
         owner=self.user
     )
     self.import_record.super_organization = self.org
     self.import_record.save()
     self.import_file = ImportFile.objects.create(
         import_record=self.import_record,
         cached_first_row=ROW_DELIMITER.join(
             [u'name', u'address', u'year built', u'building id']
         )
     )
Beispiel #6
0
    def test_get_raw_column_names(self):
        """Make sure we get column names back in a format we expect."""
        import_record = ImportRecord.objects.create()
        expected_raw_columns = ['tax id', 'name', 'etc.']
        expected_saved_format = ROW_DELIMITER.join(expected_raw_columns)
        import_file = ImportFile.objects.create(
            import_record=import_record,
            cached_first_row=expected_saved_format
        )

        # Just make sure we were saved correctly
        self.assertEqual(import_file.cached_first_row, expected_saved_format)

        url = reverse_lazy("seed:get_raw_column_names")
        resp = self.client.post(
            url, data=json.dumps(
                {'import_file_id': import_file.pk}
            ), content_type='application/json'
        )

        body = json.loads(resp.content)

        self.assertEqual(body.get('raw_columns', []), expected_raw_columns)
def create_models(data, import_file):
    """
    Create a BuildingSnapshot, a CanonicalBuilding, and a Meter. Then, create
    TimeSeries models for each meter reading in data.

    :params data: dictionary of building data from a green button xml file
    in the form returned by xml_importer.building_data
    :params import_file: ImportFile referencing the original xml file; needed
    for linking to BuildingSnapshot and for determining super_organization
    :returns: the created CanonicalBuilding
    """
    # cache data on import_file; this is a proof of concept and we
    # only have two example files available so we hardcode the only
    # heading present.
    import_file.cached_first_row = ROW_DELIMITER.join(["address"])
    import_file.cached_second_to_fifth_row = ROW_DELIMITER.join(
        [data['address']]
    )
    import_file.save()

    raw_bs = BuildingSnapshot()
    raw_bs.import_file = import_file

    # We require a save to get our PK
    # We save here to set our initial source PKs.
    raw_bs.save()
    super_org = import_file.import_record.super_organization
    raw_bs.super_organization = super_org

    set_initial_sources(raw_bs)
    raw_bs.address_line_1 = data['address']
    raw_bs.source_type = GREEN_BUTTON_BS

    raw_bs.save()

    # create canonical building
    cb = CanonicalBuilding.objects.create(canonical_snapshot=raw_bs)

    raw_bs.canonical_building = cb
    raw_bs.save()

    # log building creation
    AuditLog.objects.create(
        organization=import_file.import_record.super_organization,
        user=import_file.import_record.owner,
        content_object=cb,
        action="create_building",
        action_note="Created building",
    )

    # create meter for this dataset (each dataset is a single energy type)
    e_type = energy_type(data['service_category'])
    e_type_string = next(
        pair[1] for pair in seed.models.ENERGY_TYPES if pair[0] == e_type
    )

    m_name = "gb_{0}[{1}]".format(str(raw_bs.id), e_type_string)
    m_energy_units = energy_units(data['meter']['uom'])
    meter = Meter.objects.create(
        name=m_name, energy_type=e_type, energy_units=m_energy_units
    )

    meter.building_snapshot.add(raw_bs)
    meter.save()

    # now timeseries data for the meter
    for reading in data['interval']['readings']:

        start_time = int(reading['start_time'])
        duration = int(reading['duration'])

        begin_time = datetime.fromtimestamp(start_time)
        end_time = datetime.fromtimestamp(start_time + duration)
        value = reading['value']
        cost = reading['cost']

        new_ts = TimeSeries.objects.create(
            begin_time=begin_time,
            end_time=end_time,
            reading=value,
            cost=cost
        )

        new_ts.meter = meter
        new_ts.save()

    return cb
def create_models(data, import_file):
    """
    Create a BuildingSnapshot, a CanonicalBuilding, and a Meter. Then, create
    TimeSeries models for each meter reading in data.

    :params data: dictionary of building data from a green button xml file
    in the form returned by xml_importer.building_data
    :params import_file: ImportFile referencing the original xml file; needed
    for linking to BuildingSnapshot and for determining super_organization
    :returns: the created CanonicalBuilding
    """
    # cache data on import_file; this is a proof of concept and we
    # only have two example files available so we hardcode the only
    # heading present.
    import_file.cached_first_row = ROW_DELIMITER.join(["address"])
    import_file.cached_second_to_fifth_row = ROW_DELIMITER.join(
        [data['address']])
    import_file.save()

    raw_bs = BuildingSnapshot()
    raw_bs.import_file = import_file

    # We require a save to get our PK
    # We save here to set our initial source PKs.
    raw_bs.save()
    super_org = import_file.import_record.super_organization
    raw_bs.super_organization = super_org

    set_initial_sources(raw_bs)
    raw_bs.address_line_1 = data['address']
    raw_bs.source_type = GREEN_BUTTON_BS

    raw_bs.save()

    # create canonical building
    cb = CanonicalBuilding.objects.create(canonical_snapshot=raw_bs)

    raw_bs.canonical_building = cb
    raw_bs.save()

    # log building creation
    AuditLog.objects.create(
        organization=import_file.import_record.super_organization,
        user=import_file.import_record.owner,
        content_object=cb,
        action="create_building",
        action_note="Created building",
    )

    # create meter for this dataset (each dataset is a single energy type)
    e_type = energy_type(data['service_category'])
    e_type_string = next(pair[1] for pair in seed.models.ENERGY_TYPES
                         if pair[0] == e_type)

    m_name = "gb_{0}[{1}]".format(str(raw_bs.id), e_type_string)
    m_energy_units = energy_units(data['meter']['uom'])
    meter = Meter.objects.create(name=m_name,
                                 energy_type=e_type,
                                 energy_units=m_energy_units)

    meter.building_snapshot.add(raw_bs)
    meter.save()

    # now timeseries data for the meter
    for reading in data['interval']['readings']:

        start_time = int(reading['start_time'])
        duration = int(reading['duration'])

        begin_time = datetime.fromtimestamp(start_time)
        end_time = datetime.fromtimestamp(start_time + duration)
        value = reading['value']
        cost = reading['cost']

        new_ts = TimeSeries.objects.create(begin_time=begin_time,
                                           end_time=end_time,
                                           reading=value,
                                           cost=cost)

        new_ts.meter = meter
        new_ts.save()

    return cb