def extract_equipment_sheet(sheet, col_range, eq_type, first_col, volume_unit_col): equipment_ids = [] for row in range(6, sheet.nrows): eq = get_equipment(sheet, row, col_range, eq_type) if not isinstance(eq[4], int) and not isinstance(eq[4], float): continue psycopg2.extras.execute_values(cursor, INSERT_EQUIPMENT, [tuple(eq)]) equipment_id = cursor.fetchall()[0] equipment_ids.append(equipment_id) allocations = [] for col in range(first_col + 7, first_col + 20): allocation = none_if_not_number( get_sheet_value(sheet, row, col)) if allocation is not None: allocations.append(( application_id, equipment_id, get_sheet_value(sheet, 5, col), process_name_id.get( get_sheet_value(sheet, 5, col) ), # get the id of the process from the header of the column none_if_not_number(get_sheet_value(sheet, row, col)))) psycopg2.extras.execute_values( cursor, '''insert into ciip_2018_load.equipment_consumption (application_id, equipment_id, processing_unit_name, processing_unit_id, consumption_allocation) values %s''', allocations) return equipment_ids
def extract(ciip_book): emissions_sheet = ciip_book.sheet_by_name('Emissions') emissions = [] current_emission_cat = None for row in range(2, emissions_sheet.nrows - 1, 2) : # ignore the last row if get_sheet_value(emissions_sheet, row, 5) is None: # it's the category header current_emission_cat = ciip_swim_emissions_categories.get(get_sheet_value(emissions_sheet, row, 1)) if current_emission_cat is None: # we went too far, there's probably some junk at the bottom of the sheet break else : quantity = zero_if_not_number(get_sheet_value(emissions_sheet, row, 4)) gas_description = get_sheet_value(emissions_sheet, row, 1) emissions.append( { "sourceTypeName" : current_emission_cat, "gasType" : ciip_swim_gas_types.get(gas_description), "gasDescription" : gas_description, "annualEmission" : quantity, "annualC02e" : zero_if_not_number(get_sheet_value(emissions_sheet, row, 8)) } ) non_zero_emissions = filter(lambda e : e['annualEmission'] != 0 or e['annualC02e'] != 0, emissions) return {sourceType:list(gases) for sourceType,gases in itertools.groupby(non_zero_emissions, lambda emission : emission['sourceTypeName'])}
def extract(ciip_book): products = [] if 'Production' in ciip_book.sheet_names(): # In the SFO applications, associated emissions are in a separate sheet # Make a dict of the associated emissions associated_emissions_sheet = ciip_book.sheet_by_name( 'Emissions Allocation') associated_emissions = {} for row in range(4, 26, 2): product = get_sheet_value(associated_emissions_sheet, row, 2) emission = none_if_not_number( get_sheet_value(associated_emissions_sheet, row, 6)) if emission is None: # in case a unit was entered in the 'tonnes CO2' column emission = none_if_not_number( get_sheet_value(associated_emissions_sheet, row, 4)) if product is not None: associated_emissions[product.strip().lower()] = emission production_sheet = ciip_book.sheet_by_name('Production') for row in range(3, 42, 2): product = get_sheet_value(production_sheet, row, 4) if product is not None: emission = associated_emissions.get(product.strip().lower( )) if len(associated_emissions) > 1 else list( associated_emissions.values())[0] products.append({ "productName": product, "quantity": none_if_not_number( get_sheet_value(production_sheet, row, 6)), "units": none_if_not_number( get_sheet_value(production_sheet, row, 8)), "associatedEmissions": emission }) else: production_sheet = ciip_book.sheet_by_name( 'Module GHGs and production') for row in range(5, 18): q = none_if_not_number(get_sheet_value(production_sheet, row, 1)) e = none_if_not_number(get_sheet_value(production_sheet, row, 3)) if q is not None or e is not None: products.append({ "productName": get_sheet_value(production_sheet, row, 0), "quantity": q, "units": get_sheet_value(production_sheet, row, 2), "associatedEmissions": e, }) return products
def extract(ciip_book, cursor, operator): facility = Facility(operator) admin_sheet = ciip_book.sheet_by_name('Administrative Info') bcghg_id = str(get_sheet_value( admin_sheet, 8, 3)).replace('.0', '').strip() if get_sheet_value( admin_sheet, 8, 3) is not None else None facility.name = get_sheet_value(admin_sheet, 30, 1, operator.trade_name) facility.bcghg_id = str(bcghg_id) facility.type = get_sheet_value(admin_sheet, 32, 1) facility.naics = int( get_sheet_value(admin_sheet, 32, 3, get_sheet_value(admin_sheet, 10, 3, 0))) facility.description = get_sheet_value( admin_sheet, 42, 1) if admin_sheet.nrows >= 43 else None cursor.execute( ''' select distinct swrs_facility_id from swrs.identifier where identifier_value = %s ''', (facility.bcghg_id, )) res = cursor.fetchone() if res is not None and res[0] not in SWRS_FACILITY_ID_EXCULDE_VALUES: facility.swrs_facility_id = res[0] else: # try using the facility name cursor.execute( ''' select distinct swrs_facility_id from swrs.facility where lower(facility_name) = %s ''', (str(facility.name).lower(), )) res = cursor.fetchall() if res is not None and len(res) == 1: facility.swrs_facility_id = res[0][0] # Outlier: This partial facility name match is the only way to match this row in the ciip_2018 data to a swrs_id in the swrs data. if 'oda cre' in facility.name.lower(): facility.swrs_facility_id = 14235 if 'Production' in ciip_book.sheet_names(): production_sheet = ciip_book.sheet_by_name('Production') facility.production_calculation_explanation = get_sheet_value( production_sheet, 47, 2) facility.production_additional_info = get_sheet_value( production_sheet, 51, 2) facility.production_public_info = get_sheet_value( production_sheet, 55, 2) return facility
def extract(ciip_book, cursor): operator = Operator() admin_sheet = ciip_book.sheet_by_name('Administrative Info') duns = get_sheet_value(admin_sheet, 8, 1) if type(duns) is str: duns = duns.replace('-', '') duns = duns.replace(' ', '') elif duns is not None: duns = str(int(duns)) if duns in DUNS_EXCLUDE_VALUES: duns = None bc_corp_reg = get_sheet_value(admin_sheet, 10, 1) if bc_corp_reg is not None: bc_corp_reg = str(bc_corp_reg).replace(" ", "").replace('.0', '') operator.legal_name = get_sheet_value(admin_sheet, 4, 1) operator.trade_name = get_sheet_value(admin_sheet, 6, 1) operator.duns = duns operator.bc_corp_reg = bc_corp_reg operator.is_bc_cop_reg_valid = False # overwritten below if true if bc_corp_reg is not None: orgbook_req = requests.get( 'https://orgbook.gov.bc.ca/api/v2/topic/ident/registration/' + bc_corp_reg + '/formatted') if orgbook_req.status_code == 200: orgbook_resp = orgbook_req.json() operator.is_bc_cop_reg_valid = True operator.orgbook_legal_name = orgbook_resp['names'][0]['text'] operator.is_registration_active = not orgbook_resp['names'][0][ 'inactive'] get_swrs_id_by_name(operator, cursor) if operator.swrs_operator_id is None and duns is not None: cursor.execute( """ select distinct swrs_organisation_id from swrs.organisation where duns = %s """, (duns, )) res = cursor.fetchone() if res is not None: operator.swrs_operator_id = res[0] return operator
def get_equipment(sheet, row, col_range, eq_type): eq = [application_id, eq_type] numeric_cols = [4, 5, 6, 7, 8, 12, 13, 14, 15, 16] # tuple index for col in col_range: if col is None: eq.append(None) else: val = get_sheet_value(sheet, row, col) if val is not None: if isinstance(val, str) and val.strip().endswith('%'): try: val = float(val.replace('%', '')) except: print('Failed to convert ' + val + ' to float') val = None elif len(eq) in numeric_cols: try: val = float(val) except: print('Failed to convert ' + str(val) + ' to float') val = None elif len( eq ) == 11: # inlet_sales_compression_same_engine, should be Y or N if not isinstance(val, str) or val.strip().upper() not in [ 'Y', 'N' ]: val = None eq.append(val) eq += [operator_id, facility_id] return eq
def extract(ciip_book): admin_sheet = ciip_book.sheet_by_name('Administrative Info') cert_sheet = ciip_book.sheet_by_name('Statement of Certification') co_header_idx = search_row_index(cert_sheet, 1, 'Signature of Certifying Official') rep_addr = { 'streetAddress': get_sheet_value(admin_sheet, 24, 1), 'municipality': get_sheet_value(admin_sheet, 24, 3), 'province': get_sheet_value(admin_sheet, 26, 1), 'postalCode': get_sheet_value(admin_sheet, 26, 3) } co_addr = { 'streetAddress': get_sheet_value(cert_sheet, co_header_idx + 11, 1), 'municipality': get_sheet_value(cert_sheet, co_header_idx + 11, 4), 'province': get_sheet_value(cert_sheet, co_header_idx + 13, 1), 'postalCode': get_sheet_value(cert_sheet, co_header_idx + 13, 4) } contacts = { "certifying_official_contact": { 'firstName': get_sheet_value(cert_sheet, co_header_idx + 7, 1), 'lastName': get_sheet_value(cert_sheet, co_header_idx + 7, 4), 'position': get_sheet_value(cert_sheet, co_header_idx + 9, 1), 'email': get_sheet_value(cert_sheet, co_header_idx + 9, 4), 'phone': get_sheet_value(cert_sheet, co_header_idx + 9, 6), 'address': co_addr }, "operational_representative_contact": { 'firstName': admin_sheet.cell_value(18, 1), 'lastName': admin_sheet.cell_value(18, 3), 'position': admin_sheet.cell_value(20, 1), 'email': admin_sheet.cell_value(20, 3), 'phone': admin_sheet.cell_value(22, 1), 'address': rep_addr } } return contacts
def extract(ciip_book): elec_sheet = None row_range = None col_range = None if 'Electricity' in ciip_book.sheet_names(): elec_sheet = ciip_book.sheet_by_name('Electricity') row_range = range(4, 7, 2) col_range = range(1, 10, 2) else: elec_sheet = ciip_book.sheet_by_name('Electricity and Heat') row_range = range(5, 7) col_range = range(1, 6) elec_and_heat = filter( lambda product: not (product['quantity'] is None and product[ 'associatedEmissions'] is None), [{ "productName": "Purchased Electricity", "quantity": none_if_not_number( get_sheet_value(elec_sheet, row_range[0], col_range[0])), "units": "MWh", "associatedEmissions": None }, { "productName": "Generated Electricity", "quantity": none_if_not_number( get_sheet_value(elec_sheet, row_range[0], col_range[1])), "units": "MWh", "associatedEmissions": none_if_not_number( get_sheet_value(elec_sheet, row_range[0], col_range[4])) }, { "productName": "Consumed Electricity", "quantity": none_if_not_number( get_sheet_value(elec_sheet, row_range[0], col_range[2])), "units": "MWh", "associatedEmissions": None }, { "productName": "Sold Electricity", "quantity": none_if_not_number( get_sheet_value(elec_sheet, row_range[0], col_range[3])), "units": "MWh", "associatedEmissions": None }, { "productName": "Purchased Heat", "quantity": none_if_not_number( get_sheet_value(elec_sheet, row_range[1], col_range[0])), "units": "GJ", "associatedEmissions": None }, { "productName": "Generated Heat", "quantity": none_if_not_number( get_sheet_value(elec_sheet, row_range[1], col_range[1])), "units": "GJ", "associatedEmissions": none_if_not_number( get_sheet_value(elec_sheet, row_range[1], col_range[4])) }, { "productName": "Consumed Heat", "quantity": none_if_not_number( get_sheet_value(elec_sheet, row_range[1], col_range[2])), "units": "GJ", "associatedEmissions": None }, { "productName": "Sold Heat", "quantity": none_if_not_number( get_sheet_value(elec_sheet, row_range[1], col_range[3])), "units": "GJ", "associatedEmissions": None }]) return list(elec_and_heat)
def extract(ciip_book, cursor, application_id, operator_id, facility_id): INSERT_EQUIPMENT = '''insert into ciip_2018_load.equipment ( application_id, equipment_category, equipment_identifier, equipment_type, power_rating, load_factor, utilization, runtime_hours, design_efficiency, electrical_source, consumption_allocation_method, inlet_sales_compression_same_engine, inlet_suction_pressure, inlet_discharge_pressure, sales_suction_pressure, sales_discharge_pressure, volume_throughput, volume_units, volume_estimation_method, comments, operator_id, facility_id ) values %s returning id''' cursor.execute( ''' select id, product from ciip_2018_load.production where application_id = %s ''', (application_id, )) processes = cursor.fetchall() process_name_id = {} for p in processes: process_name_id[p[1]] = p[0] def get_equipment(sheet, row, col_range, eq_type): eq = [application_id, eq_type] numeric_cols = [4, 5, 6, 7, 8, 12, 13, 14, 15, 16] # tuple index for col in col_range: if col is None: eq.append(None) else: val = get_sheet_value(sheet, row, col) if val is not None: if isinstance(val, str) and val.strip().endswith('%'): try: val = float(val.replace('%', '')) except: print('Failed to convert ' + val + ' to float') val = None elif len(eq) in numeric_cols: try: val = float(val) except: print('Failed to convert ' + str(val) + ' to float') val = None elif len( eq ) == 11: # inlet_sales_compression_same_engine, should be Y or N if not isinstance(val, str) or val.strip().upper() not in [ 'Y', 'N' ]: val = None eq.append(val) eq += [operator_id, facility_id] return eq def extract_equipment_sheet(sheet, col_range, eq_type, first_col, volume_unit_col): equipment_ids = [] for row in range(6, sheet.nrows): eq = get_equipment(sheet, row, col_range, eq_type) if not isinstance(eq[4], int) and not isinstance(eq[4], float): continue psycopg2.extras.execute_values(cursor, INSERT_EQUIPMENT, [tuple(eq)]) equipment_id = cursor.fetchall()[0] equipment_ids.append(equipment_id) allocations = [] for col in range(first_col + 7, first_col + 20): allocation = none_if_not_number( get_sheet_value(sheet, row, col)) if allocation is not None: allocations.append(( application_id, equipment_id, get_sheet_value(sheet, 5, col), process_name_id.get( get_sheet_value(sheet, 5, col) ), # get the id of the process from the header of the column none_if_not_number(get_sheet_value(sheet, row, col)))) psycopg2.extras.execute_values( cursor, '''insert into ciip_2018_load.equipment_consumption (application_id, equipment_id, processing_unit_name, processing_unit_id, consumption_allocation) values %s''', allocations) return equipment_ids if 'Electrical Equipment' in ciip_book.sheet_names(): sheet = ciip_book.sheet_by_name('Electrical Equipment') first_col = search_col_index(sheet, 5, FIRST_COL_HEADER) volume_unit_col = search_col_index(sheet, 5, VOLUME_UNITS_COL_HEADER) col_range = list(range(first_col + 0, first_col + 7)) + list( range(first_col + 21, first_col + 29)) + list( range(volume_unit_col, volume_unit_col + 3)) extract_equipment_sheet(sheet, col_range, 'Electrical', first_col, volume_unit_col) if 'Gas Fired Equipment' in ciip_book.sheet_names(): sheet = ciip_book.sheet_by_name('Gas Fired Equipment') first_col = search_col_index(sheet, 5, FIRST_COL_HEADER) volume_unit_col = search_col_index(sheet, 5, VOLUME_UNITS_COL_HEADER) col_range = list(range(first_col + 0, first_col + 7)) col_range.append(None) col_range += list(range(first_col + 21, first_col + 28)) col_range += [volume_unit_col, volume_unit_col + 1] col_range.append(volume_unit_col + 16) ids = extract_equipment_sheet(sheet, col_range, 'Gas Fired', first_col, volume_unit_col) ids.reverse() # extract emissions allocations for row in range(len(ids)): equipment_id = ids[row] allocations = [] for col in range(volume_unit_col + 2, volume_unit_col + 15): allocation = none_if_not_number( get_sheet_value(sheet, HEADER_ROW + 1 + row, col)) if allocation is not None: allocations.append(( application_id, equipment_id, get_sheet_value(sheet, HEADER_ROW, col), process_name_id.get( get_sheet_value(sheet, HEADER_ROW, col) ), # get the id of the process from the header of the column none_if_not_number( get_sheet_value(sheet, HEADER_ROW + 1 + row, col)))) psycopg2.extras.execute_values( cursor, '''insert into ciip_2018_load.equipment_emission (application_id, equipment_id, processing_unit_name, processing_unit_id, emission_allocation) values %s''', allocations) # extract output streams output_streams = [] for col in range(first_col + 28, volume_unit_col): output_stream_value = get_sheet_value(sheet, HEADER_ROW + 1 + row, col) if output_stream_value is not None: output_streams.append( (application_id, equipment_id, get_sheet_value(sheet, HEADER_ROW, col), output_stream_value)) psycopg2.extras.execute_values( cursor, '''insert into ciip_2018_load.equipment_output_stream (application_id, equipment_id, output_stream_label, output_stream_value) values %s''', output_streams)
def extract(ciip_book): fuel_sheet = ciip_book.sheet_by_name('Fuel Usage') if 'Fuel Usage' in ciip_book.sheet_names() else ciip_book.sheet_by_name('Fuel Usage ') fuels = [] use_alt_fuel_format = get_sheet_value(fuel_sheet, 3, 0) != 'Fuel Type ' row_range = range(4, fuel_sheet.nrows) if not use_alt_fuel_format else range(5, fuel_sheet.nrows - 1, 2) for row in row_range: fuel = {} if use_alt_fuel_format: fuel = { 'fuel_type' : get_sheet_value(fuel_sheet, row, 1), 'fuel_type_alt' : get_sheet_value(fuel_sheet, row, 3), 'fuel_description' : get_sheet_value(fuel_sheet, row, 5), 'quantity' : get_sheet_value(fuel_sheet, row, 7), 'fuel_units' : get_sheet_value(fuel_sheet, row, 9), 'carbon_emissions' : get_sheet_value(fuel_sheet, row, 11), } else: fuel = { 'fuel_type' : get_sheet_value(fuel_sheet, row, 0), 'fuel_type_alt' : None, 'fuel_description' : get_sheet_value(fuel_sheet, row, 1), 'quantity' : get_sheet_value(fuel_sheet, row, 2), 'fuel_units' : get_sheet_value(fuel_sheet, row, 3), 'carbon_emissions' : get_sheet_value(fuel_sheet, row, 4), } if not (fuel['fuel_type'] is None and fuel['fuel_type_alt'] is None and fuel['fuel_description'] is None): # skip rows without any label try: if fuel['quantity'] is not None: fuel['quantity'] = float(fuel['quantity']) if fuel['carbon_emissions'] is not None: fuel['carbon_emissions'] = float(fuel['carbon_emissions']) fuels.append(fuel) except: print('Could not parse Fuel row: ' + ','.join(str(e) for e in fuel.values())) return fuels