wb923_2013 = xlrd.open_workbook(RAW_FILE_NAME_923_2_2013) ws923_2013 = wb923_2013.sheet_by_name(TAB_NAME_923_2_2013) print("Loading Form 860-2") wb860_2 = xlrd.open_workbook(RAW_FILE_NAME_860_2) ws860_2 = wb860_2.sheet_by_name(TAB_NAME_860_2) print("Loading Form 860-3") wb860_3 = xlrd.open_workbook(RAW_FILE_NAME_860_3) ws860_3 = wb860_3.sheet_by_name(TAB_NAME_860_3) # read in plants from File 2 of EIA-860 print("Reading in plants...") plants_dictionary = {} for row_id in xrange(2, ws860_2.nrows): rv = ws860_2.row_values(row_id) # row value name = pw.format_string(rv[COLS_860_2['name']]) idnr = pw.make_id(SAVE_CODE, int(rv[COLS_860_2['idnr']])) capacity = 0.0 generation = pw.PlantGenerationObject() owner = pw.format_string(str(rv[COLS_860_2['owner']])) try: latitude = float(rv[COLS_860_2['lat']]) except: latitude = pw.NO_DATA_NUMERIC try: longitude = float(rv[COLS_860_2['lng']]) except: longitude = pw.NO_DATA_NUMERIC location = pw.LocationObject(u"", latitude, longitude) new_plant = pw.PowerPlant(idnr, name, plant_country=COUNTRY_NAME, plant_location=location, plant_coord_source=SOURCE_NAME,
latitude_col = headers.index(COLNAMES[14]) longitude_col = headers.index(COLNAMES[15]) geolocation_source_col = headers.index(COLNAMES[16]) year_of_data_col = headers.index(COLNAMES[17]) except: print(u"- ERROR: One or more columns missing in {0}, skipping...".format(afile)) continue # read each row in the file for row in datareader: # skip plants that aren't operational status = row[status_col] if status not in ['Operational', 'Operating', '']: continue try: name = pw.format_string(row[name_col]) if not name: # ignore accidental blank lines continue except: print(u"-Error: Can't read plant name.") continue # must have plant name - don't read plant if not try: idnr = str(row[id_col]) if not idnr: # must have plant ID - don't read plant if not print(u"-Error: Null ID for plant {0}.".format(name)) continue except: print(u"-Error: Can't read ID for plant {0}.".format(name)) continue # must have plant ID - don't read plant if not try: capacity = float(pw.format_string(row[capacity_col].replace(",", ""))) # note: may need to convert to MW
name_col = rv.index(COLNAMES[1]) unit_col = rv.index(COLNAMES[2]) year_col = rv.index(COLNAMES[3]) capacity_col = rv.index(COLNAMES[4]) type_col = rv.index(COLNAMES[5]) fuel1_col = rv.index(COLNAMES[6]) fuel2_col = rv.index(COLNAMES[7]) generation_col = rv.index(COLNAMES[8]) for i in xrange(1, sheet.nrows): # read in row rv = sheet.row_values(i) try: name = pw.format_string(rv[name_col]) if not name: continue # don't read rows that lack a plant name (footnotes, etc) except: print(u"-Error: Can't read plant name for plant on row {0}.".format(i)) continue try: id_val = int(rv[id_col]) if not id_val: continue # don't read rows that lack an ID (footnotes, etc) except: print(u"-Error: Can't read ID for plant on row {0}.".format(i)) continue try:
year_built_list) # TODO: fix this new_location = pw.LocationObject(latitude=0.0, longitude=0.0) new_plant = pw.PowerPlant(plant_idnr=plant_idnr, plant_name=name, plant_country=COUNTRY_NAME, plant_capacity=total_capacity, plant_fuel=fuel_type_set, plant_source=URL, plant_location=new_location) plants_dictionary[plant_idnr] = new_plant print("Recording plant {0} with ID: {1}, capacity: {2}, fuel: {3}". format(name, plant_idnr, total_capacity, fuel_type_set)) # next process this plant name = pw.format_string(name_str) plant_idnr = pw.make_id(SAVE_CODE, i) capacity_list = [capacity] year_built_list = [year_built] fuel_type_set = fuel_type else: # not a new plant, just a new line if capacity_val: capacity_list.append(capacity) if year_built_val: year_built_list.append(year_built) if fuel_type and fuel_type not in fuel_type_set: fuel_type_set.update(fuel_type) # complete loop, add final plant total_capacity = sum(capacity_list)
if operational_status != "En Servicio": continue except: print(u"- Error: Can't evaluate operational status.") continue # get id try: id_val = int(p_dict['id']) except: print(u"- Error: Can't get ID.") continue # get name try: name = pw.format_string(p_dict['generador'].encode(ENCODING), ENCODING) except: print(u"- Error: Can't get name for plant {0}".format(id_val)) continue # get capacity in MW try: capacity = float(p_dict['potenciaInstalada'].strip(" MW")) except: print(u"- Error: Can't get capacity for plant {0}".format(id_val)) continue # get fuel type try: fuel_string_raw = p_dict['icon'] primary_fuel_string, other_fuel_set = parse_fuel_URY(
headers = [x.lower() for x in datareader.next()] name_col = headers.index(COLNAMES[0]) capacity_col = headers.index(COLNAMES[1]) fuel_col = headers.index(COLNAMES[2]) latitude_col = headers.index(COLNAMES[3]) longitude_col = headers.index(COLNAMES[4]) # additional columns here # if data source is for a single country country = SOURCE_NAME # read each row in the file count = 1 for row in datareader: try: name = pw.format_string(row[name_col]) except: print(u"Error: Can't read plant name.") continue # must have plant name - don't read plant if not try: capacity = float(pw.format_string( row[capacity_col])) # note: may need to convert to MW except: print(u"Error: Can't read capacity for plant {0}.".format(name)) try: fuel = pw.standardize_fuel(row[fuel_col], fuel_thesaurus) except: print(u"Error: Can't read fuel type for plant {0}.".format(name)) try: latitude = float(row[latitude_col]) longitude = float(row[longitude_col])
placemarks = child.findall(u"kml:Placemark", ns) for pm in placemarks: description = pm.find("kml:description", ns).text # html content content = html.fromstring(description) rows = content.findall("body/table")[1 + shift].findall( "tr")[1].find("td").find("table").findall("tr") status = u"N/A" plant_id = u"" for row in rows: left = row.findall("td")[0].text right = row.findall("td")[1].text # find CEG ID if left == u"CEG": plant_id = pw.format_string(right.strip(), None) # make ID string formatting consistent (is not consistent in raw data) # use only leading alpha chars and 6-digit number; drop trailing digits after "-" if plant_id and u'Null' not in plant_id: if u'.' not in plant_id: plant_id = plant_id[0:3] + u'.' + plant_id[ 3:5] + u'.' + plant_id[ 5:7] + u'.' + plant_id[7:13] elif u'-' in plant_id: plant_id = plant_id[0:16] # remove non-operating plants #if status != u"Operação":
owner_col = rv.index(COLNAMES_1[2]) latitude_col = rv.index(COLNAMES_1[3]) longitude_col = rv.index(COLNAMES_1[4]) capacity_col = rv.index(COLNAMES_1[5]) fuel_col = rv.index(COLNAMES_1[6]) source_col = rv.index(COLNAMES_1[7]) date_col = rv.index(COLNAMES_1[8]) print(u"Reading file 1...") for i in xrange(1, sheet.nrows): # read in row row = sheet.row_values(i) if pw.format_string(row[country_col]) != COUNTRY_NAME: continue try: name = pw.format_string(row[name_col], None) # already in unicode if not name: print(u"-Error: No name on row {0}".format(i + 1)) continue except: print(u"-Error: Can't read name of plant on row {0}".format(i + 1)) name = pw.NO_DATA_UNICODE # without this, next pass thru loop uses old name continue try: owner = pw.format_string(row[owner_col], None) except:
countries_col = rv.index(COLNAMES[5]) capacity_col = rv.index(COLNAMES[6]) owner_col = rv.index(COLNAMES[7]) for i in xrange(1, sheet.nrows): rv = sheet.row_values(i) try: ref = int(rv[ref_col]) if not ref: print("-Error reading ref from: {0}".format(rv[ref_col])) continue except: continue try: project_type = pw.format_string(rv[type_col]) if project_type not in PROJECT_TYPES_TO_READ: # don't read all project types continue try: fuel = pw.standardize_fuel(project_type, fuel_thesaurus) except: print("-Error reading fuel: {0}".format(project_type)) fuel = pw.NO_DATA_SET except: print(u"-Error: Can't read project type for project {0}.".format(ref)) continue try: status = pw.format_string(rv[status_col]) if status != u"Registered":
# optional raw file(s) download downloaded = pw.download(COUNTRY_NAME, {RAW_FILE_NAME: SOURCE_URL}) # set up fuel type thesaurus fuel_thesaurus = pw.make_fuel_thesaurus() # create dictionary for power plant objects plants_dictionary = {} # extract powerplant information from file(s) print(u"Reading in plants...") # read auxilliary plant information with open(PLANT_AUX_FILE, 'r') as f: reader = csv.DictReader(f) aux_plant_info = {pw.format_string(row['name']): row for row in reader} # read data from csv and parse count = 1 wb = xlrd.open_workbook(RAW_FILE_NAME) ws = wb.sheet_by_name(TAB) previous_owner = u'None' previous_name = u'None' plant_names = {} for row_id in range(START_ROW, ws.nrows): rv = ws.row_values(row_id)
# set up fuel type thesaurus fuel_thesaurus = pw.make_fuel_thesaurus() # create dictionary for power plant objects plants_dictionary = {} # extract powerplant information from file(s) print(u"Reading in plants...") # read locations locations_dictionary = {} with open(LOCATION_FILE_NAME, 'r') as f: datareader = csv.reader(f) headers = datareader.next() for row in datareader: locations_dictionary[pw.format_string( row[0])] = [float(row[1]), float(row[2])] # read commissioning years commissioning_years_dictionary = {} with open(COMMISSIONING_YEAR_FILE_NAME, 'r') as f: datareader = csv.reader(f) headers = datareader.next() for row in datareader: commissioning_years_dictionary[pw.format_string(row[0])] = row[1] # read data from csv and parse count = 1 wb = xlrd.open_workbook(RAW_FILE_NAME) ws = wb.sheet_by_name(TAB)
print("Reading in plants...") count_unit = 1 header_row = True for row_id in xrange(0, ws.nrows): if header_row == True: try: if ws.cell(row_id, 0).value == "Name": header_row = False else: continue except: continue else: # data rows rv = ws.row_values(row_id) try: name = pw.format_string(rv[COLS["name"]]) except: print(u"-Error: Can't read plant name.") continue try: owner = pw.format_string(rv[COLS["owner"]], None) except: owner = pw.NO_DATA_UNICODE print(u"-Error: Can't read plant owner.") try: capacity_max = float(rv[COLS["capacity_max"]]) except: capacity_max = pw.NO_DATA_NUMERIC print( u"-Error: Can't read capacity_max for plant {0}.".format(name)) try:
# use CEG code to lookup coordinates ceg_code_short = ceg_code[0:16] if ceg_code_short in plant_coordinates_keys: latitude = plant_coordinates[ceg_code_short]['latitude'] longitude = plant_coordinates[ceg_code_short]['longitude'] found_coordinates_count += 1 geolocation_source = SOURCE_NAME else: #print(u"-Error: No coordinates for CEG ID: {0}".format(ceg_code)) latitude = pw.NO_DATA_NUMERIC longitude = pw.NO_DATA_NUMERIC geolocation_source = pw.NO_DATA_UNICODE # get plant name name = pw.format_string(cells[1].findall("font/a")[0].text.strip(),None) # get operational date op_date = cells[2].findall("font")[0].text.strip() if op_date: try: d = parse_date(op_date) op_year = d.year found_operational_year_count += 1 except: op_year = pw.NO_DATA_NUMERIC else: op_year = pw.NO_DATA_NUMERIC # get plant capacity capacity = CAPACITY_CONVERSION_TO_MW * locale.atof(cells[4].findall("font")[0].text)
name_col = headers.index(COLNAMES[0]) country_col = headers.index(COLNAMES[1]) plant_id_col = headers.index(COLNAMES[2]) company_col = headers.index(COLNAMES[3]) latitude_col = headers.index(COLNAMES[4]) longitude_col = headers.index(COLNAMES[5]) # read each row in the file count = 1 capacity = pw.NO_DATA_NUMERIC # no capacity data in EPTR fuel = pw.NO_DATA_SET # no fuel data in EPTR for row in datareader: try: name = pw.format_string(row[name_col], encoding=DATA_ENCODING) except: print(u"Error: Can't read plant name.") continue # must have plant name - don't read plant if not try: idnr = int(row[plant_id_col]) except: print(u"Error: Can't read ID for plant {0}.".format(name)) continue try: latitude = float(row[latitude_col]) longitude = float(row[longitude_col]) except: latitude, longitude = 0.0, 0.0 try: owner = pw.format_string(row[company_col])
nger_1415 = list(csv.DictReader(open(NGER_FILENAME_1415))) nger_1314 = list(csv.DictReader(open(NGER_FILENAME_1314))) nger_1213 = list(csv.DictReader(open(NGER_FILENAME_1213))) # create a dictinary of namespaces ns = {"gml": "http://www.opengis.net/gml", "Electricity_Infrastructure": "WFS"} # read data from XML file and parse count = 1 with open(RAW_FILE_NAME, "rU") as f: tree = ET.parse(f) root = tree.getroot() for station in tree.findall("gml:featureMember", ns): plant = station.find( "Electricity_Infrastructure:National_Major_Power_Stations", ns) name = pw.format_string( plant.find("Electricity_Infrastructure:NAME", ns).text) # get object id from AREMI (variable through time) plant_oid = plant.find("Electricity_Infrastructure:OBJECTID", ns).text # check if plant is already known, and skip if there is not a record (includes cases where AREMI has duplicated plants) if plant_oid not in linking_table: print(u"Error: Don't have prescribed ID for plant {0}; OID={1}.". format(name, plant_oid)) continue # get the assigned GPPD IDNR as an int, stripping the 'AUS' prefix plant_id = int(linking_table[plant_oid]['gppd_idnr'][3:]) try: owner = pw.format_string( plant.find("Electricity_Infrastructure:OWNER", ns).text) except:
subFolder=SAVE_CODE, filename=dataset["filename"]) with open(dataset_filename, "rbU") as f: datareader = csv.DictReader(f) for row in datareader: try: idval = int(row["gid"]) except: print("-Error: Can't read ID for line {0}, skipping.".format( count)) continue try: name = pw.format_string( row.get("nombre", row.get("comuna", pw.NO_DATA_UNICODE))) except: print(u"-Error: Can't read name for ID {0}, skipping.".format( idval)) continue try: fuel_string = row.get( "Tipo", row.get("Tipo ", row.get("Combustible", pw.NO_DATA_UNICODE))) fuel = pw.standardize_fuel(fuel_string, fuel_thesaurus) except: print(u"-Error: Can't read fuel for plant {0}.".format(name)) fuel = pw.NO_DATA_SET
owner_col = colnames.index("owners1") latitude_col = colnames.index("latitude_start") longitude_col = colnames.index("longitude_start") location_col = colnames.index("location") owner_col = colnames.index("owners1") generation_col = colnames.index("expected_annual_generation_gwh_nbr") generation_col2 = colnames.index("average_annual_generation_rng1_nbr_gwh") status_col = colnames.index("status_of_plant_itf") # extract data rows = c.fetchall() conn.close() for row in rows: try: name = pw.format_string(row[name_col]) except: print(u"-Error: Can't read plant name.") continue # must have plant name - don't read plant if not try: idnr = int(row[id_col]) except: print(u"-Error: Can't read plant ID: {0}".format(row[id_col])) continue # must have ID number # skip non operational statuses if row[status_col] in NON_OPERATIONAL_STATUSES: continue try: capacity = float(row[capacity_col])
id_col = headers.index(COLNAMES_REPD[0]) name_col = headers.index(COLNAMES_REPD[1]) fuel_col = headers.index(COLNAMES_REPD[2]) capacity_col = headers.index(COLNAMES_REPD[3]) status_col = headers.index(COLNAMES_REPD[4]) x_coordinate_col = headers.index(COLNAMES_REPD[5]) y_coordinate_col = headers.index(COLNAMES_REPD[6]) owner_col = headers.index(COLNAMES_REPD[7]) # read each row in the file count = 1 for row in datareader: if u"Operational" not in row[status_col]: continue # don't load non-operatioal plants try: name = pw.format_string(row[name_col]) except: print(u"-Error: Can't read plant name.") continue # must have plant name - don't read plant if not try: idnr = int(row[id_col]) except: print(u"-Error: Can't read ref id.") continue # must have ID number try: capacity = float(pw.format_string( row[capacity_col])) # note: may need to convert to MW except: print(u"-Error: Can't read capacity for plant {0}.".format(name)) capacity = 0.0 try:
print(u"Reading in plants...") coord_skip_count = 0 # read file line-by-line with open(RAW_FILE_NAME, 'rU') as f: datareader = csv.reader(f) headers = [x.lower() for x in datareader.next()] id_col = headers.index(COLNAMES[0]) name_col = headers.index(COLNAMES[1]) latitude_col = headers.index(COLNAMES[2]) longitude_col = headers.index(COLNAMES[3]) country_col = headers.index(COLNAMES[4]) for row in datareader: idval = int(row[id_col]) name = pw.format_string(row[name_col]) try: latitude = float(row[latitude_col]) longitude = float(row[longitude_col]) except: coord_skip_count += 1 continue country = row[ country_col] # note: this is the ISO3 code so no need to convert # assign ID number idnr = pw.make_id(SAVE_CODE, idval) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name,