print(u"Reading in plants...") coord_skip_count = 0 # read file line-by-line with open(RAW_FILE_NAME, 'rU') as f: datareader = csv.reader(f) headers = [x.lower() for x in datareader.next()] id_col = headers.index(COLNAMES[0]) name_col = headers.index(COLNAMES[1]) latitude_col = headers.index(COLNAMES[2]) longitude_col = headers.index(COLNAMES[3]) country_col = headers.index(COLNAMES[4]) for row in datareader: idval = int(row[id_col]) name = pw.format_string(row[name_col]) try: latitude = float(row[latitude_col]) longitude = float(row[longitude_col]) except: coord_skip_count += 1 continue country = row[ country_col] # note: this is the ISO3 code so no need to convert # assign ID number idnr = pw.make_id(SAVE_CODE, idval) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name,
print(u"Reading in plants...") with open(RAW_FILE_NAME, 'r') as f: data = json.load(f) # select main content of page wiki = data['query']['pages']['85380']['revisions'][0]['*'] # get plants with location from map (first part of raw file) plant_filename = 'http://www.sourcewatch.org/index.php/Category:Existing_coal_plants_in_China' count = 0 for line in wiki.split('\n'): if '~[[' in line: count += 1 idnr = pw.make_id(SAVE_CODE, count) plant = line.translate({ord(k): None for k in u'[];'}).split('~') name = pw.format_string(plant[1], encoding=None) ## if name == pw.NO_DATA_UNICODE: print("-Error: Name problem with {0}".format(idnr) + "at {0}".format(plant[0].split(','))) ## coordinates = plant[0].split(',') lat = coordinates[0] lng = coordinates[1] latitude_number = float(lat) longitude_number = float(lng) new_location = pw.LocationObject(description=pw.NO_DATA_UNICODE, latitude=latitude_number, longitude=longitude_number) new_plant = pw.PowerPlant(idnr, name,
country_thesaurus = pw.make_country_names_thesaurus() # create dictionary for power plant objects plants_dictionary = {} # extract powerplant information from file(s) print(u"Reading in plants...") with open(RAW_FILE_NAME, 'r') as f: raw_plant_list = json.loads(f.read()) for plant in raw_plant_list: idval = int(plant['id']) name = pw.format_string(plant['name']) latitude = float(plant['location']['latitude']) longitude = float(plant['location']['longitude']) country = pw.standardize_country(plant['location']['country']['value'], country_thesaurus) # assign ID number idnr = pw.make_id(SAVE_CODE, idval) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=country, plant_location=new_location,
countries_col = rv.index(COLNAMES[5]) capacity_col = rv.index(COLNAMES[6]) owner_col = rv.index(COLNAMES[7]) for i in range(1, sheet.nrows): rv = sheet.row_values(i) try: ref = int(rv[ref_col]) if not ref: print("-Error reading ref from: {0}".format(rv[ref_col])) continue except: continue try: project_type = pw.format_string(rv[type_col]) if project_type not in PROJECT_TYPES_TO_READ: # don't read all project types continue try: fuel = pw.standardize_fuel(project_type, fuel_thesaurus) except: print("-Error reading fuel: {0}".format(project_type)) fuel = pw.NO_DATA_SET except: print(u"-Error: Can't read project type for project {0}.".format(ref)) continue try: status = pw.format_string(rv[status_col]) if status != u"Registered":
plant_id = int(ceg_code[-11:-5]) fuel = standardize_fuel_BRA(ceg_code) # use CEG code to lookup coordinates ceg_code_short = ceg_code[0:16] if ceg_code_short in plant_coordinates_keys: latitude = plant_coordinates[ceg_code_short]['latitude'] longitude = plant_coordinates[ceg_code_short]['longitude'] found_coordinates_count += 1 else: #print(u"-Error: No coordinates for CEG ID: {0}".format(ceg_code)) latitude = pw.NO_DATA_NUMERIC longitude = pw.NO_DATA_NUMERIC # get plant name name = pw.format_string(cells[1].findall("font/a")[0].text.strip(),None) # get operational date op_date = cells[2].findall("font")[0].text.strip() if op_date: try: d = parse_date(op_date) op_year = d.year found_operational_year_count += 1 except: op_year = pw.NO_DATA_NUMERIC else: op_year = pw.NO_DATA_NUMERIC # get plant capacity capacity = CAPACITY_CONVERSION_TO_MW * locale.atof(cells[4].findall("font")[0].text)
# set up fuel type thesaurus fuel_thesaurus = pw.make_fuel_thesaurus() # create dictionary for power plant objects plants_dictionary = {} # extract powerplant information from file(s) print(u"Reading in plants...") # read locations locations_dictionary = {} with open(LOCATION_FILE_NAME, 'r') as f: datareader = csv.reader(f) headers = datareader.next() for row in datareader: locations_dictionary[pw.format_string(row[0])] = [row[1], row[2]] # read commissioning years commissioning_years_dictionary = {} with open(COMMISSIONING_YEAR_FILE_NAME, 'r') as f: datareader = csv.reader(f) headers = datareader.next() for row in datareader: commissioning_years_dictionary[pw.format_string(row[0])] = row[1] # read data from csv and parse count = 1 wb = xlrd.open_workbook(RAW_FILE_NAME) ws = wb.sheet_by_name(TAB)
print("Loading Form 923-2") wb1 = xlrd.open_workbook(RAW_FILE_NAME_923_2) ws1 = wb1.sheet_by_name(TAB_NAME_923_2) print("Loading Form 860-2") wb2 = xlrd.open_workbook(RAW_FILE_NAME_860_2) ws2 = wb2.sheet_by_name(TAB_NAME_860_2) print("Loading Form 860-3") wb3 = xlrd.open_workbook(RAW_FILE_NAME_860_3) ws3 = wb3.sheet_by_name(TAB_NAME_860_3) # read in plants from File 2 of EIA-860 print("Reading in plants...") plants_dictionary = {} for row_id in range(2, ws2.nrows): rv = ws2.row_values(row_id) # row value name = pw.format_string(rv[COLS_860_2['name']]) idnr = pw.make_id(SAVE_CODE, int(rv[COLS_860_2['idnr']])) capacity = 0.0 generation = pw.PlantGenerationObject() owner = pw.format_string(str(rv[COLS_860_2['owner']])) try: latitude = float(rv[COLS_860_2['lat']]) except: latitude = pw.NO_DATA_NUMERIC try: longitude = float(rv[COLS_860_2['lng']]) except: longitude = pw.NO_DATA_NUMERIC location = pw.LocationObject(u"", latitude, longitude) new_plant = pw.PowerPlant(idnr, name,
print("Reading in plants...") count_unit = 1 header_row = True for row_id in range(0, ws.nrows): if header_row == True: try: if ws.cell(row_id, 0).value == "Name": header_row = False else: continue except: continue else: # data rows rv = ws.row_values(row_id) try: name = pw.format_string(rv[COLS["name"]]) except: print(u"-Error: Can't read plant name.") continue try: owner = pw.format_string(rv[COLS["owner"]], None) except: owner = pw.NO_DATA_UNICODE print(u"-Error: Can't read plant owner.") try: capacity_max = float(rv[COLS["capacity_max"]]) except: capacity_max = pw.NO_DATA_NUMERIC print(u"-Error: Can't read capacity_max for plant {0}.".format(name)) try: gen_type = pw.format_string(rv[COLS["gen_type"]]) # generation technology type
fuel_col = headers.index(COLNAMES[2]) capacity_col = headers.index(COLNAMES[3]) location_col = headers.index(COLNAMES[4]) commissioning_year_col = headers.index(COLNAMES[6]) owner_col = headers.index(COLNAMES[8]) generation_col = headers.index(COLNAMES[9]) source_col = headers.index(COLNAMES[10]) url_col = headers.index(COLNAMES[11]) country_col = headers.index(COLNAMES[12]) latitude_col = headers.index(COLNAMES[13]) longitude_col = headers.index(COLNAMES[14]) # read each row in the file for row in datareader: try: name = pw.format_string(row[name_col]) if not name: # ignore accidental blank lines continue except: print(u"-Error: Can't read plant name.") continue # must have plant name - don't read plant if not try: idnr = str(row[id_col]) if not idnr: # must have plant ID - don't read plant if not print(u"-Error: Null ID for plant {0}.".format(name)) continue except: print(u"-Error: Can't read ID for plant {0}.".format(name)) continue # must have plant ID - don't read plant if not try: capacity = float(
year_built_list) # TODO: fix this new_location = pw.LocationObject(latitude=0.0, longitude=0.0) new_plant = pw.PowerPlant(plant_idnr=plant_idnr, plant_name=name, plant_country=COUNTRY_NAME, plant_capacity=total_capacity, plant_fuel=fuel_type_set, plant_source=URL, plant_location=new_location) plants_dictionary[plant_idnr] = new_plant print("Recording plant {0} with ID: {1}, capacity: {2}, fuel: {3}". format(name, plant_idnr, total_capacity, fuel_type_set)) # next process this plant name = pw.format_string(name_str) plant_idnr = pw.make_id(SAVE_CODE, i) capacity_list = [capacity] year_built_list = [year_built] fuel_type_set = fuel_type else: # not a new plant, just a new line if capacity_val: capacity_list.append(capacity) if year_built_val: year_built_list.append(year_built) if fuel_type and fuel_type not in fuel_type_set: fuel_type_set.update(fuel_type) # complete loop, add final plant total_capacity = sum(capacity_list)
name_col = rv.index(COLNAMES[1]) unit_col = rv.index(COLNAMES[2]) year_col = rv.index(COLNAMES[3]) capacity_col = rv.index(COLNAMES[4]) type_col = rv.index(COLNAMES[5]) fuel1_col = rv.index(COLNAMES[6]) fuel2_col = rv.index(COLNAMES[7]) generation_col = rv.index(COLNAMES[8]) for i in range(1, sheet.nrows): # read in row rv = sheet.row_values(i) try: name = pw.format_string(rv[name_col]) if not name: continue # don't read rows that lack a plant name (footnotes, etc) except: print( u"-Error: Can't read plant name for plant on row {0}.".format( i)) continue try: id_val = int(rv[id_col]) if not id_val: continue # don't read rows that lack an ID (footnotes, etc) except: print(u"-Error: Can't read ID for plant on row {0}.".format(i)) continue
placemarks = child.findall(u"kml:Placemark", ns) for pm in placemarks: description = pm.find("kml:description", ns).text # html content content = html.fromstring(description) rows = content.findall("body/table")[1 + shift].findall( "tr")[1].find("td").find("table").findall("tr") status = u"N/A" plant_id = u"" for row in rows: left = row.findall("td")[0].text right = row.findall("td")[1].text # find CEG ID if left == u"CEG": plant_id = pw.format_string(right.strip(), None) # make ID string formatting consistent (is not consistent in raw data) # use only leading alpha chars and 6-digit number; drop trailing digits after "-" if plant_id and u'Null' not in plant_id: if u'.' not in plant_id: plant_id = plant_id[0:3] + u'.' + plant_id[ 3:5] + u'.' + plant_id[ 5:7] + u'.' + plant_id[7:13] elif u'-' in plant_id: plant_id = plant_id[0:16] # remove non-operating plants #if status != u"Operação":
ns = { "gml": "http://www.opengis.net/gml", "Electricity_Infrastructure": "http://win-amap-ext03:6080/arcgis/services/Electricity_Infrastructure/MapServer/WFSServer" } # read data from XML file and parse count = 1 with open(RAW_FILE_NAME, "rU") as f: tree = ET.parse(f) root = tree.getroot() for station in tree.findall("gml:featureMember", ns): plant = station.find( "Electricity_Infrastructure:National_Major_Power_Stations", ns) name = pw.format_string( plant.find("Electricity_Infrastructure:NAME", ns).text) plant_id = int( plant.find("Electricity_Infrastructure:OBJECTID", ns).text) try: owner = pw.format_string( plant.find("Electricity_Infrastructure:OWNER", ns).text) except: owner = pw.NO_DATA_UNICODE fuel = pw.standardize_fuel( plant.find("Electricity_Infrastructure:PRIMARYFUELTYPE", ns).text, fuel_thesaurus) try: capacity = plant.find("Electricity_Infrastructure:GENERATIONMW", ns).text capacity = float(capacity) except:
id_col = headers.index(COLNAMES_REPD[0]) name_col = headers.index(COLNAMES_REPD[1]) fuel_col = headers.index(COLNAMES_REPD[2]) capacity_col = headers.index(COLNAMES_REPD[3]) status_col = headers.index(COLNAMES_REPD[4]) x_coordinate_col = headers.index(COLNAMES_REPD[5]) y_coordinate_col = headers.index(COLNAMES_REPD[6]) owner_col = headers.index(COLNAMES_REPD[7]) # read each row in the file count = 1 for row in datareader: if u"Operational" not in row[status_col]: continue # don't load non-operatioal plants try: name = pw.format_string(row[name_col]) except: print(u"-Error: Can't read plant name.") continue # must have plant name - don't read plant if not try: idnr = int(row[id_col]) except: print(u"-Error: Can't read ref id.") continue # must have ID number try: capacity = float(pw.format_string( row[capacity_col])) # note: may need to convert to MW except: print(u"-Error: Can't read capacity for plant {0}.".format(name)) capacity = 0.0 try:
owner_col = rv.index(COLNAMES_1[2]) latitude_col = rv.index(COLNAMES_1[3]) longitude_col = rv.index(COLNAMES_1[4]) capacity_col = rv.index(COLNAMES_1[5]) fuel_col = rv.index(COLNAMES_1[6]) source_col = rv.index(COLNAMES_1[7]) date_col = rv.index(COLNAMES_1[8]) print(u"Reading file 1...") for i in range(1, sheet.nrows): # read in row row = sheet.row_values(i) if pw.format_string(row[country_col]) != COUNTRY_NAME: continue try: name = pw.format_string(row[name_col], None) # already in unicode if not name: print(u"-Error: No name on row {0}".format(i + 1)) continue except: print(u"-Error: Can't read name of plant on row {0}".format(i + 1)) name = pw.NO_DATA_UNICODE # without this, next pass thru loop uses old name continue try: owner = pw.format_string(row[owner_col], None) except:
subFolder=SAVE_CODE, filename=dataset["filename"]) with open(dataset_filename, "rbU") as f: datareader = csv.DictReader(f) for row in datareader: try: idval = int(row["gid"]) except: print("-Error: Can't read ID for line {0}, skipping.".format( count)) continue try: name = pw.format_string( row.get("nombre", row.get("comuna", pw.NO_DATA_UNICODE))) except: print(u"-Error: Can't read name for ID {0}, skipping.".format( idval)) continue try: fuel_string = row.get( "Tipo", row.get("Tipo ", row.get("Combustible", pw.NO_DATA_UNICODE))) fuel = pw.standardize_fuel(fuel_string, fuel_thesaurus) except: print(u"-Error: Can't read fuel for plant {0}.".format(name)) fuel = pw.NO_DATA_SET