for row in datareader: idval = int(row[id_col]) name = pw.format_string(row[name_col]) try: latitude = float(row[latitude_col]) longitude = float(row[longitude_col]) except: coord_skip_count += 1 continue country = row[ country_col] # note: this is the ISO3 code so no need to convert # assign ID number idnr = pw.make_id(SAVE_CODE, idval) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=country, plant_location=new_location, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL) plants_dictionary[idnr] = new_plant # report on plants read from file print(u"...read {0} plants.".format(len(plants_dictionary))) print("Skipped {0} plants because of missing lat/long coordinates.".format( coord_skip_count))
owner_full = pw.NO_DATA_UNICODE break owner_share = owner.text.strip().replace(u"para",u"by") + u" " owner_name = owner.findall("a")[0].text.strip() owner_conjunction = u"; " if owner_full else u"" owner_full = owner_full + owner_conjunction + owner_share + owner_name #owner_full.append(u" " + owner_name.text) if u"50" in owner_full: print(u"Plant: {0}; Owner(s): {1}".format(name,owner_full)) #print(u"Plant: {0}; Owner(s): {1}".format(name,owner_full)) """ # assign ID number idnr = pw.make_id(SAVE_CODE,plant_id) new_location = pw.LocationObject(pw.NO_DATA_UNICODE,latitude,longitude) new_plant = pw.PowerPlant(plant_idnr=idnr,plant_name=name,plant_country=COUNTRY_NAME, plant_location=new_location,plant_fuel=fuel,plant_capacity=capacity, plant_source=SOURCE_NAME,plant_source_url=SOURCE_URL,plant_cap_year=SOURCE_YEAR, plant_commissioning_year=op_year) plants_dictionary[idnr] = new_plant # report on plants read from file print(u"...read {0} plants.".format(len(plants_dictionary))) print(u"Found coordinates for {0} plants.".format(found_coordinates_count)) print(u"Found operational year for {0} plants.".format(found_operational_year_count)) # write database to csv format pw.write_csv_file(plants_dictionary,CSV_FILE_NAME)
# read in plants print(u"Reading in plants...") with open(RAW_FILE_NAME, 'r') as f: data = json.load(f) # select main content of page wiki = data['query']['pages']['85380']['revisions'][0]['*'] # get plants with location from map (first part of raw file) plant_filename = 'http://www.sourcewatch.org/index.php/Category:Existing_coal_plants_in_China' count = 0 for line in wiki.split('\n'): if '~[[' in line: count += 1 idnr = pw.make_id(SAVE_CODE, count) plant = line.translate({ord(k): None for k in u'[];'}).split('~') name = pw.format_string(plant[1], encoding=None) ## if name == pw.NO_DATA_UNICODE: print("-Error: Name problem with {0}".format(idnr) + "at {0}".format(plant[0].split(','))) ## coordinates = plant[0].split(',') lat = coordinates[0] lng = coordinates[1] latitude_number = float(lat) longitude_number = float(lng) new_location = pw.LocationObject(description=pw.NO_DATA_UNICODE, latitude=latitude_number, longitude=longitude_number)
wb1 = xlrd.open_workbook(RAW_FILE_NAME_923_2) ws1 = wb1.sheet_by_name(TAB_NAME_923_2) print("Loading Form 860-2") wb2 = xlrd.open_workbook(RAW_FILE_NAME_860_2) ws2 = wb2.sheet_by_name(TAB_NAME_860_2) print("Loading Form 860-3") wb3 = xlrd.open_workbook(RAW_FILE_NAME_860_3) ws3 = wb3.sheet_by_name(TAB_NAME_860_3) # read in plants from File 2 of EIA-860 print("Reading in plants...") plants_dictionary = {} for row_id in range(2, ws2.nrows): rv = ws2.row_values(row_id) # row value name = pw.format_string(rv[COLS_860_2['name']]) idnr = pw.make_id(SAVE_CODE, int(rv[COLS_860_2['idnr']])) capacity = 0.0 generation = pw.PlantGenerationObject() owner = pw.format_string(str(rv[COLS_860_2['owner']])) try: latitude = float(rv[COLS_860_2['lat']]) except: latitude = pw.NO_DATA_NUMERIC try: longitude = float(rv[COLS_860_2['lng']]) except: longitude = pw.NO_DATA_NUMERIC location = pw.LocationObject(u"", latitude, longitude) new_plant = pw.PowerPlant(idnr, name, plant_country=COUNTRY_NAME,
try: countries = pw.format_string(rv[countries_col]) country_list_iso2 = countries.split(";") country_list = [] for iso2 in country_list_iso2: country_list.append(iso2_to_country_names[iso2.strip()]) country = " ".join(country_list) except: print( u"-Error: Can't read countries from string {0}; list: {1}.".format( countries, country_list_iso2)) country = pw.NO_DATA_UNICODE # assign ID number idnr = pw.make_id(SAVE_CODE, ref) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=country, plant_location=new_location, plant_fuel=fuel, plant_capacity=capacity, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL, plant_owner=owner) plants_dictionary[idnr] = new_plant # report on plants read from file print(u"Loaded {0} plants to database.".format(len(plants_dictionary)))
commissioning_year = 0.5 * ( commissioning_year_1 + commissioning_year_2 ) # todo: need a better method else: commissioning_year = float(commissioning_year_string) if (commissioning_year < 1900) or (commissioning_year > 2020): # sanity check commissioning_year = pw.NO_DATA_NUMERIC except: print( u"-Error: Can't read commissioning year for plant {0} {1}." .format(country, str(idnr))) commissioning_year = pw.NO_DATA_NUMERIC # assign ID number idnr_full = pw.make_id(SAVE_CODE, int(idnr)) # check if this ID is already in the dictionary - if so, this is a unit if idnr_full in plants_dictionary: # update plant existing_plant = plants_dictionary[idnr_full] existing_plant.capacity += capacity existing_plant.fuel.update(fuel) # append generation object - may want to sum generation instead? if generation: existing_plant.generation.append(generation) # if lat/long for this unit, overwrite previous data - may want to change this if latitude and longitude: new_location = pw.LocationObject( location, latitude, longitude) existing_plant.location = new_location
fuel = pw.standardize_fuel(plant_type, fuel_thesaurus) elif plant_type == u"THERMAL": fuel = pw.standardize_fuel(rv[fuel1_col], fuel_thesaurus) if rv[fuel2_col] and rv[fuel2_col] != 'n/a': fuel2 = pw.standardize_fuel(rv[fuel2_col], fuel_thesaurus) fuel = fuel.union(fuel2) else: print("Can't identify plant type {0}".format(plant_type)) except: print(u"Can't identify plant type for plant {0}".format(name)) latitude = pw.NO_DATA_NUMERIC longitude = pw.NO_DATA_NUMERIC # assign ID number idnr = pw.make_id(SAVE_CODE, id_val) new_location = pw.LocationObject("", latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=COUNTRY_NAME, plant_location=new_location, plant_fuel=fuel, plant_capacity=capacity, plant_cap_year=DATA_YEAR, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL, plant_generation=generation) plants_dictionary[idnr] = new_plant # now find average commissioning year weighted by capacity for id_val, units in unit_list.iteritems():
plants_dictionary = {} # load GEO and CARMA for matching coordinates geo_database = pw.load_database(GEO_DATABASE_FILE) print("Loaded {0} plants from GEO database.".format(len(geo_database))) carma_database = pw.load_database(CARMA_DATABASE_FILE) print("Loaded {0} plants from CARMA database.".format(len(carma_database))) # read in plant matches file with open(PLANT_MATCHES, "rbU") as f: f.readline() # skip headers csvreader = csv.reader(f) plant_matches = {} for row in csvreader: dukes_name = str(row[0]) geo_id = pw.make_id(SAVE_CODE_GEO, int(row[1])) if row[1] else "" carma_id = pw.make_id(SAVE_CODE_CARMA, int(row[2])) if row[2] else "" repd_id = int(row[3]) if row[3] else "" plant_matches[dukes_name] = { "geo_id": geo_id, "carma_id": carma_id, "repd_id": repd_id } # load and process Renewable Energy Planning Database print(u"Reading in plants...") country = COUNTRY_NAME with open(RAW_FILE_NAME_REPD, "rU") as f: datareader = csv.reader(f) headers = datareader.next() while "Ref ID" not in headers: # find header row
source = pw.format_string(row[source_col], None) except: print(u"-Error: Can't read data source for plant with name {0}".format( name)) source = pw.NO_DATA_UNICODE try: data_date = (int(str(row[date_col])[0:4])) except: print( u"-Error:Can't read reference date for plant with name {0}".format( name)) data_date = pw.NO_DATA_NUMERIC # assign ID number idnr = pw.make_id(SAVE_CODE, i) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=COUNTRY_NAME, plant_owner=owner, plant_cap_year=data_date, plant_location=new_location, plant_coord_source=SOURCE_URL_1, plant_fuel=fuel, plant_capacity=capacity, plant_source=source, plant_source_url=SOURCE_URL_1) plants_dictionary[idnr] = new_plant # use this for id incrementing in next file
name)) capacity = pw.NO_DATA_NUMERIC try: owner_string = row.get( "Propietario", row.get("Propiedad", pw.NO_DATA_UNICODE)) if not owner_string: print("-Error: No owner string for plant {0}".format(name)) owner = pw.NO_DATA_UNICODE owner = pw.format_string(owner_string) except: print(u"-Error: Can't read owner for plant {0}.".format(name)) owner = pw.NO_DATA_UNICODE # assign ID number, make PowerPlant object, add to dictionary idnr = pw.make_id(SAVE_CODE, dataset["idstart"] + idval) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=COUNTRY_NAME, plant_owner=owner, plant_location=new_location, plant_fuel=fuel, plant_capacity=capacity, plant_cap_year=YEAR_POSTED, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL) plants_dictionary[idnr] = new_plant # report on plants read from file