try: latitude = float(row[latitude_col]) longitude = float(row[longitude_col]) except: coord_skip_count += 1 continue country = row[ country_col] # note: this is the ISO3 code so no need to convert # assign ID number idnr = pw.make_id(SAVE_CODE, idval) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=country, plant_location=new_location, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL) plants_dictionary[idnr] = new_plant # report on plants read from file print(u"...read {0} plants.".format(len(plants_dictionary))) print("Skipped {0} plants because of missing lat/long coordinates.".format( coord_skip_count)) # write database to csv format pw.write_csv_file(plants_dictionary, CSV_FILE_NAME) # # save database pw.save_database(plants_dictionary, SAVE_CODE, SAVE_DIRECTORY) print(u"Pickled database to {0}".format(SAVE_DIRECTORY))
owner_name = owner.findall("a")[0].text.strip() owner_conjunction = u"; " if owner_full else u"" owner_full = owner_full + owner_conjunction + owner_share + owner_name #owner_full.append(u" " + owner_name.text) if u"50" in owner_full: print(u"Plant: {0}; Owner(s): {1}".format(name,owner_full)) #print(u"Plant: {0}; Owner(s): {1}".format(name,owner_full)) """ # assign ID number idnr = pw.make_id(SAVE_CODE,plant_id) new_location = pw.LocationObject(pw.NO_DATA_UNICODE,latitude,longitude) new_plant = pw.PowerPlant(plant_idnr=idnr,plant_name=name,plant_country=COUNTRY_NAME, plant_location=new_location,plant_fuel=fuel,plant_capacity=capacity, plant_source=SOURCE_NAME,plant_source_url=SOURCE_URL,plant_cap_year=SOURCE_YEAR, plant_commissioning_year=op_year) plants_dictionary[idnr] = new_plant # report on plants read from file print(u"...read {0} plants.".format(len(plants_dictionary))) print(u"Found coordinates for {0} plants.".format(found_coordinates_count)) print(u"Found operational year for {0} plants.".format(found_operational_year_count)) # write database to csv format pw.write_csv_file(plants_dictionary,CSV_FILE_NAME) # save database pw.save_database(plants_dictionary,SAVE_CODE,SAVE_DIRECTORY) print(u"Pickled database to {0}".format(SAVE_DIRECTORY))
if name == pw.NO_DATA_UNICODE: print("-Error: Name problem with {0}".format(idnr) + "at {0}".format(plant[0].split(','))) ## coordinates = plant[0].split(',') lat = coordinates[0] lng = coordinates[1] latitude_number = float(lat) longitude_number = float(lng) new_location = pw.LocationObject(description=pw.NO_DATA_UNICODE, latitude=latitude_number, longitude=longitude_number) new_plant = pw.PowerPlant(idnr, name, plant_country=COUNTRY_NAME, plant_location=new_location, plant_fuel=set([u'Coal']), plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL, plant_cap_year=2017) plants_dictionary[idnr] = new_plant # use wikimarkup to detect the main table and transform to html code html = str(PyQuery(wikimarkup.parse(wiki))) # read html code into pandas dataframe frames = pd.read_html(html, header=0) df = frames[0] # make changes to Units column to include only the plant name, not the unit for i, unit in enumerate(frames[0]['Unit']): df.set_value(i, 'Unit', unit.strip('[]').split('|')[0])
generation = pw.PlantGenerationObject() owner = pw.format_string(str(rv[COLS_860_2['owner']])) try: latitude = float(rv[COLS_860_2['lat']]) except: latitude = pw.NO_DATA_NUMERIC try: longitude = float(rv[COLS_860_2['lng']]) except: longitude = pw.NO_DATA_NUMERIC location = pw.LocationObject(u"", latitude, longitude) new_plant = pw.PowerPlant(idnr, name, plant_country=COUNTRY_NAME, plant_location=location, plant_owner=owner, plant_capacity=capacity, plant_generation=generation, plant_cap_year=YEAR, plant_source=SOURCE, plant_source_url=SOURCE_URL) plants_dictionary[idnr] = new_plant print("...loaded {0} plants.".format(len(plants_dictionary))) # read in capacities from File 3 of EIA-860 print("Reading in capacities...") commissioning_year_by_unit = { } # temporary method until PowerPlant object includes unit-level information for row_id in range(2, ws3.nrows): rv = ws3.row_values(row_id) # row value
country_list.append(iso2_to_country_names[iso2.strip()]) country = " ".join(country_list) except: print( u"-Error: Can't read countries from string {0}; list: {1}.".format( countries, country_list_iso2)) country = pw.NO_DATA_UNICODE # assign ID number idnr = pw.make_id(SAVE_CODE, ref) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=country, plant_location=new_location, plant_fuel=fuel, plant_capacity=capacity, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL, plant_owner=owner) plants_dictionary[idnr] = new_plant # report on plants read from file print(u"Loaded {0} plants to database.".format(len(plants_dictionary))) # write database to csv format pw.write_csv_file(plants_dictionary, CSV_FILE_NAME) # save database pw.save_database(plants_dictionary, SAVE_CODE, SAVE_DIRECTORY) print(u"Pickled database to {0}".format(SAVE_DIRECTORY))
if latitude and longitude: new_location = pw.LocationObject( location, latitude, longitude) existing_plant.location = new_location # unclear how to handle owner, source, url, commissioning year else: new_location = pw.LocationObject(location, latitude, longitude) new_plant = pw.PowerPlant( plant_idnr=idnr, plant_name=name, plant_country=country, plant_location=new_location, plant_fuel=fuel, plant_capacity=capacity, plant_owner=owner, plant_generation=generation, plant_source=source, plant_source_url=url, plant_commissioning_year=commissioning_year) plants_dictionary[idnr_full] = new_plant # report on plants read from file print(u"...read {0} plants.".format(len(plants_dictionary))) # write database to csv format pw.write_csv_file(plants_dictionary, CSV_FILE_NAME) # save database pw.save_database(plants_dictionary, SAVE_CODE, SAVE_DIRECTORY)
if row_name: if current_plant_name: # assign ID number, make PowerPlant object, add to dictionary idnr = pw.make_id(SAVE_CODE, count) annual_generation = pw.PlantGenerationObject( gwh=current_generation_sum, start_date=gen_start, end_date=gen_stop, source=SOURCE_NAME) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=current_plant_name, plant_owner=current_owner, plant_fuel=current_fuel_types, plant_country=COUNTRY_NAME, plant_capacity=current_capacity_sum, plant_cap_year=YEAR_OF_DATA, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL, plant_generation=annual_generation) plants_dictionary[idnr] = new_plant count += 1 # reset all current values to this row current_plant_name = row_name current_fuel_types = pw.standardize_fuel(row_fuel, fuel_thesaurus) current_owner = pw.format_string(rv[COLS['owner']], None) try: current_capacity_sum = float( rv[COLS['capacity']]) * CAPACITY_CONVERSION_TO_MW except:
elif gen_type.lower() == u"wind power": fuels = set([u"Wind"]) else: fuels = pw.NO_DATA_SET for i in COLS["fuel_type"]: try: if rv[i] == "None": continue fuel = pw.standardize_fuel(rv[i], fuel_thesaurus) fuels.update(fuel) except: continue new_location = pw.LocationObject(pw.NO_DATA_UNICODE,pw.NO_DATA_NUMERIC,pw.NO_DATA_NUMERIC) idnr = u"{:4}{:06d}".format("REF", count_unit) new_unit = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_owner=owner, plant_fuel=fuels, plant_country=unicode(COUNTRY_NAME), plant_capacity=capacity_max, plant_cap_year=year_updated, plant_source=SOURCE_NAME, plant_source_url=DATASET_URL, plant_location=new_location) units_dictionary[idnr] = new_unit count_unit += 1 # Aggregate units to plant level sorted_units = sorted(units_dictionary.values(), key = lambda x: x.name) # units are sorted by name count_plant = 1 i = 0 while i < len(sorted_units)-1: j = i + 1 idnr = pw.make_id(SAVE_CODE,count_plant) matched_name = regex_match(sorted_units[i].name, sorted_units[j].name) # return a string if there is a match, otherwise False plant_name = matched_name if matched_name else sorted_units[i].name owner = sorted_units[i].owner fuels = sorted_units[i].fuel
fuel_type = pw.standardize_fuel(fuel_type_str, fuel_thesaurus) if name_str: # if true, this row begins a new plant # first process the previous plant unless this is the first entry if i > START_ROW: print i print fuel_type_set raw_input() total_capacity = sum(capacity_list) average_year_built = sum(year_built_list) / len( year_built_list) # TODO: fix this new_location = pw.LocationObject(latitude=0.0, longitude=0.0) new_plant = pw.PowerPlant(plant_idnr=plant_idnr, plant_name=name, plant_country=COUNTRY_NAME, plant_capacity=total_capacity, plant_fuel=fuel_type_set, plant_source=URL, plant_location=new_location) plants_dictionary[plant_idnr] = new_plant print("Recording plant {0} with ID: {1}, capacity: {2}, fuel: {3}". format(name, plant_idnr, total_capacity, fuel_type_set)) # next process this plant name = pw.format_string(name_str) plant_idnr = pw.make_id(SAVE_CODE, i) capacity_list = [capacity] year_built_list = [year_built] fuel_type_set = fuel_type
try: year_updated = int( plant.find("Electricity_Infrastructure:REVISED", ns).text.split("T")[0][0:4]) except: year_updated = pw.NO_DATA_NUMERIC # assign ID number idnr = pw.make_id(SAVE_CODE, plant_id) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_owner=owner, plant_country=COUNTRY_NAME, plant_location=new_location, plant_fuel=fuel, plant_capacity=capacity, plant_source=SOURCE_NAME, plant_cap_year=year_updated, plant_source_url=SOURCE_URL) plants_dictionary[idnr] = new_plant count += 1 # report on plants read from file print(u"...read {0} plants.".format(len(plants_dictionary))) # write database to csv format pw.write_csv_file(plants_dictionary, CSV_FILE_NAME) # save database pw.save_database(plants_dictionary, SAVE_CODE, SAVE_DIRECTORY)
try: owner = pw.format_string(row[owner_col]) except: print(u"-Error: Can't read owner for plant {0}.".format(name)) owner = u"" # now process plant plant_idnr = pw.make_id(SAVE_CODE_GBR, idnr) new_location = pw.LocationObject(latitude=latitude, longitude=longitude) new_plant = pw.PowerPlant(plant_idnr=plant_idnr, plant_name=name, plant_owner=owner, plant_country=country, plant_capacity=capacity, plant_cap_year=REPD_YEAR, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL, plant_location=new_location, plant_coord_source=coord_source, plant_fuel=fuel_type) plants_dictionary[plant_idnr] = new_plant # load and process DUKES file book = xlrd.open_workbook(RAW_FILE_NAME_DUKES) sheet = book.sheet_by_index(TAB_NUMBER_DUKES) count = 1000001 # use this starting number for IDs when plant is not matched to REPD plant first_data_row = 0 for i in range(sheet.nrows): # find headers
data_date = (int(str(row[date_col])[0:4])) except: print( u"-Error:Can't read reference date for plant with name {0}".format( name)) data_date = pw.NO_DATA_NUMERIC # assign ID number idnr = pw.make_id(SAVE_CODE, i) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=COUNTRY_NAME, plant_owner=owner, plant_cap_year=data_date, plant_location=new_location, plant_coord_source=SOURCE_URL_1, plant_fuel=fuel, plant_capacity=capacity, plant_source=source, plant_source_url=SOURCE_URL_1) plants_dictionary[idnr] = new_plant # use this for id incrementing in next file max_id = i # 2: read in NACEI renewable plants book = xlrd.open_workbook(RAW_FILE_NAME_2, encoding_override=ENCODING) sheet = book.sheet_by_name(TAB_NAME_2) rv = sheet.row_values(0)