for row in datareader: idval = int(row[id_col]) name = pw.format_string(row[name_col]) try: latitude = float(row[latitude_col]) longitude = float(row[longitude_col]) except: coord_skip_count += 1 continue country = row[ country_col] # note: this is the ISO3 code so no need to convert # assign ID number idnr = pw.make_id(SAVE_CODE, idval) new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude) new_plant = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_country=country, plant_location=new_location, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL) plants_dictionary[idnr] = new_plant # report on plants read from file print(u" {0} plants.".format(len(plants_dictionary))) print("Skipped {0} plants because of missing lat/long coordinates.".format( coord_skip_count)) # write database to csv format pw.write_csv_file(plants_dictionary, CSV_FILE_NAME)
for row_id in range(2, ws2.nrows): rv = ws2.row_values(row_id) # row value name = pw.format_string(rv[COLS_860_2['name']]) idnr = pw.make_id(SAVE_CODE, int(rv[COLS_860_2['idnr']])) capacity = 0.0 generation = pw.PlantGenerationObject() owner = pw.format_string(str(rv[COLS_860_2['owner']])) try: latitude = float(rv[COLS_860_2['lat']]) except: latitude = pw.NO_DATA_NUMERIC try: longitude = float(rv[COLS_860_2['lng']]) except: longitude = pw.NO_DATA_NUMERIC location = pw.LocationObject(u"", latitude, longitude) new_plant = pw.PowerPlant(idnr, name, plant_country=COUNTRY_NAME, plant_location=location, plant_owner=owner, plant_capacity=capacity, plant_generation=generation, plant_cap_year=YEAR, plant_source=SOURCE, plant_source_url=SOURCE_URL) plants_dictionary[idnr] = new_plant print("...loaded {0} plants.".format(len(plants_dictionary))) # read in capacities from File 3 of EIA-860
count += 1 idnr = pw.make_id(SAVE_CODE, count) plant = line.translate({ord(k): None for k in u'[];'}).split('~') name = pw.format_string(plant[1], encoding=None) ## if name == pw.NO_DATA_UNICODE: print("-Error: Name problem with {0}".format(idnr) + "at {0}".format(plant[0].split(','))) ## coordinates = plant[0].split(',') lat = coordinates[0] lng = coordinates[1] latitude_number = float(lat) longitude_number = float(lng) new_location = pw.LocationObject(description=pw.NO_DATA_UNICODE, latitude=latitude_number, longitude=longitude_number) new_plant = pw.PowerPlant(idnr, name, plant_country=COUNTRY_NAME, plant_location=new_location, plant_fuel=set([u'Coal']), plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL, plant_cap_year=2017) plants_dictionary[idnr] = new_plant # use wikimarkup to detect the main table and transform to html code html = str(PyQuery(wikimarkup.parse(wiki))) # read html code into pandas dataframe
current_owner = pw.NO_DATA_UNICODE current_capacity_sum = 0.0 current_generation_sum = 0.0 else: continue # now assign locations and commissioning years location_not_found = [] year_not_found = [] for idnr, plant in plants_dictionary.iteritems(): if in locations_dictionary.keys(): coords = locations_dictionary[] plant.location = pw.LocationObject(pw.NO_DATA_UNICODE, coords[0], coords[1]) else: location_not_found.append(plant) if in commissioning_years_dictionary.keys(): plant.commissioning_year = commissioning_years_dictionary[] else: year_not_found.append(plant) """ print("Locations not found for these plants:") location_not_found.sort(key = lambda x:x.capacity, reverse=True) for plant in location_not_found: if 'MOVIL' not in print(u"{0}, {1} MW".format(, plant.capacity))
# assign ID number idnr_full = pw.make_id(SAVE_CODE, int(idnr)) # check if this ID is already in the dictionary - if so, this is a unit if idnr_full in plants_dictionary: # update plant existing_plant = plants_dictionary[idnr_full] existing_plant.capacity += capacity existing_plant.fuel.update(fuel) # append generation object - may want to sum generation instead? if generation: existing_plant.generation.append(generation) # if lat/long for this unit, overwrite previous data - may want to change this if latitude and longitude: new_location = pw.LocationObject( location, latitude, longitude) existing_plant.location = new_location # unclear how to handle owner, source, url, commissioning year else: new_location = pw.LocationObject(location, latitude, longitude) new_plant = pw.PowerPlant( plant_idnr=idnr, plant_name=name, plant_country=country, plant_location=new_location, plant_fuel=fuel, plant_capacity=capacity, plant_owner=owner,
print u"-Error: Can't read plant generation technology." if gen_type.lower() == u"hydro power": fuels = set([u"Hydro"]) elif gen_type.lower() == u"wind power": fuels = set([u"Wind"]) else: fuels = pw.NO_DATA_SET for i in COLS["fuel_type"]: try: if rv[i] == "None": continue fuel = pw.standardize_fuel(rv[i], fuel_thesaurus) fuels.update(fuel) except: continue new_location = pw.LocationObject(pw.NO_DATA_UNICODE,pw.NO_DATA_NUMERIC,pw.NO_DATA_NUMERIC) idnr = u"{:4}{:06d}".format("REF", count_unit) new_unit = pw.PowerPlant(plant_idnr=idnr, plant_name=name, plant_owner=owner, plant_fuel=fuels, plant_country=unicode(COUNTRY_NAME), plant_capacity=capacity_max, plant_cap_year=year_updated, plant_source=SOURCE_NAME, plant_source_url=DATASET_URL, plant_location=new_location) units_dictionary[idnr] = new_unit count_unit += 1 # Aggregate units to plant level sorted_units = sorted(units_dictionary.values(), key = lambda x: # units are sorted by name count_plant = 1 i = 0 while i < len(sorted_units)-1: j = i + 1 idnr = pw.make_id(SAVE_CODE,count_plant) matched_name = regex_match(sorted_units[i].name, sorted_units[j].name) # return a string if there is a match, otherwise False
if fuel_type_str: if '+' in fuel_type_str: fuel_type = None else: fuel_type = pw.standardize_fuel(fuel_type_str, fuel_thesaurus) if name_str: # if true, this row begins a new plant # first process the previous plant unless this is the first entry if i > START_ROW: print i print fuel_type_set raw_input() total_capacity = sum(capacity_list) average_year_built = sum(year_built_list) / len( year_built_list) # TODO: fix this new_location = pw.LocationObject(latitude=0.0, longitude=0.0) new_plant = pw.PowerPlant(plant_idnr=plant_idnr, plant_name=name, plant_country=COUNTRY_NAME, plant_capacity=total_capacity, plant_fuel=fuel_type_set, plant_source=URL, plant_location=new_location) plants_dictionary[plant_idnr] = new_plant print("Recording plant {0} with ID: {1}, capacity: {2}, fuel: {3}". format(name, plant_idnr, total_capacity, fuel_type_set)) # next process this plant name = pw.format_string(name_str) plant_idnr = pw.make_id(SAVE_CODE, i)
y_coordinate = float(row[y_coordinate_col].replace(",", "")) longitude, latitude = pyproj.transform(osgb36, wgs84, x_coordinate, y_coordinate) except: print(u"-Error: Can't read location for plant {0}.".format(name)) latitude, longitude = 0.0, 0.0 coord_source = COUNTRY_NAME + u"national data" try: owner = pw.format_string(row[owner_col]) except: print(u"-Error: Can't read owner for plant {0}.".format(name)) owner = u"" # now process plant plant_idnr = pw.make_id(SAVE_CODE_GBR, idnr) new_location = pw.LocationObject(latitude=latitude, longitude=longitude) new_plant = pw.PowerPlant(plant_idnr=plant_idnr, plant_name=name, plant_owner=owner, plant_country=country, plant_capacity=capacity, plant_cap_year=REPD_YEAR, plant_source=SOURCE_NAME, plant_source_url=SOURCE_URL, plant_location=new_location, plant_coord_source=coord_source, plant_fuel=fuel_type) plants_dictionary[plant_idnr] = new_plant # load and process DUKES file book = xlrd.open_workbook(RAW_FILE_NAME_DUKES)