wb923_2013 = xlrd.open_workbook(RAW_FILE_NAME_923_2_2013)
ws923_2013 = wb923_2013.sheet_by_name(TAB_NAME_923_2_2013)

print("Loading Form 860-2")
wb860_2 = xlrd.open_workbook(RAW_FILE_NAME_860_2)
ws860_2 = wb860_2.sheet_by_name(TAB_NAME_860_2)
print("Loading Form 860-3")
wb860_3 = xlrd.open_workbook(RAW_FILE_NAME_860_3)
ws860_3 = wb860_3.sheet_by_name(TAB_NAME_860_3)

# read in plants from File 2 of EIA-860
print("Reading in plants...")
plants_dictionary = {}
for row_id in xrange(2, ws860_2.nrows):
	rv = ws860_2.row_values(row_id) # row value
	name = pw.format_string(rv[COLS_860_2['name']])
	idnr = pw.make_id(SAVE_CODE, int(rv[COLS_860_2['idnr']]))
	capacity = 0.0
	generation = pw.PlantGenerationObject()
	owner = pw.format_string(str(rv[COLS_860_2['owner']]))
	try:
		latitude = float(rv[COLS_860_2['lat']])
	except:
		latitude = pw.NO_DATA_NUMERIC
	try:
		longitude = float(rv[COLS_860_2['lng']])
	except:
		longitude = pw.NO_DATA_NUMERIC
	location = pw.LocationObject(u"", latitude, longitude)
	new_plant = pw.PowerPlant(idnr, name, plant_country=COUNTRY_NAME,
		plant_location=location, plant_coord_source=SOURCE_NAME,
                latitude_col = headers.index(COLNAMES[14])
                longitude_col = headers.index(COLNAMES[15])
                geolocation_source_col = headers.index(COLNAMES[16])
                year_of_data_col = headers.index(COLNAMES[17])
            except:
                print(u"- ERROR: One or more columns missing in {0}, skipping...".format(afile))
                continue

            # read each row in the file
            for row in datareader:
                # skip plants that aren't operational
                status = row[status_col]
                if status not in ['Operational', 'Operating', '']:
                    continue
                try:
                    name = pw.format_string(row[name_col])
                    if not name:  # ignore accidental blank lines
                        continue
                except:
                    print(u"-Error: Can't read plant name.")
                    continue  # must have plant name - don't read plant if not
                try:
                    idnr = str(row[id_col])
                    if not idnr:  # must have plant ID - don't read plant if not
                        print(u"-Error: Null ID for plant {0}.".format(name))
                        continue
                except:
                    print(u"-Error: Can't read ID for plant {0}.".format(name))
                    continue  # must have plant ID - don't read plant if not
                try:
                    capacity = float(pw.format_string(row[capacity_col].replace(",", "")))   # note: may need to convert to MW
Beispiel #3
0
    name_col = rv.index(COLNAMES[1])
    unit_col = rv.index(COLNAMES[2])
    year_col = rv.index(COLNAMES[3])
    capacity_col = rv.index(COLNAMES[4])
    type_col = rv.index(COLNAMES[5])
    fuel1_col = rv.index(COLNAMES[6])
    fuel2_col = rv.index(COLNAMES[7])
    generation_col = rv.index(COLNAMES[8])

    for i in xrange(1, sheet.nrows):

        # read in row
        rv = sheet.row_values(i)

        try:
            name = pw.format_string(rv[name_col])
            if not name:
                continue        # don't read rows that lack a plant name (footnotes, etc)
        except:
            print(u"-Error: Can't read plant name for plant on row {0}.".format(i))
            continue

        try:
            id_val = int(rv[id_col])
            if not id_val:
                continue        # don't read rows that lack an ID (footnotes, etc)
        except:
            print(u"-Error: Can't read ID for plant on row {0}.".format(i))
            continue

        try:
Beispiel #4
0
                year_built_list)  # TODO: fix this
            new_location = pw.LocationObject(latitude=0.0, longitude=0.0)
            new_plant = pw.PowerPlant(plant_idnr=plant_idnr,
                                      plant_name=name,
                                      plant_country=COUNTRY_NAME,
                                      plant_capacity=total_capacity,
                                      plant_fuel=fuel_type_set,
                                      plant_source=URL,
                                      plant_location=new_location)
            plants_dictionary[plant_idnr] = new_plant
            print("Recording plant {0} with ID: {1}, capacity: {2}, fuel: {3}".
                  format(name, plant_idnr, total_capacity, fuel_type_set))

        # next process this plant

        name = pw.format_string(name_str)
        plant_idnr = pw.make_id(SAVE_CODE, i)
        capacity_list = [capacity]
        year_built_list = [year_built]
        fuel_type_set = fuel_type

    else:  # not a new plant, just a new line
        if capacity_val:
            capacity_list.append(capacity)
        if year_built_val:
            year_built_list.append(year_built)
        if fuel_type and fuel_type not in fuel_type_set:
            fuel_type_set.update(fuel_type)

# complete loop, add final plant
total_capacity = sum(capacity_list)
Beispiel #5
0
        if operational_status != "En Servicio":
            continue
    except:
        print(u"- Error: Can't evaluate operational status.")
        continue

    # get id
    try:
        id_val = int(p_dict['id'])
    except:
        print(u"- Error: Can't get ID.")
        continue

    # get name
    try:
        name = pw.format_string(p_dict['generador'].encode(ENCODING), ENCODING)
    except:
        print(u"- Error: Can't get name for plant {0}".format(id_val))
        continue

    # get capacity in MW
    try:
        capacity = float(p_dict['potenciaInstalada'].strip(" MW"))
    except:
        print(u"- Error: Can't get capacity for plant {0}".format(id_val))
        continue

    # get fuel type
    try:
        fuel_string_raw = p_dict['icon']
        primary_fuel_string, other_fuel_set = parse_fuel_URY(
    headers = [x.lower() for x in datareader.next()]
    name_col = headers.index(COLNAMES[0])
    capacity_col = headers.index(COLNAMES[1])
    fuel_col = headers.index(COLNAMES[2])
    latitude_col = headers.index(COLNAMES[3])
    longitude_col = headers.index(COLNAMES[4])
    # additional columns here

    # if data source is for a single country
    country = SOURCE_NAME

    # read each row in the file
    count = 1
    for row in datareader:
        try:
            name = pw.format_string(row[name_col])
        except:
            print(u"Error: Can't read plant name.")
            continue  # must have plant name - don't read plant if not
        try:
            capacity = float(pw.format_string(
                row[capacity_col]))  # note: may need to convert to MW
        except:
            print(u"Error: Can't read capacity for plant {0}.".format(name))
        try:
            fuel = pw.standardize_fuel(row[fuel_col], fuel_thesaurus)
        except:
            print(u"Error: Can't read fuel type for plant {0}.".format(name))
        try:
            latitude = float(row[latitude_col])
            longitude = float(row[longitude_col])
Beispiel #7
0
                placemarks = child.findall(u"kml:Placemark", ns)
                for pm in placemarks:
                    description = pm.find("kml:description",
                                          ns).text  # html content
                    content = html.fromstring(description)
                    rows = content.findall("body/table")[1 + shift].findall(
                        "tr")[1].find("td").find("table").findall("tr")
                    status = u"N/A"
                    plant_id = u""
                    for row in rows:
                        left = row.findall("td")[0].text
                        right = row.findall("td")[1].text

                        # find CEG ID
                        if left == u"CEG":
                            plant_id = pw.format_string(right.strip(), None)

                        # make ID string formatting consistent (is not consistent in raw data)
                        # use only leading alpha chars and 6-digit number; drop trailing digits after "-"
                        if plant_id and u'Null' not in plant_id:

                            if u'.' not in plant_id:
                                plant_id = plant_id[0:3] + u'.' + plant_id[
                                    3:5] + u'.' + plant_id[
                                        5:7] + u'.' + plant_id[7:13]

                            elif u'-' in plant_id:
                                plant_id = plant_id[0:16]

                    # remove non-operating plants
                    #if status != u"Operação":
Beispiel #8
0
owner_col = rv.index(COLNAMES_1[2])
latitude_col = rv.index(COLNAMES_1[3])
longitude_col = rv.index(COLNAMES_1[4])
capacity_col = rv.index(COLNAMES_1[5])
fuel_col = rv.index(COLNAMES_1[6])
source_col = rv.index(COLNAMES_1[7])
date_col = rv.index(COLNAMES_1[8])

print(u"Reading file 1...")

for i in xrange(1, sheet.nrows):

    # read in row
    row = sheet.row_values(i)

    if pw.format_string(row[country_col]) != COUNTRY_NAME:
        continue

    try:
        name = pw.format_string(row[name_col], None)  # already in unicode
        if not name:
            print(u"-Error: No name on row {0}".format(i + 1))
            continue
    except:
        print(u"-Error: Can't read name of plant on row {0}".format(i + 1))
        name = pw.NO_DATA_UNICODE  # without this, next pass thru loop uses old name
        continue

    try:
        owner = pw.format_string(row[owner_col], None)
    except:
countries_col = rv.index(COLNAMES[5])
capacity_col = rv.index(COLNAMES[6])
owner_col = rv.index(COLNAMES[7])

for i in xrange(1, sheet.nrows):
    rv = sheet.row_values(i)
    try:
        ref = int(rv[ref_col])
        if not ref:
            print("-Error reading ref from: {0}".format(rv[ref_col]))
            continue
    except:
        continue

    try:
        project_type = pw.format_string(rv[type_col])
        if project_type not in PROJECT_TYPES_TO_READ:  # don't read all project types
            continue

        try:
            fuel = pw.standardize_fuel(project_type, fuel_thesaurus)
        except:
            print("-Error reading fuel: {0}".format(project_type))
            fuel = pw.NO_DATA_SET
    except:
        print(u"-Error: Can't read project type for project {0}.".format(ref))
        continue

    try:
        status = pw.format_string(rv[status_col])
        if status != u"Registered":
# optional raw file(s) download
downloaded = pw.download(COUNTRY_NAME, {RAW_FILE_NAME: SOURCE_URL})

# set up fuel type thesaurus
fuel_thesaurus = pw.make_fuel_thesaurus()

# create dictionary for power plant objects
plants_dictionary = {}

# extract powerplant information from file(s)
print(u"Reading in plants...")

# read auxilliary plant information
with open(PLANT_AUX_FILE, 'r') as f:
    reader = csv.DictReader(f)
    aux_plant_info = {pw.format_string(row['name']): row for row in reader}

# read data from csv and parse
count = 1

wb = xlrd.open_workbook(RAW_FILE_NAME)
ws = wb.sheet_by_name(TAB)

previous_owner = u'None'
previous_name = u'None'
plant_names = {}

for row_id in range(START_ROW, ws.nrows):

    rv = ws.row_values(row_id)
Beispiel #11
0
# set up fuel type thesaurus
fuel_thesaurus = pw.make_fuel_thesaurus()

# create dictionary for power plant objects
plants_dictionary = {}

# extract powerplant information from file(s)
print(u"Reading in plants...")

# read locations
locations_dictionary = {}
with open(LOCATION_FILE_NAME, 'r') as f:
    datareader = csv.reader(f)
    headers = datareader.next()
    for row in datareader:
        locations_dictionary[pw.format_string(
            row[0])] = [float(row[1]), float(row[2])]

# read commissioning years
commissioning_years_dictionary = {}
with open(COMMISSIONING_YEAR_FILE_NAME, 'r') as f:
    datareader = csv.reader(f)
    headers = datareader.next()
    for row in datareader:
        commissioning_years_dictionary[pw.format_string(row[0])] = row[1]

# read data from csv and parse
count = 1

wb = xlrd.open_workbook(RAW_FILE_NAME)
ws = wb.sheet_by_name(TAB)
Beispiel #12
0
owner_col = rv.index(COLNAMES_1[2])
latitude_col = rv.index(COLNAMES_1[3])
longitude_col = rv.index(COLNAMES_1[4])
capacity_col = rv.index(COLNAMES_1[5])
fuel_col = rv.index(COLNAMES_1[6])
source_col = rv.index(COLNAMES_1[7])
date_col = rv.index(COLNAMES_1[8])

print(u"Reading file 1...")

for i in xrange(1, sheet.nrows):

    # read in row
    row = sheet.row_values(i)

    if pw.format_string(row[country_col]) != COUNTRY_NAME:
        continue

    try:
        name = pw.format_string(row[name_col], None)  # already in unicode
        if not name:
            print(u"-Error: No name on row {0}".format(i + 1))
            continue
    except:
        print(u"-Error: Can't read name of plant on row {0}".format(i + 1))
        name = pw.NO_DATA_UNICODE  # without this, next pass thru loop uses old name
        continue

    try:
        owner = pw.format_string(row[owner_col], None)
    except:
print("Reading in plants...")
count_unit = 1
header_row = True
for row_id in xrange(0, ws.nrows):
    if header_row == True:
        try:
            if ws.cell(row_id, 0).value == "Name":
                header_row = False
            else:
                continue
        except:
            continue
    else:  # data rows
        rv = ws.row_values(row_id)
        try:
            name = pw.format_string(rv[COLS["name"]])
        except:
            print(u"-Error: Can't read plant name.")
            continue
        try:
            owner = pw.format_string(rv[COLS["owner"]], None)
        except:
            owner = pw.NO_DATA_UNICODE
            print(u"-Error: Can't read plant owner.")
        try:
            capacity_max = float(rv[COLS["capacity_max"]])
        except:
            capacity_max = pw.NO_DATA_NUMERIC
            print(
                u"-Error: Can't read capacity_max for plant {0}.".format(name))
        try:
    # use CEG code to lookup coordinates
    ceg_code_short = ceg_code[0:16]
    if ceg_code_short in plant_coordinates_keys:
        latitude = plant_coordinates[ceg_code_short]['latitude']
        longitude = plant_coordinates[ceg_code_short]['longitude']
        found_coordinates_count += 1
        geolocation_source = SOURCE_NAME
    else:
        #print(u"-Error: No coordinates for CEG ID: {0}".format(ceg_code))
        latitude = pw.NO_DATA_NUMERIC
        longitude = pw.NO_DATA_NUMERIC
        geolocation_source = pw.NO_DATA_UNICODE

    # get plant name
    name = pw.format_string(cells[1].findall("font/a")[0].text.strip(),None)

    # get operational date
    op_date = cells[2].findall("font")[0].text.strip()
    if op_date:
        try:
            d = parse_date(op_date)
            op_year = d.year
            found_operational_year_count += 1
        except:
            op_year = pw.NO_DATA_NUMERIC
    else:
        op_year = pw.NO_DATA_NUMERIC

    # get plant capacity
    capacity = CAPACITY_CONVERSION_TO_MW * locale.atof(cells[4].findall("font")[0].text)
    name_col = headers.index(COLNAMES[0])
    country_col = headers.index(COLNAMES[1])
    plant_id_col = headers.index(COLNAMES[2])
    company_col = headers.index(COLNAMES[3])
    latitude_col = headers.index(COLNAMES[4])
    longitude_col = headers.index(COLNAMES[5])

    # read each row in the file
    count = 1
    capacity = pw.NO_DATA_NUMERIC  # no capacity data in EPTR
    fuel = pw.NO_DATA_SET  # no fuel data in EPTR

    for row in datareader:

        try:
            name = pw.format_string(row[name_col], encoding=DATA_ENCODING)
        except:
            print(u"Error: Can't read plant name.")
            continue  # must have plant name - don't read plant if not
        try:
            idnr = int(row[plant_id_col])
        except:
            print(u"Error: Can't read ID for plant {0}.".format(name))
            continue
        try:
            latitude = float(row[latitude_col])
            longitude = float(row[longitude_col])
        except:
            latitude, longitude = 0.0, 0.0
        try:
            owner = pw.format_string(row[company_col])
Beispiel #16
0
nger_1415 = list(csv.DictReader(open(NGER_FILENAME_1415)))
nger_1314 = list(csv.DictReader(open(NGER_FILENAME_1314)))
nger_1213 = list(csv.DictReader(open(NGER_FILENAME_1213)))

# create a dictinary of namespaces
ns = {"gml": "http://www.opengis.net/gml", "Electricity_Infrastructure": "WFS"}

# read data from XML file and parse
count = 1
with open(RAW_FILE_NAME, "rU") as f:
    tree = ET.parse(f)
    root = tree.getroot()
    for station in tree.findall("gml:featureMember", ns):
        plant = station.find(
            "Electricity_Infrastructure:National_Major_Power_Stations", ns)
        name = pw.format_string(
            plant.find("Electricity_Infrastructure:NAME", ns).text)

        # get object id from AREMI (variable through time)
        plant_oid = plant.find("Electricity_Infrastructure:OBJECTID", ns).text
        # check if plant is already known, and skip if there is not a record (includes cases where AREMI has duplicated plants)
        if plant_oid not in linking_table:
            print(u"Error: Don't have prescribed ID for plant {0}; OID={1}.".
                  format(name, plant_oid))
            continue
        # get the assigned GPPD IDNR as an int, stripping the 'AUS' prefix
        plant_id = int(linking_table[plant_oid]['gppd_idnr'][3:])

        try:
            owner = pw.format_string(
                plant.find("Electricity_Infrastructure:OWNER", ns).text)
        except:
                                         subFolder=SAVE_CODE,
                                         filename=dataset["filename"])

    with open(dataset_filename, "rbU") as f:
        datareader = csv.DictReader(f)
        for row in datareader:

            try:
                idval = int(row["gid"])
            except:
                print("-Error: Can't read ID for line {0}, skipping.".format(
                    count))
                continue

            try:
                name = pw.format_string(
                    row.get("nombre", row.get("comuna", pw.NO_DATA_UNICODE)))
            except:
                print(u"-Error: Can't read name for ID {0}, skipping.".format(
                    idval))
                continue

            try:
                fuel_string = row.get(
                    "Tipo",
                    row.get("Tipo ", row.get("Combustible",
                                             pw.NO_DATA_UNICODE)))
                fuel = pw.standardize_fuel(fuel_string, fuel_thesaurus)
            except:
                print(u"-Error: Can't read fuel for plant {0}.".format(name))
                fuel = pw.NO_DATA_SET
Beispiel #18
0
owner_col = colnames.index("owners1")
latitude_col = colnames.index("latitude_start")
longitude_col = colnames.index("longitude_start")
location_col = colnames.index("location")
owner_col = colnames.index("owners1")
generation_col = colnames.index("expected_annual_generation_gwh_nbr")
generation_col2 = colnames.index("average_annual_generation_rng1_nbr_gwh")
status_col = colnames.index("status_of_plant_itf")

# extract data
rows = c.fetchall()
conn.close()

for row in rows:
    try:
        name = pw.format_string(row[name_col])
    except:
        print(u"-Error: Can't read plant name.")
        continue  # must have plant name - don't read plant if not
    try:
        idnr = int(row[id_col])
    except:
        print(u"-Error: Can't read plant ID: {0}".format(row[id_col]))
        continue  # must have ID number

    # skip non operational statuses
    if row[status_col] in NON_OPERATIONAL_STATUSES:
        continue

    try:
        capacity = float(row[capacity_col])
    id_col = headers.index(COLNAMES_REPD[0])
    name_col = headers.index(COLNAMES_REPD[1])
    fuel_col = headers.index(COLNAMES_REPD[2])
    capacity_col = headers.index(COLNAMES_REPD[3])
    status_col = headers.index(COLNAMES_REPD[4])
    x_coordinate_col = headers.index(COLNAMES_REPD[5])
    y_coordinate_col = headers.index(COLNAMES_REPD[6])
    owner_col = headers.index(COLNAMES_REPD[7])

    # read each row in the file
    count = 1
    for row in datareader:
        if u"Operational" not in row[status_col]:
            continue  # don't load non-operatioal plants
        try:
            name = pw.format_string(row[name_col])
        except:
            print(u"-Error: Can't read plant name.")
            continue  # must have plant name - don't read plant if not
        try:
            idnr = int(row[id_col])
        except:
            print(u"-Error: Can't read ref id.")
            continue  # must have ID number
        try:
            capacity = float(pw.format_string(
                row[capacity_col]))  # note: may need to convert to MW
        except:
            print(u"-Error: Can't read capacity for plant {0}.".format(name))
            capacity = 0.0
        try:
Beispiel #20
0
print(u"Reading in plants...")
coord_skip_count = 0
# read file line-by-line
with open(RAW_FILE_NAME, 'rU') as f:
    datareader = csv.reader(f)
    headers = [x.lower() for x in datareader.next()]
    id_col = headers.index(COLNAMES[0])
    name_col = headers.index(COLNAMES[1])
    latitude_col = headers.index(COLNAMES[2])
    longitude_col = headers.index(COLNAMES[3])
    country_col = headers.index(COLNAMES[4])

    for row in datareader:

        idval = int(row[id_col])
        name = pw.format_string(row[name_col])
        try:
            latitude = float(row[latitude_col])
            longitude = float(row[longitude_col])
        except:
            coord_skip_count += 1
            continue
        country = row[
            country_col]  # note: this is the ISO3 code so no need to convert

        # assign ID number
        idnr = pw.make_id(SAVE_CODE, idval)
        new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude,
                                         longitude)
        new_plant = pw.PowerPlant(plant_idnr=idnr,
                                  plant_name=name,