Ejemplo n.º 1
0
print(u"Reading in plants...")
coord_skip_count = 0
# read file line-by-line
with open(RAW_FILE_NAME, 'rU') as f:
    datareader = csv.reader(f)
    headers = [x.lower() for x in datareader.next()]
    id_col = headers.index(COLNAMES[0])
    name_col = headers.index(COLNAMES[1])
    latitude_col = headers.index(COLNAMES[2])
    longitude_col = headers.index(COLNAMES[3])
    country_col = headers.index(COLNAMES[4])

    for row in datareader:

        idval = int(row[id_col])
        name = pw.format_string(row[name_col])
        try:
            latitude = float(row[latitude_col])
            longitude = float(row[longitude_col])
        except:
            coord_skip_count += 1
            continue
        country = row[
            country_col]  # note: this is the ISO3 code so no need to convert

        # assign ID number
        idnr = pw.make_id(SAVE_CODE, idval)
        new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude,
                                         longitude)
        new_plant = pw.PowerPlant(plant_idnr=idnr,
                                  plant_name=name,
Ejemplo n.º 2
0
print(u"Reading in plants...")
with open(RAW_FILE_NAME, 'r') as f:
    data = json.load(f)

# select main content of page
wiki = data['query']['pages']['85380']['revisions'][0]['*']

# get plants with location from map (first part of raw file)
plant_filename = 'http://www.sourcewatch.org/index.php/Category:Existing_coal_plants_in_China'
count = 0
for line in wiki.split('\n'):
    if '~[[' in line:
        count += 1
        idnr = pw.make_id(SAVE_CODE, count)
        plant = line.translate({ord(k): None for k in u'[];'}).split('~')
        name = pw.format_string(plant[1], encoding=None)
        ##
        if name == pw.NO_DATA_UNICODE:
            print("-Error: Name problem with {0}".format(idnr) +
                  "at {0}".format(plant[0].split(',')))
        ##
        coordinates = plant[0].split(',')
        lat = coordinates[0]
        lng = coordinates[1]
        latitude_number = float(lat)
        longitude_number = float(lng)
        new_location = pw.LocationObject(description=pw.NO_DATA_UNICODE,
                                         latitude=latitude_number,
                                         longitude=longitude_number)
        new_plant = pw.PowerPlant(idnr,
                                  name,
Ejemplo n.º 3
0
country_thesaurus = pw.make_country_names_thesaurus()

# create dictionary for power plant objects
plants_dictionary = {}

# extract powerplant information from file(s)
print(u"Reading in plants...")

with open(RAW_FILE_NAME, 'r') as f:
    raw_plant_list = json.loads(f.read())

for plant in raw_plant_list:

    idval = int(plant['id'])

    name = pw.format_string(plant['name'])

    latitude = float(plant['location']['latitude'])

    longitude = float(plant['location']['longitude'])

    country = pw.standardize_country(plant['location']['country']['value'],
                                     country_thesaurus)

    # assign ID number
    idnr = pw.make_id(SAVE_CODE, idval)
    new_location = pw.LocationObject(pw.NO_DATA_UNICODE, latitude, longitude)
    new_plant = pw.PowerPlant(plant_idnr=idnr,
                              plant_name=name,
                              plant_country=country,
                              plant_location=new_location,
Ejemplo n.º 4
0
countries_col = rv.index(COLNAMES[5])
capacity_col = rv.index(COLNAMES[6])
owner_col = rv.index(COLNAMES[7])

for i in range(1, sheet.nrows):
    rv = sheet.row_values(i)
    try:
        ref = int(rv[ref_col])
        if not ref:
            print("-Error reading ref from: {0}".format(rv[ref_col]))
            continue
    except:
        continue

    try:
        project_type = pw.format_string(rv[type_col])
        if project_type not in PROJECT_TYPES_TO_READ:  # don't read all project types
            continue

        try:
            fuel = pw.standardize_fuel(project_type, fuel_thesaurus)
        except:
            print("-Error reading fuel: {0}".format(project_type))
            fuel = pw.NO_DATA_SET
    except:
        print(u"-Error: Can't read project type for project {0}.".format(ref))
        continue

    try:
        status = pw.format_string(rv[status_col])
        if status != u"Registered":
Ejemplo n.º 5
0
    plant_id = int(ceg_code[-11:-5])
    fuel = standardize_fuel_BRA(ceg_code)

    # use CEG code to lookup coordinates
    ceg_code_short = ceg_code[0:16]
    if ceg_code_short in plant_coordinates_keys:
        latitude = plant_coordinates[ceg_code_short]['latitude']
        longitude = plant_coordinates[ceg_code_short]['longitude']
        found_coordinates_count += 1
    else:
        #print(u"-Error: No coordinates for CEG ID: {0}".format(ceg_code))
        latitude = pw.NO_DATA_NUMERIC
        longitude = pw.NO_DATA_NUMERIC

    # get plant name
    name = pw.format_string(cells[1].findall("font/a")[0].text.strip(),None)

    # get operational date
    op_date = cells[2].findall("font")[0].text.strip()
    if op_date:
        try:
            d = parse_date(op_date)
            op_year = d.year
            found_operational_year_count += 1
        except:
            op_year = pw.NO_DATA_NUMERIC
    else:
        op_year = pw.NO_DATA_NUMERIC

    # get plant capacity
    capacity = CAPACITY_CONVERSION_TO_MW * locale.atof(cells[4].findall("font")[0].text)
Ejemplo n.º 6
0
# set up fuel type thesaurus
fuel_thesaurus = pw.make_fuel_thesaurus()

# create dictionary for power plant objects
plants_dictionary = {}

# extract powerplant information from file(s)
print(u"Reading in plants...")

# read locations
locations_dictionary = {}
with open(LOCATION_FILE_NAME, 'r') as f:
    datareader = csv.reader(f)
    headers = datareader.next()
    for row in datareader:
        locations_dictionary[pw.format_string(row[0])] = [row[1], row[2]]

# read commissioning years
commissioning_years_dictionary = {}
with open(COMMISSIONING_YEAR_FILE_NAME, 'r') as f:
    datareader = csv.reader(f)
    headers = datareader.next()
    for row in datareader:
        commissioning_years_dictionary[pw.format_string(row[0])] = row[1]

# read data from csv and parse
count = 1

wb = xlrd.open_workbook(RAW_FILE_NAME)
ws = wb.sheet_by_name(TAB)
Ejemplo n.º 7
0
print("Loading Form 923-2")
wb1 = xlrd.open_workbook(RAW_FILE_NAME_923_2)
ws1 = wb1.sheet_by_name(TAB_NAME_923_2)
print("Loading Form 860-2")
wb2 = xlrd.open_workbook(RAW_FILE_NAME_860_2)
ws2 = wb2.sheet_by_name(TAB_NAME_860_2)
print("Loading Form 860-3")
wb3 = xlrd.open_workbook(RAW_FILE_NAME_860_3)
ws3 = wb3.sheet_by_name(TAB_NAME_860_3)

# read in plants from File 2 of EIA-860
print("Reading in plants...")
plants_dictionary = {}
for row_id in range(2, ws2.nrows):
    rv = ws2.row_values(row_id)  # row value
    name = pw.format_string(rv[COLS_860_2['name']])
    idnr = pw.make_id(SAVE_CODE, int(rv[COLS_860_2['idnr']]))
    capacity = 0.0
    generation = pw.PlantGenerationObject()
    owner = pw.format_string(str(rv[COLS_860_2['owner']]))
    try:
        latitude = float(rv[COLS_860_2['lat']])
    except:
        latitude = pw.NO_DATA_NUMERIC
    try:
        longitude = float(rv[COLS_860_2['lng']])
    except:
        longitude = pw.NO_DATA_NUMERIC
    location = pw.LocationObject(u"", latitude, longitude)
    new_plant = pw.PowerPlant(idnr,
                              name,
Ejemplo n.º 8
0
print("Reading in plants...")
count_unit = 1
header_row = True
for row_id in range(0, ws.nrows):
    if header_row == True:
        try:
            if ws.cell(row_id, 0).value == "Name":
                header_row = False
            else:
                continue
        except:
            continue
    else:   # data rows
        rv = ws.row_values(row_id)
        try:
            name = pw.format_string(rv[COLS["name"]])
        except:
            print(u"-Error: Can't read plant name.")
            continue
        try:
            owner = pw.format_string(rv[COLS["owner"]], None)
        except:
            owner = pw.NO_DATA_UNICODE
            print(u"-Error: Can't read plant owner.")
        try:
            capacity_max = float(rv[COLS["capacity_max"]])
        except:
            capacity_max = pw.NO_DATA_NUMERIC
            print(u"-Error: Can't read capacity_max for plant {0}.".format(name))
        try:
            gen_type = pw.format_string(rv[COLS["gen_type"]]) # generation technology type
Ejemplo n.º 9
0
            fuel_col = headers.index(COLNAMES[2])
            capacity_col = headers.index(COLNAMES[3])
            location_col = headers.index(COLNAMES[4])
            commissioning_year_col = headers.index(COLNAMES[6])
            owner_col = headers.index(COLNAMES[8])
            generation_col = headers.index(COLNAMES[9])
            source_col = headers.index(COLNAMES[10])
            url_col = headers.index(COLNAMES[11])
            country_col = headers.index(COLNAMES[12])
            latitude_col = headers.index(COLNAMES[13])
            longitude_col = headers.index(COLNAMES[14])

            # read each row in the file
            for row in datareader:
                try:
                    name = pw.format_string(row[name_col])
                    if not name:  # ignore accidental blank lines
                        continue
                except:
                    print(u"-Error: Can't read plant name.")
                    continue  # must have plant name - don't read plant if not
                try:
                    idnr = str(row[id_col])
                    if not idnr:  # must have plant ID - don't read plant if not
                        print(u"-Error: Null ID for plant {0}.".format(name))
                        continue
                except:
                    print(u"-Error: Can't read ID for plant {0}.".format(name))
                    continue  # must have plant ID - don't read plant if not
                try:
                    capacity = float(
Ejemplo n.º 10
0
                year_built_list)  # TODO: fix this
            new_location = pw.LocationObject(latitude=0.0, longitude=0.0)
            new_plant = pw.PowerPlant(plant_idnr=plant_idnr,
                                      plant_name=name,
                                      plant_country=COUNTRY_NAME,
                                      plant_capacity=total_capacity,
                                      plant_fuel=fuel_type_set,
                                      plant_source=URL,
                                      plant_location=new_location)
            plants_dictionary[plant_idnr] = new_plant
            print("Recording plant {0} with ID: {1}, capacity: {2}, fuel: {3}".
                  format(name, plant_idnr, total_capacity, fuel_type_set))

        # next process this plant

        name = pw.format_string(name_str)
        plant_idnr = pw.make_id(SAVE_CODE, i)
        capacity_list = [capacity]
        year_built_list = [year_built]
        fuel_type_set = fuel_type

    else:  # not a new plant, just a new line
        if capacity_val:
            capacity_list.append(capacity)
        if year_built_val:
            year_built_list.append(year_built)
        if fuel_type and fuel_type not in fuel_type_set:
            fuel_type_set.update(fuel_type)

# complete loop, add final plant
total_capacity = sum(capacity_list)
Ejemplo n.º 11
0
    name_col = rv.index(COLNAMES[1])
    unit_col = rv.index(COLNAMES[2])
    year_col = rv.index(COLNAMES[3])
    capacity_col = rv.index(COLNAMES[4])
    type_col = rv.index(COLNAMES[5])
    fuel1_col = rv.index(COLNAMES[6])
    fuel2_col = rv.index(COLNAMES[7])
    generation_col = rv.index(COLNAMES[8])

    for i in range(1, sheet.nrows):

        # read in row
        rv = sheet.row_values(i)

        try:
            name = pw.format_string(rv[name_col])
            if not name:
                continue  # don't read rows that lack a plant name (footnotes, etc)
        except:
            print(
                u"-Error: Can't read plant name for plant on row {0}.".format(
                    i))
            continue

        try:
            id_val = int(rv[id_col])
            if not id_val:
                continue  # don't read rows that lack an ID (footnotes, etc)
        except:
            print(u"-Error: Can't read ID for plant on row {0}.".format(i))
            continue
Ejemplo n.º 12
0
                placemarks = child.findall(u"kml:Placemark", ns)
                for pm in placemarks:
                    description = pm.find("kml:description",
                                          ns).text  # html content
                    content = html.fromstring(description)
                    rows = content.findall("body/table")[1 + shift].findall(
                        "tr")[1].find("td").find("table").findall("tr")
                    status = u"N/A"
                    plant_id = u""
                    for row in rows:
                        left = row.findall("td")[0].text
                        right = row.findall("td")[1].text

                        # find CEG ID
                        if left == u"CEG":
                            plant_id = pw.format_string(right.strip(), None)

                        # make ID string formatting consistent (is not consistent in raw data)
                        # use only leading alpha chars and 6-digit number; drop trailing digits after "-"
                        if plant_id and u'Null' not in plant_id:

                            if u'.' not in plant_id:
                                plant_id = plant_id[0:3] + u'.' + plant_id[
                                    3:5] + u'.' + plant_id[
                                        5:7] + u'.' + plant_id[7:13]

                            elif u'-' in plant_id:
                                plant_id = plant_id[0:16]

                    # remove non-operating plants
                    #if status != u"Operação":
Ejemplo n.º 13
0
ns = {
    "gml":
    "http://www.opengis.net/gml",
    "Electricity_Infrastructure":
    "http://win-amap-ext03:6080/arcgis/services/Electricity_Infrastructure/MapServer/WFSServer"
}

# read data from XML file and parse
count = 1
with open(RAW_FILE_NAME, "rU") as f:
    tree = ET.parse(f)
    root = tree.getroot()
    for station in tree.findall("gml:featureMember", ns):
        plant = station.find(
            "Electricity_Infrastructure:National_Major_Power_Stations", ns)
        name = pw.format_string(
            plant.find("Electricity_Infrastructure:NAME", ns).text)
        plant_id = int(
            plant.find("Electricity_Infrastructure:OBJECTID", ns).text)
        try:
            owner = pw.format_string(
                plant.find("Electricity_Infrastructure:OWNER", ns).text)
        except:
            owner = pw.NO_DATA_UNICODE
        fuel = pw.standardize_fuel(
            plant.find("Electricity_Infrastructure:PRIMARYFUELTYPE", ns).text,
            fuel_thesaurus)
        try:
            capacity = plant.find("Electricity_Infrastructure:GENERATIONMW",
                                  ns).text
            capacity = float(capacity)
        except:
Ejemplo n.º 14
0
    id_col = headers.index(COLNAMES_REPD[0])
    name_col = headers.index(COLNAMES_REPD[1])
    fuel_col = headers.index(COLNAMES_REPD[2])
    capacity_col = headers.index(COLNAMES_REPD[3])
    status_col = headers.index(COLNAMES_REPD[4])
    x_coordinate_col = headers.index(COLNAMES_REPD[5])
    y_coordinate_col = headers.index(COLNAMES_REPD[6])
    owner_col = headers.index(COLNAMES_REPD[7])

    # read each row in the file
    count = 1
    for row in datareader:
        if u"Operational" not in row[status_col]:
            continue  # don't load non-operatioal plants
        try:
            name = pw.format_string(row[name_col])
        except:
            print(u"-Error: Can't read plant name.")
            continue  # must have plant name - don't read plant if not
        try:
            idnr = int(row[id_col])
        except:
            print(u"-Error: Can't read ref id.")
            continue  # must have ID number
        try:
            capacity = float(pw.format_string(
                row[capacity_col]))  # note: may need to convert to MW
        except:
            print(u"-Error: Can't read capacity for plant {0}.".format(name))
            capacity = 0.0
        try:
Ejemplo n.º 15
0
owner_col = rv.index(COLNAMES_1[2])
latitude_col = rv.index(COLNAMES_1[3])
longitude_col = rv.index(COLNAMES_1[4])
capacity_col = rv.index(COLNAMES_1[5])
fuel_col = rv.index(COLNAMES_1[6])
source_col = rv.index(COLNAMES_1[7])
date_col = rv.index(COLNAMES_1[8])

print(u"Reading file 1...")

for i in range(1, sheet.nrows):

    # read in row
    row = sheet.row_values(i)

    if pw.format_string(row[country_col]) != COUNTRY_NAME:
        continue

    try:
        name = pw.format_string(row[name_col], None)  # already in unicode
        if not name:
            print(u"-Error: No name on row {0}".format(i + 1))
            continue
    except:
        print(u"-Error: Can't read name of plant on row {0}".format(i + 1))
        name = pw.NO_DATA_UNICODE  # without this, next pass thru loop uses old name
        continue

    try:
        owner = pw.format_string(row[owner_col], None)
    except:
Ejemplo n.º 16
0
                                         subFolder=SAVE_CODE,
                                         filename=dataset["filename"])

    with open(dataset_filename, "rbU") as f:
        datareader = csv.DictReader(f)
        for row in datareader:

            try:
                idval = int(row["gid"])
            except:
                print("-Error: Can't read ID for line {0}, skipping.".format(
                    count))
                continue

            try:
                name = pw.format_string(
                    row.get("nombre", row.get("comuna", pw.NO_DATA_UNICODE)))
            except:
                print(u"-Error: Can't read name for ID {0}, skipping.".format(
                    idval))
                continue

            try:
                fuel_string = row.get(
                    "Tipo",
                    row.get("Tipo ", row.get("Combustible",
                                             pw.NO_DATA_UNICODE)))
                fuel = pw.standardize_fuel(fuel_string, fuel_thesaurus)
            except:
                print(u"-Error: Can't read fuel for plant {0}.".format(name))
                fuel = pw.NO_DATA_SET