def country_summary(db_conn, country, iso_code):
    """
	Get a country-level summary of the database.

	Parameters
	----------
	db_conn : sqlite3.Connection
		Open database connection.
	country : str
		Standard country name used in the database.
	iso_code : str
		3 character country code.

	Returns
	-------
	Dict holding the summarized metrics for the country.

	"""
    summary = {'country': country, 'iso_code': iso_code}
    c = db_conn.cursor()

    # count number of powerplants
    stmt = '''SELECT COUNT(*) FROM powerplants
				WHERE (country=?)'''
    query = c.execute(stmt, (country, ))
    summary['count'], = query.fetchone()

    # skip rest of summary if there aren't any powerplants
    if not summary['count']:
        return summary

    # compute total capacity
    stmt = '''SELECT SUM(capacity_mw) FROM powerplants
				WHERE (country=?)'''
    query = c.execute(stmt, (country, ))
    total_capacity_mw, = query.fetchone()
    summary['total_capacity_gw'] = total_capacity_mw / 1000

    # compute maximum single capacity
    stmt = '''SELECT MAX(capacity_mw) FROM powerplants
				WHERE (country=?)'''
    query = c.execute(stmt, (country, ))
    summary['max_capacity_mw'], = query.fetchone()

    # count distinct fuel types
    stmt = '''SELECT COUNT(*) FROM (
				SELECT DISTINCT(fuel1) from powerplants
					WHERE (country="{0}" AND fuel1 IS NOT NULL)
				UNION
				SELECT DISTINCT(fuel2) from powerplants
					WHERE (country="{0}" AND fuel2 IS NOT NULL)
				UNION
				SELECT DISTINCT(fuel3) from powerplants
					WHERE (country="{0}" AND fuel3 IS NOT NULL)
				UNION
				SELECT DISTINCT(fuel4) from powerplants
					WHERE (country="{0}" AND fuel4 IS NOT NULL)
				) AS temp'''.format(country)
    query = c.execute(stmt)
    summary['count_distinct_fuel'], = query.fetchone()

    # fuel-specific summaries
    fuel_list = pw.make_fuel_thesaurus().keys()
    for fuel in fuel_list:
        fuel_column_name = '_'.join(fuel.lower().split())
        stmt = '''SELECT COUNT(*) FROM powerplants
					WHERE (country=?
						AND (fuel1="{fuel}"
							OR fuel2="{fuel}"
							OR fuel3="{fuel}"
							OR fuel4="{fuel}"))'''.format(fuel=fuel)
        query = c.execute(stmt, (country, ))
        summary['count_fuel_{0}'.format(fuel_column_name)], = query.fetchone()

        stmt = '''SELECT SUM(capacity_mw) FROM powerplants
					WHERE (country=?
						AND (fuel1="{fuel}"
							OR fuel2="{fuel}"
							OR fuel3="{fuel}"
							OR fuel4="{fuel}"))'''.format(fuel=fuel)
        query = c.execute(stmt, (country, ))
        fuel_capacity_mw, = query.fetchone()
        summary_name = 'capacity_gw_fuel_{0}'.format(fuel_column_name)
        if fuel_capacity_mw is None:
            summary[summary_name] = 0
        else:
            summary[summary_name] = fuel_capacity_mw / 1000

    # count distinct fields
    count_distinct_list = ['name', 'owner', 'source']
    for field in count_distinct_list:
        stmt = '''SELECT COUNT(DISTINCT({field})) FROM powerplants
					WHERE (country=?
						AND {field} IS NOT NULL)'''.format(field=field)
        query = c.execute(stmt, (country, ))
        summary['count_distinct_{0}'.format(field)], = query.fetchone()

    # count null fields
    count_null_list = [
        'name', 'pw_idnr', 'capacity_mw', 'year_of_capacity_data', 'owner',
        'source', 'url', 'latitude', 'longitude'
    ]
    for field in count_null_list:
        stmt = '''SELECT COUNT(*) FROM powerplants
					WHERE (country=?
						AND {field} IS NULL)'''.format(field=field)
        query = c.execute(stmt, (country, ))
        summary['count_null_{0}'.format(field)], = query.fetchone()

    # count null fuel occurrences
    stmt = '''SELECT COUNT(*) FROM powerplants
				WHERE (country=?
					AND fuel1 IS NULL
					AND fuel2 IS NULL
					AND fuel3 IS NULL
					AND fuel4 is NULL)'''
    query = c.execute(stmt, (country, ))
    summary['count_null_fuel'], = query.fetchone()

    # count null generation data for all years
    stmt = '''SELECT COUNT(*) FROM powerplants
				WHERE (country=?
					AND generation_gwh_2012 IS NULL
					AND generation_gwh_2013 IS NULL
					AND generation_gwh_2014 IS NULL
					AND generation_gwh_2015 IS NULL
					AND generation_gwh_2016 IS NULL)'''
    query = c.execute(stmt, (country, ))
    summary['count_null_generation_gwh_all'], = query.fetchone()

    # count null generation years
    for year in range(2012, 2017):
        field = 'generation_gwh_{0}'.format(year)
        stmt = '''SELECT COUNT(*) FROM powerplants
					WHERE (country=?
						AND {field} IS NOT NULL)'''.format(field=field)
        query = c.execute(stmt, (country, ))
        summary['count_{0}'.format(field)], = query.fetchone()

    return summary
Beispiel #2
0
SAVE_DIRECTORY = pw.make_file_path(fileType="src_bin")
SOURCE_URL = u"http://www.sourcewatch.org/index.php/Category:Existing_coal_plants_in_China"
YEAR = 2017

# other params
URL_BASE = "http://www.sourcewatch.org/api.php?"
URL_END = "action=query&titles=Category:Existing_coal_plants_in_China&prop=revisions"\
 +"&rvprop=content&format=json"

# optional raw file(s) download
URL = URL_BASE + URL_END
FILES = {RAW_FILE_NAME: URL}
DOWNLOAD_FILES = pw.download(SOURCE_NAME, FILES)

# set up fuel type thesaurus
fuel_thesaurus = pw.make_fuel_thesaurus()

# set up country name thesaurus
country_thesaurus = pw.make_country_names_thesaurus()

# create dictionary for power plant objects
plants_dictionary = {}

# read in plants
print(u"Reading in plants...")
with open(RAW_FILE_NAME, 'r') as f:
    data = json.load(f)

# select main content of page
wiki = data['query']['pages']['85380']['revisions'][0]['*']
Beispiel #3
0
	def setUp(self):
		self.fuel_thesaurus = pw.make_fuel_thesaurus()