def country_summary(db_conn, country, iso_code): """ Get a country-level summary of the database. Parameters ---------- db_conn : sqlite3.Connection Open database connection. country : str Standard country name used in the database. iso_code : str 3 character country code. Returns ------- Dict holding the summarized metrics for the country. """ summary = {'country': country, 'iso_code': iso_code} c = db_conn.cursor() # count number of powerplants stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=?)''' query = c.execute(stmt, (country, )) summary['count'], = query.fetchone() # skip rest of summary if there aren't any powerplants if not summary['count']: return summary # compute total capacity stmt = '''SELECT SUM(capacity_mw) FROM powerplants WHERE (country=?)''' query = c.execute(stmt, (country, )) total_capacity_mw, = query.fetchone() summary['total_capacity_gw'] = total_capacity_mw / 1000 # compute maximum single capacity stmt = '''SELECT MAX(capacity_mw) FROM powerplants WHERE (country=?)''' query = c.execute(stmt, (country, )) summary['max_capacity_mw'], = query.fetchone() # count distinct fuel types stmt = '''SELECT COUNT(*) FROM ( SELECT DISTINCT(fuel1) from powerplants WHERE (country="{0}" AND fuel1 IS NOT NULL) UNION SELECT DISTINCT(fuel2) from powerplants WHERE (country="{0}" AND fuel2 IS NOT NULL) UNION SELECT DISTINCT(fuel3) from powerplants WHERE (country="{0}" AND fuel3 IS NOT NULL) UNION SELECT DISTINCT(fuel4) from powerplants WHERE (country="{0}" AND fuel4 IS NOT NULL) ) AS temp'''.format(country) query = c.execute(stmt) summary['count_distinct_fuel'], = query.fetchone() # fuel-specific summaries fuel_list = pw.make_fuel_thesaurus().keys() for fuel in fuel_list: fuel_column_name = '_'.join(fuel.lower().split()) stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND (fuel1="{fuel}" OR fuel2="{fuel}" OR fuel3="{fuel}" OR fuel4="{fuel}"))'''.format(fuel=fuel) query = c.execute(stmt, (country, )) summary['count_fuel_{0}'.format(fuel_column_name)], = query.fetchone() stmt = '''SELECT SUM(capacity_mw) FROM powerplants WHERE (country=? AND (fuel1="{fuel}" OR fuel2="{fuel}" OR fuel3="{fuel}" OR fuel4="{fuel}"))'''.format(fuel=fuel) query = c.execute(stmt, (country, )) fuel_capacity_mw, = query.fetchone() summary_name = 'capacity_gw_fuel_{0}'.format(fuel_column_name) if fuel_capacity_mw is None: summary[summary_name] = 0 else: summary[summary_name] = fuel_capacity_mw / 1000 # count distinct fields count_distinct_list = ['name', 'owner', 'source'] for field in count_distinct_list: stmt = '''SELECT COUNT(DISTINCT({field})) FROM powerplants WHERE (country=? AND {field} IS NOT NULL)'''.format(field=field) query = c.execute(stmt, (country, )) summary['count_distinct_{0}'.format(field)], = query.fetchone() # count null fields count_null_list = [ 'name', 'pw_idnr', 'capacity_mw', 'year_of_capacity_data', 'owner', 'source', 'url', 'latitude', 'longitude' ] for field in count_null_list: stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND {field} IS NULL)'''.format(field=field) query = c.execute(stmt, (country, )) summary['count_null_{0}'.format(field)], = query.fetchone() # count null fuel occurrences stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND fuel1 IS NULL AND fuel2 IS NULL AND fuel3 IS NULL AND fuel4 is NULL)''' query = c.execute(stmt, (country, )) summary['count_null_fuel'], = query.fetchone() # count null generation data for all years stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND generation_gwh_2012 IS NULL AND generation_gwh_2013 IS NULL AND generation_gwh_2014 IS NULL AND generation_gwh_2015 IS NULL AND generation_gwh_2016 IS NULL)''' query = c.execute(stmt, (country, )) summary['count_null_generation_gwh_all'], = query.fetchone() # count null generation years for year in range(2012, 2017): field = 'generation_gwh_{0}'.format(year) stmt = '''SELECT COUNT(*) FROM powerplants WHERE (country=? AND {field} IS NOT NULL)'''.format(field=field) query = c.execute(stmt, (country, )) summary['count_{0}'.format(field)], = query.fetchone() return summary
SAVE_DIRECTORY = pw.make_file_path(fileType="src_bin") SOURCE_URL = u"http://www.sourcewatch.org/index.php/Category:Existing_coal_plants_in_China" YEAR = 2017 # other params URL_BASE = "http://www.sourcewatch.org/api.php?" URL_END = "action=query&titles=Category:Existing_coal_plants_in_China&prop=revisions"\ +"&rvprop=content&format=json" # optional raw file(s) download URL = URL_BASE + URL_END FILES = {RAW_FILE_NAME: URL} DOWNLOAD_FILES = pw.download(SOURCE_NAME, FILES) # set up fuel type thesaurus fuel_thesaurus = pw.make_fuel_thesaurus() # set up country name thesaurus country_thesaurus = pw.make_country_names_thesaurus() # create dictionary for power plant objects plants_dictionary = {} # read in plants print(u"Reading in plants...") with open(RAW_FILE_NAME, 'r') as f: data = json.load(f) # select main content of page wiki = data['query']['pages']['85380']['revisions'][0]['*']
def setUp(self): self.fuel_thesaurus = pw.make_fuel_thesaurus()