# set up projection transformation
# thanks to John A. Stevenson: http://all-geo.org/volcan01010/2012/11/change-coordinates-with-pyproj/
wgs84 = pyproj.Proj(
    "+init=EPSG:4326"
)  # LatLon with WGS84 datum used by GPS units and Google Earth
osgb36 = pyproj.Proj("+init=EPSG:27700")  # UK Ordnance Survey, 1936 datum

# raw files download
# True if specified --download, otherwise False
FILES = {RAW_FILE_NAME_REPD: URL_REPD, RAW_FILE_NAME_DUKES: URL_DUKES}
DOWNLOAD_FILES = pw.download(
    u"UK Renewable Energy Planning Database and DUKES", FILES)

# set up fuel type thesaurus
fuel_thesaurus = pw.make_fuel_thesaurus()

# set up country name thesaurus
country_thesaurus = pw.make_country_names_thesaurus()

# create dictionary for power plant objects
plants_dictionary = {}

# load GEO and CARMA for matching coordinates
geo_database = pw.load_database(GEO_DATABASE_FILE)
print("Loaded {0} plants from GEO database.".format(len(geo_database)))
carma_database = pw.load_database(CARMA_DATABASE_FILE)
print("Loaded {0} plants from CARMA database.".format(len(carma_database)))

# read in plant matches file
with open(PLANT_MATCHES, "rbU") as f:
def country_summary(db_conn, country, iso_code):
    """
	Get a country-level summary of the database.

	Parameters
	----------
	db_conn : sqlite3.Connection
		Open database connection.
	country : str
		Standard country name used in the database.
	iso_code : str
		3 character country code.

	Returns
	-------
	Dict holding the summarized metrics for the country.

	"""
    summary = {'country': country, 'iso_code': iso_code}
    c = db_conn.cursor()

    # count number of powerplants
    stmt = '''SELECT COUNT(*) FROM powerplants
				WHERE (country=?)'''
    query = c.execute(stmt, (iso_code, ))
    summary['count'], = query.fetchone()

    # skip rest of summary if there aren't any powerplants
    if not summary['count']:
        return summary

    # compute total capacity
    stmt = '''SELECT SUM(capacity_mw) FROM powerplants
				WHERE (country=?)'''
    query = c.execute(stmt, (iso_code, ))
    total_capacity_mw, = query.fetchone()
    summary['total_capacity_gw'] = total_capacity_mw / 1000

    # compute maximum single capacity
    stmt = '''SELECT MAX(capacity_mw) FROM powerplants
				WHERE (country=?)'''
    query = c.execute(stmt, (iso_code, ))
    summary['max_capacity_mw'], = query.fetchone()

    # count distinct fuel types
    stmt = '''SELECT COUNT(*) FROM (
				SELECT DISTINCT(primary_fuel) from powerplants
					WHERE (country="{0}" AND primary_fuel IS NOT NULL)
				UNION
				SELECT DISTINCT(other_fuel1) from powerplants
					WHERE (country="{0}" AND other_fuel1 IS NOT NULL)
				UNION
				SELECT DISTINCT(other_fuel2) from powerplants
					WHERE (country="{0}" AND other_fuel2 IS NOT NULL)
				UNION
				SELECT DISTINCT(other_fuel3) from powerplants
					WHERE (country="{0}" AND other_fuel3 IS NOT NULL)
				) AS temp'''.format(iso_code)
    query = c.execute(stmt)
    summary['count_distinct_fuel'], = query.fetchone()

    # fuel-specific summaries
    fuel_list = pw.make_fuel_thesaurus().keys()
    for fuel in fuel_list:
        fuel_column_name = '_'.join(fuel.lower().split())
        stmt = '''SELECT COUNT(*) FROM powerplants
					WHERE (country=?
						AND (primary_fuel="{fuel}"
							OR other_fuel1="{fuel}"
							OR other_fuel2="{fuel}"
							OR other_fuel3="{fuel}"))'''.format(fuel=fuel)
        query = c.execute(stmt, (iso_code, ))
        summary['count_fuel_{0}'.format(fuel_column_name)], = query.fetchone()

        stmt = '''SELECT SUM(capacity_mw) FROM powerplants
					WHERE (country=?
						AND (primary_fuel="{fuel}"
							OR other_fuel1="{fuel}"
							OR other_fuel2="{fuel}"
							OR other_fuel3="{fuel}"))'''.format(fuel=fuel)
        query = c.execute(stmt, (iso_code, ))
        fuel_capacity_mw, = query.fetchone()
        summary_name = 'capacity_gw_fuel_{0}'.format(fuel_column_name)
        if fuel_capacity_mw is None:
            summary[summary_name] = 0
        else:
            summary[summary_name] = fuel_capacity_mw / 1000

    # count distinct fields
    count_distinct_list = ['name', 'owner', 'source']
    for field in count_distinct_list:
        stmt = '''SELECT COUNT(DISTINCT({field})) FROM powerplants
					WHERE (country=?
						AND {field} IS NOT NULL)'''.format(field=field)
        query = c.execute(stmt, (iso_code, ))
        summary['count_distinct_{0}'.format(field)], = query.fetchone()

    # count null fields
    count_null_list = [
        'name', 'gppd_idnr', 'capacity_mw', 'year_of_capacity_data', 'owner',
        'source', 'url', 'latitude', 'longitude'
    ]
    for field in count_null_list:
        stmt = '''SELECT COUNT(*) FROM powerplants
					WHERE (country=?
						AND {field} IS NULL)'''.format(field=field)
        query = c.execute(stmt, (iso_code, ))
        summary['count_null_{0}'.format(field)], = query.fetchone()

    # count null fuel occurrences
    stmt = '''SELECT COUNT(*) FROM powerplants
				WHERE (country=?
					AND primary_fuel IS NULL
					AND other_fuel1 IS NULL
					AND other_fuel2 IS NULL
					AND other_fuel3 is NULL)'''
    query = c.execute(stmt, (iso_code, ))
    summary['count_null_fuel'], = query.fetchone()

    # count plants with a wepp_id
    stmt = '''SELECT COUNT(*) FROM powerplants
				WHERE (country=?
					AND wepp_id IS NOT NULL)'''
    query = c.execute(stmt, (iso_code, ))
    summary['count_wepp_id'], = query.fetchone()

    # count plants with a wepp_id
    stmt = '''SELECT SUM(capacity_mw) FROM powerplants
				WHERE (country=?
					AND wepp_id IS NOT NULL)'''
    query = c.execute(stmt, (iso_code, ))
    wepp_id_mw, = query.fetchone()
    if wepp_id_mw is None:
        summary['capacity_gw_wepp_id'] = 0
    else:
        summary['capacity_gw_wepp_id'] = wepp_id_mw / 1000.

    # count null generation data for all years
    stmt = '''SELECT COUNT(*) FROM powerplants
				WHERE (country=?
					AND generation_gwh_2015 IS NULL
					AND generation_gwh_2016 IS NULL
					AND generation_gwh_2017 IS NULL
					AND estimated_generation_gwh IS NULL
					)'''
    query = c.execute(stmt, (iso_code, ))
    summary['count_null_generation_gwh_all'], = query.fetchone()

    # count non-null generation years
    for year in range(2013, 2018):
        field = 'generation_gwh_{0}'.format(year)
        stmt = '''SELECT COUNT(*) FROM powerplants
					WHERE (country=?
						AND {field} IS NOT NULL)'''.format(field=field)
        query = c.execute(stmt, (iso_code, ))
        summary['count_{0}'.format(field)], = query.fetchone()

    # count non-null generation sources
    stmt = '''SELECT COUNT(*) FROM powerplants
				WHERE (country=?
					AND generation_data_source IS NOT NULL)'''
    query = c.execute(stmt, (iso_code, ))
    summary['count_generation_data_source'], = query.fetchone()

    # count null estimated generation
    field = 'estimated_generation_gwh'
    stmt = '''SELECT COUNT(*) FROM powerplants
				WHERE (country=?
					AND {field} IS NOT NULL)'''.format(field=field)
    query = c.execute(stmt, (iso_code, ))
    summary['count_{0}'.format(field)], = query.fetchone()

    return summary