from owslib.csw import CatalogueServiceWeb
from utilities import normalize_service_urn

var_results = []

for x in range(len(cf_name_filters)):
    var_name          = variables_to_query[x]
    single_var_filter = cf_name_filters[x]
    for url in known_csw_servers:
        try:
            csw = CatalogueServiceWeb(url, timeout=20)
            csw.getrecords2(constraints=[single_var_filter], maxrecords=1000, esn='full')
            for record, item in csw.records.items():
                for d in item.references:
                    result = dict(variable=var_name,
                                  scheme=normalize_service_urn(d['scheme']),
                                  url=d['url'],
                                  server=url,
                                  title=record.title())
                    var_results.append(result)
        except BaseException, e:
            print "- FAILED: %s - %s" % (url, e)

# <markdowncell>

# <div class="error"><strong>Paginating CSW Records</strong> - Some servers have a maximum amount of records you can retrieve at once. See: https://github.com/ioos/system-test/issues/126</div>

# <markdowncell>

# #### Load results into a Pandas DataFrame
#HTML(total_services.to_html())
total_services.sort('Number of services', ascending=False).plot(kind="barh", figsize=(10,8,))

# <markdowncell>

# <div class="error"><strong>Service Types</strong> - URNs for the same service type are being identified differently.  There should be a consistent way of representing each service, or a complete mapping needs to be made available. See: https://github.com/ioos/system-test/issues/57</div>

# <markdowncell>

# #### Attempt to normalize the services manually

# <codecell>

from utilities import normalize_service_urn
normalized_urns = df.copy(deep=True)
normalized_urns["scheme"] = normalized_urns["scheme"].map(lambda x: normalize_service_urn(x))

# <codecell>

normalized_urns_summary = pd.DataFrame(normalized_urns.groupby("scheme").size(), columns=("Number of services",))
normalized_urns_summary.sort('Number of services', ascending=False).plot(kind="barh", figsize=(10,6,))

# <markdowncell>

# #### The number of service types for each model type

# <codecell>

import math

model_service_summary = pd.DataFrame(normalized_urns.groupby(["model", "scheme"], sort=True).size(), columns=("Number of services",))
#HTML(total_services.to_html())
total_services.sort('Number of services', ascending=False).plot(kind="barh", figsize=(10,8,))

# <markdowncell>

# <div class="error"><strong>Service Types</strong> - URNs for the same service type are being identified differently.  There should be a consistent way of representing each service, or a complete mapping needs to be made available. See: https://github.com/ioos/system-test/issues/57</div>

# <markdowncell>

# #### Attempt to normalize the services manually

# <codecell>

from utilities import normalize_service_urn
normalized_urns = df.copy(deep=True)
normalized_urns["scheme"] = normalized_urns["scheme"].map(lambda x: normalize_service_urn(x))

# <codecell>

normalized_urns_summary = pd.DataFrame(normalized_urns.groupby("scheme").size(), columns=("Number of services",))
normalized_urns_summary.sort('Number of services', ascending=False).plot(kind="barh", figsize=(10,6,))

# <markdowncell>

# #### The number of service types for each model type

# <codecell>

import math

model_service_summary = pd.DataFrame(normalized_urns.groupby(["model", "scheme"], sort=True).size(), columns=("Number of services",))