Example #1
0
CKAN API documentation: http://docs.ckan.org/en/latest/api/
Python CKAN library: https://github.com/ckan/ckanapi

Started by David Megginson, 2017-09-17
"""

import config  # common configuration
import ckancrawler, csv, logging, sys

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("hxl-datasets")
"""Set up a logger"""

# Open a connection to HDX
crawler = ckancrawler.Crawler(config.CKAN_URL,
                              delay=0,
                              user_agent=config.USER_AGENT)

# Open a CSV output stream
output = csv.writer(sys.stdout)

output.writerow([
    'Dataset name',
    'Dataset title',
    'HDX org',
    'Source',
    'Date created',
    'Date updated',
])

# Iterate through all the datasets ("packages") on HDX tagged as HXL
Example #2
0
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("add-quickcharts")
"""Python logging object"""

if len(sys.argv) != 4:
    print("Usage: python add-quick-charts.py <model> <pattern> <org>")
    sys.exit(2)

model = sys.argv[1]
pattern = sys.argv[2]
org = sys.argv[3]

# Connect to CKAN
crawler = ckancrawler.Crawler(ckan_url=config.CONFIG.get("ckanurl"),
                              apikey=config.CONFIG.get("apikey"),
                              delay=0)

# Look up the model view
package = crawler.ckan.action.package_show(id=model)
resource_id = package["resources"][0]["id"]
views = crawler.ckan.action.resource_view_list(id=resource_id)
qc_configuration = None
for view in views:
    # Find the Quick Charts view
    if view["view_type"] == "hdx_hxl_preview":
        qc_configuration = view["hxl_preview_config"]
        logger.info("Loaded Quick Charts configuration")
        break
if qc_configuration is None:
    print("Failed to find Quick Charts configuration for {}".format(model))
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("SADD scan")

SEX_ATTRIBUTES = ['f', 'm', 'i']
""" HXL attributes associated with sex-disaggregated data """

AGE_ATTRIBUTES = ['infants', 'children', 'adolescents', 'adults', 'elderly']
""" HXL attributes associated with age-disaggregated data """

# Result accumulators
sex_disaggregated_results = set()
age_disaggregated_results = set()

crawler = ckancrawler.Crawler('https://data.humdata.org',
                              user_agent='HDX-Developer-2015',
                              delay=0)

counter = 0

# Iterate through all packages/datasets tagged "hxl"
for package in crawler.packages('vocab_Topics:hxl'):

    # note what we've found in this dataset
    has_sex_atts = False
    has_age_atts = False

    # Iterate through resources until we find a match (or fail to)
    for resource in package['resources']:

        # Is the resource HXLated?
Example #4
0
 def setUp(self):
     # no delay, since we'll look at only a small number of packages
     self.crawler = ckancrawler.Crawler(delay=0)