CKAN API documentation: http://docs.ckan.org/en/latest/api/ Python CKAN library: https://github.com/ckan/ckanapi Started by David Megginson, 2017-09-17 """ import config # common configuration import ckancrawler, csv, logging, sys logging.basicConfig(level=logging.INFO) logger = logging.getLogger("hxl-datasets") """Set up a logger""" # Open a connection to HDX crawler = ckancrawler.Crawler(config.CKAN_URL, delay=0, user_agent=config.USER_AGENT) # Open a CSV output stream output = csv.writer(sys.stdout) output.writerow([ 'Dataset name', 'Dataset title', 'HDX org', 'Source', 'Date created', 'Date updated', ]) # Iterate through all the datasets ("packages") on HDX tagged as HXL
logging.basicConfig(level=logging.INFO) logger = logging.getLogger("add-quickcharts") """Python logging object""" if len(sys.argv) != 4: print("Usage: python add-quick-charts.py <model> <pattern> <org>") sys.exit(2) model = sys.argv[1] pattern = sys.argv[2] org = sys.argv[3] # Connect to CKAN crawler = ckancrawler.Crawler(ckan_url=config.CONFIG.get("ckanurl"), apikey=config.CONFIG.get("apikey"), delay=0) # Look up the model view package = crawler.ckan.action.package_show(id=model) resource_id = package["resources"][0]["id"] views = crawler.ckan.action.resource_view_list(id=resource_id) qc_configuration = None for view in views: # Find the Quick Charts view if view["view_type"] == "hdx_hxl_preview": qc_configuration = view["hxl_preview_config"] logger.info("Loaded Quick Charts configuration") break if qc_configuration is None: print("Failed to find Quick Charts configuration for {}".format(model))
logging.basicConfig(level=logging.INFO) logger = logging.getLogger("SADD scan") SEX_ATTRIBUTES = ['f', 'm', 'i'] """ HXL attributes associated with sex-disaggregated data """ AGE_ATTRIBUTES = ['infants', 'children', 'adolescents', 'adults', 'elderly'] """ HXL attributes associated with age-disaggregated data """ # Result accumulators sex_disaggregated_results = set() age_disaggregated_results = set() crawler = ckancrawler.Crawler('https://data.humdata.org', user_agent='HDX-Developer-2015', delay=0) counter = 0 # Iterate through all packages/datasets tagged "hxl" for package in crawler.packages('vocab_Topics:hxl'): # note what we've found in this dataset has_sex_atts = False has_age_atts = False # Iterate through resources until we find a match (or fail to) for resource in package['resources']: # Is the resource HXLated?
def setUp(self): # no delay, since we'll look at only a small number of packages self.crawler = ckancrawler.Crawler(delay=0)