def dcat_to_ckan(dcat_dict): package_dict = {} package_dict['title'] = dcat_dict.get('title') package_dict['notes'] = dcat_dict.get('description') package_dict['url'] = dcat_dict.get('landingPage') package_dict['tags'] = [] for keyword in dcat_dict.get('keyword', []): package_dict['tags'].append({'name': keyword}) package_dict['extras'] = [] for key in ['issued', 'modified']: package_dict['extras'].append({'key': 'dcat_{0}'.format(key), 'value': dcat_dict.get(key)}) package_dict['extras'].append({'key': 'guid', 'value': dcat_dict.get('identifier')}) dcat_publisher = dcat_dict.get('publisher') if isinstance(dcat_publisher, basestring): package_dict['owner_org'] = munge.munge_name(dcat_publisher) package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher}) elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'): package_dict['owner_org'] = munge.munge_name(dcat_publisher.get('name')) package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')}) package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')}) package_dict['extras'].append({ 'key': 'language', 'value': dcat_dict.get('language', []) }) package_dict['resources'] = [] for distribution in dcat_dict.get('distribution', []): mt = distribution.get('mediaType') fr = mt.split('/')[-1] if hasattr(mt, 'split') else '' resource = { 'name': distribution.get('title'), 'description': distribution.get('description'), 'url': distribution.get('downloadURL') or distribution.get('accessURL'), 'format': fr } if distribution.get('byteSize'): try: resource['size'] = int(distribution.get('byteSize')) except ValueError: pass package_dict['resources'].append(resource) return package_dict
def dcat_to_ckan(dcat_dict): package_dict = {} package_dict['title'] = dcat_dict.get('title') package_dict['notes'] = dcat_dict.get('description') package_dict['url'] = dcat_dict.get('landingPage') package_dict['tags'] = [] for keyword in dcat_dict.get('keyword', []): package_dict['tags'].append({'name': keyword}) package_dict['extras'] = [] for key in ['issued', 'modified']: package_dict['extras'].append({ 'key': 'dcat_{0}'.format(key), 'value': dcat_dict.get(key) }) package_dict['extras'].append({ 'key': 'guid', 'value': dcat_dict.get('identifier') }) dcat_publisher = dcat_dict.get('publisher') if isinstance(dcat_publisher, basestring): package_dict['owner_org'] = munge.munge_name(dcat_publisher) package_dict['extras'].append({ 'key': 'dcat_publisher_name', 'value': dcat_publisher }) elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'): package_dict['owner_org'] = munge.munge_name( dcat_publisher.get('name')) package_dict['extras'].append({ 'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name') }) package_dict['extras'].append({ 'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox') }) package_dict['extras'].append({ 'key': 'language', 'value': dcat_dict.get('language', []) }) package_dict['resources'] = [] for distribution in dcat_dict.get('distribution', []): mt = distribution.get('mediaType') fr = mt.split('/')[-1] if hasattr(mt, 'split') else '' resource = { 'name': distribution.get('title'), 'description': distribution.get('description'), 'url': distribution.get('downloadURL') or distribution.get('accessURL'), 'format': fr } if distribution.get('byteSize'): try: resource['size'] = int(distribution.get('byteSize')) except ValueError: pass package_dict['resources'].append(resource) return package_dict
def dcat_to_ckan(dcat_dict): package_dict = {} package_dict['title'] = dcat_dict.get('title') package_dict['notes'] = dcat_dict.get('description') package_dict['url'] = dcat_dict.get('landingPage') package_dict['tags'] = [] for keyword in dcat_dict.get('keyword', []): package_dict['tags'].append({'name': keyword}) # Nivel de gobierno por medio del vocabulario if dcat_dict.get('govType', False): package_dict['tags'].append({ 'name': dcat_dict.get('govType').capitalize(), 'vocabulary_id': os.environ.get('VOCABULARY_GOV_TYPE_ID', '910b5e72-2723-466d-a892-4be1e4129120') }) package_dict['gov_type'] = dcat_dict.get('govType').capitalize() package_dict['extras'] = [] for key in ['issued', 'modified']: package_dict['extras'].append({'key': 'dcat_{0}'.format(key), 'value': dcat_dict.get(key)}) package_dict['extras'].append({'key': 'guid', 'value': dcat_dict.get('identifier')}) dcat_publisher = dcat_dict.get('publisher') if isinstance(dcat_publisher, basestring): package_dict['owner_org'] = munge.munge_name(dcat_publisher) package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher}) elif isinstance(dcat_publisher, dict) and dcat_publisher.get('name'): package_dict['owner_org'] = munge.munge_name(dcat_publisher.get('name')) package_dict['extras'].append({'key': 'dcat_publisher_name', 'value': dcat_publisher.get('name')}) package_dict['extras'].append({'key': 'dcat_publisher_email', 'value': dcat_publisher.get('mbox')}) package_dict['extras'].append({'key': 'publisher_type', 'value': dcat_publisher.get('position')}) if dcat_dict.get('theme'): package_dict['extras'].append({ 'key': 'theme', 'value': dcat_dict.get('theme').title() }) package_dict['extras'].append({ 'key': 'frequency', 'value': dcat_dict.get('accrualPeriodicity', '') }) if dcat_dict.get('temporal'): start, end = dcat_dict.get('temporal').split('/') package_dict['extras'].append({ 'key': 'temporal_start', 'value': start }) package_dict['extras'].append({ 'key': 'temporal_end', 'value': end }) if dcat_dict.get('spatial'): package_dict['extras'].append({ 'key': 'spatial_text', 'value': dcat_dict.get('spatial') }) if dcat_dict.get('comments'): package_dict['extras'].append({ 'key': 'version_notes', 'value': dcat_dict.get('comments') }) if dcat_dict.get('dataDictionary'): package_dict['extras'].append({ 'key': 'dataDictionary', 'value': dcat_dict.get('dataDictionary') }) if dcat_dict.get('quality'): package_dict['extras'].append({ 'key': 'quality', 'value': dcat_dict.get('quality') }) package_dict['extras'].append({ 'key': 'language', 'value': dcat_dict.get('language', []) }) package_dict['resources'] = [] for distribution in dcat_dict.get('distribution', []): mt = distribution.get('mediaType') fr = mt.split('/')[-1] if hasattr(mt, 'split') else '' resource = { 'name': distribution.get('title'), 'description': distribution.get('description'), 'url': distribution.get('downloadURL') or distribution.get('accessURL'), 'format': fr } if distribution.get('byteSize'): try: resource['size'] = int(distribution.get('byteSize')) except ValueError: pass package_dict['resources'].append(resource) return package_dict
sys.exit(2) for opt, arg in opts: if opt in ("-h", "--help"): usage() sys.exit() elif opt in ("-d", "--datasets"): datasets = remote.action.package_list() for d in datasets: print d elif opt in ("-s", "--harvest", "-p", "--purge-harvest"): catalog = dcat_to_utf8_dict(arg) # If purge mode is activated, then delete all org's datasets if opt in ("-p", "--purge-harvest"): org_name = catalog['dataset'][0]['publisher']['name'] org_id = munge.munge_name(org_name) delete_organization_datasets(remote, org_id) for dcat_dataset in catalog.get('dataset', []): ckan_dataset = converters.dcat_to_ckan(dcat_dataset) ckan_dataset['name'] = munge.munge_title_to_name(ckan_dataset['title']) ckan_dataset['state'] = 'active' print 'Dataset "%s"' % ckan_dataset['title'], 'with %d resources' % len(ckan_dataset['resources']) print datetime.datetime.utcnow() new_dataset = upsert_dataset(remote, ckan_dataset) if new_dataset: print 'Dataset upserted' else: print 'Something went wrong' elif opt in ("-q", "--find-datasets"): datasets = find_datasets_with_query(remote, arg)