def archive_package(package_id, context, consecutive_errors=0): from ckanext.archiver import tasks package = toolkit.get_action('package_show')(context, {'id': package_id}) extras_dict = extras_to_dict(package) is_activity_package = (True if 'activity' == extras_dict.get('filetype') else False) log.debug('Archiving dataset: {0} ({1} resources)'.format( package.get('name'), len(package.get('resources', [])))) for resource in package.get('resources', []): if not resource.get('url', ''): return save_package_issue(context, package, extras_dict, 'no-url', 'URL missing') old_hash = resource.get('hash') try: result = download(context, resource, data_formats=DATA_FORMATS) except tasks.LinkCheckerError, e: if 'URL unobtainable: HTTP' in str(e): #TODO: What does this do? message = str(e)[:str(e).find(' on')] else: message = str(e) return save_package_issue(context, package, extras_dict, 'url-error', message) except tasks.DownloadError, e: if 'exceeds maximum allowed value' in str(e): message = 'File too big, not downloading' else: message = str(e) return save_package_issue(context, package, extras_dict, 'download-error', message)
def archive_package(package_id, context, consecutive_errors=0): from ckanext.archiver import tasks package = toolkit.get_action('package_show')(context, {'id': package_id}) extras_dict = extras_to_dict(package) is_activity_package = (True if 'activity' == extras_dict.get('filetype') else False) log.info('Archiving dataset: {0} ({1} resources)'.format( package.get('name'), len(package.get('resources', [])))) for resource in package.get('resources', []): if not resource.get('url', ''): return save_package_issue(context, package, extras_dict, 'no-url', 'URL missing') old_hash = resource.get('hash') try: result = download(context, resource, data_formats=DATA_FORMATS) except tasks.LinkCheckerError, e: if 'URL unobtainable: HTTP' in str(e): #TODO: What does this do? message = str(e)[:str(e).find(' on')] else: message = str(e) return save_package_issue(context, package, extras_dict, 'url-error', message) except tasks.DownloadError, e: if 'exceeds maximum allowed value' in str(e): message = 'File too big, not downloading' else: message = str(e) return save_package_issue(context, package, extras_dict, 'download-error', message)
def write_to_csv(self, publisher): """ :param publisher: :return: """ context = {'model': model, 'user': c.user or c.author} try: if publisher == 'all': package_ids = p.toolkit.get_action('package_list')(context, {}) packages = [] for pkg_id in package_ids: try: package = p.toolkit.get_action('package_show')( context, { 'id': pkg_id }) package.pop('state', None) packages.append(package) except p.toolkit.NotAuthorized: log.warn('User %s not authorized to read package %s' % (c.user, pkg_id)) continue elif publisher == 'template': # Just return an empty CSV file with just the headers packages = [] else: packages = self._get_packages_for_org(context, publisher) f = io.BytesIO() fieldnames = [n[0] for n in self.CSV_MAPPING if n[0] != 'state'] writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL) headers = dict( (n[0], n[0]) for n in self.CSV_MAPPING if n[0] != 'state') writer.writerow(headers) for package in packages: if package: row = {} extras_dict = extras_to_dict(package) for fieldname, entity, key in self.CSV_MAPPING: if key == 'state': continue value = None if entity == 'organization': if len(package['organization']): value = package['organization']['name'] elif entity == 'resources': if len(package['resources'] ) and key in package['resources'][0]: value = package['resources'][0][key] else: if key in package: value = package[key] elif key in extras_dict: value = extras_dict[key] row[fieldname] = value for field_to_check in ('title', 'description'): if fieldname == field_to_check and row.get( field_to_check): row[field_to_check] = row[ field_to_check].encode('utf-8') writer.writerow(row) output = f.getvalue() f.close() return output except p.toolkit.ObjectNotFound: p.toolkit.abort(404, 'Organization not found')
def write_csv_file(self, publisher): context = {'model': model, 'user': c.user or c.author} try: if publisher == 'all': package_ids = p.toolkit.get_action('package_list')(context, {}) packages = [] for pkg_id in package_ids: try: package = p.toolkit.get_action('package_show')(context, {'id': pkg_id}) package.pop('state', None) packages.append(package) except p.toolkit.NotAuthorized: log.warn('User %s not authorized to read package %s' % (c.user, pkg_id)) continue elif publisher == 'template': # Just return an empty CSV file with just the headers packages = [] else: packages = get_packages_for_org(context, publisher) except p.toolkit.ObjectNotFound: p.toolkit.abort(404, 'Organization not found') f = StringIO.StringIO() output = '' try: fieldnames = [n[0] for n in CSV_MAPPING if n[0] != 'state'] writer = csv.DictWriter(f, fieldnames=fieldnames, quoting=csv.QUOTE_ALL) headers = dict( (n[0],n[0]) for n in CSV_MAPPING if n[0] != 'state') writer.writerow(headers) packages.sort() for package in packages: if package: row = {} extras_dict = extras_to_dict(package) for fieldname, entity, key in CSV_MAPPING: if key == 'state': continue value = None if entity == 'organization': if len(package['organization']): value = package['organization']['name'] elif entity == 'resources': if len(package['resources']) and key in package['resources'][0]: value = package['resources'][0][key] else: if key in package: value = package[key] elif key in extras_dict: value = extras_dict[key] row[fieldname] = value for field_to_check in ('title', 'description'): if fieldname == field_to_check and row.get(field_to_check): row[field_to_check] = row[field_to_check].encode('utf-8') writer.writerow(row) output = f.getvalue() finally: f.close() return output