def fetch(cls, site_url_filter, since_datetime): import ckan.model as model from running_stats import StatsList log = logging.getLogger(__name__) stats = StatsList() # Use the generate_entries generator to get all of # the entries from the ODI Atom feed. This should # correctly handle all of the pages within the feed. import ckanext.certificates.client as client for entry in client.generate_entries(since=since_datetime): # We have to handle the case where the rel='about' might be # missing, if so we'll ignore it and catch it next time about = entry.get('about', '') if not about: log.debug(stats.add('Ignore - no rel="about" specifying the dataset', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue if not site_url_filter.search(about): log.debug(stats.add('Ignore - "about" field does not reference this site', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue if not '/dataset/' in entry['about']: log.debug(stats.add('Ignore - is "about" DGU but not a dataset', '%s "%s" %s' % (about, entry['about'], entry['id']))) continue pkg = cls._get_package_from_url(entry.get('about')) if not pkg: log.error(stats.add('Unable to find the package', '%s "%s" %s %r' % (about, entry['about'], entry['id'], entry.get('about')))) continue # Build the JSON subset we want to describe the certificate badge_data = client.get_badge_data(entry['alternate']) if not badge_data: log.info(stats.add('Error fetching badge data - skipped', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue badge_data['cert_title'] = entry.get('content', '') # e.g. 'Basic Level Certificate' badge_json = json.dumps(badge_data) if pkg.extras.get('odi-certificate') == badge_json: log.debug(stats.add('Certificate unchanged', badge_data['certificate_url'])) else: operation = 'updated' if 'odi-certificate' in pkg.extras \ else 'added' model.repo.new_revision() pkg.extras['odi-certificate'] = json.dumps(badge_data) log.debug(stats.add('Certificate %s' % operation, '"%s" %s' % (badge_data['title'], badge_data['certificate_url']))) model.Session.commit() log.info('Summary:\n' + stats.report())
def command(self): # Load configuration self._load_config() # Initialise database access import ckan.model as model model.Session.remove() model.Session.configure(bind=model.meta.engine) # Logging, post-config self.setup_logging() from pylons import config site_url = config.get('ckan.site_url') # Handling of sites that support www. but don't use it. full_site_url = site_url if not '//www.' in full_site_url: full_site_url = full_site_url.replace('//', '//www.') from running_stats import StatsList stats = StatsList() # Use the generate_entries generator to get all of # the entries from the ODI Atom feed. This should # correctly handle all of the pages within the feed. for entry in client.generate_entries(self.log): # We have to handle the case where the rel='about' might be missing, if so # we'll ignore it and catch it next time about = entry.get('about', '') if not about: self.log.debug(stats.add('Ignore - no rel="about" specifying the dataset', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue if not about.startswith(site_url) and not about.startswith(full_site_url): self.log.debug(stats.add('Ignore - "about" field does not reference this site', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue if not '/dataset/' in entry['about']: self.log.debug(stats.add('Ignore - is "about" DGU but not a dataset', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue pkg = self._get_package_from_url(entry.get('about')) if not pkg: self.log.error(stats.add('Unable to find the package', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue # Build the JSON subset we want to describe the certificate badge_data = client.get_badge_data(self.log, entry['alternate']) badge_data['cert_title'] = entry.get('content', '') badge_json = json.dumps(badge_data) if pkg.extras.get('odi-certificate') == badge_json: self.log.debug(stats.add('Certificate unchanged', badge_data['certificate_url'])) else: model.repo.new_revision() pkg.extras['odi-certificate'] = json.dumps(badge_data) operation = 'updated' if 'odi-certificate' in pkg.extras else 'added' self.log.debug(stats.add('Certificate %s' % operation, '"%s" %s' % (badge_data['title'], badge_data['certificate_url']))) model.Session.commit() self.log.info('Summary:\n' + stats.report())
def command(self): # Load configuration self._load_config() # Initialise database access import ckan.model as model model.Session.remove() model.Session.configure(bind=model.meta.engine) # Logging, post-config self.setup_logging() from pylons import config site_url = config.get('ckan.site_url') # Handling of sites that support www. but don't use it. full_site_url = site_url if not '//www.' in full_site_url: full_site_url = full_site_url.replace('//', '//www.') from running_stats import StatsList stats = StatsList() # Use the generate_entries generator to get all of # the entries from the ODI Atom feed. This should # correctly handle all of the pages within the feed. for entry in client.generate_entries(self.log): # We have to handle the case where the rel='about' might be missing, if so # we'll ignore it and catch it next time about = entry.get('about', '') if not about: self.log.debug( stats.add( 'Ignore - no rel="about" specifying the dataset', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue if not about.startswith(site_url) and not about.startswith( full_site_url): self.log.debug( stats.add( 'Ignore - "about" field does not reference this site', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue if not '/dataset/' in entry['about']: self.log.debug( stats.add( 'Ignore - is "about" DGU but not a dataset', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue pkg = self._get_package_from_url(entry.get('about')) if not pkg: self.log.error( stats.add( 'Unable to find the package', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue # Build the JSON subset we want to describe the certificate badge_data = client.get_badge_data(self.log, entry['alternate']) badge_data['cert_title'] = entry.get('content', '') badge_json = json.dumps(badge_data) if pkg.extras.get('odi-certificate') == badge_json: self.log.debug( stats.add('Certificate unchanged', badge_data['certificate_url'])) else: model.repo.new_revision() pkg.extras['odi-certificate'] = json.dumps(badge_data) operation = 'updated' if 'odi-certificate' in pkg.extras else 'added' self.log.debug( stats.add( 'Certificate %s' % operation, '"%s" %s' % (badge_data['title'], badge_data['certificate_url']))) model.Session.commit() self.log.info('Summary:\n' + stats.report())
def fetch(cls, site_url_filter, since_datetime): import ckan.model as model from running_stats import StatsList log = logging.getLogger(__name__) stats = StatsList() # Use the generate_entries generator to get all of # the entries from the ODI Atom feed. This should # correctly handle all of the pages within the feed. import ckanext.certificates.client as client for entry in client.generate_entries(since=since_datetime): # We have to handle the case where the rel='about' might be # missing, if so we'll ignore it and catch it next time about = entry.get('about', '') if not about: log.debug( stats.add( 'Ignore - no rel="about" specifying the dataset', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue if not site_url_filter.search(about): log.debug( stats.add( 'Ignore - "about" field does not reference this site', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue if not '/dataset/' in entry['about']: log.debug( stats.add( 'Ignore - is "about" DGU but not a dataset', '%s "%s" %s' % (about, entry['about'], entry['id']))) continue pkg = cls._get_package_from_url(entry.get('about')) if not pkg: log.error( stats.add( 'Unable to find the package', '%s "%s" %s %r' % (about, entry['about'], entry['id'], entry.get('about')))) continue # Build the JSON subset we want to describe the certificate badge_data = client.get_badge_data(entry['alternate']) if not badge_data: log.info( stats.add( 'Error fetching badge data - skipped', '%s "%s" %s' % (about, entry['title'], entry['id']))) continue badge_data['cert_title'] = entry.get( 'content', '') # e.g. 'Basic Level Certificate' badge_json = json.dumps(badge_data) if pkg.extras.get('odi-certificate') == badge_json: log.debug( stats.add('Certificate unchanged', badge_data['certificate_url'])) else: operation = 'updated' if 'odi-certificate' in pkg.extras \ else 'added' model.repo.new_revision() pkg.extras['odi-certificate'] = json.dumps(badge_data) log.debug( stats.add( 'Certificate %s' % operation, '"%s" %s' % (badge_data['title'], badge_data['certificate_url']))) model.Session.commit() log.info('Summary:\n' + stats.report())