def term_handler(self, signum, frame): log.debug("exiting through term handler") Session.rollback() if self.rdfdoc_to_do is None or self.rdfdoc_to_do.worked_on == False: if self.worker_proc != None: Session.delete(self.worker_proc) Session.commit() sys.exit(0) else: self.rdfdoc_to_do.worked_on = False Session.delete(self.stat_result) Session.delete(self.worker_proc) Session.commit() sys.exit(0)
def command(self): self.logging_file_config(config_file) log = logging.getLogger(__name__) self.worker_proc = None self.rdfdoc_to_do = None signal.signal(signal.SIGINT, self.term_handler) signal.signal(signal.SIGTERM, self.term_handler) # do not spawn more than two workers number_of_workers = Session.query(model.WorkerProc).with_lockmode('read').count() if number_of_workers >= 2: return 0 # check for orphaned local packages allLocalPackages = Session.query(model.RDFDoc).all() for pkg in all_local_pkgs: if pkg.name not in package_list: log.debug("%s is gone and will be deleted" % pkg.name) #Session.delete(pkg) #Session.commit() for package_name in package_list: try: package = ckan.package_entity_get(package_name) except Exception, errorstr: log.debug("ERROR with %s: %s" % (package_name, errorstr)) continue rdfdoc = Session.query(model.RDFDoc).filter(model.RDFDoc.name==package['name']).first() if rdfdoc is None: rdfdoc = model.RDFDoc() Session.add(rdfdoc) rdfdoc.name = package['name'] class BreakIt: pass try: for resource in package['resources']: if resource['format'].lower() in ["application/x-ntriples", "nt", "gzip:ntriples"]: rdfdoc.format = "nt" rdfdoc.uri = resource['url'] raise BreakIt for resource in package['resources']: if resource['format'].lower() in ["application/x-nquads", "nquads"]: rdfdoc.format = "nq" rdfdoc.uri = resource['url'] raise BreakIt for resource in package['resources']: if resource['format'].lower() in ["application/rdf+xml", "rdf"]: rdfdoc.format = "rdf" rdfdoc.uri = resource['url'] raise BreakIt for resource in package['resources']: if resource['format'].lower() in ["text/turtle", "rdf/turtle", "ttl"]: rdfdoc.format = "ttl" rdfdoc.uri = resource['url'] raise BreakIt for resource in package['resources']: if resource['format'].lower() in ["text/n3", "n3"]: rdfdoc.format = "n3" rdfdoc.uri = resource['url'] raise BreakIt for resource in package['resources']: if resource['format'].lower() in ["api/sparql", "sparql"]: # prefer a sitemap.xml over sparql, if any for sitemap_resource in package['resources']: if sitemap_resource['format'].lower() in ["meta/sitemap"]: rdfdoc.format = "sitemap" rdfdoc.uri = sitemap_resource['url'] raise BreakIt rdfdoc.format = "sparql" rdfdoc.uri = resource['url'] except BreakIt: pass if rdfdoc.format is not None: Session.commit() else: Session.rollback()
def term_handler(self, signum, frame): Session.rollback() sys.exit(0)