def run_command(self, args): run_until_timestamp = None run_for_seconds = int(args.runforseconds) if args.runforseconds else 0 if run_for_seconds > 0: run_until_timestamp = datetime.datetime.utcnow().timestamp( ) + run_for_seconds # This is a safeguard - the process should stop itself but this will kill it if it does not. def exitfunc(): os._exit(0) Timer(run_for_seconds + 60, exitfunc).start() for collection in self.database.get_all_collections(): if collection.transform_type: if not args.quiet: print("Collection " + str(collection.database_id)) transform = get_transform_instance( collection.transform_type, self.config, self.database, collection, run_until_timestamp=run_until_timestamp) transform.process() # Early return? if run_until_timestamp and run_until_timestamp < datetime.datetime.utcnow( ).timestamp(): break # If the code above took less than 60 seconds the process will stay open, waiting for the Timer to execute. # So just kill it to make sure. os._exit(0)
def run_collection(self, collection, run_until_timestamp, args): # Early return? if run_until_timestamp and run_until_timestamp < datetime.datetime.utcnow( ).timestamp(): return if collection.transform_type: logger = logging.getLogger( 'ocdskingfisher.cli.transform-collections') logger.info("Starting to transform collection " + str(collection.database_id)) if not args.quiet: print("Collection " + str(collection.database_id)) transform = get_transform_instance( collection.transform_type, self.config, self.database, collection, run_until_timestamp=run_until_timestamp, ) try: transform.process() except Exception as e: traceback.print_tb(e.__traceback__) with sentry_sdk.push_scope() as scope: scope.set_tag("transform_collection", collection.database_id) sentry_sdk.capture_exception(e)
def run_command(self, args): self.run_command_for_selecting_existing_collection(args) if not self.collection.transform_type: print("That collection does not have any transforms!") quit(-1) transform = get_transform_instance(self.collection.transform_type, self.collection.store_end_at is None, self.config, self.database, self.collection) transform.process()
def run_command(self, args): if not self.config.is_redis_available(): print("No Redis is configured!") return run_until_timestamp = None run_for_seconds = int(args.runforseconds) if args.runforseconds else 0 if run_for_seconds > 0: run_until_timestamp = datetime.datetime.utcnow().timestamp( ) + run_for_seconds # This is a safeguard - the process should stop itself but this will kill it if it does not. def exitfunc(): os._exit(0) Timer(run_for_seconds + 60, exitfunc).start() redis_conn = redis.Redis(host=self.config.redis_host, port=self.config.redis_port, db=self.config.redis_database) logger = logging.getLogger( 'ocdskingfisher.redis-queue-collection-store-finished') run = True while run: data = redis_conn.blpop( "kingfisher_work_collection_store_finished", timeout=10) if data: message = json.loads(data[1].decode('ascii')) if not args.quiet: print("Got Collection: " + str(message.get('collection_id'))) logger.info("Got Collection: " + str(message.get('collection_id'))) # Update Cache Columns self.database.update_collection_cached_columns( message.get('collection_id')) # Run any transforms that depend on this collection for collection in \ self.database.get_collections_that_transform_this_collection(message.get('collection_id')): transform = get_transform_instance( collection.transform_type, self.config, self.database, collection) transform.process() if not args.quiet: print("Processed!") # Early return? if run_until_timestamp and run_until_timestamp < datetime.datetime.utcnow( ).timestamp(): run = False # If the code above took less than 60 seconds the process will stay open, waiting for the Timer to execute. # So just kill it to make sure. os._exit(0)