def execute(self): source_db = PHINMS_DB() feeder = Batchfile_Feeder(verbosity=self.verbosity, source_db=source_db) feeder.copy_tempdir = self.copy_tempdir while True: try: # long running process, capture interrupt if systemUnderLoad(): logging.info("system under load - continue anyhow") if self.files: # Look up the given files for their filedates self.files = source_db.name_dates(self.files) else: self.files = source_db.filelist(self.progression) # If we didn't get any back, we've caught up, take # this opportunity to sleep for a while if self.daemon_mode and not self.files: logging.debug("no files found, sleeping") sleep(5 * 60) for batch_file, filedate in self.files: feeder.upload(batch_file, filedate) self.files = None # done with that batch if not self.daemon_mode: raise(SystemExit('non daemon-mode exit')) except: logging.info("Shutting down") raise # now exit finally: source_db.close()
def execute(self): source_db = PHINMS_DB() feeder = Batchfile_Feeder(verbosity=self.verbosity, source_db=source_db) feeder.copy_tempdir = self.copy_tempdir while True: try: # long running process, capture interrupt if systemUnderLoad(): logging.info("system under load - continue anyhow") if self.files: # Look up the given files for their filedates self.files = source_db.name_dates(self.files) else: self.files = source_db.filelist(self.progression) # If we didn't get any back, we've caught up, take # this opportunity to sleep for a while if self.daemon_mode and not self.files: logging.debug("no files found, sleeping") sleep(5 * 60) for batch_file, filedate in self.files: feeder.upload(batch_file, filedate) self.files = None # done with that batch if not self.daemon_mode: raise (SystemExit('non daemon-mode exit')) except: logging.info("Shutting down") raise # now exit finally: source_db.close()
def execute(self): """ Start the process """ # Initialize logging now (verbosity is now set regardless of # invocation method) configure_logging(verbosity=self.verbosity, logfile="longitudinal-manager.log") logging.info("Initiate deduplication for %s", (self.reportDate and self.reportDate or "whole database")) # Only allow one instance of the manager to run at a time. if self.lock.is_locked(): logging.warn("Can't continue, %s is locked ", LOCKFILE) return if systemUnderLoad(): logging.warn("system under load - continue anyhow") try: self.lock.acquire() self.access = DirectAccess( database=self.data_warehouse, port=self.warehouse_port, user=self.database_user, password=self.database_password, ) self.data_warehouse_access = AlchemyAccess( database=self.data_warehouse, port=self.warehouse_port, user=self.database_user, password=self.database_password, ) self.data_mart_access = AlchemyAccess( database=self.data_mart, port=self.mart_port, user=self.database_user, password=self.database_password ) startTime = time.time() if not self.skip_prep: self._prepDeduplicateTables() visits_to_process = self._visitsToProcess() # Now done with db access needs at the manager level # free up resources: self.data_mart_access.disconnect() self.data_warehouse_access.disconnect() self.access.close() # Set of locks used, one for each table needing protection # from asynchronous inserts. Names should match table # minus 'dim_' prefix, plus '_lock' suffix # i.e. dim_location -> 'location_lock' table_locks = { "admission_source_lock": Lock(), "admission_o2sat_lock": Lock(), "admission_temp_lock": Lock(), "assigned_location_lock": Lock(), "admit_reason_lock": Lock(), "chief_complaint_lock": Lock(), "diagnosis_lock": Lock(), "disposition_lock": Lock(), "flu_vaccine_lock": Lock(), "h1n1_vaccine_lock": Lock(), "lab_flag_lock": Lock(), "lab_result_lock": Lock(), "location_lock": Lock(), "note_lock": Lock(), "order_number_lock": Lock(), "performing_lab_lock": Lock(), "pregnancy_lock": Lock(), "race_lock": Lock(), "reference_range_lock": Lock(), "service_area_lock": Lock(), "specimen_source_lock": Lock(), } # If we have visits to process, fire up the workers... if len(visits_to_process) > 1: for i in range(self.NUM_PROCS): dw = Process( target=LongitudinalWorker, kwargs={ "queue": self.queue, "procNumber": i, "data_warehouse": self.data_warehouse, "warehouse_port": self.warehouse_port, "data_mart": self.data_mart, "mart_port": self.mart_port, "dbUser": self.database_user, "dbPass": self.database_password, "table_locks": table_locks, "verbosity": self.verbosity, }, ) dw.daemon = True dw.start() # Populate the queue for v in visits_to_process: self.queue.put(v) # Wait on the queue until empty self.queue.join() # Common cleanup self.tearDown() self.datePersistence.bump_date() logging.info("Queue is empty - done in %s", time.time() - startTime) finally: self.lock.release()
def execute(self): """ Start the process """ # Initialize logging now (verbosity is now set regardless of # invocation method) configure_logging(verbosity=self.verbosity, logfile="longitudinal-manager.log") logging.info("Initiate deduplication for %s", (self.reportDate and self.reportDate or "whole database")) # Only allow one instance of the manager to run at a time. if self.lock.is_locked(): logging.warn("Can't continue, %s is locked ", LOCKFILE) return if systemUnderLoad(): logging.warn("system under load - continue anyhow") try: self.lock.acquire() self.access = DirectAccess(database=self.data_warehouse, port=self.warehouse_port, user=self.database_user, password=self.database_password) self.data_warehouse_access = AlchemyAccess( database=self.data_warehouse, port=self.warehouse_port, user=self.database_user, password=self.database_password) self.data_mart_access = AlchemyAccess( database=self.data_mart, port=self.mart_port, user=self.database_user, password=self.database_password) startTime = time.time() if not self.skip_prep: self._prepDeduplicateTables() visits_to_process = self._visitsToProcess() # Now done with db access needs at the manager level # free up resources: self.data_mart_access.disconnect() self.data_warehouse_access.disconnect() self.access.close() # Set of locks used, one for each table needing protection # from asynchronous inserts. Names should match table # minus 'dim_' prefix, plus '_lock' suffix # i.e. dim_location -> 'location_lock' table_locks = {'admission_source_lock': Lock(), 'admission_o2sat_lock': Lock(), 'admission_temp_lock': Lock(), 'assigned_location_lock': Lock(), 'admit_reason_lock': Lock(), 'chief_complaint_lock': Lock(), 'diagnosis_lock': Lock(), 'disposition_lock': Lock(), 'flu_vaccine_lock': Lock(), 'h1n1_vaccine_lock': Lock(), 'lab_flag_lock': Lock(), 'lab_result_lock': Lock(), 'location_lock': Lock(), 'note_lock': Lock(), 'order_number_lock': Lock(), 'performing_lab_lock': Lock(), 'pregnancy_lock': Lock(), 'race_lock': Lock(), 'reference_range_lock': Lock(), 'service_area_lock': Lock(), 'specimen_source_lock': Lock(), } # If we have visits to process, fire up the workers... if len(visits_to_process) > 1: for i in range(self.NUM_PROCS): dw = Process(target=LongitudinalWorker, kwargs={'queue': self.queue, 'procNumber': i, 'data_warehouse': self.data_warehouse, 'warehouse_port': self.warehouse_port, 'data_mart': self.data_mart, 'mart_port': self.mart_port, 'dbUser': self.database_user, 'dbPass': self.database_password, 'table_locks': table_locks, 'verbosity': self.verbosity}) dw.daemon = True dw.start() # Populate the queue for v in visits_to_process: self.queue.put(v) # Wait on the queue until empty self.queue.join() # Common cleanup self.tearDown() self.datePersistence.bump_date() logging.info("Queue is empty - done in %s", time.time() - startTime) finally: self.lock.release()