def swestore(self): """This function is the entry point for tasks having to do with packaging and sending runs to swestore """ db_info = self.app.config.get_section_dict('db') f_conn = FlowcellRunMetricsConnection(username=db_info.get('user'), password=db_info.get('password'), url=db_info.get('url')) # Create a tarball out of the run folder if self.pargs.package_run: # We require a flowcell argument if not self._check_pargs(["flowcell"]): return self.pargs.tarball = package_run(self,self.config.get('archive','swestore_staging'), **vars(self.pargs)) if not self.pargs.tarball: self.log.error("No tarball was created, exiting") return if self.pargs.clean: rm_run(self,self.config.get('archive','root'), flowcell=self.pargs.flowcell) if self.pargs.clean_from_staging: #Check that the run has been archived on the NAS before removing it, otherwise it will keep synching if self.pargs.flowcell in f_conn.get_storage_status('NAS_nosync').keys(): rm_run(self,self.config.get('archive','swestore_staging'), flowcell=self.pargs.flowcell) else: self.log.warn("Run storage status is not NAS_nosync, not removing run from swestore_stage!") if not self.pargs.tarball: self.log.error("Required argument --tarball was not specified") return if not os.path.exists(self.pargs.tarball): self.log.error("Tarball {} does not exist".format(self.pargs.tarball)) return # Upload a tarball to a remote host if self.pargs.remote_upload: result = upload_tarball(self, **dict(self.config.get_section_dict('archive').items() + vars(self.pargs).items())) if not result: return if self.pargs.clean: rm_tarball(self,tarball=self.pargs.tarball) # Send the tarball to Swestore using irods if self.pargs.send_to_swestore: result = send_to_swestore(self,**dict(self.config.get_section_dict('archive').items() + vars(self.pargs).items())) if not result: # If archiving failed, we need to give a non-zero exit code in order for a remote instance to detect the failure sys.exit(1) if self.pargs.clean: rm_tarball(self,tarball=self.pargs.tarball) #Set the run as archived in StatusDB fc_db_id = f_conn.id_view.get(self.pargs.flowcell) f_conn.set_storage_status(fc_db_id, 'swestore_archived') # Log to statusdb if self.pargs.log_to_db: # implement this raise NotImplementedError("logging to db functionality not implemented")
def storage_cleanup(self): storage_conf = self.app.config.get_section_dict('storage') db_info = self.app.config.get_section_dict('db') f_conn = FlowcellRunMetricsConnection(username=db_info.get('user'), password=db_info.get('password'), url=db_info.get('url')) servers = [server for server in storage_conf.keys()] server = platform.node().split('.')[0].lower() if server in servers: self.app.log.info("Performing cleanup on production server \"{}\"...".format(server)) dirs = [d.lstrip() for d in storage_conf.get(server).split(',')] #Collect old runs (> 30 days in nosync folder) to remove old_runs = [] for d in dirs: nosync_dir = os.path.join(d, 'nosync') for fc in glob.iglob(os.path.join(nosync_dir, '1*')): if os.path.isdir(fc): fc_name = os.path.basename(fc) #Check that there is no check file indicating to not remove the run if not os.path.exists(os.path.join(fc, 'no_remove.txt')): stats = os.stat(os.path.join(fc, 'RTAComplete.txt')) mod_time = datetime.now() - datetime.fromtimestamp(stats.st_mtime) if mod_time.days >= 30: old_runs.append(fc) else: self.app.log.warn("no_remove.txt file found in {}, skipping run".format(fc_name)) #NAS servers if 'nas' in server: #Collect newly finished runs fc_list = [] for d in dirs: for fc in glob.glob(os.path.join(d, '1*')): if os.path.exists(os.path.join(fc, 'RTAComplete.txt')): fc_list.append(fc) #Move to nosync retries = 5 for fc in fc_list: fc_name = os.path.basename(fc) while retries: if 'Finished' in last_lines(storage_conf.get('lsyncd_log'), 1)[0]: break retries -= 1 time.sleep(3) if retries: self.app.log.info("lsyncd process seems to be up to speed, and run {} " \ "is finished, moving it to nosync".format(fc_name)) shutil.move(fc, os.path.join(os.path.dirname(fc), 'nosync')) #Touch RTAComplete.txt file to that the modification date is the date when #it was moved to nosync try: open(os.path.join(os.path.dirname(fc), 'nosync', os.path.basename(fc), 'RTAComplete.txt'), 'w').close() except IOError: self.app.log.warn("No RTAComplete.txt file was found for run {}." \ " Please check".format(os.path.basename(fc_name))) fc_db_id = f_conn.id_view.get(fc_name) if fc_db_id: f_conn.set_storage_status(fc_db_id, 'NAS_nosync') else: self.app.log.warn("Flowcell {} not found in the database, not changing status.".format(fc_name)) else: self.app.log.warn("lsyncd process doesn't seem to be finished. " \ "Skipping run {}".format(os.path.basename(fc))) #Remove old runs for fc in old_runs: fc_name = os.path.basename(fc) #Check that the run has been archived in swestore before removing permanently if fc_name in f_conn.get_storage_status('swestore_archived').keys(): self.app.log.info("Run {} has been in nosync for more than 30 days " \ "and is archived in swestore. Permanently removing it from the NAS".format(fc_name)) shutil.rmtree(fc) else: self.app.log.warn("Run {} has been in nosync for more than 30 " \ "days, but has not yet been archived in swestore. " \ "Not removing, please check it".format(fc_name)) #Processing servers (b5) else: #Collect finished runs fc_list = [] for d in dirs: for fc in glob.glob(os.path.join(d, '1*')): if os.path.exists(os.path.join(fc, 'second_read_processing_completed.txt')): fc_list.append(fc) #Move to nosync for fc in fc_list: fc_name = os.path.basename(fc) self.app.log.info("Moving run {} to nosync".format(fc_name)) shutil.move(fc, os.path.join(os.path.dirname(fc), 'nosync')) #Remove old runs for fc in old_runs: fc_name = os.path.basename(fc) self.app.log.info("Run {} has been in nosync for more than 30 " \ "days, permanently removing it from {}".format(fc_name, server)) shutil.rmtree(fc) else: self.app.log.warn("You're running the cleanup functionality in {}. But this " \ "server doen't seem to be on your pm.conf file. Are you on the correct server?".format(server))
def storage_cleanup(self): storage_conf = self.app.config.get_section_dict('storage') db_info = self.app.config.get_section_dict('db') f_conn = FlowcellRunMetricsConnection(username=db_info.get('user'), password=db_info.get('password'), url=db_info.get('url')) servers = [server for server in storage_conf.keys()] server = platform.node().split('.')[0].lower() if server in servers: self.app.log.info( "Performing cleanup on production server \"{}\"...".format( server)) dirs = [d.lstrip() for d in storage_conf.get(server).split(',')] #Collect old runs (> 30 days in nosync folder) to remove old_runs = [] for d in dirs: nosync_dir = os.path.join(d, 'nosync') for fc in glob.iglob(os.path.join(nosync_dir, '1*')): if os.path.isdir(fc): fc_name = os.path.basename(fc) #Check that there is no check file indicating to not remove the run if not os.path.exists(os.path.join( fc, 'no_remove.txt')): stats = os.stat(os.path.join( fc, 'RTAComplete.txt')) mod_time = datetime.now() - datetime.fromtimestamp( stats.st_mtime) if mod_time.days >= 30: old_runs.append(fc) else: self.app.log.warn( "no_remove.txt file found in {}, skipping run". format(fc_name)) #NAS servers if 'nas' in server: #Collect newly finished runs fc_list = [] for d in dirs: for fc in glob.glob(os.path.join(d, '1*')): if os.path.exists(os.path.join(fc, 'RTAComplete.txt')): fc_list.append(fc) #Move to nosync retries = 5 for fc in fc_list: fc_name = os.path.basename(fc) while retries: if 'Finished' in last_lines( storage_conf.get('lsyncd_log'), 1)[0]: break retries -= 1 time.sleep(3) if retries: self.app.log.info("lsyncd process seems to be up to speed, and run {} " \ "is finished, moving it to nosync".format(fc_name)) shutil.move( fc, os.path.join(os.path.dirname(fc), 'nosync')) #Touch RTAComplete.txt file to that the modification date is the date when #it was moved to nosync try: open( os.path.join(os.path.dirname(fc), 'nosync', os.path.basename(fc), 'RTAComplete.txt'), 'w').close() except IOError: self.app.log.warn("No RTAComplete.txt file was found for run {}." \ " Please check".format(os.path.basename(fc_name))) fc_db_id = f_conn.id_view.get(fc_name) if fc_db_id: f_conn.set_storage_status(fc_db_id, 'NAS_nosync') else: self.app.log.warn( "Flowcell {} not found in the database, not changing status." .format(fc_name)) else: self.app.log.warn("lsyncd process doesn't seem to be finished. " \ "Skipping run {}".format(os.path.basename(fc))) #Remove old runs for fc in old_runs: fc_name = os.path.basename(fc) #Check that the run has been archived in swestore before removing permanently if fc_name in f_conn.get_storage_status( 'swestore_archived').keys(): self.app.log.info("Run {} has been in nosync for more than 30 days " \ "and is archived in swestore. Permanently removing it from the NAS".format(fc_name)) shutil.rmtree(fc) else: self.app.log.warn("Run {} has been in nosync for more than 30 " \ "days, but has not yet been archived in swestore. " \ "Not removing, please check it".format(fc_name)) #Processing servers (b5) else: #Collect finished runs fc_list = [] for d in dirs: for fc in glob.glob(os.path.join(d, '1*')): if os.path.exists( os.path.join( fc, 'second_read_processing_completed.txt')): fc_list.append(fc) #Move to nosync for fc in fc_list: fc_name = os.path.basename(fc) self.app.log.info( "Moving run {} to nosync".format(fc_name)) shutil.move(fc, os.path.join(os.path.dirname(fc), 'nosync')) #Remove old runs for fc in old_runs: fc_name = os.path.basename(fc) self.app.log.info("Run {} has been in nosync for more than 30 " \ "days, permanently removing it from {}".format(fc_name, server)) shutil.rmtree(fc) else: self.app.log.warn("You're running the cleanup functionality in {}. But this " \ "server doen't seem to be on your pm.conf file. Are you on the correct server?".format(server))
def swestore(self): """This function is the entry point for tasks having to do with packaging and sending runs to swestore """ db_info = self.app.config.get_section_dict('db') f_conn = FlowcellRunMetricsConnection(username=db_info.get('user'), password=db_info.get('password'), url=db_info.get('url')) swestore_paths = set(self.config.get('archive','swestore_staging').split(',')) run = self.pargs.tarball if self.pargs.tarball else self.pargs.flowcell swestore_dir = get_path_swestore_staging(run, swestore_paths) # Create a tarball out of the run folder if self.pargs.package_run: # We require a flowcell argument if not self._check_pargs(["flowcell"]): return self.pargs.tarball = package_run(self, swestore_dir, **vars(self.pargs)) if not self.pargs.tarball: self.log.error("No tarball was created, exiting") return if self.pargs.clean: rm_run(self,self.config.get('archive','root'), flowcell=self.pargs.flowcell) if self.pargs.clean_from_staging: #Check that the run has been archived on the NAS before removing it, otherwise it will keep synching if self.pargs.flowcell in f_conn.get_storage_status('NAS_nosync').keys(): rm_run(self, swestore_dir, flowcell=self.pargs.flowcell) else: self.log.warn("Run storage status is not NAS_nosync, not removing run from swestore_stage!") if not self.pargs.tarball: self.log.error("Required argument --tarball was not specified") return if not os.path.exists(os.path.join(swestore_dir, self.pargs.tarball)): self.log.error("Tarball {} does not exist".format(self.pargs.tarball)) return # Upload a tarball to a remote host if self.pargs.remote_upload: result = upload_tarball(self, **dict(self.config.get_section_dict('archive').items() + vars(self.pargs).items())) if not result: return if self.pargs.clean: rm_tarball(self,tarball=self.pargs.tarball) # Send the tarball to Swestore using irods if self.pargs.send_to_swestore: result = send_to_swestore(self,**dict(self.config.get_section_dict('archive').items() + vars(self.pargs).items())) if not result: # If archiving failed, we need to give a non-zero exit code in order for a remote instance to detect the failure sys.exit(1) if self.pargs.clean: rm_tarball(self,tarball=self.pargs.tarball) #Set the run as archived in StatusDB fc_id = self.pargs.flowcell if self.pargs.flowcell else self.pargs.tarball.split('.')[0] fc_db_id = f_conn.id_view.get(fc_id) if fc_db_id: f_conn.set_storage_status(fc_db_id, 'swestore_archived') else: self.log.warn("Flowcell {} not found in the database, not changing status.".format(fc_id)) # Log to statusdb if self.pargs.log_to_db: # implement this raise NotImplementedError("logging to db functionality not implemented")