def __is_complete__(self,*args,**kwargs): """ Check to the complete file of the zcat process and handles notifications (if any). """ if GenericProcess.__is_complete__(self,*args,**kwargs): return True elif not os.path.isfile(self.complete_file): #print self.complete_file return False #If the process is complete, check to make sure that the sizes of the file are adequate. If not, send email. size1 = int(disk_usage(self.r1_path)) size2 = int(disk_usage(self.r2_path)) size = size2 if size1 < size2: size = size1 #Send an email if the size of the fastq is smaller than the expected size. #if size < int(configs['pipeline'].get('Storage','expected_fastq_size')): #template_subject = os.path.join(configs['system'].get('Common_directories','template'),configs['pipeline'].get('Zcat_email_templates','size_subject')) #template_body = os.path.join(configs['system'].get('Common_directories','template'),configs['pipeline'].get('Zcat_email_templates','size_body')) #dictionary = {} #for k,v in self.__dict__.iteritems(): # dictionary.update({k:str(v)}) #dictionary.update({'size':size}) #subject = fill_template(template_subject,dictionary) #body = fill_template(template_body, dictionary) #send_email(subject,body) return True
def storage_currently_used_by_pipeline(config,mockdb,pipeline): """ Determines the amount of storage currently used by the pipeline's output directory. """ if pipeline.bcbio_key != None: bcbio = mockdb['Bcbio'].__get__(config,pipeline.bcbio_key) return disk_usage(bcbio.output_dir) elif pipeline.zcat_key != None: zcat = mockdb['Zcat'].__get__(config,pipeline.zcat_key) return disk_usage(zcat.output_dir) return 0
def __finish__(self,*args,**kwargs): """ Finishes the bcltofastq pipeline. This is separated out due to the consolidation of multiple directories into a single email and to isolate it for specific pipelines. """ problem_dirs = [] sample_dirs = list_sample_dirs(self.output_dir.split(":")) for sample in sample_dirs: for sample_dir in sample_dirs[sample]: if (int(disk_usage(sample_dir)) < 200000): problem_dirs.append(sample_dir) if len(problem_dirs) > 0: message = "The following directory(ies) is(are) less than 200MB:\n" for problem_dir in problem_dirs: message += "\t" + problem_dir + "\n" message += "Please check.\n" #send_email("Small sample directory",message,recipients='[email protected],[email protected]') GenericPipeline.__finish__(self,*args,**kwargs) return 1
def __is_complete__(self,configs,mockdb,*args,**kwargs): """ Due to the inclusion of sub-processes (bclto fastq pipeline, illuminate, and launched pipelines), this function contains the logic to check to makes sure all of these processes have completed successfully. """ if configs["system"].get("Logging","debug") is "True": print "Checking to see if seq run is complete (and advancing post-seq run pipeline)" if not os.path.isfile(self.complete_file): if configs["system"].get("Logging","debug") is "True": print " Missing complete file " + self.complete_file return False if not hasattr(self,"interop_archived") or self.interop_archived is False: output_name = os.path.basename(self.output_dir) if not self.__archive_sequencing_run_data__(configs,self.output_dir,os.path.join(configs["system"].get('Common_directories','hiseq_run_log'),output_name)): if configs["system"].get("Logging","debug") is "True": print " Sequencing run data not archived (InterOp and so forth)." return False if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None or not hasattr(self,"illuminate_key") or self.illuminate_key is None: if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None: self.__start_bcltofastq_pipeline__(configs,mockdb) if configs["system"].get("Logging","debug") is "True": print " Starting bcltofastq pipeline." if not hasattr(self,"illuminate_key") or self.illuminate_key is None: self.__launch_illuminate__(configs,mockdb) if configs["system"].get("Logging","debug") is "True": print " Starting illuminate." return False illuminate = mockdb['Illuminate'].__get__(configs['system'],self.illuminate_key) if not illuminate.__is_complete__(configs,mockdb=mockdb,*args,**kwargs): if configs["system"].get("Logging","debug") is "True": print " Illuminate not done" return False bcl2fastq_pipeline = mockdb['BclToFastqPipeline'].__get__(configs['system'],self.bcltofastq_pipeline_key) if not bcl2fastq_pipeline.__is_complete__(configs,mockdb=mockdb,*args,**kwargs): if configs["system"].get("Logging","debug") is "True": print " bcltofastq not done" return False if not hasattr(self,"generic_copy_key") or self.generic_copy_key is None: self.__launch_archive_fastq__(configs,mockdb) if configs["system"].get("Logging","debug") is "True": print " Launching archive" return False archive = mockdb['GenericCopy'].__get__(configs['system'],self.generic_copy_key) if archive.__is_complete__(*args,**kwargs): if not disk_usage(self.fastq_archive) > 30000000: if not hasattr(self,'fastq_archive_reported') or self.fastq_archive_reported is None: message = "The flowcell "+self.flowcell_key+" has finished casava, but the archive is not as large as expected.\n" message += "\nPlease check.\n\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Flowcell size problem.",message,recipients=recipients) self.fastq_archive_reported = True return False fastq_check = check_fastq_output(self.fastq_archive) if fastq_check["md5"] == [] and fastq_check["fastqc"] == [] and fastq_check["index"] is True and fastq_check["sample_sheet"] is True: if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None: message = "Just informing you of the completion of the flowcell.\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("The fastq have been successully generated for " + self.flowcell_key + ".",message,recipients=recipients) self.fastq_check_report = True else: if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None: message = "Report detailing the issues with the flowcell directory for flowcell " + self.flowcell_key + ".\n" if not fastq_check["sample_sheet"] is True: message += "Sample sheet missing from " + self.archive_fastq + ".\n" else: if not fastq_check["index"]: message += "Index counts not generated.\n" if len(fastq_check["fastqc"]) != 0: message += "The following directories do not have fastqc results:" message += "\n\t".join(fastq_check["fastqc"]) + "\n" if len(fastq_check["md5"]) != 0: message += "The following directories do not have md5 checksums:" message += "\n\t".join(fastq_check["md5"]) + "\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Problem with fastq generation for " + self.flowcell_key + ".",message,recipients=recipients) self.fastq_check_report = True return False if not hasattr(self,"generic_clean_key") or self.generic_clean_key is None: if hasattr(self,'fastq_archive_reported') and self.fastq_archive_reported is True: message = "The flowcell "+self.flowcell_key+" has finished casava, and is now big enough.\n" message += "\nContinuing.\n\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Flowcell size problem resolved.",message,recipients=recipients) self.__launch_clean__(configs,mockdb) self.__link_to_web_portal__(configs['system']) if configs["system"].get("Logging","debug") is "True": print " Filling stats" flowcell = mockdb['Flowcell'].__get__(configs['system'],self.flowcell_key) machine = mockdb['HiSeqMachine'].__get__(configs['system'],self.machine_key) fill_demultiplex_stats(configs['system'],mockdb,self.output_dir,flowcell,machine) #return False else: if configs["system"].get("Logging","debug") is "True": print " Fastq archive not complete" return False clean = mockdb['GenericClean'].__get__(configs['system'],self.generic_clean_key) if clean.__is_complete__(*args,**kwargs): self.__finish__(*args,**kwargs) return True return False
largest_available = None needed_storage = int(configs['pipeline'].get('Storage','needed')) for location in configs['system'].get('Location_options','list').split(','): storage_device = storage_devices[location] current_available = storage_device.available - storage_device.waiting if best_location == None: best_location = location largest_available = current_available continue if current_available > largest_available: best_location = location largest_available = current_available storage_devices[best_location].waiting += needed_storage return best_location def initiate_storage_devices(config): """ Load the storage device objects into memory and initializes import attributes. """ location_dirs = get_location_dictionary_from_config(config) storage_devices = {} for name, directory in location_dirs.iteritems(): storage_devices.update({name:StorageDevice(directory=directory,name=name,limit=config.get('Storage','limit'))}) return storage_devices if __name__ == '__main__': print disk_usage(sys.argv[1])