def __init__(self,config,key=-1,input_dir=None,run_qc_metrics_dir=None,flowcell=None,process_name='dnanexusuploadpipeline',running_location='Speed',pipeline_config=None,**kwargs): if not input_dir is None: GenericPipeline.__init__(self,config,key=key,input_dir=input_dir,output_dir=input_dir,process_name=process_name,**kwargs) self.input_dir = input_dir self.output_dir = input_dir self.flowcell_key = flowcell.key self.running_location = running_location self.storage_needed = 0 subject = "Uploading " + self.flowcell_key + " to DNANexus." message = "Uploading has been initiated." recipients = pipeline_config.safe_get("Email","standard_recipients") send_email(subject,message,recipients)
def __is_complete__(self,configs,mockdb,*args,**kwargs): if GenericProcess.__is_complete__(self,*args,**kwargs): return True if not os.path.isfile(self.stderr): return False if os.stat(self.stderr)[6] != 0 and self.upload_failed is False: subject = "DNANexus uploading error for " + self.flowcell_key message = "DNANexus uploading has encountered an error. This error is detailed here:\n\t" + self.stderr message += "\nThe process has been halted, and the qsub script may be found here:\n\t" + self.qsub_file recipients = configs["pipeline"].safe_get("Email","standard_recipients") send_email(subject,message,recipients) self.upload_failed = True return False return True
def __is_complete__(self,configs,storage_device): """ Check the complete file of the backup process, retry copying files where the keys for the input and output files are not the same, and handles notifications (if any). """ if GenericProcess.__is_complete__(self): return True elif not os.path.isfile(self.complete_file): return False failed_files = self.__failed_files__(configs['pipeline']) if len(failed_files) > 0: if self.retry >= configs['pipeline'].get('Backup','retry_threshold'): send_email(self.__generate_repeated_error_text__(configs,failed_files)) self.__fill_qsub_file__(configs,r_list=failed_files) self.__launch__(configs,storage_device) self.retry += 1 return False return True
def __send_reports__(self,config,mockdb): """ For reports that have generated but not been sent, this script attaches the appropriate plots and tables and sends the email. """ numbers = config.get('Flowcell_reports','numbers').split(',') for number in numbers: flowcell_report_key = getattr(self,'flowcell_report_' + str(number) + '_key') if flowcell_report_key is None: continue report = mockdb['FlowcellStatisticReport'].objects[flowcell_report_key] if report.report_sent is True: #If the report is already sent, next. continue if not report.__is_complete__(): #If the qsub script is still running, next. continue if self.sequencing_run_type == 'RapidRun' and str(number) == '16': recipients = config.get('Flowcell_reports','last_recipients') subject, body = report.__generate_flowcell_report_text__(config,mockdb,report_type="last_report") #Add samples to the all sample list sample_keys = self.__completed_samples_list__(mockdb) write_list_file(sample_keys,config.get('Filenames','all_samples'),original_list_file=config.get('Filenames','all_samples')) self.__finish__() elif self.sequencing_run_type == 'HighThroughputRun' and str(number) == '64': recipients = config.get('Flowcell_reports','last_recipients') subject, body = report.__generate_flowcell_report_text__(config,mockdb,report_type="last_report") #Add samples to the all sample list sample_keys = self.__completed_samples_list__(mockdb) write_list_file(sample_keys,config.get('Filenames','all_samples'),original_list_file=config.get('Filenames','all_samples')) self.__finish__() else: recipients = config.get('Flowcell_reports','subset_recipients') subject, body = report.__generate_flowcell_report_text__(config,mockdb,report_type="subset_report") files = [] files.append(report.report_pdf) files.append(report.full_report) files.append(report.current_report) send_email(subject,body,recipients=recipients,files=files) report.__finish__() report.report_sent = True return 1
def __init__(self,config,key=int(-1),input_dir=None,process_name='dnanexus_upload',pipeline_config=None,pipeline=None,**kwargs): """ Initializes the upload process object. """ if not pipeline_config is None: output_name = os.path.basename(pipeline.input_dir) output_dir = os.path.join(pipeline_config.safe_get("Common_directories","dnanexus_storage"),output_name) QsubProcess.__init__(self,config,key=key,input_dir=pipeline.input_dir,output_dir=output_dir,process_name=process_name,**kwargs) self.flowcell_key = pipeline.flowcell_key flowcell_dir = os.path.basename(pipeline.input_dir.rstrip('/')) self.run_qc_metrics_path = os.path.join(config.get('Common_directories','hiseq_run_log'),flowcell_dir + "/run_qc_metrics.txt") if not os.path.isfile(self.run_qc_metrics_path): #Send an email that run qc metrics file is missing. subject = "Missing run_qc_metrics for " + self.flowcell_key message = "The run qc metrics file in the following path is missing:\n\t" + self.run_qc_metrics_path message += "\nUploading to DNANexus failed." recipients = pipeline_config.safe_get("Email","standard_recipients") send_email(subject,message,recipients) self.flowcell_dir_name = os.path.basename(self.input_dir) self.hiseq_run_log_dir = os.path.join(config.get("Common_directories","hiseq_run_log"),self.flowcell_dir_name) #Look at other object to how to get things from the sys config. self.upload_failed = False
def __launch__(self,configs,storage_device,node_list=None): """ Checks to make sure there is enough storage. If not, sends email. If so, sends the job to SGE and records pertinent information. """ #If storage device is full, send a notification and abort. if storage_device.__is_full__(configs['pipeline'].get('Storage','required_fastq_size')): send_email(self.__generate_full_error_text__(configs,storage_device)) return False #This differs from the previous check by the fact that the previous does not #account for jobs that are currently being copied. This error is not as #restrictive due to the fact that the required_fastq_size should be larger than #the actual fastq size thus leaving additional storage once complete. if not storage_device.__is_available__(configs['pipeline'].get('Storage','required_fastq_size')) and self.fail_reported == False: send_email(self.__generate_storage_error_text__(configs,storage_device)) self.fail_reported = True return False if node_list is None: node_list = configs['pipeline'].get('Backup','nodes') SampleQsubProcess.__launch__(self,configs['system'],node_list=node_list,queue_name='single') return True
def __archive_sequencing_run_data__(self,configs,input_dir,output_dir): """ This archives the pertinent sequencing run data (a small amount, which is why it is not delegated to qsub) that comes directly from the HiSeq machines. """ if not os.path.exists(output_dir): os.makedirs(output_dir) missing_paths = [] sub_dirs = ["InterOp"] self.flowcell_content_found = True if not hasattr(self,"flowcell_content_reported") or self.flowcell_content_reported is None: self.flowcell_content_reported = False for sub_dir in sub_dirs: if os.path.isdir(os.path.join(input_dir,sub_dir)): if not os.path.exists(os.path.join(output_dir,sub_dir)): shutil.copytree(os.path.join(input_dir,sub_dir),os.path.join(output_dir,sub_dir)) else: missing_paths.append(os.path.join(input_dir,sub_dir)) self.flowcell_content_found = False files = ["First_Base_Report.htm","RunInfo.xml","runParameters.xml"] for file in files: if os.path.isfile(os.path.join(input_dir,file)): shutil.copy(os.path.join(input_dir,file),output_dir) else: missing_paths.append(os.path.join(input_dir,file)) self.flowcell_content_found = False if not self.flowcell_content_found: if not self.flowcell_content_reported: message = "The flowcell "+self.flowcell_key+" has finished copying over but is missing the following paths:\n" message += "\n".join(missing_paths) message += "\nPlease check.\n\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Missing flowcell data.",message,recipients=recipients) self.flowcell_content_reported = True return False return True
def __is_complete__(self,configs,mockdb,*args,**kwargs): """ Due to the inclusion of sub-processes (bclto fastq pipeline, illuminate, and launched pipelines), this function contains the logic to check to makes sure all of these processes have completed successfully. """ if configs["system"].get("Logging","debug") is "True": print "Checking to see if seq run is complete (and advancing post-seq run pipeline)" if not os.path.isfile(self.complete_file): if configs["system"].get("Logging","debug") is "True": print " Missing complete file " + self.complete_file return False if not hasattr(self,"interop_archived") or self.interop_archived is False: output_name = os.path.basename(self.output_dir) if not self.__archive_sequencing_run_data__(configs,self.output_dir,os.path.join(configs["system"].get('Common_directories','hiseq_run_log'),output_name)): if configs["system"].get("Logging","debug") is "True": print " Sequencing run data not archived (InterOp and so forth)." return False if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None or not hasattr(self,"illuminate_key") or self.illuminate_key is None: if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None: self.__start_bcltofastq_pipeline__(configs,mockdb) if configs["system"].get("Logging","debug") is "True": print " Starting bcltofastq pipeline." if not hasattr(self,"illuminate_key") or self.illuminate_key is None: self.__launch_illuminate__(configs,mockdb) if configs["system"].get("Logging","debug") is "True": print " Starting illuminate." return False illuminate = mockdb['Illuminate'].__get__(configs['system'],self.illuminate_key) if not illuminate.__is_complete__(configs,mockdb=mockdb,*args,**kwargs): if configs["system"].get("Logging","debug") is "True": print " Illuminate not done" return False bcl2fastq_pipeline = mockdb['BclToFastqPipeline'].__get__(configs['system'],self.bcltofastq_pipeline_key) if not bcl2fastq_pipeline.__is_complete__(configs,mockdb=mockdb,*args,**kwargs): if configs["system"].get("Logging","debug") is "True": print " bcltofastq not done" return False if not hasattr(self,"generic_copy_key") or self.generic_copy_key is None: self.__launch_archive_fastq__(configs,mockdb) if configs["system"].get("Logging","debug") is "True": print " Launching archive" return False archive = mockdb['GenericCopy'].__get__(configs['system'],self.generic_copy_key) if archive.__is_complete__(*args,**kwargs): if not disk_usage(self.fastq_archive) > 30000000: if not hasattr(self,'fastq_archive_reported') or self.fastq_archive_reported is None: message = "The flowcell "+self.flowcell_key+" has finished casava, but the archive is not as large as expected.\n" message += "\nPlease check.\n\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Flowcell size problem.",message,recipients=recipients) self.fastq_archive_reported = True return False fastq_check = check_fastq_output(self.fastq_archive) if fastq_check["md5"] == [] and fastq_check["fastqc"] == [] and fastq_check["index"] is True and fastq_check["sample_sheet"] is True: if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None: message = "Just informing you of the completion of the flowcell.\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("The fastq have been successully generated for " + self.flowcell_key + ".",message,recipients=recipients) self.fastq_check_report = True else: if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None: message = "Report detailing the issues with the flowcell directory for flowcell " + self.flowcell_key + ".\n" if not fastq_check["sample_sheet"] is True: message += "Sample sheet missing from " + self.archive_fastq + ".\n" else: if not fastq_check["index"]: message += "Index counts not generated.\n" if len(fastq_check["fastqc"]) != 0: message += "The following directories do not have fastqc results:" message += "\n\t".join(fastq_check["fastqc"]) + "\n" if len(fastq_check["md5"]) != 0: message += "The following directories do not have md5 checksums:" message += "\n\t".join(fastq_check["md5"]) + "\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Problem with fastq generation for " + self.flowcell_key + ".",message,recipients=recipients) self.fastq_check_report = True return False if not hasattr(self,"generic_clean_key") or self.generic_clean_key is None: if hasattr(self,'fastq_archive_reported') and self.fastq_archive_reported is True: message = "The flowcell "+self.flowcell_key+" has finished casava, and is now big enough.\n" message += "\nContinuing.\n\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Flowcell size problem resolved.",message,recipients=recipients) self.__launch_clean__(configs,mockdb) self.__link_to_web_portal__(configs['system']) if configs["system"].get("Logging","debug") is "True": print " Filling stats" flowcell = mockdb['Flowcell'].__get__(configs['system'],self.flowcell_key) machine = mockdb['HiSeqMachine'].__get__(configs['system'],self.machine_key) fill_demultiplex_stats(configs['system'],mockdb,self.output_dir,flowcell,machine) #return False else: if configs["system"].get("Logging","debug") is "True": print " Fastq archive not complete" return False clean = mockdb['GenericClean'].__get__(configs['system'],self.generic_clean_key) if clean.__is_complete__(*args,**kwargs): self.__finish__(*args,**kwargs) return True return False
def send_missing_sample_sheet_email(sample_sheet_file): message =str(sample_sheet)+" is missing. Casava cannot run.\n" send_email("Missing "+str(sample_sheet),message,recipients='[email protected],[email protected],[email protected]')