def __init__(self,config,key=int(-1),input_dir=None,fastq_archive=None,flowcell=None,machine=None,date='dummy',run_number='dummy',side='dummy',operator=None,run_type=None,process_name='sequencing_run',no_delete=False,sample_sheet=None,**kwargs): """ Initializes the object. """ if not input_dir is None: GenericProcess.__init__(self,config,key=key,**kwargs) self.input_dir = input_dir self.flowcell_key = flowcell.key self.machine_key = machine.key self.date = date self.run_number = run_number self.side = side self.state = "Running" self.operator = operator self.run_type = run_type self.bcltofastq_pipeline_key = None #self.input_amount = input_amount #self.yield_from_library = yield_from_library #self.average_bp = average_bp output_name_pieces = [] output_name_pieces.append(str(date)) output_name_pieces.append(str(machine.key)) output_name_pieces.append(str(run_number)) output_name_pieces.append(str(side)+str(flowcell.key)) output_name = "_".join(output_name_pieces) self.output_dir = os.path.join(config.get('Common_directories','hiseq_output'),output_name) if fastq_archive is None: self.fastq_archive = os.path.join(config.get('Common_directories','casava_output'),output_name) else: self.fastq_archive = fastq_archive self.complete_file = os.path.join(config.get('Common_directories','hiseq_output'),output_name+"/"+config.get('Filenames','bcls_ready')) self.no_delete = no_delete self.interop_archived = False self.sample_sheet = sample_sheet
def __is_complete__(self,configs,mockdb,*args,**kwargs): """ Handles the merging of casava directories run in parallel and launching pipelines as well as the normal check. """ if configs["system"].get("Logging","debug") is "True": print "Checking to see if casava is done for " + self.flowcell_key if GenericProcess.__is_complete__(self,*args,**kwargs): return True for complete_file in self.complete_file.split(":"): if not os.path.isfile(complete_file): return False ##Handle merging split_categories = [] if self.merged is False: if configs["system"].get("Logging","debug") is "True": print "Merging casava results for " + self.flowcell_key if self.split_by_index_length is True: split_categories.append("Index_length") if self.split_by_lane is True: split_categories.append("Lane") split_dir = os.path.join(self.output_dir,"split") merge_split_casava_results(split_dir,self.output_dir,split_categories) #exit("Just merged. Stopping.") shutil.rmtree(split_dir) self.merged = True ##Launch pipelines self.__push_samples_into_relevant_pipelines__(configs,mockdb) self.__push_flowcells_into_relevant_pipelines__(configs,mockdb) self.__finish__(*args,**kwargs) return True
def __is_complete__(self,configs,mockdb,*args,**kwargs): """ Since concordance search is an optional sub-process, this function checks for both the completeness of the self concordance program and the necessity, and if necessary the completeness, of the concordance search. Then, once complete, relevant statistics are stored. """ if GenericProcess.__is_complete__(self,*args,**kwargs): return True elif not os.path.isfile(self.complete_file): return False store_snp_stats_in_db(self) if self.percentage_concordance > configs['pipeline'].get('Concordance','threshold'): self.__finish__(*args,**kwargs) return True #If the concordance is below the threshold, we need to conduct a concordance search against the database #First we split the search across processors if self.search_key is None: sample = mockdb['Sample'].objects[self.sample_key] concord_search = mockdb['ConcordanceSearch'].__new__(configs['system'],sample=sample,snp_stats=self) self.search_key = concord_search.key concord_search.__launch_split_searches__(configs) return False concord_search = mockdb['ConcordanceSearch'].objects[self.search_key] if concord_search.__is_complete__(configs['system'],*args,**kwargs): self.__finish__(*args,**kwargs) return True #Now we gather if concord_search.__are_split_searches_complete__(configs['pipeline']): if os.path.isfile(concord_search.qsub_file): return False concord_search.__fill_qsub_file__(configs) concord_search.__launch__(configs['system']) return False return False
def __is_complete__(self,*args,**kwargs): """ Check to the complete file of the zcat process and handles notifications (if any). """ if GenericProcess.__is_complete__(self,*args,**kwargs): return True elif not os.path.isfile(self.complete_file): #print self.complete_file return False #If the process is complete, check to make sure that the sizes of the file are adequate. If not, send email. size1 = int(disk_usage(self.r1_path)) size2 = int(disk_usage(self.r2_path)) size = size2 if size1 < size2: size = size1 #Send an email if the size of the fastq is smaller than the expected size. #if size < int(configs['pipeline'].get('Storage','expected_fastq_size')): #template_subject = os.path.join(configs['system'].get('Common_directories','template'),configs['pipeline'].get('Zcat_email_templates','size_subject')) #template_body = os.path.join(configs['system'].get('Common_directories','template'),configs['pipeline'].get('Zcat_email_templates','size_body')) #dictionary = {} #for k,v in self.__dict__.iteritems(): # dictionary.update({k:str(v)}) #dictionary.update({'size':size}) #subject = fill_template(template_subject,dictionary) #body = fill_template(template_body, dictionary) #send_email(subject,body) return True
def __is_complete__(self,configs,*args,**kwargs): """ Due to the inclusion of sub-processes (snp_stats and concordance search), this function contains the logic to check to makes sure all of these processes have completed successfully. If complete, the relevant statistics are stored. """ current_dir = self.output_dir if GenericProcess.__is_complete__(self,*args,**kwargs): return True elif not os.path.isfile(self.complete_file): if hasattr(self,"upload_dir"): current_dir = self.upload_dir if not os.path.isfile(self.complete_file.replace(self.output_dir,self.upload_dir)): #If the output directory has already been cleaned, check the upload dir. return False else: return False if hasattr(self, "snp_path") and not self.snp_path is None and hasattr(self,"analysis_ready_bam_path") and not self.analysis_ready_bam_path is None: if not os.path.isdir(os.path.dirname(self.snp_path)) or not os.path.dirname(os.path.isfile(self.analysis_ready_bam_path)): return False if not os.path.isfile(self.snp_path) or not os.path.isfile(self.analysis_ready_bam_path): snp_file = False bam_file = False return False if not self.upload_dir is None: for file in os.listdir(os.path.join(self.upload_dir,self.description)): if file.endswith('.vcf'): snp_file = True if file.endswith('.bam'): bam_file = True if not snp_file or not bam_file: if configs["system"].get("Logging","debug") is "True": print "At least one of the output files is missing for sample " + str(self.sample_key) + ":" if not os.path.isfile(self.snp_path): print "Missing "+ self.snp_path if not os.path.isfile(self.analysis_ready_bam_path): print "Missing "+ self.analysis_ready_bam_path #os.remove(self.complete_file) #template_dir = configs['system'].get('Common_directories','template') #qsub_template = os.path.join(template_dir,configs['pipeline'].get('Template_files','bcbio_no_postprocess')) #self.__fill_template__(qsub_template,os.path.join(self.output_dir,"bcbio_no_postprocess.sh")) #self.__launch__(configs['system'],os.path.join(self.output_dir,"bcbio_no_postprocess.sh")) return False else: check_file = os.path.join(current_dir,'project-summary.csv') #If the process is complete, check to make sure that the check file is created. If not, send email once. if not os.path.isfile(check_file) and configs['pipeline'].has_option('Template_files','bcbio_no_postprocess') and current_dir==self.output_dir: #subject, body = self.__generate_general_error_text__(config) #send_email(subject,body) #self.fail_reported = True os.remove(self.complete_file) template_dir = configs['system'].get('Common_directories','template') qsub_template = os.path.join(template_dir,configs['pipeline'].get('Template_files','bcbio_no_postprocess')) self.__fill_template__(qsub_template,os.path.join(self.output_dir,"bcbio_no_postprocess.sh")) self.__launch__(configs['system'],os.path.join(self.output_dir,"bcbio_no_postprocess.sh")) return False #store_stats_in_db(self) self.__finish__(*args,**kwargs) return True
def __init__(self,config,key=int(-1),flowcell=None,seq_run=None,base_output_dir=None,process_name='flowcell_reports',**kwargs): """ Initiates the report object attached to the flowcell and sequencing run but not attached to any pipelines as of yet. """ if not flowcell is None: GenericProcess.__init__(self,config,key=key,process_name=process_name,**kwargs) if base_output_dir == None: self.base_output_dir = config.get('Common_directories','flowcell_reports') else: self.base_output_dir = base_output_dir self.flowcell_key = flowcell.key self.sequencing_run_key = seq_run.key self.sequencing_run_type = seq_run.run_type self.pipelines = None numbers = config.get('Flowcell_reports','numbers').split(',') for number in numbers: setattr(self,'flowcell_report_' + str(number) + '_key',None) self.state = 'Running'
def __is_complete__(self,config,*args,**kwargs): """ Checks to see if the gathering process is complete. If so, the top 5 "scoring" results of the search are stored. """ if GenericProcess.__is_complete__(self,*args,**kwargs): return True elif not os.path.isfile(self.complete_file): return False return True
def __is_complete__(self,config,mockdb): """ Return True if all pipelines in the report object have completed. """ if GenericProcess.__is_complete__(self): return True if self.pipelines is None: return False for pipeline in self.__current_pipeline_list__(mockdb): if not pipeline.__is_complete__(): return False return True
def __is_complete__(self,configs,mockdb,*args,**kwargs): if GenericProcess.__is_complete__(self,*args,**kwargs): return True if not os.path.isfile(self.stderr): return False if os.stat(self.stderr)[6] != 0 and self.upload_failed is False: subject = "DNANexus uploading error for " + self.flowcell_key message = "DNANexus uploading has encountered an error. This error is detailed here:\n\t" + self.stderr message += "\nThe process has been halted, and the qsub script may be found here:\n\t" + self.qsub_file recipients = configs["pipeline"].safe_get("Email","standard_recipients") send_email(subject,message,recipients) self.upload_failed = True return False return True
def __is_complete__(self,*args,**kwargs): """ Check to the complete file of the zcat process and handles notifications (if any). """ try: if GenericProcess.__is_complete__(self): return True elif not os.path.isfile(self.complete_file): #print self.complete_file return False for filename in os.listdir(self.output_dir): if os.path.isfile(os.path.join(self.output_dir,filename)): if (filename.endswith('.zip')): if os.path.getsize(os.path.join(self.output_dir,filename)) > 0: return True return False except: #sys.stderr.write("Error with fastq = " + str(self)) return False
def __is_complete__(self,configs,storage_device): """ Check the complete file of the backup process, retry copying files where the keys for the input and output files are not the same, and handles notifications (if any). """ if GenericProcess.__is_complete__(self): return True elif not os.path.isfile(self.complete_file): return False failed_files = self.__failed_files__(configs['pipeline']) if len(failed_files) > 0: if self.retry >= configs['pipeline'].get('Backup','retry_threshold'): send_email(self.__generate_repeated_error_text__(configs,failed_files)) self.__fill_qsub_file__(configs,r_list=failed_files) self.__launch__(configs,storage_device) self.retry += 1 return False return True
def __is_complete__(self,configs,mockdb,*args,**kwargs): """ Since concordance search is an optional sub-process, this function checks for both the completeness of the self concordance program and the necessity, and if necessary the completeness, of the concordance search. Then, once complete, relevant statistics are stored. """ if GenericProcess.__is_complete__(self): return True elif not os.path.isfile(self.complete_file): return False if os.stat(self.summary_stats_path)[6]==0: os.remove(self.complete_file) self.__launch__(configs['system']) return False if configs["system"].get("Logging","debug") is "True": print " Storing stats" store_summary_stats_in_db(self) if configs["system"].get("Logging","debug") is "True": print " Finishing." self.__finish__(*args,**kwargs) return True