Beispiel #1
0
 def __init__(self,config,key=int(-1),input_dir=None,fastq_archive=None,flowcell=None,machine=None,date='dummy',run_number='dummy',side='dummy',operator=None,run_type=None,process_name='sequencing_run',no_delete=False,sample_sheet=None,**kwargs):
     """
     Initializes the object.
     """
     if not input_dir is None:
         GenericProcess.__init__(self,config,key=key,**kwargs)
         self.input_dir = input_dir
         self.flowcell_key = flowcell.key
         self.machine_key = machine.key
         self.date = date
         self.run_number = run_number
         self.side = side
         self.state = "Running"
         self.operator = operator
         self.run_type = run_type
         self.bcltofastq_pipeline_key = None
         #self.input_amount = input_amount
         #self.yield_from_library = yield_from_library
         #self.average_bp = average_bp
         output_name_pieces = []
         output_name_pieces.append(str(date))
         output_name_pieces.append(str(machine.key))
         output_name_pieces.append(str(run_number))
         output_name_pieces.append(str(side)+str(flowcell.key))
         output_name = "_".join(output_name_pieces)
         self.output_dir = os.path.join(config.get('Common_directories','hiseq_output'),output_name)
         if fastq_archive is None:
             self.fastq_archive = os.path.join(config.get('Common_directories','casava_output'),output_name)
         else:
             self.fastq_archive = fastq_archive
         self.complete_file = os.path.join(config.get('Common_directories','hiseq_output'),output_name+"/"+config.get('Filenames','bcls_ready'))
         self.no_delete = no_delete
         self.interop_archived = False
         self.sample_sheet = sample_sheet
Beispiel #2
0
 def __is_complete__(self,configs,mockdb,*args,**kwargs):
     """
     Handles the merging of casava directories run in parallel and launching pipelines as well as the normal check.
     """
     if configs["system"].get("Logging","debug") is "True":
         print "Checking to see if casava is done for " + self.flowcell_key
     if GenericProcess.__is_complete__(self,*args,**kwargs):
         return True
     for complete_file in self.complete_file.split(":"):
         if not os.path.isfile(complete_file):
             return False
     ##Handle merging
     split_categories = []
     if self.merged is False:
         if configs["system"].get("Logging","debug") is "True":
             print "Merging casava results for " + self.flowcell_key
         if self.split_by_index_length is True:
             split_categories.append("Index_length")
         if self.split_by_lane is True:
             split_categories.append("Lane")
         split_dir = os.path.join(self.output_dir,"split")
         merge_split_casava_results(split_dir,self.output_dir,split_categories)
         #exit("Just merged.  Stopping.")
         shutil.rmtree(split_dir)
         self.merged = True
     ##Launch pipelines
     self.__push_samples_into_relevant_pipelines__(configs,mockdb)
     self.__push_flowcells_into_relevant_pipelines__(configs,mockdb)
     self.__finish__(*args,**kwargs)
     return True
Beispiel #3
0
 def __is_complete__(self,configs,mockdb,*args,**kwargs):
     """
     Since concordance search is an optional sub-process, this function checks for
     both the completeness of the self concordance program and the necessity,
     and if necessary the completeness, of the concordance search.  Then, once 
     complete, relevant statistics are stored.
     """
     if GenericProcess.__is_complete__(self,*args,**kwargs):
         return True
     elif not os.path.isfile(self.complete_file):
         return False
     store_snp_stats_in_db(self)
     if self.percentage_concordance > configs['pipeline'].get('Concordance','threshold'):
         self.__finish__(*args,**kwargs)
         return True
     #If the concordance is below the threshold, we need to conduct a concordance search against the database
     #First we split the search across processors
     if self.search_key is None:
         sample = mockdb['Sample'].objects[self.sample_key]
         concord_search = mockdb['ConcordanceSearch'].__new__(configs['system'],sample=sample,snp_stats=self)
         self.search_key = concord_search.key
         concord_search.__launch_split_searches__(configs)
         return False
     concord_search = mockdb['ConcordanceSearch'].objects[self.search_key]
     if concord_search.__is_complete__(configs['system'],*args,**kwargs):
         self.__finish__(*args,**kwargs)
         return True
     #Now we gather
     if concord_search.__are_split_searches_complete__(configs['pipeline']):
         if os.path.isfile(concord_search.qsub_file):
             return False
         concord_search.__fill_qsub_file__(configs)
         concord_search.__launch__(configs['system'])
         return False
     return False
Beispiel #4
0
 def __is_complete__(self,*args,**kwargs):
     """
     Check to the complete file of the zcat process and handles notifications (if any).
     """
     if GenericProcess.__is_complete__(self,*args,**kwargs):
         return True
     elif not os.path.isfile(self.complete_file):
         #print self.complete_file
         return False
     #If the process is complete, check to make sure that the sizes of the file are adequate.  If not, send email.
     size1 = int(disk_usage(self.r1_path))
     size2 = int(disk_usage(self.r2_path))
     size = size2
     if size1 < size2:
         size = size1
     #Send an email if the size of the fastq is smaller than the expected size.
     #if size < int(configs['pipeline'].get('Storage','expected_fastq_size')):
         #template_subject = os.path.join(configs['system'].get('Common_directories','template'),configs['pipeline'].get('Zcat_email_templates','size_subject'))
         #template_body = os.path.join(configs['system'].get('Common_directories','template'),configs['pipeline'].get('Zcat_email_templates','size_body'))
         #dictionary = {}
         #for k,v in self.__dict__.iteritems():
         #    dictionary.update({k:str(v)})
         #dictionary.update({'size':size})
         #subject = fill_template(template_subject,dictionary)
         #body = fill_template(template_body, dictionary)
         #send_email(subject,body)
     return True
Beispiel #5
0
 def __is_complete__(self,configs,*args,**kwargs):
     """
     Due to the inclusion of sub-processes (snp_stats and concordance search),
     this function contains the logic to check to makes sure all of these processes
     have completed successfully.  If complete, the relevant statistics are stored.
     """
     current_dir = self.output_dir
     if GenericProcess.__is_complete__(self,*args,**kwargs):
         return True
     elif not os.path.isfile(self.complete_file):
         if hasattr(self,"upload_dir"):
             current_dir = self.upload_dir
             if not os.path.isfile(self.complete_file.replace(self.output_dir,self.upload_dir)): #If the output directory has already been cleaned, check the upload dir.
                 return False
         else: 
             return False
     if hasattr(self, "snp_path") and not self.snp_path is None and hasattr(self,"analysis_ready_bam_path") and not self.analysis_ready_bam_path is None:
         if not os.path.isdir(os.path.dirname(self.snp_path)) or not os.path.dirname(os.path.isfile(self.analysis_ready_bam_path)):
             return False
         if not os.path.isfile(self.snp_path) or not os.path.isfile(self.analysis_ready_bam_path):
             snp_file = False
             bam_file = False
             return False
             if not self.upload_dir is None:
                 for file in os.listdir(os.path.join(self.upload_dir,self.description)):
                     if file.endswith('.vcf'):
                         snp_file = True 
                     if file.endswith('.bam'):
                         bam_file = True 
             if not snp_file or not bam_file:
                 if configs["system"].get("Logging","debug") is "True":
                     print "At least one of the output files is missing for sample " + str(self.sample_key) + ":"
                     if not os.path.isfile(self.snp_path):
                         print "Missing "+ self.snp_path
                     if not os.path.isfile(self.analysis_ready_bam_path):
                         print "Missing "+ self.analysis_ready_bam_path
             #os.remove(self.complete_file)
             #template_dir = configs['system'].get('Common_directories','template')
             #qsub_template = os.path.join(template_dir,configs['pipeline'].get('Template_files','bcbio_no_postprocess'))
             #self.__fill_template__(qsub_template,os.path.join(self.output_dir,"bcbio_no_postprocess.sh"))
             #self.__launch__(configs['system'],os.path.join(self.output_dir,"bcbio_no_postprocess.sh"))
                 return False
     else:
         check_file = os.path.join(current_dir,'project-summary.csv')
     #If the process is complete, check to make sure that the check file is created.  If not, send email once.
         if not os.path.isfile(check_file) and configs['pipeline'].has_option('Template_files','bcbio_no_postprocess') and current_dir==self.output_dir:
         #subject, body = self.__generate_general_error_text__(config)
         #send_email(subject,body)
         #self.fail_reported = True
             os.remove(self.complete_file)
             template_dir = configs['system'].get('Common_directories','template')
             qsub_template = os.path.join(template_dir,configs['pipeline'].get('Template_files','bcbio_no_postprocess'))
             self.__fill_template__(qsub_template,os.path.join(self.output_dir,"bcbio_no_postprocess.sh"))
             self.__launch__(configs['system'],os.path.join(self.output_dir,"bcbio_no_postprocess.sh"))
             return False
     #store_stats_in_db(self)
     self.__finish__(*args,**kwargs)
     return True
Beispiel #6
0
 def __init__(self,config,key=int(-1),flowcell=None,seq_run=None,base_output_dir=None,process_name='flowcell_reports',**kwargs):
     """
     Initiates the report object attached to the flowcell and sequencing run
     but not attached to any pipelines as of yet.
     """
     if not flowcell is None:
         GenericProcess.__init__(self,config,key=key,process_name=process_name,**kwargs)
         if base_output_dir == None:
             self.base_output_dir = config.get('Common_directories','flowcell_reports')
         else:
             self.base_output_dir = base_output_dir
         self.flowcell_key = flowcell.key
         self.sequencing_run_key = seq_run.key
         self.sequencing_run_type = seq_run.run_type
         self.pipelines = None
         numbers = config.get('Flowcell_reports','numbers').split(',')
         for number in numbers:
             setattr(self,'flowcell_report_' + str(number) + '_key',None)
         self.state = 'Running'
Beispiel #7
0
 def __is_complete__(self,config,*args,**kwargs):
     """
     Checks to see if the gathering process is complete.
     If so, the top 5 "scoring" results of the search are
     stored.
     """
     if GenericProcess.__is_complete__(self,*args,**kwargs):
         return True
     elif not os.path.isfile(self.complete_file):
         return False
     return True
Beispiel #8
0
 def __is_complete__(self,config,mockdb):
     """
     Return True if all pipelines in the report object
     have completed.
     """
     if GenericProcess.__is_complete__(self):
         return True
     if self.pipelines is None:
         return False
     for pipeline in self.__current_pipeline_list__(mockdb):
         if not pipeline.__is_complete__():
             return False
     return True
Beispiel #9
0
 def __is_complete__(self,configs,mockdb,*args,**kwargs):
     if GenericProcess.__is_complete__(self,*args,**kwargs):
         return True
     if not os.path.isfile(self.stderr):
         return False
     if os.stat(self.stderr)[6] != 0 and self.upload_failed is False:
         subject = "DNANexus uploading error for " + self.flowcell_key
         message = "DNANexus uploading has encountered an error.  This error is detailed here:\n\t" + self.stderr
         message += "\nThe process has been halted, and the qsub script may be found here:\n\t" + self.qsub_file 
         recipients = configs["pipeline"].safe_get("Email","standard_recipients")
         send_email(subject,message,recipients)
         self.upload_failed = True
         return False
     return True
Beispiel #10
0
 def __is_complete__(self,*args,**kwargs):
     """
     Check to the complete file of the zcat process and handles notifications (if any).
     """
     try:
         if GenericProcess.__is_complete__(self):
             return True
         elif not os.path.isfile(self.complete_file):
             #print self.complete_file
             return False
         for filename in os.listdir(self.output_dir):
             if os.path.isfile(os.path.join(self.output_dir,filename)):
                 if (filename.endswith('.zip')):
                     if os.path.getsize(os.path.join(self.output_dir,filename)) > 0:
                         return True
         return False
     except:
         #sys.stderr.write("Error with fastq = " + str(self))
         return False
Beispiel #11
0
 def __is_complete__(self,configs,storage_device):
     """
     Check the complete file of the backup process, retry copying files
     where the keys for the input and output files are not the
     same, and handles notifications (if any).
     """
     if GenericProcess.__is_complete__(self):
         return True
     elif not os.path.isfile(self.complete_file):
         return False
     failed_files = self.__failed_files__(configs['pipeline'])
     if len(failed_files) > 0:
         if self.retry >= configs['pipeline'].get('Backup','retry_threshold'):
             send_email(self.__generate_repeated_error_text__(configs,failed_files))
         self.__fill_qsub_file__(configs,r_list=failed_files)
         self.__launch__(configs,storage_device)
         self.retry += 1
         return False
     return True
Beispiel #12
0
 def __is_complete__(self,configs,mockdb,*args,**kwargs):
     """
     Since concordance search is an optional sub-process, this function checks for
     both the completeness of the self concordance program and the necessity,
     and if necessary the completeness, of the concordance search.  Then, once 
     complete, relevant statistics are stored.
     """
     if GenericProcess.__is_complete__(self):
         return True
     elif not os.path.isfile(self.complete_file):
         return False
     if os.stat(self.summary_stats_path)[6]==0:
         os.remove(self.complete_file)
         self.__launch__(configs['system'])
         return False
     if configs["system"].get("Logging","debug") is "True":
         print "  Storing stats" 
     store_summary_stats_in_db(self)
     if configs["system"].get("Logging","debug") is "True":
         print "  Finishing." 
     self.__finish__(*args,**kwargs)
     return True