예제 #1
0
 def __init__(self,config,key=-1,seq_run=None,**kwargs):
     if not seq_run is None:
         self.flowcell_key = seq_run.flowcell_key
         input_dir = seq_run.output_dir
         output_dir = os.path.join(config.get("Common_directories","hiseq_run_log"),os.path.basename(seq_run.output_dir))
         QsubProcess.__init__(self,config,key=key,output_dir=output_dir,input_dir=input_dir,process_name="illuminate",**kwargs)
         self.run_qc_metrics_file = os.path.join(output_dir,config.get("Filenames","run_qc_metrics"))
예제 #2
0
 def __init__(self,config,key=int(-1),pipeline_config=None,process_name='md5_check_sum',pipeline=None,input_dir=None,sample_key=None,**kwargs):
     if not pipeline is None:
         self.flowcell_key = pipeline.flowcell_key
         self.md5_file = os.path.join(pipeline.input_dir,pipeline.sample_key + "_checksum.txt")
         QsubProcess.__init__(self,config,key=key,output_dir=pipeline.input_dir,input_dir=pipeline.input_dir,process_name=process_name,**kwargs)
     elif not input_dir is None and not sample_key is None:
         self.md5_file = os.path.join(input_dir,sample_key + "_checksum.txt")
         QsubProcess.__init__(self,config,key=key,output_dir=input_dir,input_dir=input_dir,process_name=process_name,**kwargs)
예제 #3
0
 def __init__(self,config,key=int(-1),pipeline_config=None,prev_step=None,process_name='index_report',pipeline=None,flowcell_key=None,**kwargs):
     if not prev_step is None:
         self.flowcell_key = pipeline.flowcell_key
         output_dir = os.path.join(prev_step.output_dir,"Undetermined_indices")
         QsubProcess.__init__(self,config,key=key,output_dir=output_dir,input_dir="None",process_name=process_name,**kwargs)
     elif not flowcell_key is None:
         self.flowcell_key = flowcell_key
         QsubProcess.__init__(self,config,key=key,input_dir="None",process_name=process_name,**kwargs)
예제 #4
0
 def __init__(self,config,key=-1,prev_step=None,pipeline=None,split_by_lane=True,split_by_index_length=True,process_name="casava",**kwargs):
     """
     In addition to initializing, other steps are completed.  These are commented below.
     """
     if not prev_step is None:
         input_dir = os.path.join(pipeline.output_dir,"Data/Intensities/BaseCalls")
         output_dir = os.path.join(pipeline.output_dir,os.path.basename(pipeline.output_dir))
         if not os.path.exists(output_dir):
             os.makedirs(output_dir)
         if pipeline.sample_sheet is None:
             original_sample_sheet_file = os.path.join(pipeline.input_dir,"SampleSheet.csv")
         else:
             original_sample_sheet_file = pipeline.sample_sheet
         if not os.path.isfile(original_sample_sheet_file):#Check to make sure original sample sheet exists
            send_missing_sample_sheet_email(original_sample_sheet_file)
            raise SampleSheetFormatException("No sample sheet found: "+str(original_sample_sheet_file))
         sample_sheet_obj_list = SampleSheetObjList(sample_sheet_file=original_sample_sheet_file)
         sample_sheet_obj_list.list[0].sample_sheet_table.__write_file__(os.path.join(output_dir,"SampleSheet.csv"))#Copy sample sheet to final output dir.
         self.merged = True
         split_categories = []
         self.split_by_lane = split_by_lane
         if split_by_lane is True: #Split by lane (speed up especially for high throughput)
             sample_sheet_obj_list = sample_sheet_obj_list.__partition_sample_sheet_objects__("Lane")
             split_categories.append("Lane")
             self.merged = False
         self.split_by_index_length = split_by_index_length
         if split_by_index_length == True: #Split by index lane (prevents casava from breaking when pool samples have different index lengths)
             for sample_sheet_obj in sample_sheet_obj_list.list:
                 sample_sheet_obj.__attach_max_column_number__("Index")
             sample_sheet_obj_list = sample_sheet_obj_list.__partition_sample_sheet_objects__("Index",use_length=True)
             split_categories.append("Index_length")
             self.merged = False
         number_tasks = len(sample_sheet_obj_list.list)
         temporary_output_directories = sample_sheet_obj_list.__create_meta_directories_and_write_files__(os.path.join(output_dir,"split"),split_categories)
         self.temporary_output_dir = ":".join(temporary_output_directories)
         sample_sheets = [os.path.join(d,"SampleSheet.csv") for d in temporary_output_directories]
         self.sample_sheet = ":".join(sample_sheets)
         sample_sheet_obj_list.__attach_masks__(run_parameters_path=os.path.join(pipeline.input_dir,"runParameters.xml"))
         masks = []
         for sample_sheet_obj in sample_sheet_obj_list.list:
             mask = sample_sheet_obj.__get_meta_datum__("mask")
             mask, number = re.subn(',','-',mask)
             masks.append(mask)
         self.mask = ":".join(masks)
         QsubProcess.__init__(self,config,key=key,output_dir=output_dir,input_dir=input_dir,number_tasks=number_tasks,process_name=process_name,**kwargs)
         self.flowcell_key = pipeline.flowcell_key
         self.seq_run_key = pipeline.seq_run_key
예제 #5
0
 def __init__(self,config,sample_keys=None,number=None,key=int(-1),flowcell=None,input_dir=None,base_output_dir=None,output_dir=None,date=strftime("%Y%m%d",localtime()),time=strftime("%H:%M:%S",localtime()),process_name='flowcell_report',complete_file=None,**kwargs):
     """
     Initializes flowcell statistic report.
     """
     if flowcell is None:
         flowcell = Flowcell(config,key="dummy_flowcell_key")
     if flowcell.__class__.__name__ != "Flowcell":
         raise Exception("Trying to start a flowcell statistics reports object on a non-flowcell.")
     if output_dir is None:
         if base_output_dir is None:
             base_output_dir = config.get('Common_directories','flowcell_reports')
         self.output_dir = os.path.join(os.path.join(base_output_dir,flowcell.key + "_reports"),str(number))
     else:
         self.output_dir = output_dir
     if complete_file is None:
         self.complete_file = os.path.join(self.output_dir,"report_" + str(number) + ".complete")
     else:
         self.complete_file = complete_file
     QsubProcess.__init__(self,config,key=key,input_dir=input_dir,base_output_dir=base_output_dir,output_dir=self.output_dir,date=date,time=time,process_name=process_name,complete_file=self.complete_file,**kwargs)
     self.flowcell_key = flowcell.key
     if sample_keys is None:
         self.sample_keys = ""
     else:
         self.sample_keys = ";".join(sample_keys)
     self.number = number
     #List of samples from the project
     self.all_samples_file = os.path.join(self.output_dir,'all_samples.ls')
     if self.key != -1:
         write_list_file(sample_keys,self.all_samples_file,original_list_file=config.get('Filenames','all_samples'))
     self.current_samples_file = os.path.join(self.output_dir,'current_samples.ls')
     if self.key != -1:
         write_list_file(sample_keys,self.current_samples_file)
     #Output files
     self.full_report = os.path.join(self.output_dir,'all_samples_report.csv')
     self.current_report = os.path.join(self.output_dir,'current_samples_report.csv')
     self.concordance_jpeg = os.path.join(self.output_dir,'concordance_vs_depth.jpeg')
     self.dbsnp_jpeg = os.path.join(self.output_dir,'dbsnp_vs_depth.jpeg')
     self.greater_than_10x_jpeg = os.path.join(self.output_dir,'greater_than_10x_vs_depth.jpeg')
     self.zero_coverage_jpeg = os.path.join(self.output_dir,'zero_coverage_vs_depth.jpeg')
     self.hethomratio_jpeg = os.path.join(self.output_dir,'hethomratio_vs_depth.jpeg')
     self.reads_jpeg = os.path.join(self.output_dir,'reads_vs_depth.jpeg')
     self.report_pdf = os.path.join(self.output_dir,self.flowcell_key + '_report.pdf')
     #Flag to keep track if report has been sent
     self.report_sent = False
예제 #6
0
    def __init__(self,config,key=int(-1),input_dir=None,process_name='dnanexus_upload',pipeline_config=None,pipeline=None,**kwargs):
        """
	  Initializes the upload process object.
        """
        if not pipeline_config is None:
            output_name = os.path.basename(pipeline.input_dir)
            output_dir = os.path.join(pipeline_config.safe_get("Common_directories","dnanexus_storage"),output_name)
            QsubProcess.__init__(self,config,key=key,input_dir=pipeline.input_dir,output_dir=output_dir,process_name=process_name,**kwargs)
	    self.flowcell_key = pipeline.flowcell_key
            flowcell_dir = os.path.basename(pipeline.input_dir.rstrip('/'))
            self.run_qc_metrics_path = os.path.join(config.get('Common_directories','hiseq_run_log'),flowcell_dir + "/run_qc_metrics.txt")
            if not os.path.isfile(self.run_qc_metrics_path):
                #Send an email that run qc metrics file is missing.
                subject = "Missing run_qc_metrics for " + self.flowcell_key
                message = "The run qc metrics file in the following path is missing:\n\t" + self.run_qc_metrics_path
                message += "\nUploading to DNANexus failed." 
                recipients = pipeline_config.safe_get("Email","standard_recipients")
                send_email(subject,message,recipients)
            self.flowcell_dir_name = os.path.basename(self.input_dir)
            self.hiseq_run_log_dir = os.path.join(config.get("Common_directories","hiseq_run_log"),self.flowcell_dir_name) #Look at other object to how to get things from the sys config.
            self.upload_failed = False
예제 #7
0
 def __init__(self,config,key=int(-1),rm_dir=None,process_name='clean',**kwargs):
     if not rm_dir is None:
         QsubProcess.__init__(self,config,key=key,input_dir=rm_dir,process_name=process_name,**kwargs)
예제 #8
0
 def __init__(self,config,key=-1,output_dir=None,input_dir=None,process_name='generic_copy',**kwargs):
     if not input_dir is None:
         QsubProcess.__init__(self,config,key=key,output_dir=output_dir,input_dir=input_dir,process_name=process_name,**kwargs)
예제 #9
0
 def __finish__(self,*args,**kwargs):
    QsubProcess.__finish__(self,*args,**kwargs)
    shutil.rmtree(self.output_dir)