コード例 #1
0
ファイル: models.py プロジェクト: billyziege/pipeline_project
 def __init__(self,config,key=int(-1),pipeline_config=None,prev_step=None,process_name='bwa_mem',pipeline=None,**kwargs):
     """
     Initializes the  process object.
     """
     if not prev_step is None:
         if prev_step.__class__.__name__ == "ZcatMultiple":
             SampleQsubProcess.__init__(self,config,key=key,process_name=process_name,input_dir=prev_step.output_dir,output_dir=os.path.join(prev_step.output_dir,"align"),number_tasks=prev_step.number_tasks,**kwargs)
             multi_fastq_file = prev_step.multi_fastq_file
             if not multi_fastq_file is None:
                 self.multi_fastq_file = multi_fastq_file
                 multi_fastq = grab_yaml(self.multi_fastq_file)
                 lane_numbers = list_from_multi_fastq_object(multi_fastq,"lane")
                 flowcells = list_from_multi_fastq_object(multi_fastq,"flowcell")
                 input_r1_fastqs =  prev_step.r1_copied.split(":")
                 input_r2_fastqs =  prev_step.r2_copied.split(":")
                 input_r1_fastqs = [re.sub(r".gz$","",fastq) for fastq in input_r1_fastqs]
                 input_r2_fastqs = [re.sub(r".gz$","",fastq) for fastq in input_r2_fastqs]
                 self.lane_number = ":".join(lane_numbers)
                 self.flowcell_key = ":".join(flowcells)
                 self.input_fastq1 = ":".join(input_r1_fastqs)
                 self.input_fastq2 = ":".join(input_r2_fastqs)
                 output_sams = []
                 for input_r1_fastq in input_r1_fastqs:
                     output_sam = re.sub(prev_step.output_dir,self.output_dir,re.sub(".fastq",".sam",input_r1_fastq))
                     output_sam = re.sub("_R1","",output_sam)
                     output_sams.append(output_sam)
                 self.output_sam = ":".join(output_sams)
             self.bwa_threads = pipeline_config.get('Program specific parameters','bwa_threads')
             self.ref_fa = pipeline_config.get('References','genome_fasta')
             self.project = pipeline.project
コード例 #2
0
ファイル: models.py プロジェクト: billyziege/pipeline_project
    def __init__(self,config,key=int(-1),process_name='zcat_multiple',multi_fastq_file=None,**kwargs):
        """
        Initializes the zcat multiple process object.
        """
        SampleQsubProcess.__init__(self,config,key=key,process_name=process_name,**kwargs)
        #Grab the first read files.
        r1_in_list = []
        if not multi_fastq_file is None:
            self.multi_fastq_file = multi_fastq_file
            multi_fastq = grab_yaml(self.multi_fastq_file)
            r1_in_list = list_from_multi_fastq_object(multi_fastq,"r1_filename")
        self.r1_input = ":".join(r1_in_list)
        r1_out_list = []
        r1_uncompressed_list = []
        for i in range(len(r1_in_list)):
            filename = self.sample_key + "_" + str(i) + "_R1.fastq" 
            r1_uncompressed_list.append(os.path.join(self.output_dir,filename))
            if r1_in_list[i][-3:] == '.gz':
                filename += ".gz"
            r1_out_list.append(os.path.join(self.output_dir,filename)) ##For the copy process
        self.r1_copied = ":".join(r1_out_list)
        self.r1_uncompressed = ":".join(r1_uncompressed_list)

        #Grab the paired read files.
        r2_in_list = []
        if not multi_fastq_file is None:
            r2_in_list = list_from_multi_fastq_object(multi_fastq,"r2_filename")
        self.r2_input = ":".join(r2_in_list)
        r2_out_list = []
        r2_uncompressed_list = []
        for i in range(len(r2_in_list)):
            filename = self.sample_key + "_" + str(i) + "_R2.fastq" 
            r2_uncompressed_list.append(os.path.join(self.output_dir,filename))
            if r2_in_list[i][-3:] == '.gz':
                filename += ".gz"
            r2_out_list.append(os.path.join(self.output_dir,filename))
        self.r2_copied = ":".join(r2_out_list)
        self.r2_uncompressed = ":".join(r2_uncompressed_list)

        if len(r1_in_list) == len(r2_in_list):
            self.number_tasks = len(r1_in_list)
            tmp_dirs = []
            complete_files = []
            for i in range(len(r1_in_list)):
                task_number = i + 1
                complete_file = os.path.join(self.output_dir, self.process_name + '.' + str(task_number) + '.complete')
                complete_files.append(complete_file)
                tmp_dir = os.path.join(self.output_dir, 'tmp.' + str(task_number))
                if not os.path.isdir(tmp_dir) and not re.search('dummy',self.output_dir):
                    os.makedirs(tmp_dir)
                tmp_dirs.append(tmp_dir)
            self.tmp_dir = ":".join(tmp_dirs)
            self.complete_file = ":".join(complete_files)
        else:
            raise Exception("The number of read and matched-pair read files for sample " + sample.key + " are not the same")
コード例 #3
0
ファイル: models.py プロジェクト: billyziege/pipeline_project
    def __init__(self,config,key=int(-1),sample=None,process_name='bwa_sampe',multi_fastq_file=None,ref_fa='/mnt/speed/qc/sequencing/biodata/genomes/Hsapiens/GRCh37/bwa/GRCh37.fa',prev_step=None,pipeline=None,**kwargs):
        """
        Initializes the  process object.
        """
        if not prev_step is None:
            if prev_step.__class__.__name__ == "BwaAln":
                if sample is None:
                    sample = Sample(config,key="dummy_sample_key")
                if sample.__class__.__name__ != "Sample":
                    raise Exception("Trying to start a qcpipeline process on a non-sample.")
                SampleQsubProcess.__init__(self,config,key=key,sample=sample,process_name=process_name,input_dir=prev_step.output_dir,output_dir=prev_step.output_dir,number_tasks=prev_step.number_tasks/2,**kwargs)
                self.project = pipeline.project
                self.sample_key = sample.key
                self.ref_fa = prev_step.ref_fa
                if not multi_fastq_file is None:
                    self.multi_fastq_file = multi_fastq_file
                    multi_fastq = grab_yaml(self.multi_fastq_file)
                    lane_numbers = list_from_multi_fastq_object(multi_fastq,"lane")
                    flowcells = list_from_multi_fastq_object(multi_fastq,"flowcell")
                    self.lane_number = ":".join(lane_numbers)
                    self.flowcell_key = ":".join(flowcells)

                input_fastqs = prev_step.input_fastq.split(":")
                input_r1_fastqs = input_fastqs[:self.number_tasks]
                input_r2_fastqs = input_fastqs[self.number_tasks:]
                self.input_fastq1 = ":".join(input_r1_fastqs)
                self.input_fastq2 = ":".join(input_r2_fastqs)

                input_sais = prev_step.output_sai.split(":")
                input_r1_sais = input_sais[:self.number_tasks]
                input_r2_sais = input_sais[self.number_tasks:]
                self.input_sai1 = ":".join(input_r1_sais)
                self.input_sai2 = ":".join(input_r2_sais)

                output_sams = []
                for input_r1_fastq in input_r1_fastqs:
                    output_sam = re.sub("_R1.fastq",".sam",input_r1_fastq)
                    output_sams.append(output_sam)
                self.output_sam = ":".join(output_sams)