Beispiel #1
0
 def __init__(self,config,key=int(-1),pipeline_config=None,prev_step=None,process_name='cp',pipeline=None,**kwargs):
     if not prev_step is None:
         if pipeline_config is None:
             pipeline_config = MyConfigParser()
             pipeline_config.read(config.get('Pipeline',pipeline.obj_type))
         cp_input_dir_name = pipeline_config.safe_get('Common_directories','cp_subdir')
         if cp_input_dir_name is None:
             cp_input_dir_name = ""
             if prev_step.obj_type == "CleanBcbio":
                 for root, dirs, files in os.walk(prev_step.output_dir,topdown=False):
                     for filename in files:
                         if filename.endswith(".vcf"):
                             full_path = os.path.join(root,filename)
                             cp_indput_dir = os.path.dirname(full_path)
         cp_input_dir = os.path.join(pipeline.output_dir,cp_input_dir_name)
         output_subdir_name = pipeline_config.safe_get('Common_directories','output_subdir','ngv3')
         cp_dir = os.path.join(pipeline.input_dir,output_subdir_name)
         if not os.path.exists(cp_dir):
             os.makedirs(cp_dir)
         self.cp_dir = cp_dir
         SampleQsubProcess.__init__(self,config,key=key,input_dir=cp_input_dir,output_dir=pipeline.output_dir,process_name=process_name,**kwargs)
         if self.sample_key is not None:
             self.md5_file = os.path.join(cp_dir,self.sample_key + "_exome_md5checksums.txt")
         else:
             self.md5_file = "exome_md5checksums.txt"
Beispiel #2
0
 def __init__(self,config,key=int(-1),sample=None,flowcell=None,description=None,recipe=None,input_dir=None,pipeline_config=None,pipeline_key=None,process_name='qcpipeline',running_location='Speed',storage_needed=500000000,project=None,flowcell_dir_name=None,seq_run_key=None,date=strftime("%Y%m%d",localtime()),*args,**kwargs):
     if not pipeline_config is None or not pipeline_key is None:
         if sample is None:
             sample = Sample(config,key="dummy_sample_key")
         if sample.__class__.__name__ != "Sample":
             raise Exception("Trying to start a qcpipeline process on a non-sample.")
         if flowcell is None:
             sample = Flowcell(config,key="dummy_flowcell_key")
         if flowcell.__class__.__name__ != "Flowcell":
             raise Exception("Trying to start a qcpipeline process on a sample not from a flowcell.")
         automation_parameters_config = MyConfigParser()
         automation_parameters_config.read(config.get("Filenames","automation_config"))
         #Specific information about this pipeline
         self.description = description
         self.recipe = recipe
         self.storage_needed = storage_needed
         self.input_dir = input_dir
         self.running_location = running_location
         self.seq_run_key = seq_run_key
         capture_target_bed = automation_parameters_config.safe_get("Target",pipeline_key)
         if not capture_target_bed is None:
             self.capture_target_bed = capture_target_bed
         if pipeline_config is None:
             pipeline_name = automation_parameters_config.safe_get("Pipeline",pipeline_key)
             pipeline_config = MyConfigParser()
             pipeline_config.read(config.get('Pipeline',pipeline_name))
         pipeline_steps = pipeline_config.get('Pipeline','steps').split(',')
         for step in pipeline_steps:
             setattr(self,step+"_key",None)
         base_output_dir = pipeline_config.get('Common_directories','archive_directory')
         if flowcell_dir_name is None:
             self.client_dir = self.input_dir
         else:
             sample_dir_name = sample.key
             if not str(sample_dir_name).startswith("Sample_"):
                 sample_dir_name = "Sample_" + sample_dir_name
             self.client_dir = os.path.join(config.get('Common_directories','casava_output'),flowcell_dir_name+"/Project_"+str(project)+"/"+sample_dir_name)
         self.flowcell_key = flowcell.key
         base_client_dir = config.get('Common_directories','casava_output')
         if project is None:
             if base_output_dir is None:
                 base_output_dir = ""
             self.output_dir = os.path.join(base_output_dir,sample.key + '_' + str(date))
         else:
             project_out = re.sub('_','-',project)
             self.project = project_out
             if re.search("[0-9]",project_out[0:1]):
                 project_out = "Project-" + project_out
             if base_output_dir == None:
                 base_output_dir = ""
             self.output_dir = os.path.join(base_output_dir,project_out + "_" + sample.key + '_' + str(date))
         if not os.path.exists(self.output_dir) and not re.search('dummy',sample.key):
             os.makedirs(self.output_dir)
         GenericProcess.__init__(self,config,key=key,process_name=process_name,**kwargs)
         self.date = date
         self.sample_key = sample.key
         self.altered_parameters = None
Beispiel #3
0
 def __push_flowcells_into_relevant_pipelines__(self,configs,mockdb):
     """
     Provides the interface from which all post casava flowcell pipelines are run.
     """
     if configs["system"].get("Logging","debug") is "True":
         print "  Starting post casava flowcell pipelines for " + self.flowcell_key
     flowcell_dir_name = os.path.basename(self.output_dir)
     automation_parameters_config = MyConfigParser()
     automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
     running_location = "Speed"
     parsed = parse_sample_sheet(configs['system'],mockdb,self.output_dir)
     description = parsed['description'].replace(parsed['SampleID']+'_','')
     description_dict = parse_description_into_dictionary(description)
     if 'Pipeline' in description_dict:
         pipeline_key =  description_dict['Pipeline']
     else:
         description_pieces = parsed['description'].split('-')
         pipeline_key = description_pieces[-1]
     if pipeline_key.startswith('CCGL'):
         pipeline_key='CCGL'
     pipeline_name = automation_parameters_config.safe_get("Flowcell pipeline",pipeline_key)
     if pipeline_name is None:
         return 1
     if configs["system"].get("Logging","debug") is "True":
         print "Starting " + pipeline_name
     pipeline_config = MyConfigParser()
     pipeline_config.read(configs["system"].get('Pipeline',pipeline_name))
     pipeline = mockdb[pipeline_name].__new__(configs['system'],input_dir=self.output_dir,pipeline_key=pipeline_key,seq_run_key=self.seq_run_key,project=parsed['project_name'],flowcell_dir_name=flowcell_dir_name,pipeline_config=pipeline_config,**parsed)
     return 1
Beispiel #4
0
 def __push_samples_into_relevant_pipelines__(self,configs,mockdb):
     """
     Provides the interface from which all post casava sample pipelines are run.
     """
     if configs["system"].get("Logging","debug") is "True":
         print "  Starting post casava sample pipelines for " + self.flowcell_key
         print "  Determining Sample dirs"
     sample_dirs = list_project_sample_dirs(self.output_dir.split(":"))
     if configs["system"].get("Logging","debug") is "True":
        print "  Samples: " + str(sample_dirs) 
     flowcell_dir_name = os.path.basename(self.output_dir)
     automation_parameters_config = MyConfigParser()
     automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
     fastqc_pipeline_config = MyConfigParser()
     fastqc_pipeline_config.read(configs["system"].get("Pipeline","FastQCPipeline"))
     for project in sample_dirs:
         for sample in sample_dirs[project]:
             #running_location = identify_running_location_with_most_currently_available(configs,storage_devices)
             running_location = "Speed"
             parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0])
             if configs["system"].get("Logging","debug") is "True":
                print "    Pushing fastqc pipeline for " + sample
             fastqc_pipeline = mockdb["FastQCPipeline"].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],flowcell_dir_name=flowcell_dir_name,project=parsed['project_name'],pipeline_config=fastqc_pipeline_config,seq_run_key=self.seq_run_key,**parsed)
             description_dict = parse_description_into_dictionary(parsed['description'])
             if 'Pipeline' in description_dict:
                 pipeline_key =  description_dict['Pipeline']
             else:
                 description_pieces = parsed['description'].split('-')
                 pipeline_key = description_pieces[-1]
             pipeline_name = automation_parameters_config.safe_get("Pipeline",pipeline_key)
             if pipeline_name is None:
                 continue
             if configs["system"].get("Logging","debug") is "True":
                 print "Starting " + pipeline_name + " for " + sample
             pipeline = mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_key=pipeline_key,seq_run_key=self.seq_run_key,project=parsed['project_name'],flowcell_dir_name=flowcell_dir_name,**parsed)
def things_to_do_if_initializing_flowcell_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None):
    """
    Starts pipelines that read the entire flowcell data.
    """
    if configs["system"].get("Logging","debug") is "True":
        print "  Starting post casava flowcell pipelines"
    flowcell_dir_name = os.path.basename(source_dir)
    automation_parameters_config = MyConfigParser()
    automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
    running_location = "Speed"
    parsed = parse_sample_sheet(configs['system'],mockdb,source_dir)
    description = parsed['description'].replace(parsed['SampleID']+'_','')
    description_dict = parse_description_into_dictionary(description)
    if configs["system"].get("Logging","debug") is "True":
        print "        Description = " + str(parsed['description'])
    if 'Pipeline' in description_dict:
        pipeline_key =  description_dict['Pipeline']
    else:
        description_pieces = parsed['description'].split('_')
        pipeline_key = description_pieces[-1]
    if pipeline_key.startswith('CCGL'):
        pipeline_key='CCGL'
    pipeline_name_check = automation_parameters_config.safe_get("Flowcell pipeline",pipeline_key)
    if pipeline_name_check != pipeline_name:
        return 1
    if pipeline_name is None:
        return 1
    if configs["system"].get("Logging","debug") is "True":
        print "Starting " + pipeline_name
    pipeline = mockdb[pipeline_name].__new__(configs['system'],input_dir=source_dir,pipeline_key=pipeline_key,seq_run_key=None,project=parsed['project_name'],flowcell_dir_name=flowcell_dir_name,running_location='Speed',pipeline_config=configs["pipeline"],**parsed)
    return 1
def things_to_do_if_initializing_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None,combine_projects=True):
    if combine_project:
        sample_dirs["dummy_project"] = list_sample_dirs(source_dir)
    else:
        sample_dirs = list_project_sample_dirs(source_dir)
    target_config = MyConfigParser()
    target_config.read(configs["system"].get("Filenames","target_config"))
    for project in sample_dirs:
        for sample in sample_dirs[project]:
            running_location = identify_running_location_with_most_currently_available(configs,storage_devices)
            parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0])
            if base_output_dir is None:
                base_output_dir = configs['pipeline'].get('Common_directories','archive_directory')
            automation_parameters_config = MyConfigParser()
            automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
            description_dict = parse_description_into_dictionary(parsed['description'])
            if 'Pipeline' in description_dict:
                pipeline_key =  description_dict['Pipeline']
            else:
                description_pieces = parsed['description'].split('_')
                pipeline_key = description_pieces[-1]
            pipeline_name_for_sample = automation_parameters_config.safe_get("Pipeline",pipeline_key)
            if not pipeline_name_for_sample == pipeline_name:
                continue
            mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_config=configs["pipeline"],project=parsed['project_name'],pipeline_key=pipeline_key,**parsed)
            flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
            flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
            if parsed['flowcell'].key in flowcell_dict:
                seq_run = flowcell_dict[parsed['flowcell'].key]
                pass
            else:
                try:
                    base_dir = get_sequencing_run_base_dir(source_dir)
                    [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(base_dir)
                    machine = mockdb['HiSeqMachine'].__get__(configs['system'],machine_key)
                    run_type = determine_run_type(base_dir)
                    seq_run = mockdb['SequencingRun'].__new__(configs['system'],flowcell,machine,date,run_number,output_dir=base_dir,side=side,run_type=run_type)
                    fill_demultiplex_stats(configs['system'],mockdb,seq_run.output_dir,flowcell,machine)
                except:
                    pass
    return 1