def __init__(self,config,key=int(-1),pipeline_config=None,prev_step=None,process_name='cp',pipeline=None,**kwargs): if not prev_step is None: if pipeline_config is None: pipeline_config = MyConfigParser() pipeline_config.read(config.get('Pipeline',pipeline.obj_type)) cp_input_dir_name = pipeline_config.safe_get('Common_directories','cp_subdir') if cp_input_dir_name is None: cp_input_dir_name = "" if prev_step.obj_type == "CleanBcbio": for root, dirs, files in os.walk(prev_step.output_dir,topdown=False): for filename in files: if filename.endswith(".vcf"): full_path = os.path.join(root,filename) cp_indput_dir = os.path.dirname(full_path) cp_input_dir = os.path.join(pipeline.output_dir,cp_input_dir_name) output_subdir_name = pipeline_config.safe_get('Common_directories','output_subdir','ngv3') cp_dir = os.path.join(pipeline.input_dir,output_subdir_name) if not os.path.exists(cp_dir): os.makedirs(cp_dir) self.cp_dir = cp_dir SampleQsubProcess.__init__(self,config,key=key,input_dir=cp_input_dir,output_dir=pipeline.output_dir,process_name=process_name,**kwargs) if self.sample_key is not None: self.md5_file = os.path.join(cp_dir,self.sample_key + "_exome_md5checksums.txt") else: self.md5_file = "exome_md5checksums.txt"
def __init__(self,config,key=int(-1),sample=None,flowcell=None,description=None,recipe=None,input_dir=None,pipeline_config=None,pipeline_key=None,process_name='qcpipeline',running_location='Speed',storage_needed=500000000,project=None,flowcell_dir_name=None,seq_run_key=None,date=strftime("%Y%m%d",localtime()),*args,**kwargs): if not pipeline_config is None or not pipeline_key is None: if sample is None: sample = Sample(config,key="dummy_sample_key") if sample.__class__.__name__ != "Sample": raise Exception("Trying to start a qcpipeline process on a non-sample.") if flowcell is None: sample = Flowcell(config,key="dummy_flowcell_key") if flowcell.__class__.__name__ != "Flowcell": raise Exception("Trying to start a qcpipeline process on a sample not from a flowcell.") automation_parameters_config = MyConfigParser() automation_parameters_config.read(config.get("Filenames","automation_config")) #Specific information about this pipeline self.description = description self.recipe = recipe self.storage_needed = storage_needed self.input_dir = input_dir self.running_location = running_location self.seq_run_key = seq_run_key capture_target_bed = automation_parameters_config.safe_get("Target",pipeline_key) if not capture_target_bed is None: self.capture_target_bed = capture_target_bed if pipeline_config is None: pipeline_name = automation_parameters_config.safe_get("Pipeline",pipeline_key) pipeline_config = MyConfigParser() pipeline_config.read(config.get('Pipeline',pipeline_name)) pipeline_steps = pipeline_config.get('Pipeline','steps').split(',') for step in pipeline_steps: setattr(self,step+"_key",None) base_output_dir = pipeline_config.get('Common_directories','archive_directory') if flowcell_dir_name is None: self.client_dir = self.input_dir else: sample_dir_name = sample.key if not str(sample_dir_name).startswith("Sample_"): sample_dir_name = "Sample_" + sample_dir_name self.client_dir = os.path.join(config.get('Common_directories','casava_output'),flowcell_dir_name+"/Project_"+str(project)+"/"+sample_dir_name) self.flowcell_key = flowcell.key base_client_dir = config.get('Common_directories','casava_output') if project is None: if base_output_dir is None: base_output_dir = "" self.output_dir = os.path.join(base_output_dir,sample.key + '_' + str(date)) else: project_out = re.sub('_','-',project) self.project = project_out if re.search("[0-9]",project_out[0:1]): project_out = "Project-" + project_out if base_output_dir == None: base_output_dir = "" self.output_dir = os.path.join(base_output_dir,project_out + "_" + sample.key + '_' + str(date)) if not os.path.exists(self.output_dir) and not re.search('dummy',sample.key): os.makedirs(self.output_dir) GenericProcess.__init__(self,config,key=key,process_name=process_name,**kwargs) self.date = date self.sample_key = sample.key self.altered_parameters = None
def __push_flowcells_into_relevant_pipelines__(self,configs,mockdb): """ Provides the interface from which all post casava flowcell pipelines are run. """ if configs["system"].get("Logging","debug") is "True": print " Starting post casava flowcell pipelines for " + self.flowcell_key flowcell_dir_name = os.path.basename(self.output_dir) automation_parameters_config = MyConfigParser() automation_parameters_config.read(configs["system"].get("Filenames","automation_config")) running_location = "Speed" parsed = parse_sample_sheet(configs['system'],mockdb,self.output_dir) description = parsed['description'].replace(parsed['SampleID']+'_','') description_dict = parse_description_into_dictionary(description) if 'Pipeline' in description_dict: pipeline_key = description_dict['Pipeline'] else: description_pieces = parsed['description'].split('-') pipeline_key = description_pieces[-1] if pipeline_key.startswith('CCGL'): pipeline_key='CCGL' pipeline_name = automation_parameters_config.safe_get("Flowcell pipeline",pipeline_key) if pipeline_name is None: return 1 if configs["system"].get("Logging","debug") is "True": print "Starting " + pipeline_name pipeline_config = MyConfigParser() pipeline_config.read(configs["system"].get('Pipeline',pipeline_name)) pipeline = mockdb[pipeline_name].__new__(configs['system'],input_dir=self.output_dir,pipeline_key=pipeline_key,seq_run_key=self.seq_run_key,project=parsed['project_name'],flowcell_dir_name=flowcell_dir_name,pipeline_config=pipeline_config,**parsed) return 1
def __push_samples_into_relevant_pipelines__(self,configs,mockdb): """ Provides the interface from which all post casava sample pipelines are run. """ if configs["system"].get("Logging","debug") is "True": print " Starting post casava sample pipelines for " + self.flowcell_key print " Determining Sample dirs" sample_dirs = list_project_sample_dirs(self.output_dir.split(":")) if configs["system"].get("Logging","debug") is "True": print " Samples: " + str(sample_dirs) flowcell_dir_name = os.path.basename(self.output_dir) automation_parameters_config = MyConfigParser() automation_parameters_config.read(configs["system"].get("Filenames","automation_config")) fastqc_pipeline_config = MyConfigParser() fastqc_pipeline_config.read(configs["system"].get("Pipeline","FastQCPipeline")) for project in sample_dirs: for sample in sample_dirs[project]: #running_location = identify_running_location_with_most_currently_available(configs,storage_devices) running_location = "Speed" parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0]) if configs["system"].get("Logging","debug") is "True": print " Pushing fastqc pipeline for " + sample fastqc_pipeline = mockdb["FastQCPipeline"].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],flowcell_dir_name=flowcell_dir_name,project=parsed['project_name'],pipeline_config=fastqc_pipeline_config,seq_run_key=self.seq_run_key,**parsed) description_dict = parse_description_into_dictionary(parsed['description']) if 'Pipeline' in description_dict: pipeline_key = description_dict['Pipeline'] else: description_pieces = parsed['description'].split('-') pipeline_key = description_pieces[-1] pipeline_name = automation_parameters_config.safe_get("Pipeline",pipeline_key) if pipeline_name is None: continue if configs["system"].get("Logging","debug") is "True": print "Starting " + pipeline_name + " for " + sample pipeline = mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_key=pipeline_key,seq_run_key=self.seq_run_key,project=parsed['project_name'],flowcell_dir_name=flowcell_dir_name,**parsed)
def things_to_do_if_initializing_flowcell_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None): """ Starts pipelines that read the entire flowcell data. """ if configs["system"].get("Logging","debug") is "True": print " Starting post casava flowcell pipelines" flowcell_dir_name = os.path.basename(source_dir) automation_parameters_config = MyConfigParser() automation_parameters_config.read(configs["system"].get("Filenames","automation_config")) running_location = "Speed" parsed = parse_sample_sheet(configs['system'],mockdb,source_dir) description = parsed['description'].replace(parsed['SampleID']+'_','') description_dict = parse_description_into_dictionary(description) if configs["system"].get("Logging","debug") is "True": print " Description = " + str(parsed['description']) if 'Pipeline' in description_dict: pipeline_key = description_dict['Pipeline'] else: description_pieces = parsed['description'].split('_') pipeline_key = description_pieces[-1] if pipeline_key.startswith('CCGL'): pipeline_key='CCGL' pipeline_name_check = automation_parameters_config.safe_get("Flowcell pipeline",pipeline_key) if pipeline_name_check != pipeline_name: return 1 if pipeline_name is None: return 1 if configs["system"].get("Logging","debug") is "True": print "Starting " + pipeline_name pipeline = mockdb[pipeline_name].__new__(configs['system'],input_dir=source_dir,pipeline_key=pipeline_key,seq_run_key=None,project=parsed['project_name'],flowcell_dir_name=flowcell_dir_name,running_location='Speed',pipeline_config=configs["pipeline"],**parsed) return 1
def things_to_do_if_initializing_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None,combine_projects=True): if combine_project: sample_dirs["dummy_project"] = list_sample_dirs(source_dir) else: sample_dirs = list_project_sample_dirs(source_dir) target_config = MyConfigParser() target_config.read(configs["system"].get("Filenames","target_config")) for project in sample_dirs: for sample in sample_dirs[project]: running_location = identify_running_location_with_most_currently_available(configs,storage_devices) parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0]) if base_output_dir is None: base_output_dir = configs['pipeline'].get('Common_directories','archive_directory') automation_parameters_config = MyConfigParser() automation_parameters_config.read(configs["system"].get("Filenames","automation_config")) description_dict = parse_description_into_dictionary(parsed['description']) if 'Pipeline' in description_dict: pipeline_key = description_dict['Pipeline'] else: description_pieces = parsed['description'].split('_') pipeline_key = description_pieces[-1] pipeline_name_for_sample = automation_parameters_config.safe_get("Pipeline",pipeline_key) if not pipeline_name_for_sample == pipeline_name: continue mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_config=configs["pipeline"],project=parsed['project_name'],pipeline_key=pipeline_key,**parsed) flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key') flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key') if parsed['flowcell'].key in flowcell_dict: seq_run = flowcell_dict[parsed['flowcell'].key] pass else: try: base_dir = get_sequencing_run_base_dir(source_dir) [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(base_dir) machine = mockdb['HiSeqMachine'].__get__(configs['system'],machine_key) run_type = determine_run_type(base_dir) seq_run = mockdb['SequencingRun'].__new__(configs['system'],flowcell,machine,date,run_number,output_dir=base_dir,side=side,run_type=run_type) fill_demultiplex_stats(configs['system'],mockdb,seq_run.output_dir,flowcell,machine) except: pass return 1