Ejemplo n.º 1
0
def things_to_do_if_initializing_flowcell_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None):
    """
    Starts pipelines that read the entire flowcell data.
    """
    if configs["system"].get("Logging","debug") is "True":
        print "  Starting post casava flowcell pipelines"
    flowcell_dir_name = os.path.basename(source_dir)
    automation_parameters_config = MyConfigParser()
    automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
    running_location = "Speed"
    parsed = parse_sample_sheet(configs['system'],mockdb,source_dir)
    description = parsed['description'].replace(parsed['SampleID']+'_','')
    description_dict = parse_description_into_dictionary(description)
    if configs["system"].get("Logging","debug") is "True":
        print "        Description = " + str(parsed['description'])
    if 'Pipeline' in description_dict:
        pipeline_key =  description_dict['Pipeline']
    else:
        description_pieces = parsed['description'].split('_')
        pipeline_key = description_pieces[-1]
    if pipeline_key.startswith('CCGL'):
        pipeline_key='CCGL'
    pipeline_name_check = automation_parameters_config.safe_get("Flowcell pipeline",pipeline_key)
    if pipeline_name_check != pipeline_name:
        return 1
    if pipeline_name is None:
        return 1
    if configs["system"].get("Logging","debug") is "True":
        print "Starting " + pipeline_name
    pipeline = mockdb[pipeline_name].__new__(configs['system'],input_dir=source_dir,pipeline_key=pipeline_key,seq_run_key=None,project=parsed['project_name'],flowcell_dir_name=flowcell_dir_name,running_location='Speed',pipeline_config=configs["pipeline"],**parsed)
    return 1
Ejemplo n.º 2
0
 def __is_complete__(self,configs,mockdb,*args,**kwargs):
     """
     Checks to see if the pipeline is complete.  If not, and it is ready to advance, then
     the pipeline is advanced.
     """
     if GenericProcess.__is_complete__(self,*args,**kwargs):
         return True
     if not hasattr(self,"generic_copy_key") or self.generic_copy_key is None:
         if configs["system"].get("Logging","debug") is "True":
             print "Copying bcls"
         self.__launch_copy_bcls__(configs,mockdb)
         return False
     current_configs = {}
     current_configs["system"] = configs["system"]
     pipeline_config = MyConfigParser()
     current_configs["pipeline"] = pipeline_config
     pipeline_config.read(configs["system"].get('Pipeline','BclToFastqPipeline'))
     if self.__handle_linear_steps__(current_configs,mockdb,skip_finish=True,*args,**kwargs):
         casava = mockdb['Casava'].__get__(configs['system'],self.casava_key)
         if configs["system"].get("Logging","debug") is "True":
             print "Checking the pipeline first step results"
         if casava.__do_all_relevant_pipelines_have_first_step_complete__(current_configs,mockdb):
             self.__finish__(*args,**kwargs)
             return True
     return False
Ejemplo n.º 3
0
 def __init__(self,config,key=int(-1),pipeline_config=None,prev_step=None,process_name='cp',pipeline=None,**kwargs):
     if not prev_step is None:
         if pipeline_config is None:
             pipeline_config = MyConfigParser()
             pipeline_config.read(config.get('Pipeline',pipeline.obj_type))
         cp_input_dir_name = pipeline_config.safe_get('Common_directories','cp_subdir')
         if cp_input_dir_name is None:
             cp_input_dir_name = ""
             if prev_step.obj_type == "CleanBcbio":
                 for root, dirs, files in os.walk(prev_step.output_dir,topdown=False):
                     for filename in files:
                         if filename.endswith(".vcf"):
                             full_path = os.path.join(root,filename)
                             cp_indput_dir = os.path.dirname(full_path)
         cp_input_dir = os.path.join(pipeline.output_dir,cp_input_dir_name)
         output_subdir_name = pipeline_config.safe_get('Common_directories','output_subdir','ngv3')
         cp_dir = os.path.join(pipeline.input_dir,output_subdir_name)
         if not os.path.exists(cp_dir):
             os.makedirs(cp_dir)
         self.cp_dir = cp_dir
         SampleQsubProcess.__init__(self,config,key=key,input_dir=cp_input_dir,output_dir=pipeline.output_dir,process_name=process_name,**kwargs)
         if self.sample_key is not None:
             self.md5_file = os.path.join(cp_dir,self.sample_key + "_exome_md5checksums.txt")
         else:
             self.md5_file = "exome_md5checksums.txt"
Ejemplo n.º 4
0
 def __init__(self,config,key=int(-1),sample=None,flowcell=None,description=None,recipe=None,input_dir=None,pipeline_config=None,pipeline_key=None,process_name='qcpipeline',running_location='Speed',storage_needed=500000000,project=None,flowcell_dir_name=None,seq_run_key=None,date=strftime("%Y%m%d",localtime()),*args,**kwargs):
     if not pipeline_config is None or not pipeline_key is None:
         if sample is None:
             sample = Sample(config,key="dummy_sample_key")
         if sample.__class__.__name__ != "Sample":
             raise Exception("Trying to start a qcpipeline process on a non-sample.")
         if flowcell is None:
             sample = Flowcell(config,key="dummy_flowcell_key")
         if flowcell.__class__.__name__ != "Flowcell":
             raise Exception("Trying to start a qcpipeline process on a sample not from a flowcell.")
         automation_parameters_config = MyConfigParser()
         automation_parameters_config.read(config.get("Filenames","automation_config"))
         #Specific information about this pipeline
         self.description = description
         self.recipe = recipe
         self.storage_needed = storage_needed
         self.input_dir = input_dir
         self.running_location = running_location
         self.seq_run_key = seq_run_key
         capture_target_bed = automation_parameters_config.safe_get("Target",pipeline_key)
         if not capture_target_bed is None:
             self.capture_target_bed = capture_target_bed
         if pipeline_config is None:
             pipeline_name = automation_parameters_config.safe_get("Pipeline",pipeline_key)
             pipeline_config = MyConfigParser()
             pipeline_config.read(config.get('Pipeline',pipeline_name))
         pipeline_steps = pipeline_config.get('Pipeline','steps').split(',')
         for step in pipeline_steps:
             setattr(self,step+"_key",None)
         base_output_dir = pipeline_config.get('Common_directories','archive_directory')
         if flowcell_dir_name is None:
             self.client_dir = self.input_dir
         else:
             sample_dir_name = sample.key
             if not str(sample_dir_name).startswith("Sample_"):
                 sample_dir_name = "Sample_" + sample_dir_name
             self.client_dir = os.path.join(config.get('Common_directories','casava_output'),flowcell_dir_name+"/Project_"+str(project)+"/"+sample_dir_name)
         self.flowcell_key = flowcell.key
         base_client_dir = config.get('Common_directories','casava_output')
         if project is None:
             if base_output_dir is None:
                 base_output_dir = ""
             self.output_dir = os.path.join(base_output_dir,sample.key + '_' + str(date))
         else:
             project_out = re.sub('_','-',project)
             self.project = project_out
             if re.search("[0-9]",project_out[0:1]):
                 project_out = "Project-" + project_out
             if base_output_dir == None:
                 base_output_dir = ""
             self.output_dir = os.path.join(base_output_dir,project_out + "_" + sample.key + '_' + str(date))
         if not os.path.exists(self.output_dir) and not re.search('dummy',sample.key):
             os.makedirs(self.output_dir)
         GenericProcess.__init__(self,config,key=key,process_name=process_name,**kwargs)
         self.date = date
         self.sample_key = sample.key
         self.altered_parameters = None
Ejemplo n.º 5
0
 def __push_flowcells_into_relevant_pipelines__(self,configs,mockdb):
     """
     Provides the interface from which all post casava flowcell pipelines are run.
     """
     if configs["system"].get("Logging","debug") is "True":
         print "  Starting post casava flowcell pipelines for " + self.flowcell_key
     flowcell_dir_name = os.path.basename(self.output_dir)
     automation_parameters_config = MyConfigParser()
     automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
     running_location = "Speed"
     parsed = parse_sample_sheet(configs['system'],mockdb,self.output_dir)
     description = parsed['description'].replace(parsed['SampleID']+'_','')
     description_dict = parse_description_into_dictionary(description)
     if 'Pipeline' in description_dict:
         pipeline_key =  description_dict['Pipeline']
     else:
         description_pieces = parsed['description'].split('-')
         pipeline_key = description_pieces[-1]
     if pipeline_key.startswith('CCGL'):
         pipeline_key='CCGL'
     pipeline_name = automation_parameters_config.safe_get("Flowcell pipeline",pipeline_key)
     if pipeline_name is None:
         return 1
     if configs["system"].get("Logging","debug") is "True":
         print "Starting " + pipeline_name
     pipeline_config = MyConfigParser()
     pipeline_config.read(configs["system"].get('Pipeline',pipeline_name))
     pipeline = mockdb[pipeline_name].__new__(configs['system'],input_dir=self.output_dir,pipeline_key=pipeline_key,seq_run_key=self.seq_run_key,project=parsed['project_name'],flowcell_dir_name=flowcell_dir_name,pipeline_config=pipeline_config,**parsed)
     return 1
Ejemplo n.º 6
0
 def __push_samples_into_relevant_pipelines__(self,configs,mockdb):
     """
     Provides the interface from which all post casava sample pipelines are run.
     """
     if configs["system"].get("Logging","debug") is "True":
         print "  Starting post casava sample pipelines for " + self.flowcell_key
         print "  Determining Sample dirs"
     sample_dirs = list_project_sample_dirs(self.output_dir.split(":"))
     if configs["system"].get("Logging","debug") is "True":
        print "  Samples: " + str(sample_dirs) 
     flowcell_dir_name = os.path.basename(self.output_dir)
     automation_parameters_config = MyConfigParser()
     automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
     fastqc_pipeline_config = MyConfigParser()
     fastqc_pipeline_config.read(configs["system"].get("Pipeline","FastQCPipeline"))
     for project in sample_dirs:
         for sample in sample_dirs[project]:
             #running_location = identify_running_location_with_most_currently_available(configs,storage_devices)
             running_location = "Speed"
             parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0])
             if configs["system"].get("Logging","debug") is "True":
                print "    Pushing fastqc pipeline for " + sample
             fastqc_pipeline = mockdb["FastQCPipeline"].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],flowcell_dir_name=flowcell_dir_name,project=parsed['project_name'],pipeline_config=fastqc_pipeline_config,seq_run_key=self.seq_run_key,**parsed)
             description_dict = parse_description_into_dictionary(parsed['description'])
             if 'Pipeline' in description_dict:
                 pipeline_key =  description_dict['Pipeline']
             else:
                 description_pieces = parsed['description'].split('-')
                 pipeline_key = description_pieces[-1]
             pipeline_name = automation_parameters_config.safe_get("Pipeline",pipeline_key)
             if pipeline_name is None:
                 continue
             if configs["system"].get("Logging","debug") is "True":
                 print "Starting " + pipeline_name + " for " + sample
             pipeline = mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_key=pipeline_key,seq_run_key=self.seq_run_key,project=parsed['project_name'],flowcell_dir_name=flowcell_dir_name,**parsed)
Ejemplo n.º 7
0
 def __launch_copy_bcls__(self,configs,mockdb):
     """
     This launches the process that will archive the fastq directories.
     """
     input_dir = os.path.join(self.input_dir,"Data/Intensities")
     output_dir = os.path.join(self.output_dir,"Data/Intensities")
     if not os.path.exists(output_dir):
         os.makedirs(output_dir)
     copy_all_xml(self.input_dir,self.output_dir)
     copy_all_xml(os.path.join(self.input_dir,"Data"),os.path.join(self.output_dir,"Data"))
     current_configs = {}
     current_configs["system"] = configs["system"]
     pipeline_config = MyConfigParser()
     current_configs["pipeline"] = pipeline_config
     pipeline_config.read(configs["system"].get('Pipeline','BclToFastqPipeline'))
     copy_bcls = mockdb['GenericCopy'].__new__(configs['system'],input_dir=input_dir,output_dir=output_dir)
     self.generic_copy_key = copy_bcls.key
     copy_bcls.__fill_qsub_file__(current_configs)
     copy_bcls.__launch__(configs['system'])
Ejemplo n.º 8
0
 def __do_all_relevant_pipelines_have_first_step_complete__(self,configs,mockdb):
     """
     Since the first step of post casava pipelines is to copy the data,
     moving the data after these pipelines are started must wait for
     this step to complete.  This is only a concern when everything is 
     automated.  This checks that step and whether the FastQCPipeline is finished.
     """
     pipeline_names = configs["system"].get('Pipeline','post_casava_automated').split(',')
     for pipeline_name in pipeline_names:
             try:
                 seq_run_key_dict = mockdb[pipeline_name].__attribute_value_to_object_dict__('seq_run_key')
                 pipeline_config = MyConfigParser()
                 pipeline_config.read(config.get('Pipeline',pipeline_name))
                 for pipeline in seq_run_key_dict[self.seq_run_key]:
                     if pipeline_name == "FastQCPipeline":
                         if not pipeline.__is_complete__():
                             return False
                     if not pipeline.__check_first_step__(pipeline_config):
                         return False
             except:
                 continue
     return True
Ejemplo n.º 9
0
def things_to_do_if_initializing_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None,combine_projects=True):
    if combine_project:
        sample_dirs["dummy_project"] = list_sample_dirs(source_dir)
    else:
        sample_dirs = list_project_sample_dirs(source_dir)
    target_config = MyConfigParser()
    target_config.read(configs["system"].get("Filenames","target_config"))
    for project in sample_dirs:
        for sample in sample_dirs[project]:
            running_location = identify_running_location_with_most_currently_available(configs,storage_devices)
            parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0])
            if base_output_dir is None:
                base_output_dir = configs['pipeline'].get('Common_directories','archive_directory')
            automation_parameters_config = MyConfigParser()
            automation_parameters_config.read(configs["system"].get("Filenames","automation_config"))
            description_dict = parse_description_into_dictionary(parsed['description'])
            if 'Pipeline' in description_dict:
                pipeline_key =  description_dict['Pipeline']
            else:
                description_pieces = parsed['description'].split('_')
                pipeline_key = description_pieces[-1]
            pipeline_name_for_sample = automation_parameters_config.safe_get("Pipeline",pipeline_key)
            if not pipeline_name_for_sample == pipeline_name:
                continue
            mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_config=configs["pipeline"],project=parsed['project_name'],pipeline_key=pipeline_key,**parsed)
            flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
            flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key')
            if parsed['flowcell'].key in flowcell_dict:
                seq_run = flowcell_dict[parsed['flowcell'].key]
                pass
            else:
                try:
                    base_dir = get_sequencing_run_base_dir(source_dir)
                    [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(base_dir)
                    machine = mockdb['HiSeqMachine'].__get__(configs['system'],machine_key)
                    run_type = determine_run_type(base_dir)
                    seq_run = mockdb['SequencingRun'].__new__(configs['system'],flowcell,machine,date,run_number,output_dir=base_dir,side=side,run_type=run_type)
                    fill_demultiplex_stats(configs['system'],mockdb,seq_run.output_dir,flowcell,machine)
                except:
                    pass
    return 1
parser = argparse.ArgumentParser(description='Manages data and submits new jobs.')
parser.add_argument('-i', dest='source_dir', nargs='+', help='fastq source', default=None)
parser.add_argument('-o', dest='dest_dir', help='vcf destination', default=None)
parser.add_argument('-p', '--pipeline', dest='pipeline', help='The version of the pipeline', default='QualityControlPipeline')
parser.add_argument('--analyze_sequencing_run', dest='seq_run', action='store_true', help='Reanalyze the give sequencing run.', default=False)
parser.add_argument('--system_config', dest='system_config_file', help='The system configuration file', default='/home/sequencing/src/pipeline_project/pipeline/config/ihg_system.cfg')
parser.add_argument('-d', '--debug', dest='debug', action='store_true', help='Turn debugging on', default=False)
parser.add_argument('--sample_sheet', dest='sample_sheet', type=str, help='For use for re-initializing a sequencing run.  Specifies the samples sheet to be used for casava.', default=None)
options = parser.parse_args()
if options.debug is True:
    print "Options are " + str(options)

#Load configs
configs = {}
system_config = MyConfigParser()
system_config.read(options.system_config_file)
system_config.add_section("Logging")
if options.debug is True:
    system_config.set("Logging","debug","True")
else:
    system_config.set("Logging","debug","False")
configs.update({'system':system_config})

config_instance = MyConfigParser()
configs.update({"seq_run":config_instance})
config_instance.read(system_config.get('Pipeline',"seq_run"))

pipelines = system_config.get('Pipeline','opts').split(',')
pipeline_config = {}
for pipeline_name in pipelines: