def things_to_do_if_initializing_pipeline_with_input_directory(configs,storage_devices,mockdb,source_dir,pipeline_name=None,base_output_dir=None,combine_projects=True): if combine_project: sample_dirs["dummy_project"] = list_sample_dirs(source_dir) else: sample_dirs = list_project_sample_dirs(source_dir) target_config = MyConfigParser() target_config.read(configs["system"].get("Filenames","target_config")) for project in sample_dirs: for sample in sample_dirs[project]: running_location = identify_running_location_with_most_currently_available(configs,storage_devices) parsed = parse_sample_sheet(configs['system'],mockdb,sample_dirs[project][sample][0]) if base_output_dir is None: base_output_dir = configs['pipeline'].get('Common_directories','archive_directory') automation_parameters_config = MyConfigParser() automation_parameters_config.read(configs["system"].get("Filenames","automation_config")) description_dict = parse_description_into_dictionary(parsed['description']) if 'Pipeline' in description_dict: pipeline_key = description_dict['Pipeline'] else: description_pieces = parsed['description'].split('_') pipeline_key = description_pieces[-1] pipeline_name_for_sample = automation_parameters_config.safe_get("Pipeline",pipeline_key) if not pipeline_name_for_sample == pipeline_name: continue mockdb[pipeline_name].__new__(configs['system'],input_dir=sample_dirs[project][sample][0],pipeline_config=configs["pipeline"],project=parsed['project_name'],pipeline_key=pipeline_key,**parsed) flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key') flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key') if parsed['flowcell'].key in flowcell_dict: seq_run = flowcell_dict[parsed['flowcell'].key] pass else: try: base_dir = get_sequencing_run_base_dir(source_dir) [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(base_dir) machine = mockdb['HiSeqMachine'].__get__(configs['system'],machine_key) run_type = determine_run_type(base_dir) seq_run = mockdb['SequencingRun'].__new__(configs['system'],flowcell,machine,date,run_number,output_dir=base_dir,side=side,run_type=run_type) fill_demultiplex_stats(configs['system'],mockdb,seq_run.output_dir,flowcell,machine) except: pass return 1
def __is_complete__(self,configs,mockdb,*args,**kwargs): """ Due to the inclusion of sub-processes (bclto fastq pipeline, illuminate, and launched pipelines), this function contains the logic to check to makes sure all of these processes have completed successfully. """ if configs["system"].get("Logging","debug") is "True": print "Checking to see if seq run is complete (and advancing post-seq run pipeline)" if not os.path.isfile(self.complete_file): if configs["system"].get("Logging","debug") is "True": print " Missing complete file " + self.complete_file return False if not hasattr(self,"interop_archived") or self.interop_archived is False: output_name = os.path.basename(self.output_dir) if not self.__archive_sequencing_run_data__(configs,self.output_dir,os.path.join(configs["system"].get('Common_directories','hiseq_run_log'),output_name)): if configs["system"].get("Logging","debug") is "True": print " Sequencing run data not archived (InterOp and so forth)." return False if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None or not hasattr(self,"illuminate_key") or self.illuminate_key is None: if not hasattr(self,"bcltofastq_pipeline_key") or self.bcltofastq_pipeline_key is None: self.__start_bcltofastq_pipeline__(configs,mockdb) if configs["system"].get("Logging","debug") is "True": print " Starting bcltofastq pipeline." if not hasattr(self,"illuminate_key") or self.illuminate_key is None: self.__launch_illuminate__(configs,mockdb) if configs["system"].get("Logging","debug") is "True": print " Starting illuminate." return False illuminate = mockdb['Illuminate'].__get__(configs['system'],self.illuminate_key) if not illuminate.__is_complete__(configs,mockdb=mockdb,*args,**kwargs): if configs["system"].get("Logging","debug") is "True": print " Illuminate not done" return False bcl2fastq_pipeline = mockdb['BclToFastqPipeline'].__get__(configs['system'],self.bcltofastq_pipeline_key) if not bcl2fastq_pipeline.__is_complete__(configs,mockdb=mockdb,*args,**kwargs): if configs["system"].get("Logging","debug") is "True": print " bcltofastq not done" return False if not hasattr(self,"generic_copy_key") or self.generic_copy_key is None: self.__launch_archive_fastq__(configs,mockdb) if configs["system"].get("Logging","debug") is "True": print " Launching archive" return False archive = mockdb['GenericCopy'].__get__(configs['system'],self.generic_copy_key) if archive.__is_complete__(*args,**kwargs): if not disk_usage(self.fastq_archive) > 30000000: if not hasattr(self,'fastq_archive_reported') or self.fastq_archive_reported is None: message = "The flowcell "+self.flowcell_key+" has finished casava, but the archive is not as large as expected.\n" message += "\nPlease check.\n\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Flowcell size problem.",message,recipients=recipients) self.fastq_archive_reported = True return False fastq_check = check_fastq_output(self.fastq_archive) if fastq_check["md5"] == [] and fastq_check["fastqc"] == [] and fastq_check["index"] is True and fastq_check["sample_sheet"] is True: if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None: message = "Just informing you of the completion of the flowcell.\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("The fastq have been successully generated for " + self.flowcell_key + ".",message,recipients=recipients) self.fastq_check_report = True else: if not hasattr(self,"fastq_check_report") or self.fastq_check_report is None: message = "Report detailing the issues with the flowcell directory for flowcell " + self.flowcell_key + ".\n" if not fastq_check["sample_sheet"] is True: message += "Sample sheet missing from " + self.archive_fastq + ".\n" else: if not fastq_check["index"]: message += "Index counts not generated.\n" if len(fastq_check["fastqc"]) != 0: message += "The following directories do not have fastqc results:" message += "\n\t".join(fastq_check["fastqc"]) + "\n" if len(fastq_check["md5"]) != 0: message += "The following directories do not have md5 checksums:" message += "\n\t".join(fastq_check["md5"]) + "\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Problem with fastq generation for " + self.flowcell_key + ".",message,recipients=recipients) self.fastq_check_report = True return False if not hasattr(self,"generic_clean_key") or self.generic_clean_key is None: if hasattr(self,'fastq_archive_reported') and self.fastq_archive_reported is True: message = "The flowcell "+self.flowcell_key+" has finished casava, and is now big enough.\n" message += "\nContinuing.\n\n" recipients = configs["system"].get('Email','recipients_for_fastq') send_email("Flowcell size problem resolved.",message,recipients=recipients) self.__launch_clean__(configs,mockdb) self.__link_to_web_portal__(configs['system']) if configs["system"].get("Logging","debug") is "True": print " Filling stats" flowcell = mockdb['Flowcell'].__get__(configs['system'],self.flowcell_key) machine = mockdb['HiSeqMachine'].__get__(configs['system'],self.machine_key) fill_demultiplex_stats(configs['system'],mockdb,self.output_dir,flowcell,machine) #return False else: if configs["system"].get("Logging","debug") is "True": print " Fastq archive not complete" return False clean = mockdb['GenericClean'].__get__(configs['system'],self.generic_clean_key) if clean.__is_complete__(*args,**kwargs): self.__finish__(*args,**kwargs) return True return False
import sys import ConfigParser from mockdb.initiate_mockdb import initiate_mockdb, save_mockdb from processes.parsing import parse_sequencing_run_dir from demultiplex_stats.fill_demultiplex_stats import fill_demultiplex_stats directory = sys.argv[1] config = ConfigParser.ConfigParser() config.read('/mnt/iscsi_space/zerbeb/qc_pipeline_project/qc_pipeline/config/qc.cfg') mockdb=initiate_mockdb(config) [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(directory) machine = mockdb['HiSeqMachine'].__get__(config, machine_key) flowcell = mockdb['Flowcell'].__get__(config, flowcell_key) fill_demultiplex_stats(config,mockdb,directory,flowcell,machine) save_mockdb(config,mockdb)