def maintain_sequencing_run_objects(config,mockdb): """ Reads in the directories in the hiseq output directory and compares it to what's in the SequencingRun database. If it is a new directory, a new sequencing run object is created in the Running state. """ monitoring_dirs = list_monitoring_dirs(config.get('Common_directories','hiseq_output')) for dir in monitoring_dirs: try: [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(dir) except: date = None machine_key = "dummy_machine" flowcell_key = "dummy_flowcell" run_number = -1 side = "dummy_side" try: if not date is None: monitoring_dirs_keyed_by_flowcell[flowcell_key] = dir except: monitoring_dirs_keyed_by_flowcell = {} monitoring_dirs_keyed_by_flowcell[flowcell_key] = dir db_flowcells = set(mockdb['SequencingRun'].__attribute_value_to_key_dict__('flowcell_key').keys()) new_flowcells = set(monitoring_dirs_keyed_by_flowcell.keys()).difference(db_flowcells) if config.get("Logging","debug") is "True": #print "db flowcells: "+ str(db_flowcells) #print "monitoring flowcells: " + str(monitoring_dirs_keyed_by_flowcell) print "new flowcells: "+ str(new_flowcells) for new_flowcell in new_flowcells: add_sequencing_run_object(config,mockdb,input_dir=monitoring_dirs_keyed_by_flowcell[new_flowcell]) return 1
def add_sequencing_run_object(config,mockdb,input_dir,output_dir=None,sample_sheet=None): """ Reads in the directories in the hiseq output directory and compares it to what's in the SequencingRun database. If it is a new directory, a new sequencing run object is created in the Running state. """ [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(input_dir) machine = mockdb['HiSeqMachine'].__get__(config,key=machine_key) flowcell = mockdb['Flowcell'].__get__(config,key=flowcell_key) run_type = determine_run_type(input_dir) seq_run=mockdb['SequencingRun'].__new__(config,input_dir=input_dir,fastq_archive=output_dir,flowcell=flowcell,machine=machine,date=date,run_number=run_number,side=side,run_type=run_type,sample_sheet=sample_sheet) seq_run.state = 'Running' return 1
def fill_run_stats_from_directory_name(config,mockdb,directory,add=True): """ This function reads in a directory and processes it. If the flowcell exists, the sequencing run for the flowcell is pulled and overwritten. If add is True or no flowcell exists, both the sequencing object and the flowcell are created. """ [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(directory) last_name = date + "_" + machine_key + "_" + run_number + "_" + side + flowcell_key orig_filename = os.path.join(config.get('Common_directories','hiseq_output'),last_name,config.get('Filenames','basecalling_initialized')) done_filename = os.path.join(config.get('Common_directories','hiseq_output'),last_name,config.get('Filenames','basecalling_complete')) if os.path.isfile(orig_filename): #start_timestamp = datetime.datetime.strptime(time.ctime(os.path.getctime(orig_filename)),"%Y-%m-%d %H:%M:%S") start_timestamp = datetime.datetime.strptime(time.ctime(os.path.getctime(orig_filename)), "%a %b %d %H:%M:%S %Y") else: start_timestamp = None if os.path.isfile(done_filename): #end_timestamp = datetime.datetime.strptime(time.ctime(os.path.getctime(done_filename)),"%Y-%m-%d %H:%M:%S") end_timestamp = datetime.datetime.strptime(time.ctime(os.path.getctime(done_filename)), "%a %b %d %H:%M:%S %Y") else: end_timestamp = None machine = mockdb['HiSeqMachine'].__get__(config,key=machine_key) flowcell = mockdb['Flowcell'].__get__(config,key=flowcell_key) seq_run_flowcell_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key') if add==False and flowcell_key in seq_run_flowcell_dict.keys(): seq_runs = seq_run_flowcell_dict[flowcell_key] #if len(seq_runs) > 1: #raise Exception("A flowcell, {0}, has been used in multiple sequencing runs.".format(flowcell_key)) for seq_run in seq_runs: seq_run.begin_timestamp = start_timestamp seq_run.end_timestamp = end_timestamp seq_run.flowcell_key = flowcell.key seq_run.machine_key = machine.key else: seq_run = mockdb['SequencingRun'].__new__(config,flowcell=flowcell,machine=machine,date=date,run_number=run_number,output_dir=directory,side=side,begin_timestamp=start_timestamp,end_timestamp=end_timestamp) seq_run.state = 'Complete' return 1
string += "," + flowcell.key string += "," + machine.key string += "," + number print string return 1 if __name__ == "__main__": parser = argparse.ArgumentParser(description='Places the statistics in Demultiplex_Stats.html into the database') parser.add_argument('directory', type=str, help='The directory name where the casava results are placed.') parser.add_argument('--report', dest='report', action='store_true', default=False, help='Prints data insead of inputting into database') parser.add_argument('--add_seq_run', dest='add_seq_run', action='store_true', default=False, help='Adds a completed sequencing run for the flowcell') args = parser.parse_args() config = ConfigParser.ConfigParser() config.read('/home/sequencing/src/pipeline_project/pipeline/config/ihg_system.cfg') mockdb = initiate_mockdb(config) [date,machine_key,run_number,side,flowcell_key] = parse_sequencing_run_dir(args.directory) flowcell = mockdb['Flowcell'].__get__(config,key=flowcell_key) machine = mockdb['HiSeqMachine'].__get__(config,key=machine_key) if args.report is True: report_demultiplex_stats(config,mockdb,args.directory,flowcell,machine) else: if args.add_seq_run is True: try: flowcell_seq_run_dict = mockdb['SequencingRun'].__attribute_value_to_object_dict__('flowcell_key') seq_run = flowcell_seq_run_dict[flowcell.key][0] except KeyError: run_type = determine_run_type(args.directory) seq_run=mockdb['SequencingRun'].__new__(config,flowcell=flowcell,machine=machine,date=date,run_number=run_number,output_dir=args.directory,side=side,run_type=run_type) seq_run.__finish__() fill_demultiplex_stats(config,mockdb,args.directory,flowcell,machine) save_mockdb(config,mockdb)