def test_fastq_screen_finished(self): """Detecting finished state of fastq_screen """ # Assert that an empty directory doesn't indicate finished state self.assertFalse(sq.fastq_screen_finished(self.rootdir), "Fastq screen should not be considered finished without output files") # Create an output file and corresponding png but no rows in output sample_file = os.path.join(self.rootdir,"{}_fastq_screen.txt".format(td.generate_sample())) png_file = "{}.png".format(os.path.splitext(sample_file)[0]) utils.touch_file(sample_file) utils.touch_file(png_file) self.assertFalse(sq.fastq_screen_finished(self.rootdir), "Fastq screen should not be considered finished with empty output file") # Write some output and assert fastq_screen is detected as finished with open(sample_file,"w") as fh: for n in range(5): fh.write("{}\n".format(str(n))) self.assertTrue(sq.fastq_screen_finished(self.rootdir), "Fastq screen should be considered finished with non-empty output file and corresponding png") # Remove the png and assert fastq_screen is not finished os.unlink(png_file) self.assertFalse(sq.fastq_screen_finished(self.rootdir), "Fastq screen should not be considered finished with non-empty output file but without corresponding png")
def status_query(archive_dir, analysis_dir, flowcell, project, brief=False): """Get a status report of the progress of flowcells based on a snapshot of the file system """ last_step = 14 status = [] # Process each flowcell in the archive directory for fcdir in bcbio.get_flowcelldirs(archive_dir, flowcell): fc_status = {} fc_status['flowcell'] = os.path.basename(fcdir) # Locate the samplesheet samplesheet = bcbio.get_samplesheet(fcdir) if samplesheet is None: print( "{}***ERROR***: Could not locate samplesheet in flowcell directory. Skipping.." ) continue fc_status['samplesheet'] = samplesheet # Get a list of the projects in the samplesheet projects = bcbio.get_projects(samplesheet, project) if len(projects) == 0: print( "\t***WARNING***: No projects matched your filter [{}] for flowcell. Skipping.." .format(project)) continue fc_status['projects'] = [] # Iterate over the projects in the flowcell for proj in projects: proj = proj.replace("__", ".") proj_status = {} proj_status['project'] = proj pdir = bcbio.get_project_analysis_dir(analysis_dir, proj) if not pdir: continue proj_status['project_dir'] = pdir proj_status['samples'] = [] proj_status['no_finished_samples'] = 0 samples = bcbio.get_project_samples(samplesheet, proj) for smpl in samples: smpl = smpl.replace("__", ".") sample_status = {} proj_status['samples'].append(sample_status) sample_status['sample_id'] = smpl sdir = bcbio.get_sample_analysis_dir(pdir, smpl) if not sdir: continue sample_status['sample_dir'] = sdir # Match the flowcell we're processing to the sample flowcell directories sample_fc = [ d for d in bcbio.get_flowcelldirs(sdir) if d.split("_")[-1] == fcdir.split("_")[-1] ] if len(sample_fc) == 0: continue sample_fc = sample_fc[0] sample_status['sample_fc_dir'] = sample_fc fastq_screen = bcbio.get_fastq_screen_folder(sample_fc) if fastq_screen: sample_status['fastq_screen'] = [ fastq_screen, bcbio.fastq_screen_finished(fastq_screen) ] now = datetime.datetime.now() pipeline_start_indicator = bcbio.get_pipeline_indicator( sample_fc, [1]) if len(pipeline_start_indicator) == 0: continue pipeline_start_indicator = pipeline_start_indicator[0] most_recent, _ = bcbio.get_most_recent_indicator( [pipeline_start_indicator]) sample_status['pipeline_started'] = [ pipeline_start_indicator, most_recent ] most_recent, ifile = bcbio.get_most_recent_indicator( bcbio.get_pipeline_indicator(sample_fc)) sample_status['pipeline_progress'] = [ifile, most_recent] sample_log = bcbio.get_sample_pipeline_log(sample_fc, smpl) if not sample_log: continue st = os.stat(sample_log) sample_status['pipeline_log'] = [ sample_log, datetime.datetime.fromtimestamp(st.st_mtime) ] jobids = slurm.get_slurm_jobid(smpl) sample_status['slurm_job'] = [] for jobid in jobids: sample_status['slurm_job'].append( [jobid, slurm.get_slurm_jobstatus(jobid)]) most_recent, ifile = bcbio.get_most_recent_indicator( bcbio.get_pipeline_indicator(sample_fc, [last_step])) if ifile is not None and sample_status.get( 'fastq_screen', [None, False])[1]: sample_status['finished'] = True proj_status['no_finished_samples'] += 1 if proj_status['no_finished_samples'] == len(samples): proj_status['finished'] = True fc_status['projects'].append(proj_status) status.append(fc_status) print_status(status, brief)
def status_query(archive_dir, analysis_dir, flowcell, project, brief): """Get a status report of the progress of flowcells based on a snapshot of the file system """ last_step = 14 status = [] # Process each flowcell in the archive directory for fcdir in IlluminaRun.get_flowcell(archive_dir,flowcell): fc_status = {} fc_status['flowcell'] = os.path.basename(fcdir) # Locate the samplesheet samplesheet = IlluminaRun.get_samplesheet(fcdir) if samplesheet is None: print("{}***ERROR***: Could not locate samplesheet in flowcell directory. Skipping..") continue fc_status['samplesheet'] = samplesheet # Get a list of the projects in the samplesheet projects = HiSeqRun.get_project_names(samplesheet) if len(projects) == 0: print("\t***WARNING***: No projects matched your filter [{}] for flowcell. Skipping..".format(project)) continue fc_status['projects'] = [] # Iterate over the projects in the flowcell for proj in projects: proj = proj.replace("__",".") proj_status = {} proj_status['project'] = proj pdir = bcbio.get_project_analysis_dir(analysis_dir, proj) if not pdir: continue proj_status['project_dir'] = pdir proj_status['samples'] = [] proj_status['no_finished_samples'] = 0 samples = HiSeqRun.get_project_sample_ids(samplesheet, proj) for smpl in samples: smpl = smpl.replace("__",".") sample_status = {} proj_status['samples'].append(sample_status) sample_status['sample_id'] = smpl sdir = bcbio.get_sample_analysis_dir(pdir, smpl) if not sdir: continue sample_status['sample_dir'] = sdir # Match the flowcell we're processing to the sample flowcell directories sample_fc = [d for d in IlluminaRun.get_flowcell(sdir) if d.split("_")[-1] == fcdir.split("_")[-1]] if len(sample_fc) == 0: continue sample_fc = sample_fc[0] sample_status['sample_fc_dir'] = sample_fc fastq_screen = bcbio.get_fastq_screen_folder(sample_fc) if fastq_screen: sample_status['fastq_screen'] = [fastq_screen,bcbio.fastq_screen_finished(fastq_screen)] now = datetime.datetime.now() pipeline_start_indicator = bcbio.get_pipeline_indicator(sample_fc,[1]) if len(pipeline_start_indicator) == 0: continue pipeline_start_indicator = pipeline_start_indicator[0] most_recent, _ = bcbio.get_most_recent_indicator([pipeline_start_indicator]) sample_status['pipeline_started'] = [pipeline_start_indicator,most_recent] most_recent, ifile = bcbio.get_most_recent_indicator(bcbio.get_pipeline_indicator(sample_fc)) sample_status['pipeline_progress'] = [ifile,most_recent] sample_log = bcbio.get_sample_pipeline_log(sample_fc,smpl) if not sample_log: continue st = os.stat(sample_log) sample_status['pipeline_log'] = [sample_log,datetime.datetime.fromtimestamp(st.st_mtime)] jobids = slurm.get_slurm_jobid(smpl) sample_status['slurm_job'] = [] for jobid in jobids: sample_status['slurm_job'].append([jobid,slurm.get_slurm_jobstatus(jobid)]) most_recent, ifile = bcbio.get_most_recent_indicator(bcbio.get_pipeline_indicator(sample_fc,[last_step])) if ifile is not None and sample_status.get('fastq_screen',[None,False])[1]: sample_status['finished'] = True proj_status['no_finished_samples'] += 1 if proj_status['no_finished_samples'] == len(samples): proj_status['finished'] = True fc_status['projects'].append(proj_status) status.append(fc_status) print_status(status,brief)