def test_get_flowcell(self): """Get flowcell from analysis directory """ # Work in a separate subdirectory subdir = os.path.join(self.rootdir, "test_get_flowcell") os.mkdir(subdir) # Create some folders fc_dirs = [] for i in range(3): fc_dirs.append( tempfile.mkdtemp(suffix="{}_fc_dir".format(i), dir=subdir)) # Assert nothing is returned for non-existing flowcell self.assertListEqual( IlluminaRun.get_flowcell(subdir, 'flowcell_id'), [], "Nothing should be returned for a non-existing flowcell id") # Assert that the correct folders are returned for exact matches for fc_dir in fc_dirs: self.assertListEqual( [fc_dir], IlluminaRun.get_flowcell(subdir, os.path.basename(fc_dir)), "Did not return the correct folder for exact match") # Assert that an empty match returns all folders self.assertListEqual( sorted(fc_dirs), sorted(IlluminaRun.get_flowcell(subdir)), "Did not return the correct folders for empty matches") # Assert that a partial match is resolved to the correct folder self.assertListEqual( [fc_dirs[-1]], IlluminaRun.get_flowcell(subdir, "{}_fc_dir".format( str(len(fc_dirs) - 1))), "Did not return the correct folder for partial match") # Assert that an ambiguous match returns the matching folders self.assertListEqual( sorted(fc_dirs), sorted(IlluminaRun.get_flowcell(subdir, "_fc_dir")), "Did not return the correct folders for ambiguous matches") # Assert that the correct folder is returned for an exact match that matches ambiguously when allowing wildcards ambig_dir = os.path.join(subdir, "_fc_dir") utils.safe_makedir(ambig_dir) self.assertListEqual( [ambig_dir], sorted( IlluminaRun.get_flowcell(subdir, os.path.basename(ambig_dir))), "Did not return the correct folder for specific non-wildcard match" ) shutil.rmtree(subdir)
def test_get_flowcell(self): """Get flowcell from analysis directory """ # Work in a separate subdirectory subdir = os.path.join(self.rootdir,"test_get_flowcell") os.mkdir(subdir) # Create some folders fc_dirs = [] for i in range(3): fc_dirs.append(tempfile.mkdtemp(suffix="{}_fc_dir".format(i), dir=subdir)) # Assert nothing is returned for non-existing flowcell self.assertListEqual(IlluminaRun.get_flowcell(subdir,'flowcell_id'), [], "Nothing should be returned for a non-existing flowcell id") # Assert that the correct folders are returned for exact matches for fc_dir in fc_dirs: self.assertListEqual([fc_dir], IlluminaRun.get_flowcell(subdir,os.path.basename(fc_dir)), "Did not return the correct folder for exact match") # Assert that an empty match returns all folders self.assertListEqual(sorted(fc_dirs), sorted(IlluminaRun.get_flowcell(subdir)), "Did not return the correct folders for empty matches") # Assert that a partial match is resolved to the correct folder self.assertListEqual([fc_dirs[-1]], IlluminaRun.get_flowcell(subdir,"{}_fc_dir".format(str(len(fc_dirs)-1))), "Did not return the correct folder for partial match") # Assert that an ambiguous match returns the matching folders self.assertListEqual(sorted(fc_dirs), sorted(IlluminaRun.get_flowcell(subdir,"_fc_dir")), "Did not return the correct folders for ambiguous matches") # Assert that the correct folder is returned for an exact match that matches ambiguously when allowing wildcards ambig_dir = os.path.join(subdir,"_fc_dir") utils.safe_makedir(ambig_dir) self.assertListEqual([ambig_dir], sorted(IlluminaRun.get_flowcell(subdir,os.path.basename(ambig_dir))), "Did not return the correct folder for specific non-wildcard match") shutil.rmtree(subdir)
def status_query(archive_dir, analysis_dir, flowcell, project, brief): """Get a status report of the progress of flowcells based on a snapshot of the file system """ last_step = 14 status = [] # Process each flowcell in the archive directory for fcdir in IlluminaRun.get_flowcell(archive_dir,flowcell): fc_status = {} fc_status['flowcell'] = os.path.basename(fcdir) # Locate the samplesheet samplesheet = IlluminaRun.get_samplesheet(fcdir) if samplesheet is None: print("{}***ERROR***: Could not locate samplesheet in flowcell directory. Skipping..") continue fc_status['samplesheet'] = samplesheet # Get a list of the projects in the samplesheet projects = HiSeqRun.get_project_names(samplesheet) if len(projects) == 0: print("\t***WARNING***: No projects matched your filter [{}] for flowcell. Skipping..".format(project)) continue fc_status['projects'] = [] # Iterate over the projects in the flowcell for proj in projects: proj = proj.replace("__",".") proj_status = {} proj_status['project'] = proj pdir = bcbio.get_project_analysis_dir(analysis_dir, proj) if not pdir: continue proj_status['project_dir'] = pdir proj_status['samples'] = [] proj_status['no_finished_samples'] = 0 samples = HiSeqRun.get_project_sample_ids(samplesheet, proj) for smpl in samples: smpl = smpl.replace("__",".") sample_status = {} proj_status['samples'].append(sample_status) sample_status['sample_id'] = smpl sdir = bcbio.get_sample_analysis_dir(pdir, smpl) if not sdir: continue sample_status['sample_dir'] = sdir # Match the flowcell we're processing to the sample flowcell directories sample_fc = [d for d in IlluminaRun.get_flowcell(sdir) if d.split("_")[-1] == fcdir.split("_")[-1]] if len(sample_fc) == 0: continue sample_fc = sample_fc[0] sample_status['sample_fc_dir'] = sample_fc fastq_screen = bcbio.get_fastq_screen_folder(sample_fc) if fastq_screen: sample_status['fastq_screen'] = [fastq_screen,bcbio.fastq_screen_finished(fastq_screen)] now = datetime.datetime.now() pipeline_start_indicator = bcbio.get_pipeline_indicator(sample_fc,[1]) if len(pipeline_start_indicator) == 0: continue pipeline_start_indicator = pipeline_start_indicator[0] most_recent, _ = bcbio.get_most_recent_indicator([pipeline_start_indicator]) sample_status['pipeline_started'] = [pipeline_start_indicator,most_recent] most_recent, ifile = bcbio.get_most_recent_indicator(bcbio.get_pipeline_indicator(sample_fc)) sample_status['pipeline_progress'] = [ifile,most_recent] sample_log = bcbio.get_sample_pipeline_log(sample_fc,smpl) if not sample_log: continue st = os.stat(sample_log) sample_status['pipeline_log'] = [sample_log,datetime.datetime.fromtimestamp(st.st_mtime)] jobids = slurm.get_slurm_jobid(smpl) sample_status['slurm_job'] = [] for jobid in jobids: sample_status['slurm_job'].append([jobid,slurm.get_slurm_jobstatus(jobid)]) most_recent, ifile = bcbio.get_most_recent_indicator(bcbio.get_pipeline_indicator(sample_fc,[last_step])) if ifile is not None and sample_status.get('fastq_screen',[None,False])[1]: sample_status['finished'] = True proj_status['no_finished_samples'] += 1 if proj_status['no_finished_samples'] == len(samples): proj_status['finished'] = True fc_status['projects'].append(proj_status) status.append(fc_status) print_status(status,brief)