Esempio n. 1
0
 def test__get_project_analysis_dir(self):
     """Test that getting the project analysis folder behaves as expected
     """
     # Assert that none is returned when no folder exists
     proj = td.generate_project()
     self.assertIsNone(sq.get_project_analysis_dir(self.rootdir,proj),
                       "Did not return empty result for non-existing folders")
     
     # Assert that none is still returned when some mismatching folders exist
     for n in range(5):
         os.mkdir(os.path.join(self.rootdir,td.generate_project()))
     self.assertIsNone(sq.get_project_analysis_dir(self.rootdir,proj),
                       "Did not return empty result for mismatching folders")
     
     # Assert that a file with the same name as the project is not returned
     projdir = os.path.join(self.rootdir,proj)
     utils.touch_file(projdir)
     self.assertIsNone(sq.get_project_analysis_dir(self.rootdir,proj),
                       "Returned a file with matching name. Should only return folders")
     os.unlink(projdir)
     
     # Assert that the corrct folder is returned when it exists
     os.mkdir(projdir)
     self.assertEqual(projdir,sq.get_project_analysis_dir(self.rootdir,proj),
                      "The expected project folder was not returned")
Esempio n. 2
0
 def test__get_project_analysis_dir(self):
     """Test that getting the project analysis folder behaves as expected
     """
     # Assert that none is returned when no folder exists
     proj = td.generate_project()
     self.assertIsNone(sq.get_project_analysis_dir(self.rootdir,proj),
                       "Did not return empty result for non-existing folders")
     
     # Assert that none is still returned when some mismatching folders exist
     for n in range(5):
         os.mkdir(os.path.join(self.rootdir,td.generate_project()))
     self.assertIsNone(sq.get_project_analysis_dir(self.rootdir,proj),
                       "Did not return empty result for mismatching folders")
     
     # Assert that a file with the same name as the project is not returned
     projdir = os.path.join(self.rootdir,proj)
     utils.touch_file(projdir)
     self.assertIsNone(sq.get_project_analysis_dir(self.rootdir,proj),
                       "Returned a file with matching name. Should only return folders")
     os.unlink(projdir)
     
     # Assert that the corrct folder is returned when it exists
     os.mkdir(projdir)
     self.assertEqual(projdir,sq.get_project_analysis_dir(self.rootdir,proj),
                      "The expected project folder was not returned")
Esempio n. 3
0
def status_query(archive_dir, analysis_dir, flowcell, project, brief=False):
    """Get a status report of the progress of flowcells based on a snapshot of the file system
    """

    last_step = 14
    status = []
    # Process each flowcell in the archive directory
    for fcdir in bcbio.get_flowcelldirs(archive_dir, flowcell):
        fc_status = {}
        fc_status['flowcell'] = os.path.basename(fcdir)

        # Locate the samplesheet
        samplesheet = bcbio.get_samplesheet(fcdir)
        if samplesheet is None:
            print(
                "{}***ERROR***: Could not locate samplesheet in flowcell directory. Skipping.."
            )
            continue
        fc_status['samplesheet'] = samplesheet

        # Get a list of the projects in the samplesheet
        projects = bcbio.get_projects(samplesheet, project)
        if len(projects) == 0:
            print(
                "\t***WARNING***: No projects matched your filter [{}] for flowcell. Skipping.."
                .format(project))
            continue

        fc_status['projects'] = []

        # Iterate over the projects in the flowcell
        for proj in projects:
            proj = proj.replace("__", ".")
            proj_status = {}
            proj_status['project'] = proj

            pdir = bcbio.get_project_analysis_dir(analysis_dir, proj)
            if not pdir:
                continue

            proj_status['project_dir'] = pdir
            proj_status['samples'] = []
            proj_status['no_finished_samples'] = 0
            samples = bcbio.get_project_samples(samplesheet, proj)
            for smpl in samples:
                smpl = smpl.replace("__", ".")
                sample_status = {}
                proj_status['samples'].append(sample_status)
                sample_status['sample_id'] = smpl
                sdir = bcbio.get_sample_analysis_dir(pdir, smpl)
                if not sdir:
                    continue
                sample_status['sample_dir'] = sdir

                # Match the flowcell we're processing to the sample flowcell directories
                sample_fc = [
                    d for d in bcbio.get_flowcelldirs(sdir)
                    if d.split("_")[-1] == fcdir.split("_")[-1]
                ]
                if len(sample_fc) == 0:
                    continue
                sample_fc = sample_fc[0]
                sample_status['sample_fc_dir'] = sample_fc

                fastq_screen = bcbio.get_fastq_screen_folder(sample_fc)
                if fastq_screen:
                    sample_status['fastq_screen'] = [
                        fastq_screen,
                        bcbio.fastq_screen_finished(fastq_screen)
                    ]

                now = datetime.datetime.now()
                pipeline_start_indicator = bcbio.get_pipeline_indicator(
                    sample_fc, [1])
                if len(pipeline_start_indicator) == 0:
                    continue
                pipeline_start_indicator = pipeline_start_indicator[0]

                most_recent, _ = bcbio.get_most_recent_indicator(
                    [pipeline_start_indicator])
                sample_status['pipeline_started'] = [
                    pipeline_start_indicator, most_recent
                ]

                most_recent, ifile = bcbio.get_most_recent_indicator(
                    bcbio.get_pipeline_indicator(sample_fc))
                sample_status['pipeline_progress'] = [ifile, most_recent]

                sample_log = bcbio.get_sample_pipeline_log(sample_fc, smpl)
                if not sample_log:
                    continue
                st = os.stat(sample_log)
                sample_status['pipeline_log'] = [
                    sample_log,
                    datetime.datetime.fromtimestamp(st.st_mtime)
                ]

                jobids = slurm.get_slurm_jobid(smpl)
                sample_status['slurm_job'] = []
                for jobid in jobids:
                    sample_status['slurm_job'].append(
                        [jobid, slurm.get_slurm_jobstatus(jobid)])

                most_recent, ifile = bcbio.get_most_recent_indicator(
                    bcbio.get_pipeline_indicator(sample_fc, [last_step]))
                if ifile is not None and sample_status.get(
                        'fastq_screen', [None, False])[1]:
                    sample_status['finished'] = True
                    proj_status['no_finished_samples'] += 1

            if proj_status['no_finished_samples'] == len(samples):
                proj_status['finished'] = True

            fc_status['projects'].append(proj_status)

        status.append(fc_status)
    print_status(status, brief)
Esempio n. 4
0
def status_query(archive_dir, analysis_dir, flowcell, project, brief):
    """Get a status report of the progress of flowcells based on a snapshot of the file system
    """
    
    last_step = 14
    status = []
    # Process each flowcell in the archive directory
    for fcdir in IlluminaRun.get_flowcell(archive_dir,flowcell):
        fc_status = {}
        fc_status['flowcell'] = os.path.basename(fcdir)
        
        # Locate the samplesheet
        samplesheet = IlluminaRun.get_samplesheet(fcdir)
        if samplesheet is None:
            print("{}***ERROR***: Could not locate samplesheet in flowcell directory. Skipping..")
            continue
        fc_status['samplesheet'] = samplesheet

        # Get a list of the projects in the samplesheet
        projects = HiSeqRun.get_project_names(samplesheet)
        if len(projects) == 0:
            print("\t***WARNING***: No projects matched your filter [{}] for flowcell. Skipping..".format(project))
            continue
        
        fc_status['projects'] = []
        
        # Iterate over the projects in the flowcell
        for proj in projects:
            proj = proj.replace("__",".")
            proj_status = {}
            proj_status['project'] = proj
            
            pdir = bcbio.get_project_analysis_dir(analysis_dir, proj)
            if not pdir:
                continue
            
            proj_status['project_dir'] = pdir
            proj_status['samples'] = []
            proj_status['no_finished_samples'] = 0
            samples = HiSeqRun.get_project_sample_ids(samplesheet, proj)
            for smpl in samples:
                smpl = smpl.replace("__",".")
                sample_status = {}
                proj_status['samples'].append(sample_status)
                sample_status['sample_id'] = smpl
                sdir = bcbio.get_sample_analysis_dir(pdir, smpl)
                if not sdir:
                    continue
                sample_status['sample_dir'] = sdir
                
                # Match the flowcell we're processing to the sample flowcell directories
                sample_fc = [d for d in IlluminaRun.get_flowcell(sdir) if d.split("_")[-1] == fcdir.split("_")[-1]]
                if len(sample_fc) == 0:
                    continue
                sample_fc = sample_fc[0]
                sample_status['sample_fc_dir'] = sample_fc
                
                fastq_screen = bcbio.get_fastq_screen_folder(sample_fc)
                if fastq_screen:
                    sample_status['fastq_screen'] = [fastq_screen,bcbio.fastq_screen_finished(fastq_screen)]
                
                now = datetime.datetime.now()
                pipeline_start_indicator = bcbio.get_pipeline_indicator(sample_fc,[1])
                if len(pipeline_start_indicator) == 0:
                    continue
                pipeline_start_indicator = pipeline_start_indicator[0]
                
                most_recent, _ = bcbio.get_most_recent_indicator([pipeline_start_indicator])
                sample_status['pipeline_started'] = [pipeline_start_indicator,most_recent]
                
                most_recent, ifile = bcbio.get_most_recent_indicator(bcbio.get_pipeline_indicator(sample_fc))
                sample_status['pipeline_progress'] = [ifile,most_recent]
                
                sample_log = bcbio.get_sample_pipeline_log(sample_fc,smpl)
                if not sample_log:
                    continue
                st = os.stat(sample_log)
                sample_status['pipeline_log'] = [sample_log,datetime.datetime.fromtimestamp(st.st_mtime)]
                
                jobids = slurm.get_slurm_jobid(smpl)
                sample_status['slurm_job'] = []
                for jobid in jobids:
                    sample_status['slurm_job'].append([jobid,slurm.get_slurm_jobstatus(jobid)])
                
                most_recent, ifile = bcbio.get_most_recent_indicator(bcbio.get_pipeline_indicator(sample_fc,[last_step]))
                if ifile is not None and sample_status.get('fastq_screen',[None,False])[1]:
                    sample_status['finished'] = True
                    proj_status['no_finished_samples'] += 1
                
            
            if proj_status['no_finished_samples'] == len(samples):
                proj_status['finished'] = True
                
            fc_status['projects'].append(proj_status)
            
        status.append(fc_status) 
    print_status(status,brief)