def get_analysis_projects(self, pattern=None): """ Return the analysis projects in a list By default returns all projects within the analysis directory which are listed in the 'projects.info' metadata file (and 'undetermined', which is not). If the 'pattern' is not None then it should be a simple pattern used to match against available names to select a subset of projects (see bcf_utils.name_matches). If any project in 'projects.info' doesn't have an associated analysis directory then it will be omitted from the results. Arguments: pattern (str): optional pattern to select a subset of projects (default: select all projects) Returns: List: list of AnalysisProject instances. """ project_metadata = self.load_project_metadata( self.params.project_metadata) projects = [] if pattern is None: pattern = '*' # Get matching projects from metadata file for line in project_metadata: name = line['Project'] if name.startswith('#'): # Ignore commented project continue if not bcf_utils.name_matches(name, pattern): # Name failed to match, ignore continue # Look for a matching project directory project_dir = os.path.join(self.analysis_dir, name) if os.path.exists(project_dir): projects.append(AnalysisProject(project_dir)) else: logging.warning("Matching project '%s': no associated " "directory" % name) # Deal with undetermined if bcf_utils.name_matches('undetermined', pattern): undetermined_analysis = self.undetermined() if undetermined_analysis is not None and \ 'undetermined' not in [p.name for p in projects]: projects.append(undetermined_analysis) return projects
def get_projects(seq_data,pattern=None): """Get list of projects Returns a list of IlluminaProject objects from the supplied IlluminaData object. """ projects = [] for p in seq_data.projects: if bcf_utils.name_matches(p.name,pattern): projects.append(p) return projects
def get_samples(seq_data,project_pattern=None,sample_pattern=None): """Get list of samples Returns a list of IlluminaSample objects from the supplied IlluminaData object. """ samples = [] for p in get_projects(seq_data,pattern=project_pattern): for s in p.samples: if bcf_utils.name_matches(s.name,sample_pattern): samples.append(s) return samples
def get_samples(self, pattern): """Return list of sample matching pattern Arguments: pattern: simple 'glob' style pattern Returns: Python list of samples with names matching the supplied pattern (or an empty list if no names match). """ samples = [] for sample in self.samples: if bcf_utils.name_matches(sample.name, pattern): samples.append(sample) return samples
def get_analysis_projects_from_dirs(self, pattern=None, strict=False): """ Return a list of AnalysisProjects in the analysis directory Tests each of the subdirectories in the top-level of the analysis directory and rejects any that appear to be CASVAVA/bcl2fastq outputs or which don't successfully load as AnalysisProject instances. Unlike the `get_analysis_projects` method, no checking against the project metadata (typically in 'projects.info') is performed. If the 'pattern' is not None then it should be a simple pattern used to match against available names to select a subset of projects (see bcf_utils.name_matches). Arguments: pattern (str): optional pattern to select a subset of projects (default: select all projects) strict (bool): if True then apply strict checks on each discovered project directory before adding it to the list (default: don't apply strict checks) Returns: List: list of AnalysisProject instances. """ logging.debug("Testing subdirectories to determine analysis projects") projects = [] if pattern is None: pattern = '*' # Try loading each subdirectory as a project for dirn in bcf_utils.list_dirs(self.analysis_dir): # Test for bcl2fastq output try: IlluminaData.IlluminaData(self.analysis_dir, unaligned_dir=dirn) logging.debug("* %s: rejected" % dirn) continue except IlluminaData.IlluminaDataError: pass except Exception as ex: logging.debug("Exception when attempting to load " "subdir '%s' as CASAVA/bcl2fastq output " "(ignored): %s" % (dirn, ex)) # Try loading as a project test_project = AnalysisProject( dirn, os.path.join(self.analysis_dir, dirn)) if strict: # Apply strict checks if not test_project.is_analysis_dir: logging.debug("* %s: rejected (failed strict checks)" % dirn) continue else: # Basic check: are there any samples? if not len(test_project.samples): logging.debug("* %s: rejected (no samples)" % dirn) continue # Passed checks logging.debug("* %s: analysis directory" % dirn) if bcf_utils.name_matches(test_project.name, pattern): projects.append(test_project) return projects
print "%s\t%s\t%d" % (fastq, bcf_utils.format_file_size(fsize), nreads) # Copy fastq.gz files to the current directory if options.copy_pattern is not None: # Extract project and sample names/patterns try: project_pattern, sample_pattern = options.copy_pattern.split("/") print "Copy: look for samples matching pattern %s" % options.copy_pattern print "Data files will be copied to %s" % os.getcwd() except ValueError: logging.error("ERROR invalid pattern '%s'" % options.copy_pattern) sys.exit(1) # Loop through projects and samples looking for matches for project in illumina_data.projects: if bcf_utils.name_matches(project.name, project_pattern): # Loop through samples for sample in project.samples: if bcf_utils.name_matches(sample.name, sample_pattern): for fastq in sample.fastq: fastq_file = os.path.join(sample.dirn, fastq) print "\tCopying .../%s" % os.path.basename( fastq_file) dst = os.path.abspath(os.path.basename(fastq_file)) if os.path.exists(dst): logging.error( "File %s already exists! Skipped" % dst) else: shutil.copy(fastq_file, dst) # Verify against sample sheet
bcf_utils.format_file_size(fsize), nreads) # Copy fastq.gz files to the current directory if options.copy_pattern is not None: # Extract project and sample names/patterns try: project_pattern,sample_pattern = options.copy_pattern.split("/") print "Copy: look for samples matching pattern %s" % options.copy_pattern print "Data files will be copied to %s" % os.getcwd() except ValueError: logging.error("ERROR invalid pattern '%s'" % options.copy_pattern) sys.exit(1) # Loop through projects and samples looking for matches for project in illumina_data.projects: if bcf_utils.name_matches(project.name,project_pattern): # Loop through samples for sample in project.samples: if bcf_utils.name_matches(sample.name,sample_pattern): for fastq in sample.fastq: fastq_file = os.path.join(sample.dirn,fastq) print "\tCopying .../%s" % os.path.basename(fastq_file) dst = os.path.abspath(os.path.basename(fastq_file)) if os.path.exists(dst): logging.error("File %s already exists! Skipped" % dst) else: shutil.copy(fastq_file,dst) # Verify against sample sheet if options.sample_sheet is not None: if IlluminaData.verify_run_against_sample_sheet(illumina_data,options.sample_sheet):