コード例 #1
0
    def get_analysis_projects(self, pattern=None):
        """
        Return the analysis projects in a list

        By default returns all projects within the analysis
        directory which are listed in the 'projects.info'
        metadata file (and 'undetermined', which is not).

        If the 'pattern' is not None then it should be a simple
        pattern used to match against available names to select
        a subset of projects (see bcf_utils.name_matches).

        If any project in 'projects.info' doesn't have an
        associated analysis directory then it will be omitted
        from the results.

        Arguments:
          pattern (str): optional pattern to select a subset
            of projects (default: select all projects)

        Returns:
          List: list of AnalysisProject instances.
        """
        project_metadata = self.load_project_metadata(
            self.params.project_metadata)
        projects = []
        if pattern is None:
            pattern = '*'
        # Get matching projects from metadata file
        for line in project_metadata:
            name = line['Project']
            if name.startswith('#'):
                # Ignore commented project
                continue
            if not bcf_utils.name_matches(name, pattern):
                # Name failed to match, ignore
                continue
            # Look for a matching project directory
            project_dir = os.path.join(self.analysis_dir, name)
            if os.path.exists(project_dir):
                projects.append(AnalysisProject(project_dir))
            else:
                logging.warning("Matching project '%s': no associated "
                                "directory" % name)
        # Deal with undetermined
        if bcf_utils.name_matches('undetermined', pattern):
            undetermined_analysis = self.undetermined()
            if undetermined_analysis is not None and \
               'undetermined' not in [p.name for p in projects]:
                projects.append(undetermined_analysis)
        return projects
コード例 #2
0
def get_projects(seq_data,pattern=None):
    """Get list of projects

    Returns a list of IlluminaProject objects from the supplied
    IlluminaData object.

    """
    projects = []
    for p in seq_data.projects:
        if bcf_utils.name_matches(p.name,pattern):
            projects.append(p)
    return projects
コード例 #3
0
def get_samples(seq_data,project_pattern=None,sample_pattern=None):
    """Get list of samples

    Returns a list of IlluminaSample objects from the supplied
    IlluminaData object.

    """
    samples = []
    for p in get_projects(seq_data,pattern=project_pattern):
        for s in p.samples:
            if bcf_utils.name_matches(s.name,sample_pattern):
                samples.append(s)
    return samples
コード例 #4
0
ファイル: analysis.py プロジェクト: nandr0id/auto_process_ngs
    def get_samples(self, pattern):
        """Return list of sample matching pattern

        Arguments:
          pattern: simple 'glob' style pattern

        Returns:
          Python list of samples with names matching the supplied
          pattern (or an empty list if no names match).

        """
        samples = []
        for sample in self.samples:
            if bcf_utils.name_matches(sample.name, pattern):
                samples.append(sample)
        return samples
コード例 #5
0
    def get_analysis_projects_from_dirs(self, pattern=None, strict=False):
        """
        Return a list of AnalysisProjects in the analysis directory

        Tests each of the subdirectories in the top-level of the
        analysis directory and rejects any that appear to be
        CASVAVA/bcl2fastq outputs or which don't successfully load
        as AnalysisProject instances.

        Unlike the `get_analysis_projects` method, no checking
        against the project metadata (typically in 'projects.info')
        is performed.

        If the 'pattern' is not None then it should be a simple
        pattern used to match against available names to select
        a subset of projects (see bcf_utils.name_matches).

        Arguments:
          pattern (str): optional pattern to select a subset
            of projects (default: select all projects)
          strict (bool): if True then apply strict checks on
            each discovered project directory before adding it
            to the list (default: don't apply strict checks)

        Returns:
          List: list of AnalysisProject instances.
        """
        logging.debug("Testing subdirectories to determine analysis projects")
        projects = []
        if pattern is None:
            pattern = '*'
        # Try loading each subdirectory as a project
        for dirn in bcf_utils.list_dirs(self.analysis_dir):
            # Test for bcl2fastq output
            try:
                IlluminaData.IlluminaData(self.analysis_dir,
                                          unaligned_dir=dirn)
                logging.debug("* %s: rejected" % dirn)
                continue
            except IlluminaData.IlluminaDataError:
                pass
            except Exception as ex:
                logging.debug("Exception when attempting to load "
                              "subdir '%s' as CASAVA/bcl2fastq output "
                              "(ignored): %s" % (dirn, ex))
            # Try loading as a project
            test_project = AnalysisProject(
                dirn, os.path.join(self.analysis_dir, dirn))
            if strict:
                # Apply strict checks
                if not test_project.is_analysis_dir:
                    logging.debug("* %s: rejected (failed strict checks)" %
                                  dirn)
                    continue
            else:
                # Basic check: are there any samples?
                if not len(test_project.samples):
                    logging.debug("* %s: rejected (no samples)" % dirn)
                    continue
            # Passed checks
            logging.debug("* %s: analysis directory" % dirn)
            if bcf_utils.name_matches(test_project.name, pattern):
                projects.append(test_project)
        return projects
コード例 #6
0
                print "%s\t%s\t%d" % (fastq, bcf_utils.format_file_size(fsize),
                                      nreads)

    # Copy fastq.gz files to the current directory
    if options.copy_pattern is not None:
        # Extract project and sample names/patterns
        try:
            project_pattern, sample_pattern = options.copy_pattern.split("/")
            print "Copy: look for samples matching pattern %s" % options.copy_pattern
            print "Data files will be copied to %s" % os.getcwd()
        except ValueError:
            logging.error("ERROR invalid pattern '%s'" % options.copy_pattern)
            sys.exit(1)
        # Loop through projects and samples looking for matches
        for project in illumina_data.projects:
            if bcf_utils.name_matches(project.name, project_pattern):
                # Loop through samples
                for sample in project.samples:
                    if bcf_utils.name_matches(sample.name, sample_pattern):
                        for fastq in sample.fastq:
                            fastq_file = os.path.join(sample.dirn, fastq)
                            print "\tCopying .../%s" % os.path.basename(
                                fastq_file)
                            dst = os.path.abspath(os.path.basename(fastq_file))
                            if os.path.exists(dst):
                                logging.error(
                                    "File %s already exists! Skipped" % dst)
                            else:
                                shutil.copy(fastq_file, dst)

    # Verify against sample sheet
コード例 #7
0
                                  bcf_utils.format_file_size(fsize),
                                  nreads)

    # Copy fastq.gz files to the current directory
    if options.copy_pattern is not None:
        # Extract project and sample names/patterns
        try:
            project_pattern,sample_pattern = options.copy_pattern.split("/")
            print "Copy: look for samples matching pattern %s" % options.copy_pattern
            print "Data files will be copied to %s" % os.getcwd()
        except ValueError:
            logging.error("ERROR invalid pattern '%s'" % options.copy_pattern)
            sys.exit(1)
        # Loop through projects and samples looking for matches
        for project in illumina_data.projects:
            if bcf_utils.name_matches(project.name,project_pattern):
                # Loop through samples
                for sample in project.samples:
                    if bcf_utils.name_matches(sample.name,sample_pattern):
                        for fastq in sample.fastq:
                            fastq_file = os.path.join(sample.dirn,fastq)
                            print "\tCopying .../%s" % os.path.basename(fastq_file)
                            dst = os.path.abspath(os.path.basename(fastq_file))
                            if os.path.exists(dst):
                                logging.error("File %s already exists! Skipped" % dst)
                            else:
                                shutil.copy(fastq_file,dst)

    # Verify against sample sheet
    if options.sample_sheet is not None:
        if IlluminaData.verify_run_against_sample_sheet(illumina_data,options.sample_sheet):