def test_locate_flowcell(self):
        flowcell_name = "temp_flowcell"
        tmp_dir = tempfile.mkdtemp()
        config = {"environment": {"flowcell_inbox": tmp_dir}}
        with self.assertRaises(ValueError):
            # Should raise ValueError if flowcell can't be found
            locate_flowcell(flowcell=flowcell_name, config=config)

        tmp_flowcell_path = os.path.join(tmp_dir, flowcell_name)
        with self.assertRaises(ValueError):
            # Should raise ValueError as path given doesn't exist
            locate_flowcell(flowcell=tmp_flowcell_path, config=config)

        os.makedirs(tmp_flowcell_path)
        # Should return the path passed in
        self.assertEqual(locate_flowcell(flowcell=tmp_flowcell_path, config=config), tmp_flowcell_path)

        # Should return the full path after searching flowcell_inbox
        self.assertEqual(locate_flowcell(flowcell=flowcell_name, config=config), tmp_flowcell_path)
Example #2
0
    def test_locate_flowcell(self):
        flowcell_name = "temp_flowcell"
        tmp_dir = tempfile.mkdtemp()
        config = {'environment': {'flowcell_inbox': tmp_dir}}
        with self.assertRaises(ValueError):
            # Should raise ValueError if flowcell can't be found
            locate_flowcell(flowcell=flowcell_name, config=config)

        tmp_flowcell_path = os.path.join(tmp_dir, flowcell_name)
        with self.assertRaises(ValueError):
            # Should raise ValueError as path given doesn't exist
            locate_flowcell(flowcell=tmp_flowcell_path, config=config)

        os.makedirs(tmp_flowcell_path)
        # Should return the path passed in
        self.assertEqual(locate_flowcell(flowcell=tmp_flowcell_path, config=config),
                         tmp_flowcell_path)

        # Should return the full path after searching flowcell_inbox
        self.assertEqual(locate_flowcell(flowcell=flowcell_name, config=config),
                         tmp_flowcell_path)
Example #3
0
def organize_projects_from_flowcell(demux_fcid_dirs, restrict_to_projects=None,
                                    restrict_to_samples=None,
                                    fallback_libprep=None, quiet=False,
                                    create_files=True,
                                    config=None, config_file_path=None):
    """Sort demultiplexed Illumina flowcells into projects and return a list of them,
    creating the project/sample/libprep/seqrun dir tree on disk via symlinks.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param bool quiet: Don't send notification emails
    :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True)
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.

    :returns: A list of NGIProject objects.
    :rtype: list
    :raises RuntimeError: If no (valid) projects are found in the flowcell dirs
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready"
    projects_to_analyze = dict()
    for demux_fcid_dir in demux_fcid_dirs_set:
        try:
            # Get the full path to the flowcell if it was passed in as just a name
            demux_fcid_dir = locate_flowcell(demux_fcid_dir)
        except ValueError as e:
            # Flowcell path couldn't be found/doesn't exist; skip it
            LOG.error('Skipping flowcell "{}": {}'.format(demux_fcid_dir, e))
            continue
        # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files
        projects_to_analyze = \
                setup_analysis_directory_structure(fc_dir=demux_fcid_dir,
                                                   projects_to_analyze=projects_to_analyze,
                                                   restrict_to_projects=restrict_to_projects,
                                                   restrict_to_samples=restrict_to_samples,
                                                   create_files=create_files,
                                                   fallback_libprep=fallback_libprep,
                                                   config=config,
                                                   quiet=quiet)
    if not projects_to_analyze:
        if restrict_to_projects:
            error_message = ("No projects found to process: the specified flowcells "
                             "({fcid_dirs}) do not contain the specified project(s) "
                             "({restrict_to_projects}) or there was an error "
                             "gathering required information.").format(
                                    fcid_dirs=",".join(demux_fcid_dirs_set),
                                    restrict_to_projects=",".join(restrict_to_projects))
        else:
            error_message = ("No projects found to process in flowcells {} "
                             "or there was an error gathering required "
                             "information.".format(",".join(demux_fcid_dirs_set)))
        raise RuntimeError(error_message)
    else:
        projects_to_analyze = projects_to_analyze.values()
    return projects_to_analyze
Example #4
0
def organize_projects_from_flowcell(demux_fcid_dirs,
                                    restrict_to_projects=None,
                                    restrict_to_samples=None,
                                    fallback_libprep=None,
                                    quiet=False,
                                    create_files=True,
                                    config=None,
                                    config_file_path=None):
    """Sort demultiplexed Illumina flowcells into projects and return a list of them,
    creating the project/sample/libprep/seqrun dir tree on disk via symlinks.

    :param list demux_fcid_dirs: The CASAVA-produced demux directory/directories.
    :param list restrict_to_projects: A list of projects; analysis will be
                                      restricted to these. Optional.
    :param list restrict_to_samples: A list of samples; analysis will be
                                     restricted to these. Optional.
    :param str fallback_libprep: If libprep cannot be determined, use this value if supplied (default None)
    :param bool quiet: Don't send notification emails
    :param bool create_files: Alter the filesystem (as opposed to just parsing flowcells) (default True)
    :param dict config: The parsed NGI configuration file; optional.
    :param str config_file_path: The path to the NGI configuration file; optional.

    :returns: A list of NGIProject objects.
    :rtype: list
    :raises RuntimeError: If no (valid) projects are found in the flowcell dirs
    """
    if not restrict_to_projects: restrict_to_projects = []
    if not restrict_to_samples: restrict_to_samples = []
    demux_fcid_dirs_set = set(demux_fcid_dirs)
    # Sort/copy each raw demux FC into project/sample/fcid format -- "analysis-ready"
    projects_to_analyze = dict()
    for demux_fcid_dir in demux_fcid_dirs_set:
        try:
            # Get the full path to the flowcell if it was passed in as just a name
            demux_fcid_dir = locate_flowcell(demux_fcid_dir)
        except ValueError as e:
            # Flowcell path couldn't be found/doesn't exist; skip it
            LOG.error('Skipping flowcell "{}": {}'.format(demux_fcid_dir, e))
            continue
        # These will be a bunch of Project objects each containing Samples, FCIDs, lists of fastq files
        projects_to_analyze = \
                setup_analysis_directory_structure(fc_dir=demux_fcid_dir,
                                                   projects_to_analyze=projects_to_analyze,
                                                   restrict_to_projects=restrict_to_projects,
                                                   restrict_to_samples=restrict_to_samples,
                                                   create_files=create_files,
                                                   fallback_libprep=fallback_libprep,
                                                   config=config,
                                                   quiet=quiet)
    if not projects_to_analyze:
        if restrict_to_projects:
            error_message = (
                "No projects found to process: the specified flowcells "
                "({fcid_dirs}) do not contain the specified project(s) "
                "({restrict_to_projects}) or there was an error "
                "gathering required information.").format(
                    fcid_dirs=",".join(demux_fcid_dirs_set),
                    restrict_to_projects=",".join(restrict_to_projects))
        else:
            error_message = ("No projects found to process in flowcells {} "
                             "or there was an error gathering required "
                             "information.".format(
                                 ",".join(demux_fcid_dirs_set)))
        raise RuntimeError(error_message)
    else:
        projects_to_analyze = list(projects_to_analyze.values())
    return projects_to_analyze