Esempio n. 1
0
    if modulefiles is not None:
        for modulefile in modulefiles.split(','):
            envmod.load(modulefile)

    # Deal with job runners
    runners = dict()
    for runner in args.runners:
        try:
            stage, runner_spec = runner.split('=')
        except ValueError:  # too few values to unpack
            stage = 'default'
            runner_spec = runner
        if stage not in stages:
            logger.fatal("Bad stage for --runner option: %s" % stage)
            sys.exit(1)
        runners[stage] = fetch_runner(runner_spec)
    try:
        default_runner = runners['default']
    except KeyError:
        default_runner = __settings.runners.icell8
    for stage in stages:
        if stage not in runners:
            if stage == 'qc':
                # Use the general QC settings
                stage_runner = __settings.runners['qc']
            else:
                # Look for Icell8-specific runner
                try:
                    stage_runner = __settings.runners['icell8_%s' % stage]
                except KeyError:
                    stage_runner = default_runner
Esempio n. 2
0
 # Set number of threads for QC jobs
 if args.nthreads:
     nthreads = args.nthreads
 else:
     nthreads = __settings.qc.nprocessors
 # Cellranger settings
 cellranger_jobmode = cellranger_settings.cellranger_jobmode
 cellranger_mempercore = cellranger_settings.cellranger_mempercore
 cellranger_jobinterval = cellranger_settings.cellranger_jobinterval
 cellranger_localcores = cellranger_settings.cellranger_localcores
 cellranger_localmem = cellranger_settings.cellranger_localmem
 # Set up runners
 if args.runner is not None:
     # Runner explicitly supplied on the command line
     print("Setting up runners supplied on command line")
     default_runner = fetch_runner(args.runner)
     runners = {
         'cellranger_runner': default_runner,
         'fastqc_runner': default_runner,
         'fastq_screen_runner': default_runner,
         'star_runner': default_runner,
         'verify_runner': default_runner,
         'report_runner': default_runner,
     }
 else:
     # Runners from configuration
     print("Setting up runners from configuration")
     default_runner = __settings.general.default_runner
     runners = {
         'cellranger_runner': __settings.runners.cellranger,
         'fastqc_runner': __settings.runners.fastqc,
Esempio n. 3
0
    # Parse the command line
    args = p.parse_args()

    # Set up environment
    if args.modulefiles is None:
        modulefiles = __modulefiles
    else:
        modulefiles = args.modulefiles
    if modulefiles is not None:
        announce("Setting up environment")
        for modulefile in modulefiles.split(','):
            envmod.load(modulefile)

    # Job runner
    qc_runner = fetch_runner(args.runner)

    # Load the project
    announce("Loading project data")
    project_dir = os.path.abspath(args.project_dir)
    project_name = os.path.basename(project_dir)
    project = AnalysisProject(project_name,project_dir)

    # Get list of samples
    project = AnalysisProject(project_name,project_dir,
                              fastq_dir=args.fastq_dir)
    print "Subdirectories with Fastqs:"
    for fastq_dir in project.fastq_dirs:
        print "- %s" % fastq_dir
    print "Gathering Fastqs from %s" % project.fastq_dir
    if args.sample_pattern is not None:
Esempio n. 4
0
def archive(ap,archive_dir=None,platform=None,year=None,
            perms=None,group=None,include_bcl2fastq=False,
            read_only_fastqs=True,runner=None,
            final=False,force=False,dry_run=False):
    """
    Copy an analysis directory and contents to an archive area

    Copies the contents of the analysis directory to an archive
    area, which can be on a local or remote system.

    The archive directory is constructed in the form

    <TOP_DIR>/<YEAR>/<PLATFORM>/<DIR>/...

    The YEAR and PLATFORM can be overriden using the appropriate
    arguments.

    By default the data is copied to a 'staging' directory
    called '__ANALYSIS_DIR.pending' in the archive directory.
    The archiving can be finalised by setting the 'final'
    argumente to 'True', which performs a last update of the
    staging area before moving the data to its final location.

    Once the archive has been finalised any further archiving
    attempts will be refused.

    Copying of the data is performed using 'rsync'; multiple
    archive operations mirror the contents of the analysis
    directory (so any data removed from the source will also
    be removed from the archive).

    By default the 'bcl2fastq' directory is omitted from the
    archive, unless the fastq files in any projects are links to
    the data. Inclusion of this directory can be forced by
    setting the appropriate argument.

    The fastqs will be switched to be read-only in the archive
    by default.

    Arguments:
      ap (AutoProcessor): autoprocessor pointing to the
        analysis directory to be archived
      archive_dir (str): top level archive directory, of the
        form '[[user@]host:]dir' (if not set then use the value
        from the settings.ini file).
      platform (str): set the value of the <PLATFORM> level in
        the archive (if not set then taken from the supplied
        autoprocessor instance).
      year (str): set the value of the <YEAR> level in the
        archive (if not set then defaults to the current year)
        (4 digits)
      perms (str): change the permissions of the destination
        files and directories according to the supplied
        argument (e.g. 'g+w') (if not set then use the value
         from the settings.ini file).
      group (str): set the group of the destination files to
        the supplied argument (if not set then use the value
        from the settings.ini file).
      include_bcl2fastq (bool): if True then force inclusion
        of the 'bcl2fastq' subdirectory; otherwise only include
        it if fastq files in project subdirectories are symlinks.
      read_only_fastqs (bool): if True then make the fastqs
        read-only in the destination directory; otherwise keep
        the original permissions.
      runner: (optional) specify a non-default job runner to use
        for primary data rsync
      final (bool): if True then finalize the archive by
        moving the '.pending' temporary archive to the final
        location
      force (bool): if True then do archiving even if key
        metadata items are not set; otherwise abort archiving
        operation.
      dry_run (bool): report what would be done but don't
        perform any operations.

    Returns:
      UNIX-style integer returncode: 0 = successful termination,
        non-zero indicates an error occurred.
    """
    # Return value
    retval = 0
    # Check if analysis dir is actually staging directory
    analysis_dir = os.path.basename(ap.analysis_dir)
    is_staging = False
    if analysis_dir.startswith("__") and analysis_dir.endswith(".pending"):
        logger.warning("Operating directly on staged directory")
        if not final:
            raise Exception("Cannot re-stage already staged "
                            "analysis directory")
        else:
            is_staging = True
    # Fetch archive location
    if archive_dir is None:
        archive_dir = ap.settings.archive.dirn
    if archive_dir is None:
        raise Exception("No archive directory specified (use "
                        "--archive_dir option?)")
    # Construct subdirectory structure i.e. platform and year
    if platform is None:
        platform = ap.metadata.platform
    if platform is None:
        raise Exception("No platform specified (use --platform "
                        "option?)")
    if year is None:
        year = "20%s" % str(ap.metadata.instrument_datestamp)[0:2]
    archive_dir = os.path.join(archive_dir,year,platform)
    if not fileops.exists(archive_dir):
        raise OSError("Archive directory '%s' doesn't exist" %
                      archive_dir)
    # Determine target directory
    if not is_staging:
        final_dest = analysis_dir
        staging = "__%s.pending" % analysis_dir
    else:
        final_dest = analysis_dir[len("__"):-len(".pending")]
        staging = analysis_dir
    if final:
        dest = final_dest
    else:
        dest = staging
    print "Copying to archive directory: %s" % archive_dir
    print "Platform   : %s" % platform
    print "Year       : %s" % year
    print "Destination: %s %s" % (dest,
                                  "(final)" if final else
                                  "(staging)")
    # Check if final archive already exists
    if fileops.exists(os.path.join(archive_dir,final_dest)):
        logging.fatal("Final archive already exists, stopping")
        return 1
    # Check metadata
    check_metadata = ap.check_metadata(('source','run_number'))
    if not check_metadata:
        if not force or not is_staging:
            logging.fatal("Some metadata items not set, stopping")
            return 1
        logging.warning("Some metadata items not set, proceeding")
    if not is_staging:
        # Are there any projects to archive?
        projects = ap.get_analysis_projects()
        if not projects:
            raise Exception("No project directories found, nothing "
                            "to archive")
        # Determine which directories to exclude
        excludes = ['--exclude=primary_data',
                    '--exclude=save.*',
                    '--exclude=*.bak',
                    '--exclude=tmp.*']
        if not include_bcl2fastq:
            # Determine whether bcl2fastq dir should be included implicitly
            # because there are links from the analysis directories
            for project in projects:
                if project.fastqs_are_symlinks:
                    print "Found at least one project with fastq " \
                        "symlinks (%s)" % project.name
                    include_bcl2fastq = True
                    break
        if not include_bcl2fastq:
            print "Excluding '%s' directory from archive" % \
                ap.params.unaligned_dir
            excludes.append('--exclude=%s' % ap.params.unaligned_dir)
        # 10xgenomics products to exclude
        excludes.append('--exclude=*.mro')
        excludes.append('--exclude="%s*"' %
                        tenx_genomics_utils.flow_cell_id(ap.run_name))
        # Log dir
        log_dir = 'archive%s' % ('_final' if final else '_staging')
        if dry_run:
            log_dir += '_dry_run'
        ap.set_log_dir(ap.get_log_subdir(log_dir))
        # Set up runner
        if runner is not None:
            runner = fetch_runner(runner)
        else:
            runner = ap.settings.runners.rsync
        runner.set_log_dir(ap.log_dir)
        # Setup a scheduler for multiple rsync jobs
        sched = simple_scheduler.SimpleScheduler(
            runner=runner,
            max_concurrent=ap.settings.general.max_concurrent_jobs)
        sched.start()
        # Keep track of jobs
        archiving_jobs = []
        # If making fastqs read-only then transfer them separately
        if read_only_fastqs and final:
            rsync_fastqs = applications.general.rsync(
                "%s/" % ap.analysis_dir,
                os.path.join(archive_dir,staging),
                prune_empty_dirs=True,
                dry_run=dry_run,
                chmod='ugo-w',
                extra_options=(
                    '--include=*/',
                    '--include=fastqs/**',
                    '--exclude=*',))
            print "Running %s" % rsync_fastqs
            rsync_fastqs_job = sched.submit(rsync_fastqs,
                                            name="rsync.archive_fastqs")
            # Exclude fastqs from main rsync
            excludes.append('--exclude=fastqs')
            wait_for = [rsync_fastqs_job.job_name]
            # Add to list of jobs
            archiving_jobs.append(rsync_fastqs_job)
        else:
            # No separate Fastq rsync
            rsync_fastqs_job = None
            wait_for = ()
        # Main rsync command
        rsync = applications.general.rsync(
            "%s/" % ap.analysis_dir,
            os.path.join(archive_dir,staging),
            prune_empty_dirs=True,
            mirror=True,
            dry_run=dry_run,
            chmod=perms,
            extra_options=excludes)
        print "Running %s" % rsync
        rsync_job = sched.submit(rsync,name="rsync.archive",
                                 wait_for=wait_for)
        archiving_jobs.append(rsync_job)
        # Wait for jobs to complete
        rsync_job.wait()
        # Check exit status on jobs
        for job in archiving_jobs:
            print "%s completed: exit code %s" % (job.name,
                                                  job.exit_code)
        retval = sum([j.exit_code for j in archiving_jobs])
        if retval != 0:
            logger.warning("One or more archiving jobs failed "
                           "(non-zero exit code returned)")
        else:
            # Set the group
            if group is not None:
                print "Setting group of archived files to '%s'" % group
                if not dry_run:
                    set_group = fileops.set_group_command(
                        group,
                        os.path.join(archive_dir,staging),
                        verbose=True)
                    print "Running %s" % set_group
                    set_group_job = sched.submit(
                        set_group,
                        name="set_group.archive")
                    set_group_job.wait()
                    # Check exit status
                    exit_code = set_group_job.exit_code
                    print "%s completed: exit code %s" % (
                        set_group_job.name,
                        exit_code)
                    if exit_code != 0:
                        logger.warning("Setting group failed (non-zero "
                                       "exit status code returned)")
                    retval = retval + exit_code
        # Finish with scheduler
        sched.wait()
        sched.stop()
        # Bail out if there was a problem
        if retval != 0:
            raise Exception("Staging to archive failed")
    # Move to final location
    if final:
        print "Moving to final location: %s" % final_dest
        if not dry_run:
            fileops.rename(os.path.join(archive_dir,staging),
                           os.path.join(archive_dir,final_dest))
    # Finish
    return retval
Esempio n. 5
0
def main():
    """
    """
    # Load configuration
    settings = Settings()

    # Collect defaults
    default_runner = settings.runners.rsync

    # Get pre-defined destinations
    destinations = [name for name in settings.destination]

    # Command line
    p = argparse.ArgumentParser(
        description="Transfer copies of Fastq data from an analysis "
        "project to an arbitrary destination for sharing with other "
        "people")
    p.add_argument('--version',
                   action='version',
                   version=("%%(prog)s %s" % get_version()))
    p.add_argument('--subdir',
                   action='store',
                   choices=('random_bin', 'run_id'),
                   default=None,
                   help="subdirectory naming scheme: 'random_bin' "
                   "locates a random pre-existing empty subdirectory "
                   "under the target directory; 'run_id' creates a "
                   "new subdirectory "
                   "'PLATFORM_DATESTAMP.RUN_ID-PROJECT'. If this "
                   "option is not set then no subdirectory will be "
                   "used")
    p.add_argument('--readme',
                   action='store',
                   metavar='README_TEMPLATE',
                   dest='readme_template',
                   help="template file to generate README file from; "
                   "can be full path to a template file, or the name "
                   "of a file in the 'templates' directory")
    p.add_argument('--weburl',
                   action='store',
                   help="base URL for webserver (sets the value of "
                   "the WEBURL variable in the template README)")
    p.add_argument('--include_downloader',
                   action='store_true',
                   help="copy the 'download_fastqs.py' utility to the "
                   "final location")
    p.add_argument('--include_qc_report',
                   action='store_true',
                   help="copy the zipped QC reports to the final "
                   "location")
    p.add_argument('--include_10x_outputs',
                   action='store_true',
                   help="copy outputs from 10xGenomics pipelines (e.g. "
                   "'cellranger count') to the final location")
    p.add_argument('--link',
                   action='store_true',
                   help="hard link files instead of copying")
    p.add_argument('--runner',
                   action='store',
                   help="specify the job runner to use for executing "
                   "the checksumming, Fastq copy and tar gzipping "
                   "operations (defaults to job runner defined for "
                   "copying in config file [%s])" % default_runner)
    p.add_argument('dest',
                   action='store',
                   metavar="DEST",
                   help="destination to copy Fastqs to; can be the "
                   "name of a destination defined in the configuration "
                   "file, or an arbitrary location of the form "
                   "'[[USER@]HOST:]DIR' (%s)" %
                   (("available destinations: %s" %
                     (','.join("'%s'" % d for d in sorted(destinations))))
                    if destinations else "no destinations currently defined"))
    p.add_argument('project',
                   action='store',
                   metavar="PROJECT",
                   help="path to project directory (or to a Fastqs "
                   "subdirectory in a project) to copy Fastqs from")

    # Process command line
    args = p.parse_args()

    # Check if target is pre-defined destination
    if args.dest in destinations:
        print("Loading settings for destination '%s'" % args.dest)
        dest = settings.destination[args.dest]
        target_dir = dest.directory
        readme_template = dest.readme_template
        subdir = dest.subdir
        include_downloader = dest.include_downloader
        include_qc_report = dest.include_qc_report
        hard_links = dest.hard_links
        weburl = dest.url
    else:
        target_dir = args.dest
        readme_template = None
        subdir = None
        include_downloader = False
        include_qc_report = False
        hard_links = False
        weburl = None

    # Update defaults with command line values
    if args.readme_template:
        readme_template = args.readme_template
    if args.subdir:
        subdir = args.subdir
    if args.include_downloader:
        include_downloader = True
    if args.include_qc_report:
        include_qc_report = True
    if args.weburl:
        weburl = args.weburl
    if args.link:
        hard_links = args.link

    # Sort out project directory
    project = AnalysisProject(args.project)
    if not project.is_analysis_dir:
        # Assume it's the Fastq dir
        fastq_dir = os.path.basename(args.project)
        project = AnalysisProject(os.path.dirname(args.project))
    else:
        fastq_dir = None
    if not project.is_analysis_dir:
        logger.error("'%s': project not found" % args.project)
        return 1
    project_name = project.name

    # Parent analysis directory
    analysis_dir = AnalysisDir(os.path.dirname(project.dirn))

    # Fastqs directory
    try:
        project.use_fastq_dir(fastq_dir)
    except Exception as ex:
        logger.error("'%s': failed to load Fastq set '%s': %s" %
                     (project.name, fastq_dir, ex))
        return 1

    # Report
    print("Transferring data from '%s' (%s)" % (project.name, project.dirn))
    print("Fastqs in %s" % project.fastq_dir)

    # Summarise samples and Fastqs
    samples = set()
    nfastqs = 0
    fsize = 0
    for sample in project.samples:
        samples.add(sample.name)
        for fq in sample.fastq:
            fsize += os.lstat(fq).st_size
            nfastqs += 1
    nsamples = len(samples)
    dataset = "%s%s dataset" % ("%s " % project.info.single_cell_platform
                                if project.info.single_cell_platform else '',
                                project.info.library_type)
    endedness = "paired-end" if project.info.paired_end else "single-end"
    print("%s with %d Fastqs from %d %s sample%s totalling %s" %
          (dataset, nfastqs, nsamples, endedness, 's' if nsamples != 1 else '',
           format_file_size(fsize)))

    # Check target dir
    if not Location(target_dir).is_remote:
        target_dir = os.path.abspath(target_dir)
    if not exists(target_dir):
        print("'%s': target directory not found" % target_dir)
        return
    else:
        print("Target directory %s" % target_dir)

    # Locate downloader
    if include_downloader:
        print("Locating downloader for inclusion")
        downloader = find_program("download_fastqs.py")
        if downloader is None:
            logging.error("Unable to locate download_fastqs.py")
            return 1
        print("... found %s" % downloader)
    else:
        downloader = None

    # Locate zipped QC report
    if include_qc_report:
        print("Locating zipped QC reports for inclusion")
        qc_zips = list()
        # Check QC directories and look for zipped reports
        for qc_dir in project.qc_dirs:
            # Get the associated Fastq set
            # NB only compare the basename of the Fastq dir
            # in case full paths weren't updated
            fq_set = os.path.basename(project.qc_info(qc_dir).fastq_dir)
            if fq_set == os.path.basename(project.fastq_dir):
                for qc_base in (
                        "%s_report.%s.%s" %
                    (qc_dir, project.name, project.info.run),
                        "%s_report.%s.%s" %
                    (qc_dir, project.name,
                     os.path.basename(analysis_dir.analysis_dir)),
                ):
                    qc_zip = os.path.join(project.dirn, "%s.zip" % qc_base)
                    if os.path.exists(qc_zip):
                        print("... found %s" % qc_zip)
                        qc_zips.append(qc_zip)
        if not qc_zips:
            logger.error("No zipped QC reports found")
            return 1
    else:
        qc_zips = None

    # Locate 10xGenomics outputs
    if args.include_10x_outputs:
        print("Locating outputs from 10xGenomics pipelines for " "inclusion")
        cellranger_dirs = list()
        for d in (
                'cellranger_count',
                'cellranger_multi',
        ):
            cellranger_dir = os.path.join(project.dirn, d)
            if os.path.isdir(cellranger_dir):
                print("... found %s" % cellranger_dir)
                cellranger_dirs.append(cellranger_dir)
        if not cellranger_dirs:
            logger.error("No outputs from 10xGenomics pipelines found")
            return 1
    else:
        cellranger_dirs = None

    # Determine subdirectory
    if subdir == "random_bin":
        # Find a random empty directory under the
        # target directory
        print("Locating random empty bin")
        subdirs = [
            d for d in os.listdir(target_dir)
            if os.path.isdir(os.path.join(target_dir, d))
        ]
        if not subdirs:
            print("Failed to locate subdirectories")
            return
        shuffle(subdirs)
        subdir = None
        for d in subdirs:
            if not os.listdir(os.path.join(target_dir, d)):
                # Empty bin
                subdir = d
                break
        if subdir is None:
            print("Failed to locate empty subdirectory")
            return
        print("... found '%s'" % subdir)
        # Update target dir
        target_dir = os.path.join(target_dir, subdir)
    elif subdir == "run_id":
        # Construct subdirectory name based on the
        # run ID
        subdir = "{platform}_{datestamp}.{run_number}-{project}".format(
            platform=analysis_dir.metadata.platform.upper(),
            datestamp=analysis_dir.metadata.instrument_datestamp,
            run_number=analysis_dir.metadata.run_number,
            project=project.name)
        # Check it doesn't already exist
        if exists(os.path.join(target_dir, subdir)):
            logger.error("'%s': subdirectory already exists" % subdir)
            return
        print("Using subdirectory '%s'" % subdir)
        # Update target dir
        target_dir = os.path.join(target_dir, subdir)

    # Make target directory
    if not exists(target_dir):
        mkdir(target_dir)

    # Get runner for copy job
    if args.runner:
        runner = fetch_runner(args.runner)
    else:
        runner = default_runner

    # Set identifier for jobs
    job_id = "%s%s" % (project_name,
                       (".%s" % fastq_dir if fastq_dir is not None else ''))

    # Set the working directory
    working_dir = os.path.abspath("transfer.%s.%s" %
                                  (job_id, int(time.time())))
    mkdir(working_dir)
    print("Created working dir %s" % working_dir)

    # Construct the README
    if readme_template:
        # Check that template file exists
        print("Locating README template")
        template = None
        for filen in (
                readme_template,
                os.path.join(get_templates_dir(), readme_template),
        ):
            if os.path.exists(filen):
                template = filen
                break
        if template is None:
            logger.error("'%s': template file not found" % readme_template)
            return 1
        else:
            readme_template = template
        print("... found %s" % readme_template)
        # Read in template
        with open(readme_template, 'rt') as fp:
            readme = fp.read()
        # Substitute template variables
        template_vars = {
            'PLATFORM': analysis_dir.metadata.platform.upper(),
            'RUN_NUMBER': analysis_dir.metadata.run_number,
            'DATESTAMP': analysis_dir.metadata.instrument_datestamp,
            'PROJECT': project_name,
            'WEBURL': weburl,
            'BIN': subdir,
            'DIR': target_dir,
            'TODAY': date.today().strftime("%d/%m/%Y"),
        }
        for var in template_vars:
            value = template_vars[var]
            if value is None:
                value = '?'
            else:
                value = str(value)
            readme = re.sub(r"%{var}%".format(var=var), value, readme)
        # Write out a temporary README file
        readme_file = os.path.join(working_dir, "README")
        with open(readme_file, 'wt') as fp:
            fp.write(readme)
    else:
        # No README
        readme_file = None

    # Start a scheduler to run jobs
    sched = SimpleScheduler(runner=runner,
                            reporter=TransferDataSchedulerReporter(),
                            poll_interval=settings.general.poll_interval)
    sched.start()

    # Build command to run manage_fastqs.py
    copy_cmd = Command("manage_fastqs.py")
    if hard_links:
        copy_cmd.add_args("--link")
    copy_cmd.add_args(analysis_dir.analysis_dir, project_name)
    if fastq_dir is not None:
        copy_cmd.add_args(fastq_dir)
    copy_cmd.add_args("copy", target_dir)
    print("Running %s" % copy_cmd)
    copy_job = sched.submit(copy_cmd.command_line,
                            name="copy.%s" % job_id,
                            wd=working_dir)

    # Copy README
    if readme_file is not None:
        print("Copying README file")
        copy_cmd = copy_command(readme_file,
                                os.path.join(target_dir, "README"))
        sched.submit(copy_cmd.command_line,
                     name="copy.%s.readme" % job_id,
                     runner=SimpleJobRunner(),
                     wd=working_dir)

    # Copy download_fastqs.py
    if downloader:
        print("Copying downloader")
        copy_cmd = copy_command(
            downloader, os.path.join(target_dir, os.path.basename(downloader)))
        sched.submit(copy_cmd.command_line,
                     name="copy.%s.downloader" % job_id,
                     runner=SimpleJobRunner(),
                     wd=working_dir)

    # Copy QC reports
    if qc_zips:
        for qc_zip in qc_zips:
            print("Copying '%s'" % os.path.basename(qc_zip))
            copy_cmd = copy_command(qc_zip,
                                    os.path.join(target_dir,
                                                 os.path.basename(qc_zip)),
                                    link=hard_links)
            sched.submit(copy_cmd.command_line,
                         name="copy.%s.%s" %
                         (job_id, os.path.basename(qc_zip)),
                         runner=SimpleJobRunner(),
                         wd=working_dir)

    # Tar and copy 10xGenomics outputs
    if cellranger_dirs:
        for cellranger_dir in cellranger_dirs:
            print("Tar gzipping and copying '%s'" %
                  os.path.basename(cellranger_dir))
            # Tar & gzip data
            targz = os.path.join(
                working_dir,
                "%s.%s.%s.tgz" % (os.path.basename(cellranger_dir),
                                  project_name, project.info.run))
            targz_cmd = Command("tar", "czvhf", targz, "-C",
                                os.path.dirname(cellranger_dir),
                                os.path.basename(cellranger_dir))
            print("Running %s" % targz_cmd)
            targz_job = sched.submit(
                targz_cmd.command_line,
                name="targz.%s.%s" %
                (job_id, os.path.basename(cellranger_dir)),
                wd=working_dir)
            # Copy the targz file
            copy_cmd = copy_command(
                targz, os.path.join(target_dir, os.path.basename(targz)))
            print("Running %s" % copy_cmd)
            copy_job = sched.submit(copy_cmd.command_line,
                                    name="copytgz.%s.%s" %
                                    (job_id, os.path.basename(cellranger_dir)),
                                    runner=SimpleJobRunner(),
                                    wd=working_dir,
                                    wait_for=(targz_job.job_name, ))

    # Wait for scheduler jobs to complete
    sched.wait()

    # Check exit code for Fastq copying
    exit_code = copy_job.exit_code
    if exit_code != 0:
        logger.error("File copy exited with an error")
        return exit_code
    else:
        print("Files now at %s" % target_dir)
        if weburl:
            url = weburl
            if subdir is not None:
                url = os.path.join(url, subdir)
            print("URL: %s" % url)
        print("Done")
Esempio n. 6
0
 def getrunner(self, section, option, default='SimpleJobRunner'):
     try:
         return fetch_runner(self.get(section, option, default))
     except Exception:
         return None