Beispiel #1
0
def process_lane(info, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    config = _update_config_w_custom(config, info)

    sample_name = info.get("description", "")
    if (config["algorithm"].get("include_short_name", True) and
            info.get("name", "")):
        sample_name = "%s---%s" % (info.get("name", ""), sample_name)
    genome_build = info.get("genome_build", None)
    multiplex = info.get("multiplex", None)

    log.info("Processing sample: %s; lane %s; reference genome %s; " \
             "researcher %s; analysis method %s" %
             (sample_name, info["lane"], genome_build,
              info.get("researcher", ""), info.get("analysis", "")))
    if multiplex:
        log.debug("Sample %s multiplexed as: %s" % (sample_name, multiplex))

    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], info, fc_name)
    lane_name = "%s_%s_%s" % (info['lane'], fc_date, fc_name)
    lane_items = []
    for mname, msample, fastq1, fastq2 in split_by_barcode(full_fastq1,
            full_fastq2, multiplex, lane_name, dirs, config):
        mlane_name = "%s_%s" % (lane_name, mname) if mname else lane_name
        if msample is None:
            msample = "%s---%s" % (sample_name, mname)
        lane_items.append((fastq1, fastq2, genome_build, mlane_name, msample,
                           dirs, config))
    return lane_items
Beispiel #2
0
def long_term_storage(remote_info, config_file):
    config = load_config(config_file)
    log_handler = create_log_handler(config, log.name)
    with log_handler.applicationbound():
        log.info("Copying run data over to remote storage: %s" %
        config["store_host"])
        log.debug("The contents from AMQP for this dataset are:\n %s" %
        remote_info)
        _copy_for_storage(remote_info, config)
Beispiel #3
0
def _remove_transferred_files(remote_info, config):
    """Remove the files transferred in a previous test.
    """
    copy_to = os.path.realpath("../transfer_data/copy_to")
    with fabric.settings(host_string="%s@%s" % \
         (config["store_user"], config["store_host"])):
        rm_str = "rm -r %s/%s" % \
         (copy_to, os.path.split(remote_info["directory"])[1])
        log.debug(rm_str)
        fabric.run(rm_str)
Beispiel #4
0
def _copy_from_sequencer(remote_info, config):
    """Get local directory of flowcell info, or copy from sequencer.
    """
    if remote_info.has_key("fc_dir"):
        fc_dir = remote_info["fc_dir"]
        assert os.path.exists(fc_dir)
    else:
        log.debug("Remote host information: %s" % remote_info)
        c_host_str = _config_hosts(config)
        with fabric.settings(host_string=c_host_str):
            fc_dir = _remote_copy(remote_info, config)
    return fc_dir
Beispiel #5
0
def _copy_from_sequencer(remote_info, config):
    """Get local directory of flowcell info, or copy from sequencer.
    """
    if "fc_dir" in remote_info:
        fc_dir = remote_info["fc_dir"]
        assert os.path.exists(fc_dir)
    else:
        log.debug("Remote host information: %s" % remote_info)
        c_host_str = _config_hosts(config)
        with fabric.settings(host_string=c_host_str):
            base_dir = config["store_dir"]
            try:
                protocol = config["transfer_protocol"]
            except KeyError:
                protocol = None
                pass

            fc_dir = remote_copy(remote_info, base_dir, protocol)

    return fc_dir
Beispiel #6
0
def process_lane(info, config, dirs):
    """Models bcbio process lane"""
    sample_name = info.get("description", "")
    genome_build = info.get("genome_build", None)
    multiplex = info.get('multiplex', None)
    log.info("Processing sample: %s; lane %s; reference genome %s" %
             (sample_name, info["lane"], genome_build))
    if multiplex:
        log.debug("Sample %s is multiplexed as: %s" % (sample_name, multiplex))
    fq = get_barcoded_fastq_files(multiplex, info, dirs['fc_dir'], config['fc_name'], config['fc_date'])
    
    ## Move data along with fastq files
    fc_bc_dir = os.path.join(config['data_delivery_dir'], "%s_%s_%s_barcode" % (info['lane'], config['fc_date'], config['fc_name']))
    _make_dir(fc_bc_dir, "fastq.txt barcode directory")
    if not options.only_fastq:
        data, fastqc = _get_analysis_results(config, dirs, info['lane'])
        _deliver_data(data, fastqc, config['data_delivery_dir'])

    for fqpair in fq:
        [_deliver_fastq_file(fq_src, os.path.basename(fq_src), fc_bc_dir) for fq_src in fqpair]
Beispiel #7
0
def make_lane_items(info, fc_date, fc_name, dirs, config):
    sample_name = info.get("description", "")
    if (config["algorithm"].get("include_short_name", True) and
            info.get("name", "")):
        sample_name = "%s---%s" % (info.get("name", ""), sample_name)
    genome_build = info.get("genome_build", None)
    multiplex = info.get("multiplex", "")
    log.info("Processing sample: %s; lane %s; reference genome %s; " \
             "researcher %s; analysis method %s" %
             (sample_name, info["lane"], genome_build,
              info.get("researcher", ""), info.get("analysis", "")))
    lane_items = []
    if multiplex:
        log.debug("Sample %s is multiplexed as: %s" % (sample_name, multiplex))
        mitems = get_multiplex_items(multiplex, info['lane'], dirs['fc_dir'], fc_name, fc_date)
        for fastq1, fastq2, mlane_name, msample in mitems:
            lane_items.append((fastq1, fastq2, genome_build, mlane_name, msample, dirs, config))
    else:
        # TODO: Not multiplex: what to do?
        pass
    return lane_items
Beispiel #8
0
def remote_copy(remote_info, base_dir, protocol):
    """Securely copy files between servers.
    """
    fc_dir = base_dir

    if not fabric_files.exists(fc_dir):
        fabric.run("mkdir %s" % fc_dir)

    if protocol == "scp" or protocol == None:
        for fcopy in remote_info["to_copy"]:
            target_loc = os.path.join(fc_dir, os.path.basename(remote_info['directory']), fcopy)
            if not fabric_files.exists(target_loc):
                target_dir = os.path.dirname(target_loc)
                if not fabric_files.exists(target_dir):
                    fabric.run("mkdir -p %s" % target_dir)

                cl = ["scp", "-r", "%s@%s:%s/%s" %
                      (remote_info["user"], remote_info["hostname"],
                      remote_info["directory"], fcopy),
                      target_loc]

                log.debug(cl)
                fabric.run(" ".join(cl))

    elif protocol == "rsync":
        include = []
        for fcopy in remote_info['to_copy']:
            include.append("--include='%s**/*'" % (fcopy,))
            include.append("--include='%s'" % (fcopy,))
            # By including both these patterns we get the entire directory
            # if a directory is given, or a single file if a single file is
            # given.

        cl = ["rsync", "--checksum", "--archive", \
                "--compress", "--partial", "--progress", \
                "--prune-empty-dirs", "--verbose", "--include='*/'", \
                " ".join(include), "--exclude='*'", \
                "%s@%s:%s" % (remote_info["user"], remote_info["hostname"], \
                remote_info["directory"]), fc_dir]

        log.debug(cl)
        fabric.run(" ".join(cl))

    # Note: rdiff-backup doesn't have the ability to resume a partial transfer,
    # and will instead transfer the backup from the beginning if it detects a
    # partial transfer.
    elif protocol == "rdiff-backup":
        include = []
        for fcopy in remote_info['to_copy']:
            include.append("--include %s/%s" % \
            (remote_info["directory"], fcopy))

        cl = ["rdiff-backup", " ".join(include), "--exclude '**'",
              "%s@%s::%s" % (remote_info["user"], remote_info["hostname"],
              remote_info["directory"]), fc_dir]

        log.debug(cl)
        fabric.run(" ".join(cl))

    return fc_dir
Beispiel #9
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    lane_name = "%s_%s_%s" % (lane_items[0]["lane"], fc_date, fc_name)
    log.debug("Demulitplexing %s" % lane_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], lane_items[0], fc_name)
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config)
    out = []
    for item in lane_items:
        config = _update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if bc_files.has_key(item["barcode_id"]):
            fastq1, fastq2 = bc_files[item["barcode_id"]]
            cur_lane_name = lane_name
            cur_lane_desc = item["description"]
            if item.get("name", ""):
                cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)
            if item["barcode_id"] is not None:
                cur_lane_name += "_%s" % (item["barcode_id"])
            out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config))
    return out
Beispiel #10
0
def _copy_for_storage(remote_info, config):
    """Securely copy files from remote directory to the storage server.

    This requires ssh public keys to be setup so that no password entry
    is necessary, Fabric is used to manage setting up copies on the remote
    storage server.
    """
    log.info("Copying run data over to remote storage: %s" % config["store_host"])
    log.debug("The contents from AMQP for this dataset are:\n %s" % remote_info)
    base_dir = config["store_dir"]
    fabric.env.host_string = "%s@%s" % (config["store_user"], config["store_host"])
    fc_dir = os.path.join(base_dir, os.path.basename(remote_info['directory']))
    if not fabric_files.exists(fc_dir):
        fabric.run("mkdir %s" % fc_dir)
    for fcopy in remote_info['to_copy']:
        target_loc = os.path.join(fc_dir, fcopy)
        if not fabric_files.exists(target_loc):
            target_dir = os.path.dirname(target_loc)
            if not fabric_files.exists(target_dir):
                fabric.run("mkdir -p %s" % target_dir)
            cl = ["scp", "-r", "%s@%s:%s/%s" % (
                  remote_info["user"], remote_info["hostname"], remote_info["directory"],
                  fcopy), target_loc]
            fabric.run(" ".join(cl))
Beispiel #11
0
def remote_copy(remote_info, base_dir, protocol):
    """Securely copy files between servers.
    """
    fc_dir = os.path.join(base_dir, os.path.basename(remote_info['directory']))

    if not fabric_files.exists(fc_dir):
        fabric.run("mkdir %s" % fc_dir)

    if protocol == "scp" or protocol == None:
        for fcopy in remote_info['to_copy']:
            target_loc = os.path.join(fc_dir, fcopy)
            if not fabric_files.exists(target_loc):
                target_dir = os.path.dirname(target_loc)
                if not fabric_files.exists(target_dir):
                    fabric.run("mkdir -p %s" % target_dir)

                cl = ["scp", "-r", "%s@%s:%s/%s" %
                      (remote_info["user"], remote_info["hostname"],
                      remote_info["directory"], fcopy),
                      target_loc]

                log.debug(cl)
                fabric.run(" ".join(cl))

    elif protocol == "rsync":
        for fcopy in remote_info['to_copy']:
            target_loc = os.path.join(fc_dir, fcopy)
            target_dir = os.path.dirname(target_loc)

            if not fabric_files.exists(target_dir):
                fabric.run("mkdir -p %s" % target_dir)

            if os.path.isdir("%s/%s" % (remote_info["directory"], fcopy)) \
            and fcopy[-1] != "/":
                fcopy += "/"

            # Option -P --append should enable resuming progress on
            # partial transfers.
            cl = ["rsync", "-craz", "-P", "--append", "%s@%s:%s/%s" %
                  (remote_info["user"], remote_info["hostname"],
                   remote_info["directory"], fcopy), fc_dir]

            log.debug(cl)
            fabric.run(" ".join(cl))

    # Note: rdiff-backup doesn't have the ability to resume a partial transfer,
    # and will instead transfer the backup from the beginning if it detects a
    # partial transfer.
    elif protocol == "rdiff-backup":
        include = []
        for fcopy in remote_info['to_copy']:
            include.append("--include %s/%s" % \
            (remote_info["directory"], fcopy))

        cl = ["rdiff-backup", " ".join(include), "--exclude '**'",
              "%s@%s::%s" % (remote_info["user"], remote_info["hostname"],
              remote_info["directory"]), fc_dir]

        log.debug(cl)
        fabric.run(" ".join(cl))

    return fc_dir