Ejemplo n.º 1
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    logger.info("Preparing %s" % lane_name)
    full_fastq1, full_fastq2 = get_fastq_files(
        dirs["fastq"],
        dirs["work"],
        lane_items[0],
        fc_name,
        dirs=dirs,
        config=shared.update_config_w_custom(config, lane_items[0]))
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)
    out = []
    for item in lane_items:
        config = shared.update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if item["barcode_id"] in bc_files:
            for fastq1, fastq2, lane_ext in _prep_fastq_files(
                    item, bc_files, dirs, config):
                cur_lane_name = lane_name
                cur_lane_desc = item["description"]
                if item.get("name", "") and config["algorithm"].get(
                        "include_short_name", True):
                    cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)
                if item["barcode_id"] is not None:
                    cur_lane_name += "_%s" % (item["barcode_id"])
                if lane_ext is not None:
                    cur_lane_name += "_s{0}".format(lane_ext)
                out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                            dirs, config))
    return out
Ejemplo n.º 2
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    logger.info("Demulitplexing %s" % lane_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"],
                                               lane_items[0], fc_name, config=config)
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)
    out = []
    for item in lane_items:
        config = _update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if bc_files.has_key(item["barcode_id"]):
            for fastq1, fastq2, lane_ext in _prep_fastq_files(item, bc_files, dirs, config):
                cur_lane_name = lane_name
                cur_lane_desc = item["description"]
                if item.get("name", "") and config["algorithm"].get("include_short_name", True):
                    cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)
                if item["barcode_id"] is not None:
                    cur_lane_name += "_%s" % (item["barcode_id"])
                if lane_ext is not None:
                    cur_lane_name += "_s{0}".format(lane_ext)
                if config["algorithm"].get("trim_reads", False):
                    trim_info = brun_trim_fastq([x for x in [fastq1, fastq2] if x is not None],
                                                dirs, config)
                    fastq1 = trim_info[0]
                    if fastq2 is not None:
                        fastq2 = trim_info[1]
                out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                            dirs, config))
    return out
Ejemplo n.º 3
0
Archivo: lane.py Proyecto: kdaily/bcbb
def process_lane(info, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    config = _update_config_w_custom(config, info)

    sample_name = info.get("description", "")
    if (config["algorithm"].get("include_short_name", True) and
            info.get("name", "")):
        sample_name = "%s---%s" % (info.get("name", ""), sample_name)
    genome_build = info.get("genome_build", None)
    multiplex = info.get("multiplex", None)

    log.info("Processing sample: %s; lane %s; reference genome %s; " \
             "researcher %s; analysis method %s" %
             (sample_name, info["lane"], genome_build,
              info.get("researcher", ""), info.get("analysis", "")))
    if multiplex:
        log.debug("Sample %s multiplexed as: %s" % (sample_name, multiplex))

    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], info, fc_name)
    lane_name = "%s_%s_%s" % (info['lane'], fc_date, fc_name)
    lane_items = []
    for mname, msample, fastq1, fastq2 in split_by_barcode(full_fastq1,
            full_fastq2, multiplex, lane_name, dirs, config):
        mlane_name = "%s_%s" % (lane_name, mname) if mname else lane_name
        if msample is None:
            msample = "%s---%s" % (sample_name, mname)
        lane_items.append((fastq1, fastq2, genome_build, mlane_name, msample,
                           dirs, config))
    return lane_items
Ejemplo n.º 4
0
Archivo: lane.py Proyecto: aminmg/bcbb
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """

    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"],
                                               lane_items[0], fc_name, config=config)

    # Filter phiX
    custom_config = _update_config_w_custom(config, lane_items[0])
    if custom_config["algorithm"].get("filter_phix", False):
        # If we are starting from demultiplexed material, we will skip a lane-wise screening
        # Screening will be performed on a sample basis
        if custom_config["algorithm"].get("demultiplexed", False):
            logger.warn("Will not filter phix lane-wise on already demultiplexed files. " \
                "You will have to specify genomes_filter_out option for each sample")

        else:
            logger.info("Filtering phiX from %s" % lane_name)
            info = {"genomes_filter_out": "spiked_phix", "description": lane_name}
            processed = remove_contaminants(full_fastq1, full_fastq2, info, lane_name, info["description"], dirs, custom_config)
            (full_fastq1, full_fastq2, _, lane_name) = processed[0][0:4]

    logger.info("Demultiplexing %s" % lane_name)
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)

    out = []
    for item in lane_items:
        config = _update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if item["barcode_id"] in bc_files:
            fastq1, fastq2 = bc_files[item["barcode_id"]]
            cur_lane_name = lane_name
            cur_lane_desc = item["description"]
            if item.get("name", "") and config["algorithm"].get("include_short_name", True):
                cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)

            if item["barcode_id"] is not None:
                cur_lane_name += "_%s" % (item["barcode_id"])

            if config["algorithm"].get("trim_reads", False):
                trim_info = brun_trim_fastq([x for x in [fastq1, fastq2] if x is not None],
                                            dirs, config)
                fastq1 = trim_info[0]
                if fastq2 is not None:
                    fastq2 = trim_info[1]

            out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                        dirs, config))

    return out
Ejemplo n.º 5
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    lane_name = "%s_%s_%s" % (lane_items[0]["lane"], fc_date, fc_name)
    log.debug("Demulitplexing %s" % lane_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], lane_items[0], fc_name)
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items, lane_name, dirs, config)
    out = []
    for item in lane_items:
        config = _update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if bc_files.has_key(item["barcode_id"]):
            fastq1, fastq2 = bc_files[item["barcode_id"]]
            cur_lane_name = lane_name
            cur_lane_desc = item["description"]
            if item.get("name", ""):
                cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)
            if item["barcode_id"] is not None:
                cur_lane_name += "_%s" % (item["barcode_id"])
            out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc, dirs, config))
    return out
Ejemplo n.º 6
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"],
                                               dirs["work"], lane_items[0], fc_name, dirs=dirs,
                                               config=config_utils.update_w_custom(config, lane_items[0]))
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_items[0]["rgnames"]["lane"], dirs, config)
    out = []
    for item in lane_items:
        logger.debug("Preparing %s" % item["rgnames"]["lane"])
        config = config_utils.update_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if item["barcode_id"] in bc_files:
            for fastq1, fastq2, lane_ext in _prep_fastq_files(item, bc_files, dirs, config):
                if item["barcode_id"] is not None:
                    item["rgnames"]["lane"] += "_%s" % (item["barcode_id"])
                if lane_ext is not None:
                    item["rgnames"]["lane"] += "_s{0}".format(lane_ext)
                out.append((fastq1, fastq2, item, dirs, config))
    return out
Ejemplo n.º 7
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """

    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"],
                                               dirs["work"],
                                               lane_items[0],
                                               fc_name,
                                               config=config)

    # Filter phiX
    custom_config = _update_config_w_custom(config, lane_items[0])
    if custom_config["algorithm"].get("filter_phix", False):
        # If we are starting from demultiplexed material, we will skip a lane-wise screening
        # Screening will be performed on a sample basis
        if custom_config["algorithm"].get("demultiplexed", False):
            logger.warn("Will not filter phix lane-wise on already demultiplexed files. " \
                "You will have to specify genomes_filter_out option for each sample")

        else:
            logger.info("Filtering phiX from %s" % lane_name)
            info = {
                "genomes_filter_out": "spiked_phix",
                "description": lane_name
            }
            processed = remove_contaminants(full_fastq1, full_fastq2, info,
                                            lane_name, info["description"],
                                            dirs, custom_config)
            (full_fastq1, full_fastq2, _, lane_name) = processed[0][0:4]

    logger.info("Demultiplexing %s" % lane_name)
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)

    out = []
    for item in lane_items:
        config = _update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if item["barcode_id"] in bc_files:
            fastq1, fastq2 = bc_files[item["barcode_id"]]
            cur_lane_name = lane_name
            cur_lane_desc = item["description"]
            if item.get("name", "") and config["algorithm"].get(
                    "include_short_name", True):
                cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)

            if item["barcode_id"] is not None:
                cur_lane_name += "_%s" % (item["barcode_id"])

            if config["algorithm"].get("trim_reads", False):
                trim_info = brun_trim_fastq(
                    [x for x in [fastq1, fastq2] if x is not None], dirs,
                    config)
                fastq1 = trim_info[0]
                if fastq2 is not None:
                    fastq2 = trim_info[1]

            out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                        dirs, config))

    return out