Ejemplo n.º 1
0
def trim_lane(item):
    """
    if trim_reads is set with no trimmer specified, default to B-run trimming
    only. if trimmer is set to a supported type, perform that trimming
    instead.

    """
    to_trim = [x for x in item["files"] if x is not None]
    dirs = item["dirs"]
    config = item["config"]
    # this block is to maintain legacy configuration files
    trim_reads = config["algorithm"].get("trim_reads", False)
    if not trim_reads:
        logger.info("Skipping trimming of %s." % (", ".join(to_trim)))
        return item

    # swap the default to None if trim_reads gets deprecated

    if trim_reads == "low_quality" or trim_reads == "true":
        logger.info("Trimming low quality ends from %s."
                    % (", ".join(to_trim)))
        out_files = brun_trim_fastq(to_trim, dirs, config)

    if trim_reads == "read_through":
        logger.info("Trimming low quality ends and read through adapter "
                    "sequence from %s." % (", ".join(to_trim)))
        out_files = trim_read_through(to_trim, dirs, config)

    else:
        logger.info("Trimming low quality ends from %s."
                    % (", ".join(to_trim)))
        out_files = brun_trim_fastq(to_trim, dirs, config)
    item["files"] = out_files
    return [[item]]
Ejemplo n.º 2
0
def trim_lane(item):
    """
    if trim_reads is set with no trimmer specified, default to B-run trimming
    only. if trimmer is set to a supported type, perform that trimming
    instead.

    """
    to_trim = [x for x in item["files"] if x is not None]
    dirs = item["dirs"]
    config = item["config"]
    # this block is to maintain legacy configuration files
    trim_reads = config["algorithm"].get("trim_reads", False)
    if not trim_reads:
        logger.info("Skipping trimming of %s." % (", ".join(to_trim)))
        return item

    # swap the default to None if trim_reads gets deprecated

    if trim_reads == "low_quality" or trim_reads == "true":
        logger.info("Trimming low quality ends from %s." %
                    (", ".join(to_trim)))
        out_files = brun_trim_fastq(to_trim, dirs, config)

    if trim_reads == "read_through":
        logger.info("Trimming low quality ends and read through adapter "
                    "sequence from %s." % (", ".join(to_trim)))
        out_files = trim_read_through(to_trim, dirs, config)

    else:
        logger.info("Trimming low quality ends from %s." %
                    (", ".join(to_trim)))
        out_files = brun_trim_fastq(to_trim, dirs, config)
    item["files"] = out_files
    return [[item]]
Ejemplo n.º 3
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    logger.info("Demulitplexing %s" % lane_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"],
                                               lane_items[0], fc_name, config=config)
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)
    out = []
    for item in lane_items:
        config = _update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if bc_files.has_key(item["barcode_id"]):
            for fastq1, fastq2, lane_ext in _prep_fastq_files(item, bc_files, dirs, config):
                cur_lane_name = lane_name
                cur_lane_desc = item["description"]
                if item.get("name", "") and config["algorithm"].get("include_short_name", True):
                    cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)
                if item["barcode_id"] is not None:
                    cur_lane_name += "_%s" % (item["barcode_id"])
                if lane_ext is not None:
                    cur_lane_name += "_s{0}".format(lane_ext)
                if config["algorithm"].get("trim_reads", False):
                    trim_info = brun_trim_fastq([x for x in [fastq1, fastq2] if x is not None],
                                                dirs, config)
                    fastq1 = trim_info[0]
                    if fastq2 is not None:
                        fastq2 = trim_info[1]
                out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                            dirs, config))
    return out
Ejemplo n.º 4
0
def trim_lane(fastq1, fastq2, info, lane_name, lane_desc, dirs, config):
    """
    if trim_reads is set with no trimmer specified, default to B-run trimming
    only. if trimmer is set to a supported type, perform that trimming
    instead.

    """
    # this block is to maintain legacy configuration files
    if not config["algorithm"].get("trim_reads", False):
        return [(fastq1, fastq2, info, lane_name, lane_desc, dirs, config)]

    # swap the default to None if trim_reads gets deprecated
    trimmer = config["algorithm"].get("trimmer", "low_quality")

    to_trim = [x for x in [fastq1, fastq2] if x is not None]

    if trimmer == "low_quality":
        logger.info("Trimming low quality ends from %s."
                    % (", ".join(to_trim)))
        out_files = brun_trim_fastq(to_trim, dirs, config)

    elif trimmer == "adapter":
        logger.info("Trimming low quality ends and adapter sequence "
                    "from %s." % (", ".join(to_trim)))
        out_files = cutadapt_trim(to_trim, dirs, config)
    else:
        logger.info("Skipping trimming of %s." % (", ".join(to_trim)))
        out_files = [fastq1, fastq2]

    fastq1 = out_files[0]
    if fastq2 is not None:
        fastq2 = out_files[1]

    return [(fastq1, fastq2, info, lane_name, lane_desc, dirs, config)]
Ejemplo n.º 5
0
Archivo: lane.py Proyecto: aminmg/bcbb
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """

    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"], dirs["work"],
                                               lane_items[0], fc_name, config=config)

    # Filter phiX
    custom_config = _update_config_w_custom(config, lane_items[0])
    if custom_config["algorithm"].get("filter_phix", False):
        # If we are starting from demultiplexed material, we will skip a lane-wise screening
        # Screening will be performed on a sample basis
        if custom_config["algorithm"].get("demultiplexed", False):
            logger.warn("Will not filter phix lane-wise on already demultiplexed files. " \
                "You will have to specify genomes_filter_out option for each sample")

        else:
            logger.info("Filtering phiX from %s" % lane_name)
            info = {"genomes_filter_out": "spiked_phix", "description": lane_name}
            processed = remove_contaminants(full_fastq1, full_fastq2, info, lane_name, info["description"], dirs, custom_config)
            (full_fastq1, full_fastq2, _, lane_name) = processed[0][0:4]

    logger.info("Demultiplexing %s" % lane_name)
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)

    out = []
    for item in lane_items:
        config = _update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if item["barcode_id"] in bc_files:
            fastq1, fastq2 = bc_files[item["barcode_id"]]
            cur_lane_name = lane_name
            cur_lane_desc = item["description"]
            if item.get("name", "") and config["algorithm"].get("include_short_name", True):
                cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)

            if item["barcode_id"] is not None:
                cur_lane_name += "_%s" % (item["barcode_id"])

            if config["algorithm"].get("trim_reads", False):
                trim_info = brun_trim_fastq([x for x in [fastq1, fastq2] if x is not None],
                                            dirs, config)
                fastq1 = trim_info[0]
                if fastq2 is not None:
                    fastq2 = trim_info[1]

            out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                        dirs, config))

    return out
Ejemplo n.º 6
0
def trim_lane(fastq1, fastq2, info, lane_name, lane_desc, dirs, config):
    """
    if trim_reads is set with no trimmer specified, default to B-run trimming
    only. if trimmer is set to a supported type, perform that trimming
    instead.

    """
    to_trim = [x for x in [fastq1, fastq2] if x is not None]
    # this block is to maintain legacy configuration files
    trim_reads = config["algorithm"].get("trim_reads", False)
    if not trim_reads:
        logger.info("Skipping trimming of %s." % (", ".join(to_trim)))
        return [(fastq1, fastq2, info, lane_name, lane_desc, dirs, config)]

    # swap the default to None if trim_reads gets deprecated

    if trim_reads == "low_quality" or trim_reads == "true":
        logger.info("Trimming low quality ends from %s." %
                    (", ".join(to_trim)))
        out_files = brun_trim_fastq(to_trim, dirs, config)

    if trim_reads == "read_through":
        logger.info("Trimming low quality ends and read through adapter "
                    "sequence from %s." % (", ".join(to_trim)))
        out_files = trim_read_through(to_trim, dirs, config)

    else:
        logger.info("Trimming low quality ends from %s." %
                    (", ".join(to_trim)))
        out_files = brun_trim_fastq(to_trim, dirs, config)

    fastq1 = out_files[0]
    if fastq2 is not None:
        fastq2 = out_files[1]

    return [(fastq1, fastq2, info, lane_name, lane_desc, dirs, config)]
Ejemplo n.º 7
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """
    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    logger.info("Demulitplexing %s" % lane_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"],
                                               dirs["work"],
                                               lane_items[0],
                                               fc_name,
                                               config=_update_config_w_custom(
                                                   config, lane_items[0]))
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)
    out = []
    for item in lane_items:
        config = _update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if bc_files.has_key(item["barcode_id"]):
            for fastq1, fastq2, lane_ext in _prep_fastq_files(
                    item, bc_files, dirs, config):
                cur_lane_name = lane_name
                cur_lane_desc = item["description"]
                if item.get("name", "") and config["algorithm"].get(
                        "include_short_name", True):
                    cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)
                if item["barcode_id"] is not None:
                    cur_lane_name += "_%s" % (item["barcode_id"])
                if lane_ext is not None:
                    cur_lane_name += "_s{0}".format(lane_ext)
                if config["algorithm"].get("trim_reads", False):
                    trim_info = brun_trim_fastq(
                        [x for x in [fastq1, fastq2] if x is not None], dirs,
                        config)
                    fastq1 = trim_info[0]
                    if fastq2 is not None:
                        fastq2 = trim_info[1]
                out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                            dirs, config))
    return out
Ejemplo n.º 8
0
def process_lane(lane_items, fc_name, fc_date, dirs, config):
    """Prepare lanes, potentially splitting based on barcodes.
    """

    lane_name = "%s_%s_%s" % (lane_items[0]['lane'], fc_date, fc_name)
    full_fastq1, full_fastq2 = get_fastq_files(dirs["fastq"],
                                               dirs["work"],
                                               lane_items[0],
                                               fc_name,
                                               config=config)

    # Filter phiX
    custom_config = _update_config_w_custom(config, lane_items[0])
    if custom_config["algorithm"].get("filter_phix", False):
        # If we are starting from demultiplexed material, we will skip a lane-wise screening
        # Screening will be performed on a sample basis
        if custom_config["algorithm"].get("demultiplexed", False):
            logger.warn("Will not filter phix lane-wise on already demultiplexed files. " \
                "You will have to specify genomes_filter_out option for each sample")

        else:
            logger.info("Filtering phiX from %s" % lane_name)
            info = {
                "genomes_filter_out": "spiked_phix",
                "description": lane_name
            }
            processed = remove_contaminants(full_fastq1, full_fastq2, info,
                                            lane_name, info["description"],
                                            dirs, custom_config)
            (full_fastq1, full_fastq2, _, lane_name) = processed[0][0:4]

    logger.info("Demultiplexing %s" % lane_name)
    bc_files = split_by_barcode(full_fastq1, full_fastq2, lane_items,
                                lane_name, dirs, config)

    out = []
    for item in lane_items:
        config = _update_config_w_custom(config, item)
        # Can specify all barcodes but might not have actual sequences
        # Would be nice to have a good way to check this is okay here.
        if item["barcode_id"] in bc_files:
            fastq1, fastq2 = bc_files[item["barcode_id"]]
            cur_lane_name = lane_name
            cur_lane_desc = item["description"]
            if item.get("name", "") and config["algorithm"].get(
                    "include_short_name", True):
                cur_lane_desc = "%s : %s" % (item["name"], cur_lane_desc)

            if item["barcode_id"] is not None:
                cur_lane_name += "_%s" % (item["barcode_id"])

            if config["algorithm"].get("trim_reads", False):
                trim_info = brun_trim_fastq(
                    [x for x in [fastq1, fastq2] if x is not None], dirs,
                    config)
                fastq1 = trim_info[0]
                if fastq2 is not None:
                    fastq2 = trim_info[1]

            out.append((fastq1, fastq2, item, cur_lane_name, cur_lane_desc,
                        dirs, config))

    return out