Exemple #1
0
def _maybe_add_junction_files(algorithm, sample, out):
    """
    add splice junction files from STAR, if available
    """
    junction_bed = dd.get_junction_bed(sample)
    if junction_bed:
        out.append({
            "path": junction_bed,
            "type": "bed",
            "ext": "SJ",
            "dir": "STAR"
        })
    chimeric_file = dd.get_chimericjunction(sample)
    if chimeric_file:
        out.append({
            "path": chimeric_file,
            "type": "tsv",
            "ext": "chimericSJ",
            "dir": "STAR"
        })
    sj_file = dd.get_starjunction(sample)
    if sj_file:
        out.append({
            "path": sj_file,
            "type": "tab",
            "ext": "SJ",
            "dir": "STAR"
        })
    star_summary = dd.get_summary_qc(sample).get("star", None)
    if star_summary:
        star_log = star_summary["base"]
        if star_log:
            out.append({"path": star_log, "type": "log", "dir": "STAR"})
    return out
Exemple #2
0
def _maybe_add_junction_files(algorithm, sample, out):
    """
    add splice junction files from STAR, if available
    """
    junction_bed = dd.get_junction_bed(sample)
    if junction_bed:
        out.append({
            "path": junction_bed,
            "type": "bed",
            "ext": "SJ",
            "dir": "STAR"
        })
    chimeric_file = dd.get_chimericjunction(sample)
    if chimeric_file:
        out.append({
            "path": chimeric_file,
            "type": "tsv",
            "ext": "chimericSJ",
            "dir": "STAR"
        })
    sj_file = dd.get_starjunction(sample)
    if sj_file:
        out.append({
            "path": sj_file,
            "type": "tab",
            "ext": "SJ",
            "dir": "STAR"
        })
    return out
Exemple #3
0
def filter_junction_variants(vrn_file, data):
    """
    filter out variants within 10 basepairs of a splice junction, these are
    very prone to being false positives with RNA-seq data
    """
    SJ_BP_MASK = 10
    vrn_dir = os.path.dirname(vrn_file)
    splicebed = dd.get_junction_bed(data)
    if not file_exists(splicebed):
        logger.info(
            "Splice junction BED file not found, skipping filtering of "
            "variants closed to splice junctions.")
        return vrn_file
    spliceslop = get_padded_bed_file(vrn_dir, splicebed, SJ_BP_MASK, data)
    out_file = os.path.splitext(vrn_file)[0] + "-junctionfiltered.vcf.gz"
    if file_exists(out_file):
        return out_file
    with file_transaction(data, out_file) as tx_out_file:
        out_base = os.path.splitext(tx_out_file)[0]
        logger.info(
            "Removing variants within %d bases of splice junctions listed in %s from %s. "
            % (SJ_BP_MASK, spliceslop, vrn_file))
        pybedtools.BedTool(vrn_file).intersect(spliceslop,
                                               wa=True,
                                               header=True,
                                               v=True).saveas(out_base)
        tx_out_file = vcfutils.bgzip_and_index(out_base, dd.get_config(data))
    return out_file
Exemple #4
0
def filter_junction_variants(vrn_file, data):
    """
    filter out variants within 10 basepairs of a splice junction, these are
    very prone to being false positives with RNA-seq data
    """
    SJ_BP_MASK = 10
    vrn_dir = os.path.dirname(vrn_file)
    splicebed = dd.get_junction_bed(data)
    if not file_exists(splicebed):
        logger.info("Splice junction BED file not found, skipping filtering of "
                    "variants closed to splice junctions.")
        return vrn_file
    spliceslop = get_padded_bed_file(vrn_dir, splicebed, SJ_BP_MASK, data)
    out_file = os.path.splitext(vrn_file)[0] + "-junctionfiltered.vcf.gz"
    if file_exists(out_file):
        return out_file
    with file_transaction(data, out_file) as tx_out_file:
        out_base = os.path.splitext(tx_out_file)[0]
        logger.info("Removing variants within %d bases of splice junctions listed in %s from %s. " % (SJ_BP_MASK, spliceslop, vrn_file))
        pybedtools.BedTool(vrn_file).intersect(spliceslop, wa=True, header=True, v=True).saveas(out_base)
        tx_out_file = vcfutils.bgzip_and_index(out_base, dd.get_config(data))
    return out_file