def _maybe_add_junction_files(algorithm, sample, out): """ add splice junction files from STAR, if available """ junction_bed = dd.get_junction_bed(sample) if junction_bed: out.append({ "path": junction_bed, "type": "bed", "ext": "SJ", "dir": "STAR" }) chimeric_file = dd.get_chimericjunction(sample) if chimeric_file: out.append({ "path": chimeric_file, "type": "tsv", "ext": "chimericSJ", "dir": "STAR" }) sj_file = dd.get_starjunction(sample) if sj_file: out.append({ "path": sj_file, "type": "tab", "ext": "SJ", "dir": "STAR" }) star_summary = dd.get_summary_qc(sample).get("star", None) if star_summary: star_log = star_summary["base"] if star_log: out.append({"path": star_log, "type": "log", "dir": "STAR"}) return out
def _maybe_add_junction_files(algorithm, sample, out): """ add splice junction files from STAR, if available """ junction_bed = dd.get_junction_bed(sample) if junction_bed: out.append({ "path": junction_bed, "type": "bed", "ext": "SJ", "dir": "STAR" }) chimeric_file = dd.get_chimericjunction(sample) if chimeric_file: out.append({ "path": chimeric_file, "type": "tsv", "ext": "chimericSJ", "dir": "STAR" }) sj_file = dd.get_starjunction(sample) if sj_file: out.append({ "path": sj_file, "type": "tab", "ext": "SJ", "dir": "STAR" }) return out
def filter_junction_variants(vrn_file, data): """ filter out variants within 10 basepairs of a splice junction, these are very prone to being false positives with RNA-seq data """ SJ_BP_MASK = 10 vrn_dir = os.path.dirname(vrn_file) splicebed = dd.get_junction_bed(data) if not file_exists(splicebed): logger.info( "Splice junction BED file not found, skipping filtering of " "variants closed to splice junctions.") return vrn_file spliceslop = get_padded_bed_file(vrn_dir, splicebed, SJ_BP_MASK, data) out_file = os.path.splitext(vrn_file)[0] + "-junctionfiltered.vcf.gz" if file_exists(out_file): return out_file with file_transaction(data, out_file) as tx_out_file: out_base = os.path.splitext(tx_out_file)[0] logger.info( "Removing variants within %d bases of splice junctions listed in %s from %s. " % (SJ_BP_MASK, spliceslop, vrn_file)) pybedtools.BedTool(vrn_file).intersect(spliceslop, wa=True, header=True, v=True).saveas(out_base) tx_out_file = vcfutils.bgzip_and_index(out_base, dd.get_config(data)) return out_file
def filter_junction_variants(vrn_file, data): """ filter out variants within 10 basepairs of a splice junction, these are very prone to being false positives with RNA-seq data """ SJ_BP_MASK = 10 vrn_dir = os.path.dirname(vrn_file) splicebed = dd.get_junction_bed(data) if not file_exists(splicebed): logger.info("Splice junction BED file not found, skipping filtering of " "variants closed to splice junctions.") return vrn_file spliceslop = get_padded_bed_file(vrn_dir, splicebed, SJ_BP_MASK, data) out_file = os.path.splitext(vrn_file)[0] + "-junctionfiltered.vcf.gz" if file_exists(out_file): return out_file with file_transaction(data, out_file) as tx_out_file: out_base = os.path.splitext(tx_out_file)[0] logger.info("Removing variants within %d bases of splice junctions listed in %s from %s. " % (SJ_BP_MASK, spliceslop, vrn_file)) pybedtools.BedTool(vrn_file).intersect(spliceslop, wa=True, header=True, v=True).saveas(out_base) tx_out_file = vcfutils.bgzip_and_index(out_base, dd.get_config(data)) return out_file