Example #1
0
def spv_freq_filter(line, tumor_index):
    """Filter VarScan calls based on the SPV value and frequency.

    Removes calls with SPV < 0.05 and a tumor FREQ > 0.35.

    False positives dominate these higher frequency, low SPV calls. They appear
    to be primarily non-somatic/germline variants not removed by other filters.
    """
    if line.startswith("#CHROM"):
        headers = [('##FILTER=<ID=SpvFreq,Description="High frequency (tumor FREQ > 0.35) '
                    'and low p-value for somatic (SPV < 0.05)">')]
        return "\n".join(headers) + "\n" + line
    elif line.startswith("#"):
        return line
    else:
        parts = line.split("\t")
        sample_ft = {a: v for (a, v) in zip(parts[8].split(":"), parts[9 + tumor_index].split(":"))}
        freq = utils.safe_to_float(sample_ft.get("FREQ"))
        spvs = [x for x in parts[7].split(";") if x.startswith("SPV=")]
        spv = utils.safe_to_float(spvs[0].split("=")[-1] if spvs else None)
        fname = None
        if spv is not None and freq is not None:
            if spv < 0.05 and freq > 0.35:
                fname = "SpvFreq"
        if fname:
            if parts[6] in set([".", "PASS"]):
                parts[6] = fname
            else:
                parts[6] += ";%s" % fname
        line = "\t".join(parts)
        return line
Example #2
0
def spv_freq_filter(line, tumor_index):
    """Filter VarScan calls based on the SPV value and frequency.

    Removes calls with SPV < 0.05 and a tumor FREQ > 0.35.

    False positives dominate these higher frequency, low SPV calls. They appear
    to be primarily non-somatic/germline variants not removed by other filters.
    """
    if line.startswith("#CHROM"):
        headers = [(
            '##FILTER=<ID=SpvFreq,Description="High frequency (tumor FREQ > 0.35) '
            'and low p-value for somatic (SPV < 0.05)">')]
        return "\n".join(headers) + "\n" + line
    elif line.startswith("#"):
        return line
    else:
        parts = line.split("\t")
        sample_ft = {
            a: v
            for (a,
                 v) in zip(parts[8].split(":"), parts[9 +
                                                      tumor_index].split(":"))
        }
        freq = utils.safe_to_float(sample_ft.get("FREQ"))
        spvs = [x for x in parts[7].split(";") if x.startswith("SPV=")]
        spv = utils.safe_to_float(spvs[0].split("=")[-1] if spvs else None)
        fname = None
        if spv is not None and freq is not None:
            if spv < 0.05 and freq > 0.35:
                fname = "SpvFreq"
        if fname:
            if parts[6] in set([".", "PASS"]):
                parts[6] = fname
            else:
                parts[6] += ";%s" % fname
        line = "\t".join(parts)
        return line
Example #3
0
def depth_freq_filter(line, tumor_index, aligner):
    """Command line to filter VarDict calls based on depth, frequency and quality.

    Looks at regions with low depth for allele frequency (AF * DP < 6, the equivalent
    of < 13bp for heterogygote calls, but generalized. Within these calls filters if a
    calls has:

    - Low mapping quality and multiple mismatches in a read (NM)
        For bwa only: MQ < 55.0 and NM > 1.0 or MQ < 60.0 and NM > 2.0
    - Low depth (DP < 10)
    - Low QUAL (QUAL < 45)

    Also filters in low allele frequency regions with poor quality, if all of these are
    true:
    - Allele frequency < 0.2
    - Quality < 55
    - P-value (SSF) > 0.06
    """
    if line.startswith("#CHROM"):
        headers = [('##FILTER=<ID=LowAlleleDepth,Description="Low depth per allele frequency '
                    'along with poor depth, quality, mapping quality and read mismatches.">'),
                   ('##FILTER=<ID=LowFreqQuality,Description="Low frequency read with '
                    'poor quality and p-value (SSF).">')]
        return "\n".join(headers) + "\n" + line
    elif line.startswith("#"):
        return line
    else:
        parts = line.split("\t")
        sample_ft = {a: v for (a, v) in zip(parts[8].split(":"), parts[9 + tumor_index].split(":"))}
        qual = utils.safe_to_float(parts[5])
        dp = utils.safe_to_float(sample_ft.get("DP"))
        af = utils.safe_to_float(sample_ft.get("AF"))
        nm = utils.safe_to_float(sample_ft.get("NM"))
        mq = utils.safe_to_float(sample_ft.get("MQ"))
        ssfs = [x for x in parts[7].split(";") if x.startswith("SSF=")]
        pval = utils.safe_to_float(ssfs[0].split("=")[-1] if ssfs else None)
        fname = None
        if not chromhacks.is_sex(parts[0]) and dp is not None and af is not None:
            if dp * af < 6:
                if aligner == "bwa" and nm is not None and mq is not None:
                    if (mq < 55.0 and nm > 1.0) or (mq < 60.0 and nm > 2.0):
                        fname = "LowAlleleDepth"
                if dp < 10:
                    fname = "LowAlleleDepth"
                if qual is not None and qual < 45:
                    fname = "LowAlleleDepth"
        if af is not None and qual is not None and pval is not None:
            if af < 0.2 and qual < 45 and pval > 0.06:
                fname = "LowFreqQuality"
        if fname:
            if parts[6] in set([".", "PASS"]):
                parts[6] = fname
            else:
                parts[6] += ";%s" % fname
        line = "\t".join(parts)
        return line