Python VCF.add_filter_to_header Examples

Programming Language: Python

Namespace/Package Name: cyvcf2

Class/Type: VCF

Method/Function: add_filter_to_header

Examples at hotexamples.com: 19

cyvcf2 is a Python package library for processing and manipulating VCF files. One of its functions is "add_filter_to_header", which allows users to add a new entry to the FILTER section of the VCF header.

Here are some code examples:

Example 1 - Adding a new filter to the VCF header:

import cyvcf2

# Open the VCF file and get the header
vcf = cyvcf2.VCF('my_vcf_file.vcf')
header = vcf.raw_header

# Add a new filter into the FILTER section of the header
new_filter = 'FILTER='
header = cyvcf2.add_filter_to_header(header, new_filter)

# Update the VCF header with the new FILTER entry
vcf.update_header({'##FILTER': header})

This code adds a new filter called LOW_QUAL to the VCF header. The FILTER entry includes the ID and a brief description of the filter.

Example 2 - Adding a new filter with multiple annotations:

import cyvcf2

# Open the VCF file and get the header
vcf = cyvcf2.VCF('my_vcf_file.vcf')
header = vcf.raw_header

# Define the annotations for the new filter
annots = ['DP', 'MQ', 'STR']

# Create the FILTER entry with the new annotations
new_filter = 'FILTER='

# Add the new filter to the header
header = cyvcf2.add_filter_to_header(header, new_filter)

# Update the VCF header with the new FILTER entry
vcf.update_header({'##FILTER': header})

This code adds a new filter called LOW_QUAL to the VCF header with three annotations (DP, MQ, and STR). The annotations are specified in the Info field of the FILTER entry. Overall, these examples demonstrate how to use the "add_filter_to_header" function in cyvcf2 to add new filters to the VCF header.

Python VCF.add_filter_to_header - 19 examples found. These are the top rated real world Python examples of cyvcf2.VCF.add_filter_to_header extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

VCF(30)

add_info_to_header(30)

close(30)

header_iter(24)

add_filter_to_header(16)

add_format_to_header(7)

contains(6)

set_samples(6)

add_to_header(4)

__call__(4)

next(4)

relatedness(3)

__init__(3)

set_index(1)

run(1)

index(1)

readline(1)

plot_relatedness(1)

ibd(1)

get_header_type(1)

fetch(1)

update(1)

Example #1

Show file

File: mutect2.py Project: vladsaveliev/bcbio-nextgen

def _af_filter(data, in_file, out_file):
    """Soft-filter variants with AF below min_allele_fraction (appends "MinAF" to FILTER)
    """
    min_freq = float(utils.get_in(data["config"], ("algorithm", "min_allele_fraction"), 10)) / 100.0
    logger.debug("Filtering MuTect2 calls with allele fraction threshold of %s" % min_freq)
    ungz_out_file = "%s.vcf" % utils.splitext_plus(out_file)[0]
    if not utils.file_exists(ungz_out_file) and not utils.file_exists(ungz_out_file + ".gz"):
        with file_transaction(data, ungz_out_file) as tx_out_file:
            vcf = VCF(in_file)
            vcf.add_filter_to_header({
                'ID': 'MinAF',
                'Description': 'Allele frequency is lower than %s%% ' % (min_freq*100) + (
                    '(configured in bcbio as min_allele_fraction)'
                    if utils.get_in(data["config"], ("algorithm", "min_allele_fraction"))
                    else '(default threshold in bcbio; override with min_allele_fraction in the algorithm section)')})
            w = Writer(tx_out_file, vcf)
            # GATK 3.x can produce VCFs without sample names for empty VCFs
            try:
                tumor_index = vcf.samples.index(dd.get_sample_name(data))
            except ValueError:
                tumor_index = None
            for rec in vcf:
                if tumor_index is not None and np.all(rec.format('AF')[tumor_index] < min_freq):
                    vcfutils.cyvcf_add_filter(rec, 'MinAF')
                w.write_record(rec)
            w.close()
    return vcfutils.bgzip_and_index(ungz_out_file, data["config"])

Example #2

Show file

File: mutect2.py Project: screx/bcbio-nextgen

def _af_filter(data, in_file, out_file):
    """Soft-filter variants with AF below min_allele_fraction (appends "MinAF" to FILTER)
    """
    min_freq = float(
        utils.get_in(data["config"],
                     ("algorithm", "min_allele_fraction"), 10)) / 100.0
    logger.info(
        "Filtering MuTect2 calls with allele fraction threshold of %s" %
        min_freq)
    ungz_out_file = "%s.vcf" % utils.splitext_plus(out_file)[0]
    if not utils.file_exists(ungz_out_file) and not utils.file_exists(
            ungz_out_file + ".gz"):
        with file_transaction(data, ungz_out_file) as tx_out_file:
            vcf = VCF(in_file)
            vcf.add_filter_to_header({
                'ID':
                'MinAF',
                'Description':
                'Allele frequency is lower than %s%% ' % (min_freq * 100) +
                ('(configured in bcbio as min_allele_fraction)'
                 if utils.get_in(data["config"],
                                 ("algorithm", "min_allele_fraction")) else
                 '(default threshold in bcbio; override with min_allele_fraction in the algorithm section)'
                 )
            })
            w = Writer(tx_out_file, vcf)
            tumor_index = vcf.samples.index(data['description'])
            for rec in vcf:
                if np.all(rec.format('AF')[tumor_index] < min_freq):
                    vcfutils.cyvcf_add_filter(rec, 'MinAF')
                w.write_record(rec)
            w.close()
    return vcfutils.bgzip_and_index(ungz_out_file, data["config"])

Example #3

Show file

File: filter-site-missingness.py Project: vifehe/yaps2

def mark_missing_sites(vcffile, region, missing_threshold, soft_filter):
    vcf = VCF(vcffile)
    header_param_id = {
        'ID':
        'MISSING',
        'Description':
        'failed variant site missingness threshold ({} %)'.format(
            missing_threshold)
    }
    header_param_info = {
        'ID': 'MISSINGPCT',
        'Description': 'site missingness percentage',
        'Type': 'Float',
        'Number': '1'
    }
    vcf.add_filter_to_header(header_param_id)
    vcf.add_info_to_header(header_param_info)
    out = Writer('-', vcf)
    (total_sites, noted_sites) = (0, 0)

    for variant in vcf(region):
        total_sites += 1
        (missing_pct, missing, total) = compute_missingness(variant)
        verdict = variant_missing_criteria(missing_threshold, missing_pct)
        variant = update_variant(variant, verdict, missing_pct)
        if verdict == "pass":
            noted_sites += 1
            out.write_record(variant)
        elif verdict == "fail" and soft_filter:
            out.write_record(variant)

    out.close()
    msg = "After filtering, passed {} out of a possible {} Sites ({})"
    msg = msg.format(noted_sites, total_sites, 'pass')
    print(msg, file=sys.stderr)

Example #4

Show file

    def setUp(self):
        # load test data
        # store each variant object into specific variables for tes
        test_directory = os.path.dirname(os.path.abspath(__file__))
        reader = VCF(os.path.join(test_directory, "test.vcf"))
        self.test_filter = refilter.Filter(0.3, 0.7, 'AB', 'VAR_DP', 5, ['MISSING'], ['DB'])
        reader.add_filter_to_header(self.test_filter.filtered_header())
        reader.add_info_to_header(self.test_filter.rescued_header())

        self.variants = [ variant for variant in reader ]

Example #5

Show file

File: strelka2.py Project: vladsaveliev/bcbio-nextgen

def _af_annotate_and_filter(paired, items, in_file, out_file):
    """Populating FORMAT/AF, and dropping variants with AF<min_allele_fraction

    Strelka2 doesn't report exact AF for a variant, however it can be calculated as alt_counts/dp from existing fields:
    somatic
      snps:    GT:DP:FDP:SDP:SUBDP:AU:CU:GU:TU                 dp=DP                {ALT}U[0] = alt_counts(tier1,tier2)
      indels:  GT:DP:DP2:TAR:TIR:TOR:DP50:FDP50:SUBDP50:BCN50  dp=DP                TIR = alt_counts(tier1,tier2)
    germline
      snps:    GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL(:PS)       dp=sum(alt_counts)   AD = ref_count,alt_counts
      indels:  GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL(:PS)             dp=sum(alt_counts)   AD = ref_count,alt_counts
    """
    data = paired.tumor_data if paired else items[0]
    min_freq = float(utils.get_in(data["config"], ("algorithm", "min_allele_fraction"), 10)) / 100.0
    logger.debug("Filtering Strelka2 calls with allele fraction threshold of %s" % min_freq)
    ungz_out_file = "%s.vcf" % utils.splitext_plus(out_file)[0]
    if not utils.file_exists(ungz_out_file) and not utils.file_exists(ungz_out_file + ".gz"):
        with file_transaction(data, ungz_out_file) as tx_out_file:
            vcf = VCF(in_file)
            vcf.add_format_to_header({
                'ID': 'AF',
                'Description': 'Allele frequency, as calculated in bcbio: AD/DP (germline), <ALT>U/DP (somatic snps), '
                               'TIR/DPI (somatic indels)',
                'Type': 'Float',
                'Number': '.'})
            vcf.add_filter_to_header({
                'ID': 'MinAF',
                'Description': 'Allele frequency is lower than %s%% ' % (min_freq*100) + (
                    '(configured in bcbio as min_allele_fraction)'
                    if utils.get_in(data["config"], ("algorithm", "min_allele_fraction"))
                    else '(default threshold in bcbio; override with min_allele_fraction in the algorithm section)')})
            w = Writer(tx_out_file, vcf)
            tumor_index = vcf.samples.index(data['description'])
            for rec in vcf:
                if paired:  # somatic?
                    if rec.is_snp:  # snps?
                        alt_counts = rec.format(rec.ALT[0] + 'U')[:,0]  # {ALT}U=tier1_depth,tier2_depth
                    else:  # indels
                        alt_counts = rec.format('TIR')[:,0]  # TIR=tier1_depth,tier2_depth
                    dp = rec.format('DP')[:,0]
                elif rec.format("AD") is not None:  # germline?
                    alt_counts = rec.format('AD')[:,1:]  # AD=REF,ALT1,ALT2,...
                    dp = np.sum(rec.format('AD')[:,0:], axis=1)
                else: # germline gVCF record
                    alt_counts, dp = (None, None)
                if dp is not None:
                    with np.errstate(divide='ignore', invalid='ignore'):  # ignore division by zero and put AF=.0
                        af = np.true_divide(alt_counts, dp)
                        af[~np.isfinite(af)] = .0  # -inf inf NaN -> .0
                    rec.set_format('AF', af)
                    if paired and np.all(af[tumor_index] < min_freq):
                        vcfutils.cyvcf_add_filter(rec, 'MinAF')
                w.write_record(rec)
            w.close()
    return vcfutils.bgzip_and_index(ungz_out_file, data["config"])

Example #6

Show file

File: strelka2.py Project: vamst/bcbio-nextgen

def _af_annotate_and_filter(paired, items, in_file, out_file):
    """Populating FORMAT/AF, and dropping variants with AF<min_allele_fraction

    Strelka2 doesn't report exact AF for a variant, however it can be calculated as alt_counts/dp from existing fields:
    somatic
      snps:    GT:DP:FDP:SDP:SUBDP:AU:CU:GU:TU                 dp=DP                {ALT}U[0] = alt_counts(tier1,tier2)
      indels:  GT:DP:DP2:TAR:TIR:TOR:DP50:FDP50:SUBDP50:BCN50  dp=DP                TIR = alt_counts(tier1,tier2)
    germline
      snps:    GT:GQ:GQX:DP:DPF:AD:ADF:ADR:SB:FT:PL(:PS)       dp=sum(alt_counts)   AD = ref_count,alt_counts
      indels:  GT:GQ:GQX:DPI:AD:ADF:ADR:FT:PL(:PS)             dp=sum(alt_counts)   AD = ref_count,alt_counts
    """
    data = paired.tumor_data if paired else items[0]
    min_freq = float(utils.get_in(data["config"], ("algorithm", "min_allele_fraction"), 10)) / 100.0
    logger.info("Filtering Strelka2 calls with allele fraction threshold of %s" % min_freq)
    ungz_out_file = "%s.vcf" % utils.splitext_plus(out_file)[0]
    if not utils.file_exists(ungz_out_file) and not utils.file_exists(ungz_out_file + ".gz"):
        with file_transaction(data, ungz_out_file) as tx_out_file:
            vcf = VCF(in_file)
            vcf.add_format_to_header({
                'ID': 'AF',
                'Description': 'Allele frequency, as calculated in bcbio: AD/DP (germline), <ALT>U/DP (somatic snps), '
                               'TIR/DPI (somatic indels)',
                'Type': 'Float',
                'Number': '.'})
            vcf.add_filter_to_header({
                'ID': 'MinAF',
                'Description': 'Allele frequency is lower than %s%% ' % (min_freq*100) + (
                    '(configured in bcbio as min_allele_fraction)'
                    if utils.get_in(data["config"], ("algorithm", "min_allele_fraction"))
                    else '(default threshold in bcbio; override with min_allele_fraction in the algorithm section)')})
            w = Writer(tx_out_file, vcf)
            tumor_index = vcf.samples.index(data['description'])
            for rec in vcf:
                if paired:  # somatic?
                    if rec.is_snp:  # snps?
                        alt_counts = rec.format(rec.ALT[0] + 'U')[:,0]  # {ALT}U=tier1_depth,tier2_depth
                    else:  # indels
                        alt_counts = rec.format('TIR')[:,0]  # TIR=tier1_depth,tier2_depth
                    dp = rec.format('DP')[:,0]
                elif rec.format("AD") is not None:  # germline?
                    alt_counts = rec.format('AD')[:,1:]  # AD=REF,ALT1,ALT2,...
                    dp = np.sum(rec.format('AD')[:,0:], axis=1)
                else: # germline gVCF record
                    alt_counts, dp = (None, None)
                if dp is not None:
                    with np.errstate(divide='ignore', invalid='ignore'):  # ignore division by zero and put AF=.0
                        af = np.true_divide(alt_counts, dp)
                        af[~np.isfinite(af)] = .0  # -inf inf NaN -> .0
                    rec.set_format('AF', af)
                    if paired and np.all(af[tumor_index] < min_freq):
                        vcfutils.cyvcf_add_filter(rec, 'MinAF')
                w.write_record(rec)
            w.close()
    return vcfutils.bgzip_and_index(ungz_out_file, data["config"])

Example #7

Show file

File: refilter.py Project: ernfrid/refilter

def main(min_allele_balance, max_allele_balance, allele_balance_tag,
         variant_sample_depth_tag, min_depth, exclude_filters, exclude_fields,
         vcf):
    reader = VCF(vcf)
    refilter = Filter(min_allele_balance, max_allele_balance,
                      allele_balance_tag, variant_sample_depth_tag, min_depth,
                      exclude_filters, exclude_fields)
    reader.add_filter_to_header(refilter.filtered_header())
    reader.add_info_to_header(refilter.rescued_header())
    writer = Writer('-', reader)

    for variant in reader:
        refilter(variant)  # Modifies variant filter status in place
        writer.write_record(variant)

Example #8

Show file

File: add_filters.py Project: shubhamsaini/snpstr-imputation

def main():
    parser = argparse.ArgumentParser(__doc__)
    parser.add_argument("--vcf", help="VCF file", type=str, required=True)
    parser.add_argument("--statsfile", help="File with chrom, start, locus stats", type=str, required=True)
    parser.add_argument("--out", help="Prefix for output files", type=str, required=True)
    parser.add_argument("--min-hwep", help="Minimum HWE p-value", type=float, default=0)
    parser.add_argument("--min-callrate", help="Minimum call rate", type=float, default=0)
    parser.add_argument("--min-het", help="Minimum heterozygosity", type=float, default=0)
    parser.add_argument("--max-hrun-offset", help="For periods 5+, discard if the ref has " \
                            "homopolymer run > period+offset", type=int, default=100000)
    parser.add_argument("--filter-segdup", help="Filter loci overlapping a segdup", action="store_true")
    args = parser.parse_args()

    # Get VCF reader
    reader = VCF(args.vcf)

    # Load locus filters
    sys.stderr.write("Getting filters...\n")
    locstats = pd.read_csv(args.statsfile, sep="\t")
    locstats["FILTER"] = locstats.apply(lambda x: GetFilters(x, args, len(reader.samples)), 1)
    locstats.to_csv(args.out + ".tab", sep="\t", index=False)

    # Get filter dictionary
    sys.stderr.write("Getting filter dictionary...\n")
    filter_dict = dict(zip(list(locstats["start"]), list(locstats["FILTER"])))

    # Set filter field
    sys.stderr.write("Setting filter field in VCFs...\n")
    adict = {
        "HWE": "HWE less than %s"%args.min_hwep,
        "Callrate": "Callrate less than %s"%args.min_callrate,
        "Het": "Het less than %s"%args.min_het,
        "Hrun": "Hrun greater than %s"%args.max_hrun_offset,
        "Segdup": "Locus in a segmental duplication",
        "MissingInfo": "No stats provided for the locus",
        }
    for f in adict:
        reader.add_filter_to_header({"ID": f, "Description": adict[f]})
    writer = Writer("/dev/stdout", reader)
    for record in reader:
        filters = filter_dict.get(record.INFO["START"], "MissingInfo")
        if filters != ".":
            record.FILTER = filters.split(";")
        else: record.FILTER = "PASS"
        writer.write_record(record)
    writer.close()
    reader.close()

Example #9

Show file

File: test_reader.py Project: guochangjiang/cyvcf2

def test_add_filter_to_header():
    v = VCF(VCF_PATH)
    # NOTE that we have to add the filter to the header of the reader,
    # not the writer because the record will be associated with the reader
    v.add_filter_to_header({'ID': 'abcdefg', 'Description': 'abcdefg'})

    f = tempfile.mktemp(suffix=".vcf")
    atexit.register(os.unlink, f)
    w = Writer(f, v)
    rec = v.next()

    rec.FILTER = ["abcdefg"]
    w.write_record(rec)
    w.close()

    v = next(VCF(f))
    assert v.FILTER == "abcdefg", v.FILTER

Example #10

Show file

File: test_reader.py Project: frichter/cyvcf2

def test_add_filter_to_header():
    v = VCF(VCF_PATH)
    # NOTE that we have to add the filter to the header of the reader,
    # not the writer because the record will be associated with the reader
    v.add_filter_to_header({'ID': 'abcdefg', 'Description': 'abcdefg'})

    f = tempfile.mktemp(suffix=".vcf")
    atexit.register(os.unlink, f)
    w = Writer(f, v)
    rec = v.next()

    rec.FILTER = ["abcdefg"]
    w.write_record(rec)
    w.close()

    v = next(VCF(f))
    assert v.FILTER == "abcdefg", v.FILTER

Example #11

Show file

File: cli.py Project: wook2014/vtools

def filter_cli(input, output, trash, params_file, index_sample,
               immediate_return):
    vcf = VCF(input, gts012=True)

    idx = vcf.samples.index(index_sample)
    for filter_item in list(FilterClass):
        vcf.add_filter_to_header(filter_item.value)

    out = Writer(output, vcf)
    tr = Writer(trash, vcf)

    filter_params = FilterParams(params_file)

    filter_it = Filterer(vcf, filter_params, idx, immediate_return)

    for record, fi in filter_it:
        if fi is None or len(fi) == 0:
            out.write_record(record)
        else:
            record.FILTER = [x.name for x in fi]
            tr.write_record(record)

    out.close()
    tr.close()

Example #12

Show file

pass_threshold = 0

if args.conditions is None:
    wantFilters = False
else:
    conditions = formatString(args.INFO_conditions)
    pass_threshold += 1

if args.GTconditions is None:
    wantGTFilters = False
else:
    pass_threshold += 1

vcf = VCF(args.inputvcf)
vcf.add_filter_to_header({
    'ID': args.filtername,
    'Description': 'SV filter ' + args.filtername
})

for v in vcf:

    #Apply variant filters
    if wantFilters == True:
        evaluateINFO(args.conditions_str, v)

        #result = vt.evaluateINFO(line,cond,args.logic)
        #if result == True:
        #	is_filtered += 1

    #Apply per-sample filters
    if wantGTFilters == True:
        cond = args.GTconditions.split(",")

Example #13

Show file

    # API and example
    # https://brentp.github.io/cyvcf2/
    # https://brentp.github.io/cyvcf2/docstrings.html#api
    invcf = VCF(
        '/dev/stdin', lazy=True, gts012=True
    )  # if gts012=True, then gt_types will be 0=HOM_REF, 1=HET, 2=HOM_ALT, 3=UNKNOWN.
    # invcf = VCF('test.vcf.gz', lazy=True)

    # adjust the header to contain the new field
    # the keys 'ID', 'Description', 'Type', and 'Number' are required.

    invcf.add_filter_to_header({
        'ID':
        'VCFSiteMissingFilter.py',
        'Description':
        'Exclude the site with missing rate higher than > ' +
        str(MISS_THRESHOLD)
    })

    # create a new vcf Writer using the input vcf as a template.
    # Only need to write out updated VCF header.
    # Other parts output as string.
    outvcf = Writer('/dev/stdout', invcf)
    outvcf.close()

    for variant in invcf:
        missing_rate = 1 - variant.call_rate
        if missing_rate <= MISS_THRESHOLD:
            ss = str(variant).split()
            sys.stdout.write(str(variant))

Example #14

Show file

def fix_vcf(vcf, output, fai, jasmine=False):
    chromsizes = {line.split()[0]: int(line.split()[1]) for line in open(fai)}

    vcf_in = VCF(vcf)
    if not output:
        output = vcf.replace(".vcf", "_{}.vcf".format("fixed"))
    vcf_in.add_info_to_header({
        'ID': 'TRUNCATED',
        'Description': "SVLEN truncated",
        'Type': 'Flag',
        'Number': '0'
    })
    vcf_in.add_info_to_header({
        'ID': 'STRANDS2',
        'Description':
        "alt reads first +,alt reads first -,alt reads second +,alt reads second -.",
        'Type': 'Integer',
        'Number': '4'
    })
    vcf_in.add_info_to_header({
        'ID': 'Strandbias_pval',
        'Description': "P-value for fisher exact test for strand bias.",
        'Type': 'Float',
        'Number': 'A'
    })
    vcf_in.add_filter_to_header({
        'ID':
        'STRANDBIAS',
        'Description':
        "Strand is biased if Strandbias_pval< 0.01."
    })
    if jasmine:
        vcf_in.add_info_to_header({
            'ID': 'STRANDS',
            'Description': "foo",
            'Type': 'String',
            'Number': '1'
        })
        vcf_in.add_info_to_header({
            'ID': 'AF',
            'Description': "foo",
            'Type': 'Float',
            'Number': '1'
        })
    handle, interm_output = tempfile.mkstemp(suffix=".vcf")
    vcf_out = Writer(interm_output, vcf_in)
    records_fixed = 0
    records_truncated = 0
    mito_variants = 0
    interchromosomal_bnds = 0
    for v in vcf_in:
        if v.CHROM == 'chrM':
            mito_variants += 1
            continue
        if v.start == -1:
            v.set_pos(0)
            records_fixed += 1
        try:
            if (v.INFO.get('SVTYPE')
                    == 'BND') and (v.CHROM != v.INFO.get('CHR2')):
                del v.INFO['END']
                interchromosomal_bnds += 1
        except KeyError:
            pass
        try:
            if chromsizes[v.INFO.get('CHR2')] < v.INFO.get('END'):
                v.INFO['SVLEN'] = 1
                v.INFO['END'] = v.start + 1
                v.INFO['TRUNCATED'] = True
                records_truncated += 1
        except KeyError:
            pass
        if v.INFO.get('SVLEN') == 999999999:
            v.INFO['SVLEN'] = 1
            v.INFO['TRUNCATED'] = True
        vcf_out.write_record(v)
    vcf_out.close()
    vcf_sort(interm_output, output)
    if mito_variants != 0:
        sys.stderr.write(f"Removed {mito_variants} records on chrM.\n")
    if records_fixed != 0:
        sys.stderr.write(f"Fixed {records_fixed} records.\n")
    if records_truncated != 0:
        sys.stderr.write(
            f"Truncated {records_truncated} records where END > chromosome size\n"
        )
    if interchromosomal_bnds != 0:
        sys.stderr.write(
            f"Dropped END for {interchromosomal_bnds} interchromosomal BNDs\n")

Example #15

Show file

        ShowFormat()
        sys.exit(-1)

    mingq = int(args['--mingq'])

    # API and example
    # https://brentp.github.io/cyvcf2/
    # https://brentp.github.io/cyvcf2/docstrings.html#api
    invcf = VCF('/dev/stdin', lazy=True, gts012=True)
    # invcf = VCF('test.vcf.gz', lazy=True)

    # adjust the header to contain the new field
    # the keys 'ID', 'Description', 'Type', and 'Number' are required.
    invcf.add_filter_to_header({
        'ID':
        'VCFGQFilter.py',
        'Description':
        'Mask genotype as missing if GQ value  < ' + str(mingq)
    })

    # create a new vcf Writer using the input vcf as a template.
    # Only need to write out updated VCF header.
    # Other parts output as string.
    sys.stdout.write('%s' % (invcf.raw_header))
    # outvcf = Writer('/dev/stdout', invcf)
    # outvcf.close()

    # Cache data for faster process.
    DATA_COL = 9
    FMT_COL = 8
    FMT_STRING_CACHE = ''
    DP_COL = -1

Example #16

Show file

for name, filt in filters.items():
    if name in BUILTIN_FILTERS:
        if not isinstance(filt, tuple):
            filt = (filt, )
        filters[name] = lambda variant: BUILTIN_FILTERS[name](variant, *filt)
        filters[name].__doc__ = BUILTIN_FILTERS[name].__doc__
    else:
        filters[name] = eval(filt)
        filters[name].__doc__ = filter_descs.get(name, filt)


invcf = VCF(infile)
for name, filt in filters.items():
    invcf.add_filter_to_header({
        'ID': name,
        'Description': filt.__doc__,
    })

if outfile.endswith(".gz"):
    outvcf = Writer(outfile, invcf, "wz")
else:
    outvcf = Writer(outfile, invcf)

for variant in invcf:
    for name, filt in filters.items():
        if not filt(variant):
            if not variant.FILTER:
                variant.FILTER = name
            else:
                variant.FILTER = f"{variant.FILTER};{name}"
    if variant.FILTER and not keep:

Example #17

Show file

File: apply_filters.py Project: leoisl/head_to_head_pipeline

    def add_filters_to_header(self, vcf: VCF):
        if self.min_depth > 0:
            header = {
                "ID": str(Tags.LowDepth),
                "Description": (
                    f"Depth ({Tags.Depth}) less than {self.min_depth} - i.e., {Tags.Depth}<{self.min_depth:.1f}"
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. depth: {header}")

        if self.min_fed > 0:
            header = {
                "ID": str(Tags.LowFed),
                "Description": (
                    f"High-quality depth of the called allele as a fraction of the expected (median; {self.expected_depth}) is "
                    f"less than {self.min_fed}"
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. FED: {header}")

        if self.min_mq > 0:
            header = {
                "ID": str(Tags.LowMapQual),
                "Description": (
                    f"Mapping quality ({Tags.MapQual.value}) less than {self.min_mq} - i.e., {Tags.MapQual.value}<{self.min_mq:.0f}"
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. depth: {header}")

        if self.max_depth > 0:
            header = {
                "ID": str(Tags.HighDepth),
                "Description": (
                    f"Depth ({Tags.Depth}) more than {self.max_depth} - i.e., {Tags.Depth}>{self.max_depth:.1f}"
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for max. depth: {header}")

        if self.min_qual > 0:
            header = {
                "ID": str(Tags.LowQual),
                "Description": f"QUAL less than {self.min_qual}",
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. QUAL: {header}")

        if self.min_strand_bias > 0:
            header = {
                "ID": str(Tags.StrandBias),
                "Description": (
                    f"A strand on the called allele has less than  "
                    f"{self.min_strand_bias:.2%} of the high-quality depth for that "
                    f"allele. This is judged on the {Tags.StrandDepth} tag."
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for strand bias: {header}")

        if self.min_frs > 0:
            header = {
                "ID": str(Tags.LowSupport),
                "Description": f"Fraction of read support on called allele is less than {self.min_frs}",
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. FRS: {header}")

        if self.min_bqb > 0:
            header = {
                "ID": str(Tags.LowBaseQualBias),
                "Description": (
                    f"Base Quality Bias ({Tags.BaseQualBias}) is less than "
                    f"{self.min_bqb}."
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. base quality bias: {header}")

        if self.min_mqb > 0:
            header = {
                "ID": str(Tags.LowMapQualBias),
                "Description": (
                    f"Mapping Quality Bias ({Tags.MapQualBias}) is less than "
                    f"{self.min_mqb}."
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. mapping quality bias: {header}")

        if self.min_rpb > 0:
            header = {
                "ID": str(Tags.LowReadPosBias),
                "Description": (
                    f"Read Position Bias ({Tags.ReadPosBias}) is less than "
                    f"{self.min_rpb}."
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. read position bias: {header}")

        if self.min_rpbz is not None:
            header = {
                "ID": str(Tags.LowReadPosBiasZ),
                "Description": (
                    f"Read Position Bias z-test score ({Tags.ReadPosBiasZ}) is less than "
                    f"{self.min_rpbz}."
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. read position bias z-test: {header}")

        if self.max_rpbz is not None:
            header = {
                "ID": str(Tags.HighReadPosBiasZ),
                "Description": (
                    f"Read Position Bias z-test score ({Tags.ReadPosBiasZ}) is more than "
                    f"{self.max_rpbz}."
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for max. read position bias z-test: {header}")

        if self.max_scbz is not None:
            header = {
                "ID": str(Tags.HighSoftClipBiasZ),
                "Description": (
                    f"Soft-Clip Length Bias z-test score ({Tags.SoftClipBiasZ}) is more than "
                    f"{self.max_scbz}."
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for max. soft-clip length bias z-test: {header}")

        if self.max_sgb != 0:
            header = {
                "ID": str(Tags.HighSegBias),
                "Description": (
                    f"Segregation-based metric ({Tags.SegregationBias}) is greater "
                    f"than {self.max_sgb}."
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for max. segregation bias: {header}")

        if self.min_vdb > 0:
            header = {
                "ID": str(Tags.LowVarDistBias),
                "Description": (
                    f"Variant distance bias ({Tags.VariantDistanceBias}) is less "
                    f"than {self.min_vdb}."
                ),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. variant distance bias: {header}")

Example #18

Show file

File: VCFHOMOMinorReadsRatioFilter.py Project: wavefancy/WallaceBroad

        ShowFormat()
        sys.exit(-1)

    MRR_THRESHOLD = float(args['-c'])  # the threshold minor reads ratio.

    # API and example
    # https://brentp.github.io/cyvcf2/
    # https://brentp.github.io/cyvcf2/docstrings.html#api
    invcf = VCF('/dev/stdin', lazy=True, gts012=True)
    # invcf = VCF('test.vcf.gz', lazy=True)

    # adjust the header to contain the new field
    # the keys 'ID', 'Description', 'Type', and 'Number' are required.
    invcf.add_filter_to_header({
        'ID':
        'VCFHOMOMinorReadsRatioFilter.py',
        'Description':
        'Mask the genotype as missing if the MRR >=  ' + args['-c']
    })

    # create a new vcf Writer using the input vcf as a template.
    # Only need to write out updated VCF header.
    # Other parts output as string.
    sys.stdout.write('%s' % (invcf.raw_header))
    # outvcf = Writer('/dev/stdout', invcf)
    # outvcf.close()

    # Cache data for faster process.
    DATA_COL = 9
    FMT_COL = 8
    FMT_STRING_CACHE = ''
    DP_COL = -1

Example #19

Show file

    def add_filters_to_header(self, vcf: VCF):
        if self.min_depth > 0:
            header = {
                "ID":
                str(Tags.LowDepth),
                "Description":
                (f"Depth ({Tags.Depth}) less than {self.min_depth_frac:.1%} the "
                 f"expected depth of {self.expected_depth:.1f}. "
                 f"{Tags.Depth}<{self.min_depth:.1f}"),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. depth: {header}")

        if self.max_depth > 0:
            header = {
                "ID":
                str(Tags.HighDepth),
                "Description":
                (f"Depth ({Tags.Depth}) more than {self.max_depth_frac:.1%} the "
                 f"expected depth of {self.expected_depth:.1f}. "
                 f"{Tags.Depth}>{self.max_depth:.1f}"),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for max. depth: {header}")

        if self.min_qual > 0:
            header = {
                "ID": str(Tags.LowQual),
                "Description": f"QUAL less than {self.min_qual}",
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. QUAL: {header}")

        if self.min_strand_bias > 0:
            header = {
                "ID":
                str(Tags.StrandBias),
                "Description":
                (f"A strand on the called allele has less than  "
                 f"{self.min_strand_bias:.2%} of the high-quality depth for that "
                 f"allele. This is judged on the {Tags.StrandDepth} tag."),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for strand bias: {header}")

        if self.min_bqb > 0:
            header = {
                "ID":
                str(Tags.LowBaseQualBias),
                "Description":
                (f"Base Quality Bias ({Tags.BaseQualBias}) is less than "
                 f"{self.min_bqb}."),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. base quality bias: {header}")

        if self.min_mqb > 0:
            header = {
                "ID":
                str(Tags.LowMapQualBias),
                "Description":
                (f"Mapping Quality Bias ({Tags.MapQualBias}) is less than "
                 f"{self.min_mqb}."),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. mapping quality bias: {header}")
        if self.min_rpb > 0:
            header = {
                "ID":
                str(Tags.LowReadPosBias),
                "Description":
                (f"Read Position Bias ({Tags.ReadPosBias}) is less than "
                 f"{self.min_rpb}."),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. read position bias: {header}")

        if self.max_sgb != 0:
            header = {
                "ID":
                str(Tags.HighSegBias),
                "Description":
                (f"Segregation-based metric ({Tags.SegregationBias}) is greater "
                 f"than {self.max_sgb}."),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for max. segregation bias: {header}")

        if self.min_vdb > 0:
            header = {
                "ID":
                str(Tags.LowVarDistBias),
                "Description":
                (f"Variant distance bias ({Tags.VariantDistanceBias}) is less "
                 f"than {self.min_vdb}."),
            }
            vcf.add_filter_to_header(header)
            logging.debug(f"Header for min. variant distance bias: {header}")