Example #1
def main(args=None):
    if args is None:
        args = sys.argv[:]

    print("Welcome to sequana_vcf_filter")
    user_options = Options(prog="sequana_vcf_filter")

    if "--version" in args:
        import sequana
    elif len(args) == 1 or "--help" in args:
        user_options.parse_args(["prog", "--help"])
    elif len(args) == 2:

        class SimpleOpt():

        options = SimpleOpt()
        options.input_filename = args[1]
        options = user_options.parse_args(args[1:])

    # set the level
    logger.level = options.level

    vcf = VCF(options.input_filename)
    vcf.vcf.filter_dict['QUAL'] = options.quality

    vcf.vcf.apply_indel_filter = options.apply_indel_filter
    vcf.vcf.apply_dp4_filter = options.apply_dp4_filter
    vcf.vcf.apply_af1_filter = options.apply_af1_filter
    vcf.vcf.dp4_minimum_depth = options.minimum_depth
    vcf.vcf.dp4_minimum_depth_strand = options.minimum_depth_strand
    vcf.vcf.dp4_minimum_ratio = options.minimum_ratio
    vcf.vcf.minimum_af1 = options.minimum_af1
    vcf.vcf.filter_dict['INFO'] = {}
    vcf.vcf.filter_dict['QUAL'] = options.quality

    for this in options.filter:
        this = this[0]
        signs = [">", "<", ">=", "<="]
        for sign in signs:
            if sign in this:
                key, value = this.split(sign, 1)
                key = key.strip()
                value = sign.strip() + value.strip()
                vcf.vcf.filter_dict['INFO'][key] = value


    res = vcf.vcf.filter_vcf(options.output_filename,

    return res
Example #2
def test_vcf_filter():

    data = sequana_data("test_vcf_mpileup_4dot1.vcf")
    v = VCF(data)
    assert v.vcf.version == "4.1"

    # Test the INFO set to {}
    v.vcf.filter_dict['INFO'] = {}
    v.vcf.filter_dict['QUAL'] = 50
    with TempFile() as fh:
        res = v.vcf.filter_vcf(fh.name)
    assert res == {'N': 573, 'filtered': 308, 'unfiltered': 265}

    # Test the & filter
    v.vcf.filter_dict["QUAL"] = 0
    v.vcf.filter_dict["INFO"] = {}
    v.vcf.filter_dict['INFO']['DP'] = ">40&<=80"
    with TempFile() as fh:
        res = v.vcf.filter_vcf(fh.name)
    assert res == {'N': 573, 'filtered': 10, 'unfiltered': 563}

    # check the | filter
    v.vcf.filter_dict["QUAL"] = 0
    v.vcf.filter_dict["INFO"] = {}
    v.vcf.filter_dict['INFO']['DP'] = "<40|>=80"
    with TempFile() as fh:
        res = v.vcf.filter_vcf(fh.name)
    assert res == {'N': 573, 'filtered': 562, 'unfiltered': 11}

    # check the array filter
    v.vcf.filter_dict["QUAL"] = 0
    v.vcf.filter_dict["INFO"] = {}
    v.vcf.filter_dict['INFO']['DP4[0]'] = "<2"
    with TempFile() as fh:
        res = v.vcf.filter_vcf(fh.name)
    assert res == {'N': 573, 'filtered': 173, 'unfiltered': 400}

    # Check the array access to one item
    v.vcf.filter_dict["QUAL"] = 0
    v.vcf.filter_dict["INFO"] = {}
    v.vcf.filter_dict['INFO']['DP4[2]'] = "<2"
    with TempFile() as fh:
        res = v.vcf.filter_vcf(fh.name)
    assert res == {'N': 573, 'filtered': 199, 'unfiltered': 374}

    # Check the sum of elements in an array
    v.vcf.filter_dict["QUAL"] = 0
    v.vcf.filter_dict["INFO"] = {}
    v.vcf.filter_dict['INFO']['sum(DP4[2], DP4[3])'] = "<4"
    with TempFile() as fh:
        res = v.vcf.filter_vcf(fh.name)
    assert res == {'N': 573, 'filtered': 7, 'unfiltered': 566}
Example #3
def test_indel():
    data = sequana_data("test_vcf_mpileup_4dot1.vcf")
    v = VCF(data)
    variant = next(v.vcf)
    assert v.vcf.is_indel(variant) is False
    variant = next(v.vcf)
    assert v.vcf.is_indel(variant) is False
    variant = next(v.vcf)
    assert v.vcf.is_indel(variant) is True
Example #4
def test_af1():
    data = sequana_data("test_vcf_mpileup_4dot1.vcf")
    v = VCF(data)
    variant = next(v.vcf)

    variant.INFO['AF1'] = 1
    assert v.vcf.is_valid_af1(variant) is True
    variant.INFO['AF1'] = 0.5
    assert v.vcf.is_valid_af1(variant) is False

    # polymorphic case
    variant = next(v.vcf)

    variant.INFO['AF1'] = 1
    assert v.vcf.is_valid_af1(variant) is False
    #variant.INFO['AF1'] = 0.5
    #assert v.vcf.is_valid_af1(variant) is True

    v = VCF(data)
    v.vcf.apply_af1_filter = True
    with TempFile() as fh:
        res = v.vcf.filter_vcf(fh.name)
    assert res == {'N': 573, 'filtered': 391, 'unfiltered': 182}
Example #5
def test_vcf_filter_freebayes():

    data = sequana_data("test.vcf")
    v = VCF(data)
Example #6
def test_vcf_filter_dp4():

    data = sequana_data("test_vcf_mpileup_4dot1.vcf")
    v = VCF(data)
    variant = next(v.vcf)

    def validate_variant_alternatate(variant):
        # variant.ALT must be different from "." for this test
        assert str(variant.ALT[0]).strip() != "."

        # test minimum depth of alternate must be >= 4
        variant.INFO['DP4'] = [0, 0, 2, 2]
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75)

        # here, not enough depth on alternate strand reverse or forward
        variant.INFO['DP4'] = [0, 0, 4, 1]
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is False
        variant.INFO['DP4'] = [0, 0, 1, 4]
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is False

        # mimimum ratio must be > 0.75
        variant.INFO['DP4'] = [25, 0, 75, 75]
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is True

        variant.INFO['DP4'] = [25, 25, 75,
                               74]  # just below 0.75 for the alt reverse
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is False
        variant.INFO['DP4'] = [25, 25, 74,
                               75]  # just below 0.75 for the alt forward
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is False

    # variant.ALT is equal to "A"

    def validate_variant_reference(variant):
        # variant.ALT must be different from "." for this test
        assert str(variant.ALT[0]).strip() == "."

        # test minimum depth of alternate must be >= 4
        variant.INFO['DP4'] = [2, 2, 0, 0]
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75)

        # here, not enough depth on alternate strand reverse or forward
        variant.INFO['DP4'] = [4, 1, 0, 0]
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is False
        variant.INFO['DP4'] = [1, 4, 0, 0]
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is False

        # mimimum ratio must be > 0.75
        variant.INFO['DP4'] = [75, 75, 25, 0]
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is True

        variant.INFO['DP4'] = [75, 74, 25,
                               25]  # just below 0.75 for the alt reverse
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is False
        variant.INFO['DP4'] = [74, 75, 25,
                               25]  # just below 0.75 for the alt forward
        assert v.vcf.is_valid_dp4(variant, 4, 2, 0.75) is False

    # variant.ALT is equal to "A"
    variant.ALT[0].sequence = "."

    # Now, let us do the filtering with the vcf_filter method
    v = VCF(data)
    v.vcf.apply_dp4_filter = True
    with TempFile() as fh:
        res = v.vcf.filter_vcf(fh.name)
    assert res == {'N': 573, 'filtered': 414, 'unfiltered': 159}
Example #7
def main(args=None):
    if args is None:
        args = sys.argv[:]

    print("Welcome to sequana_vcf_filter")
    user_options = Options(prog="sequana_vcf_filter")

    if len(args) == 1 or "--help" in args:
        user_options.parse_args(["prog", "--help"])
    elif len(args) == 2:

        class SimpleOpt():

        options = SimpleOpt()
        options.input_filename = args[1]
        options = user_options.parse_args(args[1:])

    # set the level
    logger.level = options.level

    vcf = VCF(options.input_filename)
    vcf.vcf.filter_dict['QUAL'] = options.quality
    vcf.vcf.filter_dict['INFO'] = {}

    # Read filters from a file
    if options.filter_file:
        import configparser
        cfg = configparser.RawConfigParser()
        cfg.optionsxform = str
        if cfg.has_section('filters'):
            for key, value in cfg.items('filters'):
                vcf.vcf.filter_dict["INFO"][key.upper()] = value
            raise ValueError("filter file must contain a section "
                             "[filters] use --help for more information")

        if cfg.has_section('general'):
            quality = cfg.getint('general', 'quality')
            vcf.vcf.filter_dict['QUAL'] = quality

    if options.quality != 0:
        vcf.vcf.filter_dict['QUAL'] = options.quality

    for this in options.filter:
        this = this[0]
        signs = [">", "<", ">=", "<="]
        for sign in signs:
            if sign in this:
                key, value = this.split(sign)
                key = key.strip()
                value = sign.strip() + value.strip()
                vcf.vcf.filter_dict['INFO'][key] = value

    res = vcf.vcf.filter_vcf(options.output_filename,

    return res
Example #8
def test_vcf_filter_freebayes():

    data = sequana_data("test.vcf")
    v = VCF(data)