def test_rank_models(): """Test to get the raw chromosome""" variant_dict = get_variant_dict(info="GeneticModels=1:AD|AD_dn,2:AR_hom") variant_line = get_variant_line(info="GeneticModels=1:AD|AD_dn,2:AR_hom") plugin = Plugin( name='genetic_models', field='INFO', data_type='string', info_key="GeneticModels", separators=[',', ':', '|'], dict_entry=True, string_rules={ 'AD_dn':1, 'AD':2 } ) dict_entry = plugin.get_value( variant_dict=variant_dict, dict_key='1' ) line_entry = plugin.get_value( variant_line=variant_line, dict_key='1' ) assert dict_entry == 'AD' assert line_entry == 'AD'
def test_pos(): """Test to get the raw chromosome""" plugin = Plugin(name='Pos', field='POS') test_pos = '1000' variant_line = get_variant_line(pos=test_pos) variant_dict = get_variant_dict(pos=test_pos) assert plugin.get_value(variant_line=variant_line) == test_pos assert plugin.get_value(variant_dict=variant_dict) == test_pos
def test_chrom(): """Test to get the raw chromosome""" plugin = Plugin(name='Chrom', field='CHROM') test_chrom = '10' variant_line = get_variant_line(chrom=test_chrom) variant_dict = get_variant_dict(chrom=test_chrom) assert plugin.get_value(variant_line=variant_line) == test_chrom assert plugin.get_value(variant_dict=variant_dict) == test_chrom
def test_id_flag(): """Test to get the raw chromosome""" plugin = Plugin(name='ID', field='ID') db_id = 'rs001' variant_line = get_variant_line(db_id=db_id) variant_dict = get_variant_dict(db_id=db_id) assert plugin.get_value(variant_line=variant_line) == db_id assert plugin.get_value(variant_dict=variant_dict) == db_id
def test_multiple_id_no_rule(): """Test to get the raw chromosome""" plugin = Plugin(name='ID', field='ID', data_type='flag') db_id_1 = 'rs001' db_id_2 = 'rs002' db_id = "{0};{1}".format(db_id_1, db_id_2) variant_line = get_variant_line(db_id=db_id) variant_dict = get_variant_dict(db_id=db_id) assert plugin.get_value(variant_line=variant_line) == True assert plugin.get_value(variant_dict=variant_dict) == True
def test_flag(): """Test to get the raw chromosome""" plugin = Plugin( name='DB', field='INFO', info_key="DB", data_type='flag', ) variant_line = get_variant_line() variant_dict = get_variant_dict() assert plugin.get_value(variant_line=variant_line) == True assert plugin.get_value(variant_dict=variant_dict) == True
def test_rank_score_with_key(): """Test to get the raw chromosome""" variant_dict = get_variant_dict(info="RankScore=1:12,2:11") variant_line = get_variant_line(info="RankScore=1:12,2:11") plugin = Plugin( name='rank_score', data_type='integer', field='INFO', info_key="RankScore", separators=[',', ':'], dict_entry=True ) dict_entry = plugin.get_value( variant_dict=variant_dict, dict_key='1' ) line_entry = plugin.get_value( variant_line=variant_line, dict_key='1' ) assert dict_entry == 12 assert line_entry == 12
def test_1000G_float(): """Test to get the raw chromosome""" plugin = Plugin( name='thousand_g', field='INFO', info_key="1000GAF", separators=[','], data_type='float', ) test_value = 0.744609 variant_line = get_variant_line() variant_dict = get_variant_dict() assert plugin.get_value(variant_line=variant_line) == test_value assert plugin.get_value(variant_dict=variant_dict) == test_value
def test_1000G_record_rule(): """Test to get the raw chromosome""" plugin = Plugin( name='thousand_g', field='INFO', info_key="1000GAF", separators=[','], data_type='float', record_rule='min', ) info = "1000GAF=0.744609,0.02;AC=2;AF=1.00;AN=2" variant_line = get_variant_line(info=info) variant_dict = get_variant_dict(info=info) assert plugin.get_value(variant_line=variant_line) == 0.02 assert plugin.get_value(variant_dict=variant_dict) == 0.02
def test_filter_min_rule(): """Test to get the raw chromosome""" plugin = Plugin( name='Filter', field='FILTER', data_type='string', string_rules={ 'PASS':2, 'NOT_PASS':1 }, record_rule='min' ) filt = "PASS;NOT_PASS" variant_line = get_variant_line(filt=filt) variant_dict = get_variant_dict(filt=filt) assert plugin.get_value(variant_line=variant_line) == 'NOT_PASS' assert plugin.get_value(variant_dict=variant_dict) == 'NOT_PASS'
def filter(variant_file, annotation, threshold, discard, greater, silent, outfile): """ Filter vcf variants. Filter variants based on their annotation """ logger.info("Running genmod filter version {0}".format(__version__)) variant_file = get_file_handle(variant_file) start_time_analysis = datetime.now() logger.info("Initializing a Header Parser") head = HeaderParser() for line in variant_file: line = line.rstrip() if line.startswith('#'): if line.startswith('##'): head.parse_meta_data(line) else: head.parse_header_line(line) else: break #Add the first variant to the iterator variant_file = itertools.chain([line], variant_file) header_line = head.header if not annotation in head.info_dict: logger.warning( "Annotation {0} not specified in header".format(annotation)) logger.info("Please check VCF file") logger.info("Exiting...") sys.exit(1) logger.info( "Building a plugin from extract_vcf for {0}".format(annotation)) annotation_plugin = Plugin(name=annotation, field='INFO', info_key=annotation, separators=[','], record_rule='min', data_type='float') logger.debug("Plugin=(field={0},info_key={1},separators={2},record_rule={3}"\ ",data_type={4})".format('INFO', annotation, "','", 'min', 'float')) print_headers(head=head, outfile=outfile, silent=silent) nr_of_variants = 0 nr_of_passed_variants = 0 for variant in variant_file: nr_of_variants += 1 keep_variant = False value = annotation_plugin.get_value(variant_line=variant) logger.debug("Found value {0}".format(value)) if value: if greater: if value > threshold: keep_variant = True else: if value < threshold: keep_variant = True else: if not discard: keep_variant = True if keep_variant: logger.debug("Keeping variant") nr_of_passed_variants += 1 print_variant(variant_line=variant, outfile=outfile, mode='vcf', silent=silent) else: logger.debug("Discarding variant") logger.info("Number of variants in file {0}".format(nr_of_variants)) logger.info( "Number of variants passing filter {0}".format(nr_of_passed_variants)) logger.info( "Number of variants filtered {0}".format(nr_of_variants - nr_of_passed_variants))
def filter(variant_file, annotation, threshold, discard, greater, silent, outfile): """ Filter vcf variants. Filter variants based on their annotation """ logger.info("Running genmod filter version {0}".format(__version__)) start_time_analysis = datetime.now() logger.info("Initializing a Header Parser") head = HeaderParser() for line in variant_file: line = line.rstrip() if line.startswith('#'): if line.startswith('##'): head.parse_meta_data(line) else: head.parse_header_line(line) else: break #Add the first variant to the iterator variant_file = itertools.chain([line], variant_file) header_line = head.header if not annotation in head.info_dict: logger.warning("Annotation {0} not specified in header".format(annotation)) logger.info("Please check VCF file") logger.info("Exiting...") sys.exit(1) logger.info("Building a plugin from extract_vcf for {0}".format(annotation)) annotation_plugin = Plugin( name=annotation, field='INFO', info_key=annotation, separators = [','], record_rule = 'min', data_type = 'float' ) logger.debug("Plugin=(field={0},info_key={1},separators={2},record_rule={3}"\ ",data_type={4})".format('INFO', annotation, "','", 'min', 'float')) print_headers(head=head, outfile=outfile, silent=silent) nr_of_variants = 0 nr_of_passed_variants = 0 for variant in variant_file: nr_of_variants += 1 keep_variant = False value = annotation_plugin.get_value(variant_line=variant) logger.debug("Found value {0}".format(value)) if value: if greater: if value > threshold: keep_variant = True else: if value < threshold: keep_variant = True else: if not discard: keep_variant = True if keep_variant: logger.debug("Keeping variant") nr_of_passed_variants += 1 print_variant( variant_line=variant, outfile=outfile, mode='vcf', silent=silent ) else: logger.debug("Discarding variant") logger.info("Number of variants in file {0}".format(nr_of_variants)) logger.info("Number of variants passing filter {0}".format(nr_of_passed_variants)) logger.info("Number of variants filtered {0}".format( nr_of_variants - nr_of_passed_variants))