def check_format_ad_dp_tags(vcf, pcgr_directory, config_options, logger): found_taf_tag = 0 found_tdp_tag = 0 found_naf_tag = 0 found_ndp_tag = 0 found_call_conf_tag = 0 tumor_dp_tag = config_options['allelic_support']['tumor_dp_tag'] tumor_af_tag = config_options['allelic_support']['tumor_af_tag'] normal_dp_tag = config_options['allelic_support']['normal_dp_tag'] normal_af_tag = config_options['allelic_support']['normal_af_tag'] call_conf_tag = config_options['allelic_support']['call_conf_tag'] annoutils.detect_reserved_info_tag(tumor_dp_tag, 'tumor_dp_tag', logger) annoutils.detect_reserved_info_tag(normal_dp_tag, 'normal_dp_tag', logger) annoutils.detect_reserved_info_tag(tumor_af_tag, 'tumor_af_tag', logger) annoutils.detect_reserved_info_tag(normal_af_tag, 'normal_af_tag', logger) annoutils.detect_reserved_info_tag(call_conf_tag, 'call_conf_tag', logger) for e in vcf.header_iter(): header_element = e.info() if 'ID' in header_element.keys( ) and 'HeaderType' in header_element.keys(): if header_element['HeaderType'] == 'INFO': if header_element['ID'] == tumor_dp_tag: if header_element['Type'] == 'Integer': logger.info( 'Found INFO tag for tumor variant sequencing depth (tumor_dp_tag ' + str(tumor_dp_tag) + ') in input VCF') found_tdp_tag = 1 else: err_msg = 'INFO tag for tumor variant sequencing depth (tumor_dp_tag ' + str( tumor_dp_tag ) + ') is not correctly specified in input VCF (Type=' + str( header_element['Type'] ) + '), should be Type=Integer' return pcgr_error_message(err_msg, logger) if header_element['ID'] == tumor_af_tag: if header_element['Type'] == 'Float': logger.info( 'Found INFO tag for tumor variant allelic fraction (tumor_af_tag ' + str(tumor_af_tag) + ') in input VCF') found_taf_tag = 1 else: err_msg = 'INFO tag for tumor variant allelic fraction (tumor_af_tag ' + str( tumor_af_tag ) + ') is not correctly specified in input VCF (Type=' + str( header_element['Type']) + '), should be Type=Float' return pcgr_error_message(err_msg, logger) if header_element['ID'] == normal_dp_tag: if header_element['Type'] == 'Integer': logger.info( 'Found INFO tag for normal/control variant sequencing depth (normal_dp_tag ' + str(normal_dp_tag) + ') in input VCF') found_ndp_tag = 1 else: err_msg = 'INFO tag for normal/control variant sequencing depth (normal_dp_tag ' + str( normal_dp_tag ) + ') is not correctly specified in input VCF (Type=' + str( header_element['Type'] ) + '), should be Type=Integer' return pcgr_error_message(err_msg, logger) if header_element['ID'] == normal_af_tag: if header_element['Type'] == 'Float': logger.info( 'Found INFO tag for normal/control allelic fraction (normal_af_tag ' + str(normal_af_tag) + ') in input VCF') found_naf_tag = 1 else: err_msg = 'INFO tag for for normal/control allelic fraction (normal_af_tag ' + str( normal_af_tag ) + ') is not correctly specified in input VCF (Type=' + str( header_element['Type']) + ') should be Type=Float' return pcgr_error_message(err_msg, logger) if header_element['ID'] == call_conf_tag: if header_element['Type'] == 'String': logger.info( 'Found INFO tag for variant call confidence (call_conf_tag ' + str(call_conf_tag) + ') in input VCF') found_call_conf_tag = 1 else: err_msg = 'INFO tag for variant call confidence (call_conf_tag) is not correctly specified in input VCF (Type=' + str( header_element['Type'] ) + '), should be Type=String' return pcgr_error_message(err_msg, logger) if call_conf_tag != '' and found_call_conf_tag == 0: logger.warn('Could not find the specified call_conf_tag (' + str(call_conf_tag) + ') in INFO column of input VCF') if tumor_dp_tag != '' and found_tdp_tag == 0: logger.warn('Could not find the specified tumor_dp_tag (' + str(tumor_dp_tag) + ') in INFO column of input VCF') if tumor_af_tag != '' and found_taf_tag == 0: logger.warn('Could not find the specified tumor_af_tag (' + str(tumor_af_tag) + ') in INFO column of input VCF') if normal_dp_tag != '' and found_ndp_tag == 0: logger.warn('Could not find the specified normal_dp_tag (' + str(normal_dp_tag) + ') in INFO column of input VCF') if normal_af_tag != '' and found_naf_tag == 0: logger.warn('Could not find the specified normal_af_tag (' + str(normal_af_tag) + ') in INFO column of input VCF') if found_tdp_tag == 1 and found_taf_tag == 0: logger.warn( 'BOTH \' tumor_dp_tag\' AND \' tumor_af_tag\' need to be specified for use in tumor report (\'tumor_af_tag\' is missing)' ) if found_tdp_tag == 0 and found_taf_tag == 1: logger.warn( 'BOTH \'tumor_dp_tag\' AND \'tumor_af_tag\' need to be specified for use in tumor report (\'tumor_dp_tag\' is missing)' ) if found_ndp_tag == 1 and found_naf_tag == 0: logger.warn( 'BOTH \'normal_dp_tag\' AND \'normal_af_tag\' need to be specified for use in tumor report (\'normal_af_tag\' is missing)' ) if found_ndp_tag == 0 and found_naf_tag == 1: logger.warn( 'BOTH \'normal_dp_tag\' AND \'normal_af_tag\' need to be specified for use in tumor report (\'normal_dp_tag\' is missing)' ) return 0
def check_format_ad_dp_tags(vcf, pcgr_directory, config_options, logger): found_taf_tag = 0 found_tdp_tag = 0 found_naf_tag = 0 found_ndp_tag = 0 found_call_conf_tag = 0 tumor_dp_tag = config_options['allelic_support']['tumor_dp_tag'] tumor_af_tag = config_options['allelic_support']['tumor_af_tag'] control_dp_tag = config_options['allelic_support']['control_dp_tag'] control_af_tag = config_options['allelic_support']['control_af_tag'] call_conf_tag = config_options['allelic_support']['call_conf_tag'] annoutils.detect_reserved_info_tag(tumor_dp_tag, 'tumor_dp_tag', logger) annoutils.detect_reserved_info_tag(control_dp_tag, 'control_dp_tag', logger) annoutils.detect_reserved_info_tag(tumor_af_tag, 'tumor_af_tag', logger) annoutils.detect_reserved_info_tag(control_af_tag, 'control_af_tag', logger) annoutils.detect_reserved_info_tag(call_conf_tag, 'call_conf_tag', logger) for e in vcf.header_iter(): header_element = e.info() if 'ID' in header_element.keys( ) and 'HeaderType' in header_element.keys(): if header_element['HeaderType'] == 'INFO': if header_element['ID'] == tumor_dp_tag: if header_element['Type'] == 'Integer': logger.info( 'Found INFO tag for tumor variant sequencing depth (tumor_dp_tag ' + str(tumor_dp_tag) + ') in input VCF') found_tdp_tag = 1 else: err_msg = 'INFO tag for tumor variant sequencing depth (tumor_dp_tag ' + str( tumor_dp_tag ) + ') is not correctly specified in input VCF (Type=' + str( header_element['Type'] ) + '), should be Type=Integer' return pcgr_error_message(err_msg, logger) if header_element['ID'] == tumor_af_tag: if header_element['Type'] == 'Float': logger.info( 'Found INFO tag for tumor variant allelic fraction (tumor_af_tag ' + str(tumor_af_tag) + ') in input VCF') found_taf_tag = 1 else: err_msg = 'INFO tag for tumor variant allelic fraction (tumor_af_tag ' + str( tumor_af_tag ) + ') is not correctly specified in input VCF (Type=' + str( header_element['Type']) + '), should be Type=Float' return pcgr_error_message(err_msg, logger) if header_element['ID'] == control_dp_tag: if header_element['Type'] == 'Integer': logger.info( 'Found INFO tag for normal/control variant sequencing depth (control_dp_tag ' + str(control_dp_tag) + ') in input VCF') found_ndp_tag = 1 else: err_msg = 'INFO tag for normal/control variant sequencing depth (control_dp_tag ' + str( control_dp_tag ) + ') is not correctly specified in input VCF (Type=' + str( header_element['Type'] ) + '), should be Type=Integer' return pcgr_error_message(err_msg, logger) if header_element['ID'] == control_af_tag: if header_element['Type'] == 'Float': logger.info( 'Found INFO tag for normal/control allelic fraction (control_af_tag ' + str(control_af_tag) + ') in input VCF') found_naf_tag = 1 else: err_msg = 'INFO tag for for normal/control allelic fraction (control_af_tag ' + str( control_af_tag ) + ') is not correctly specified in input VCF (Type=' + str( header_element['Type']) + ') should be Type=Float' return pcgr_error_message(err_msg, logger) if header_element['ID'] == call_conf_tag: if header_element['Type'] == 'String': logger.info( 'Found INFO tag for variant call confidence (call_conf_tag ' + str(call_conf_tag) + ') in input VCF') found_call_conf_tag = 1 else: err_msg = 'INFO tag for variant call confidence (call_conf_tag) is not correctly specified in input VCF (Type=' + str( header_element['Type'] ) + '), should be Type=String' return pcgr_error_message(err_msg, logger) if call_conf_tag != '' and found_call_conf_tag == 0: logger.warn('Could not find the specified call_conf_tag (' + str(call_conf_tag) + ') in INFO column of input VCF') if tumor_dp_tag != '' and found_tdp_tag == 0: logger.warn('Could not find the specified tumor_dp_tag (' + str(tumor_dp_tag) + ') in INFO column of input VCF') if tumor_af_tag != '' and found_taf_tag == 0: logger.warn('Could not find the specified tumor_af_tag (' + str(tumor_af_tag) + ') in INFO column of input VCF') if control_dp_tag != '' and found_ndp_tag == 0: logger.warn('Could not find the specified control_dp_tag (' + str(control_dp_tag) + ') in INFO column of input VCF') if control_af_tag != '' and found_naf_tag == 0: logger.warn('Could not find the specified control_af_tag (' + str(control_af_tag) + ') in INFO column of input VCF') if config_options['tumor_only'][ 'exclude_likely_hom_germline'] is True and config_options[ 'tumor_only']['vcf_tumor_only'] is True and found_taf_tag == 0: logger.warn( 'Could not find the specified tumor_af_tag (' + str(tumor_af_tag) + ') in INFO column of input VCF - filtering of homozygous germline variants in tumor-only mode will be ignored' ) if config_options['tumor_only'][ 'exclude_likely_het_germline'] is True and config_options[ 'tumor_only']['vcf_tumor_only'] is True and found_taf_tag == 0: logger.warn( 'Could not find the specified tumor_af_tag (' + str(tumor_af_tag) + ') in INFO column of input VCF - filtering of heterozygous germline variants in tumor-only mode will be ignored' ) if found_tdp_tag == 1 and found_taf_tag == 0: logger.warn( 'BOTH \' tumor_dp_tag\' AND \' tumor_af_tag\' need to be specified for use in tumor report (\'tumor_af_tag\' is missing)' ) if found_tdp_tag == 0 and found_taf_tag == 1: logger.warn( 'BOTH \'tumor_dp_tag\' AND \'tumor_af_tag\' need to be specified for use in tumor report (\'tumor_dp_tag\' is missing)' ) if found_ndp_tag == 1 and found_naf_tag == 0: logger.warn( 'BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_af_tag\' is missing)' ) if found_ndp_tag == 0 and found_naf_tag == 1: logger.warn( 'BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_dp_tag\' is missing)' ) ## if filtering turned on for AF-based tumor-only filtering, return error if TVAF not defined return 0
def check_format_ad_dp_tags(vcf, tumor_dp_tag, tumor_af_tag, control_dp_tag, control_af_tag, call_conf_tag, exclude_hom_germline, exclude_het_germline, tumor_only, logger): """ Function that checks whether the INFO tags specified for depth/allelic fraction are correctly formatted in the VCF header (i.e. Type) """ found_taf_tag = 0 found_tdp_tag = 0 found_naf_tag = 0 found_ndp_tag = 0 found_call_conf_tag = 0 annoutils.detect_reserved_info_tag(tumor_dp_tag,'tumor_dp_tag', logger) annoutils.detect_reserved_info_tag(control_dp_tag,'control_dp_tag', logger) annoutils.detect_reserved_info_tag(tumor_af_tag,'tumor_af_tag', logger) annoutils.detect_reserved_info_tag(control_af_tag,'control_af_tag', logger) annoutils.detect_reserved_info_tag(call_conf_tag,'call_conf_tag', logger) for e in vcf.header_iter(): header_element = e.info() if 'ID' in header_element.keys() and 'HeaderType' in header_element.keys(): if header_element['HeaderType'] == 'INFO': if header_element['ID'] == tumor_dp_tag: if header_element['Type'] == 'Integer': logger.info(f'Found INFO tag for tumor variant sequencing depth (tumor_dp_tag {tumor_dp_tag}) in input VCF') found_tdp_tag = 1 else: err_msg = f'INFO tag for tumor variant sequencing depth (tumor_dp_tag {tumor_dp_tag}) is not correctly specified in input VCF (Type={header_element["Type"]}), should be Type=Integer' return error_message(err_msg, logger) if header_element['ID'] == tumor_af_tag: if header_element['Type'] == 'Float': logger.info(f'Found INFO tag for tumor variant allelic fraction (tumor_af_tag {tumor_af_tag}) in input VCF') found_taf_tag = 1 else: err_msg = f'INFO tag for tumor variant allelic fraction (tumor_af_tag {tumor_af_tag}) is not correctly specified in input VCF (Type={header_element["Type"]}), should be Type=Float' return error_message(err_msg, logger) if header_element['ID'] == control_dp_tag: if header_element['Type'] == 'Integer': logger.info(f'Found INFO tag for normal/control variant sequencing depth (control_dp_tag {control_dp_tag}) in input VCF') found_ndp_tag = 1 else: err_msg = f'INFO tag for normal/control variant sequencing depth (control_dp_tag {control_dp_tag}) is not correctly specified in input VCF (Type={header_element["Type"]}), should be Type=Integer' return error_message(err_msg, logger) if header_element['ID'] == control_af_tag: if header_element['Type'] == 'Float': logger.info(f'Found INFO tag for normal/control allelic fraction (control_af_tag {control_af_tag}) in input VCF') found_naf_tag = 1 else: err_msg = f'INFO tag for for normal/control allelic fraction (control_af_tag {control_af_tag}) is not correctly specified in input VCF (Type={header_element["Type"]}) should be Type=Float' return error_message(err_msg, logger) if header_element['ID'] == call_conf_tag: if header_element['Type'] == 'String': logger.info(f'Found INFO tag for variant call confidence (call_conf_tag {call_conf_tag}) in input VCF') found_call_conf_tag = 1 else: err_msg = f'INFO tag for variant call confidence (call_conf_tag) is not correctly specified in input VCF (Type={header_element["Type"]}), should be Type=String' return error_message(err_msg, logger) if call_conf_tag != '_NA_' and found_call_conf_tag == 0: logger.warning(f"Could not find the specified call_conf_tag ('{call_conf_tag}') in INFO column of input VCF") if tumor_dp_tag != '_NA_' and found_tdp_tag == 0: logger.warning(f"Could not find the specified tumor_dp_tag ('{tumor_dp_tag}') in INFO column of input VCF") if tumor_af_tag != '_NA_' and found_taf_tag == 0: logger.warning(f"Could not find the specified tumor_af_tag ('{tumor_af_tag}') in INFO column of input VCF") if control_dp_tag != '_NA_' and found_ndp_tag == 0: logger.warning(f"Could not find the specified control_dp_tag ('{control_dp_tag}') in INFO column of input VCF") if control_af_tag != '_NA_' and found_naf_tag == 0: logger.warning(f"Could not find the specified control_af_tag ('{control_af_tag}') in INFO column of input VCF") if exclude_hom_germline is True and tumor_only == 1 and found_taf_tag == 0: logger.warning(f"Could not find the specified tumor_af_tag ('{tumor_af_tag}') in INFO column of input VCF - filtering of homozygous germline variants in tumor-only mode will be ignored") if exclude_het_germline is True and tumor_only == 1 and found_taf_tag == 0: logger.warning(f"Could not find the specified tumor_af_tag ('{tumor_af_tag}') in INFO column of input VCF - filtering of heterozygous germline variants in tumor-only mode will be ignored") if found_tdp_tag == 1 and found_taf_tag == 0: logger.warning('BOTH \' tumor_dp_tag\' AND \' tumor_af_tag\' need to be specified for use in tumor report (\'tumor_af_tag\' is missing)') if found_tdp_tag == 0 and found_taf_tag == 1: logger.warning('BOTH \'tumor_dp_tag\' AND \'tumor_af_tag\' need to be specified for use in tumor report (\'tumor_dp_tag\' is missing)') if found_ndp_tag == 1 and found_naf_tag == 0: logger.warning('BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_af_tag\' is missing)') if found_ndp_tag == 0 and found_naf_tag == 1: logger.warning('BOTH \'control_dp_tag\' AND \'control_af_tag\' need to be specified for use in tumor report (\'control_dp_tag\' is missing)') ## if filtering turned on for AF-based tumor-only filtering, return error if TVAF not defined return 0