Esempio n. 1
0
def parse_jsons(jsonfile, stats, count_sv = False, count_all = False):
    '''
    parse json, extract T, TP, FP stats for various variant types
    
    adapted from Roger Liu's code.
    :param jsonfile:
    :param stats:
    :param count_sv:
    :param count_all:
    :return:
    '''
    var_types = stats.keys()
    metrics = stats[var_types[0]].keys()
    with utils.versatile_open(jsonfile, 'r') as fh:
        data = json.load(fh)
        for vt in var_types:
            if vt in data['num_true_correct']['data']:
                for mt in metrics:
                    try:
                        if count_all:
                            stats[vt][mt] += data['num_true_correct']['data'][vt]['sum_count'][mt]
                        elif count_sv:
                            stats[vt][mt] += data['num_true_correct']['data'][vt]['svSumCount'][mt]
                        else:
                            stats[vt][mt] += data['num_true_correct']['data'][vt]['sum_count'][mt]
                            stats[vt][mt] -= data['num_true_correct']['data'][vt]['svSumCount'][mt]
                    except KeyError as err:
                        print ("error in {}. No {} field".format(jsonfile, err))
                        stats[vt][mt] += 0
Esempio n. 2
0
def parse_jsons(jsonfile, stats, count_sv=False, count_all=False):
    '''
    parse json, extract T, TP, FP stats for various variant types
    
    adapted from Roger Liu's code.
    :param jsonfile:
    :param stats:
    :param count_sv:
    :param count_all:
    :return:
    '''
    var_types = stats.keys()
    metrics = stats[var_types[0]].keys()
    with utils.versatile_open(jsonfile, 'r') as fh:
        data = json.load(fh)
        for vt in var_types:
            if vt in data['num_true_correct']['data']:
                for mt in metrics:
                    try:
                        if count_all:
                            stats[vt][mt] += data['num_true_correct']['data'][
                                vt]['sum_count'][mt]
                        elif count_sv:
                            stats[vt][mt] += data['num_true_correct']['data'][
                                vt]['svSumCount'][mt]
                        else:
                            stats[vt][mt] += data['num_true_correct']['data'][
                                vt]['sum_count'][mt]
                            stats[vt][mt] -= data['num_true_correct']['data'][
                                vt]['svSumCount'][mt]
                    except KeyError as err:
                        print("error in {}. No {} field".format(jsonfile, err))
                        stats[vt][mt] += 0
Esempio n. 3
0
def convertCN(filenames, operation):
    """
    convert '2/1'-like copy number to a single number(e.g. 2)
    0 will be considered same as 1
    by default the max number will be kept
    the change is in place
    """
    logger = logging.getLogger(convertCN.__name__)
    logger.info("convertCN started")
    if operation != "two2one" and operation != "one2two":
        raise ValueError("Only two2one or one2two allowed")
    two2one = operation == "two2one"
    delimiter = re.compile('[/|]')
    for name in filenames:
        logger.info("processing {}".format(name))
        with versatile_open(name, 'r') as file_fd:
            output = tempfile.NamedTemporaryFile(mode='r+w', delete=False)
            for l in file_fd:
                l = l.rstrip()
                fields = l.split("\t")
                if l.startswith("#") or 'CN' not in fields[8]:
                    if l.startswith('##FORMAT=<ID=CN'):
                        if two2one:
                            l = l.replace("Type=String", "Type=Integer")
                        else:
                            l = l.replace("Type=Integer", "Type=String")
                    output.write(l + "\n")
                else:
                    info = fields[8].split(':')
                    cnIndex = info.index('CN')
                    gtIndex = info.index('GT')
                    #change CN field in all samples
                    for sampleIndex in range(9, len(fields)):
                        sampleInfo = fields[sampleIndex].split(':')
                        if two2one:
                            cn = delimiter.split(sampleInfo[cnIndex])
                            #here cn is list of strings
                            sampleInfo[cnIndex] = str(max(map(int, cn)))
                        elif len(delimiter.split(sampleInfo[cnIndex])) == 1:
                            #only split when there is only one number
                            gt = delimiter.split(sampleInfo[gtIndex])
                            cn = sampleInfo[cnIndex]
                            for i in range(len(gt)):
                                gt[i] = '1' if gt[i] == '0' else cn
                            if sampleInfo[gtIndex].find('/') >= 0:
                                sampleInfo[cnIndex] = '/'.join(gt)
                            else:
                                sampleInfo[cnIndex] = '|'.join(gt)
                        fields[sampleIndex] = ":".join(sampleInfo)
                    output.write("\t".join(fields) + "\n")
            output.close()
            shutil.copyfile(output.name, name)
            os.remove(output.name)
    logger.info("convertCN done")
    return
Esempio n. 4
0
def convertCN(filenames, operation):
    """
    convert '2/1'-like copy number to a single number(e.g. 2)
    0 will be considered same as 1
    by default the max number will be kept
    the change is in place
    """
    logger = logging.getLogger(convertCN.__name__)
    logger.info("convertCN started")
    if operation != "two2one" and operation != "one2two":
        raise ValueError("Only two2one or one2two allowed")
    two2one = operation == "two2one"
    delimiter = re.compile('[/|]')
    for name in filenames:
        logger.info("processing {}".format(name))
        with versatile_open(name, 'r') as file_fd:
            output = tempfile.NamedTemporaryFile(mode = 'r+w', delete = False)
            for l in file_fd:
                l = l.rstrip()
                fields = l.split("\t")
                if l.startswith("#") or 'CN' not in fields[8]:
                    if l.startswith('##FORMAT=<ID=CN'):
                        if two2one:
                            l = l.replace("Type=String","Type=Integer")
                        else:
                            l = l.replace("Type=Integer", "Type=String")
                    output.write(l + "\n")
                else:
                    info = fields[8].split(':')
                    cnIndex = info.index('CN')
                    gtIndex = info.index('GT')
                    #change CN field in all samples
                    for sampleIndex in range(9,len(fields)):
                        sampleInfo = fields[sampleIndex].split(':')
                        if two2one:
                            cn = delimiter.split(sampleInfo[cnIndex])
                            #here cn is list of strings
                            sampleInfo[cnIndex] = str(max(map(int, cn)))
                        elif len(delimiter.split(sampleInfo[cnIndex])) == 1:
                            #only split when there is only one number
                            gt = delimiter.split(sampleInfo[gtIndex])
                            cn = sampleInfo[cnIndex]
                            for i in range(len(gt)):
                                gt[i] = '1' if gt[i] == '0' else cn
                            if sampleInfo[gtIndex].find('/') >= 0:
                                sampleInfo[cnIndex] = '/'.join(gt)
                            else:
                                sampleInfo[cnIndex] = '|'.join(gt)
                        fields[sampleIndex] = ":".join(sampleInfo)
                    output.write("\t".join(fields) + "\n")
            output.close()
            shutil.copyfile(output.name, name)
            os.remove(output.name)
    logger.info("convertCN done")
    return
Esempio n. 5
0
    def run(self):
        '''

        :return:
        '''
        #command example
        #rtg-tools-3.8.4-bdba5ea_install/rtg vcfeval --baseline truth.vcf.gz \
        #--calls compare1.vcf.gz -o vcfeval_split_snp -t ref.sdf --output-mode=annotate --sample xx --squash-ploidy --regions ?? \
        cmd = ['java', utils.JAVA_XMX, '-jar', utils.RTGJAR, 'vcfeval',
               '-o', self.prefix, '--baseline',
               self.true_vcf,
               '-t', self.reference,
               ]
        if not self.exclude_filtered:
            cmd.append('--all-records')
        if not self.match_geno:
            cmd.append('--squash-ploidy')
        if self.sample:
            cmd.append('--sample')
            cmd.append(self.sample)
        if self.regions:
            cmd.append('--bed-regions')
            cmd.append(self.regions)
        if self.opts:
            cmd.append(self.opts)
        if len(self.vcfs) != 1:
            raise ValueError('vcfeval only takes 1 prediction VCF and 1 truth VCF: {0}'.format(self.vcfs))
        cmd.append('--calls')
        cmd.append(self.vcfs[0])

        tp = os.path.join(self.prefix, 'tp-baseline.vcf.gz')
        tp_predict = os.path.join(self.prefix, 'tp.vcf.gz')
        fn = os.path.join(self.prefix, 'fn.vcf.gz')
        fp = os.path.join(self.prefix, 'fp.vcf.gz')

        if utils.count_variants(self.true_vcf) == 0 and utils.count_variants(self.vcfs[0]) == 0:
            #both truth and prediction are empty, do nothing
            utils.makedirs([self.prefix])
            shutil.copyfile(self.true_vcf, tp)
            shutil.copyfile(self.true_vcf, fn)
            shutil.copyfile(self.vcfs[0], tp_predict)
            shutil.copyfile(self.vcfs[0], fp)
        else:
            if self.log_to_file:
                with utils.versatile_open(self.log_to_file, 'a') as logout:
                    utils.run_shell_command(cmd, sys.stderr, logout)
            else:
                utils.run_shell_command(cmd, sys.stderr, sys.stderr)
        for i in (tp, tp_predict, fn, fp):
            if not os.path.exists(i):
                raise Exception('{0} was not generated by vcfeval. Please check and rerun.'.format(i))
        self.tp, self.tp_predict, self.fn, self.fp = tp, tp_predict, fn, fp
Esempio n. 6
0
    def run(self):
        '''

        :return:
        '''
        cmd = [
            self.java,
            utils.JAVA_XMX,
            '-jar',
            utils.VARSIMJAR,
            'vcfcompare',
            '-prefix',
            self.prefix,
            '-true_vcf',
            self.true_vcf,
            '-reference',
            self.reference,
        ]
        if self.exclude_filtered:
            cmd.append('-exclude_filtered')
        if self.match_geno:
            cmd.append('-match_geno')
        if self.sample:
            cmd.append('-sample')
            cmd.append(self.sample)
        if self.regions:
            cmd.append('-bed')
            cmd.append(self.regions)
        if self.disallow_partial_fp:
            cmd.append('-disallow_partial_fp')
        if str(self.sv_length):
            cmd.append('-sv_length {}'.format(self.sv_length))
        if self.opts:
            cmd.append(self.opts)
        cmd.extend(self.vcfs)

        if self.log_to_file:
            with utils.versatile_open(self.log_to_file, 'a') as logout:
                utils.run_shell_command(cmd, sys.stdout, logout)
        else:
            utils.run_shell_command(cmd, sys.stdout, sys.stderr)
        tp = self.prefix + '_TP.vcf'
        fn = self.prefix + '_FN.vcf'
        fp = self.prefix + '_FP.vcf'
        for i in (tp, fn, fp):
            if not os.path.exists(i):
                raise Exception(
                    '{0} was not generated by VarSim vcfcompare. Please check and rerun.'
                    .format(i))
        self.tp, self.fn, self.fp = tp, fn, fp
Esempio n. 7
0
def generate_sdf(reference, log):
    '''
    take reference and generate SDF
    :param reference:
    :return:
    '''
    sdf = reference + '.sdf'
    if os.path.exists(sdf):
        LOGGER.info('{0} exists, doing nothing'.format(sdf))
        LOGGER.info('to rerun SDF generation, please remove or rename {0}'.format(sdf))
        return sdf
    cmd = ['java', utils.JAVA_XMX, '-jar',utils.RTGJAR,'format',
           '-o', sdf, reference]
    if log:
        with utils.versatile_open(log, 'a') as logout:
            utils.run_shell_command(cmd, logout, logout)
    else:
        utils.run_shell_command(cmd, sys.stdout, sys.stderr)
    return sdf
Esempio n. 8
0
def generate_sdf(reference, log):
    '''
    take reference and generate SDF
    :param reference:
    :return:
    '''
    sdf = reference + '.sdf'
    if os.path.exists(sdf):
        LOGGER.info('{0} exists, doing nothing'.format(sdf))
        LOGGER.info(
            'to rerun SDF generation, please remove or rename {0}'.format(sdf))
        return sdf
    cmd = [
        'java', utils.JAVA_XMX, '-jar', utils.RTGJAR, 'format', '-o', sdf,
        reference
    ]
    if log:
        with utils.versatile_open(log, 'a') as logout:
            utils.run_shell_command(cmd, logout, logout)
    else:
        utils.run_shell_command(cmd, sys.stdout, sys.stderr)
    return sdf
Esempio n. 9
0
    def run(self):
        '''

        :return:
        '''
        cmd = ['java', utils.JAVA_XMX, '-jar', utils.VARSIMJAR, 'vcfcompare',
           '-prefix', self.prefix, '-true_vcf',
           self.true_vcf,
           '-reference', self.reference,
           ]
        if self.exclude_filtered:
            cmd.append('-exclude_filtered')
        if self.match_geno:
            cmd.append('-match_geno')
        if self.sample:
            cmd.append('-sample')
            cmd.append(self.sample)
        if self.regions:
            cmd.append('-bed')
            cmd.append(self.regions)
        if self.disallow_partial_fp:
            cmd.append('-disallow_partial_fp')
        if self.opts:
            cmd.append(self.opts)
        cmd.extend(self.vcfs)

        if self.log_to_file:
            with utils.versatile_open(self.log_to_file, 'a') as logout:
                utils.run_shell_command(cmd, sys.stdout, logout)
        else:
            utils.run_shell_command(cmd, sys.stdout, sys.stderr)
        tp = self.prefix + '_TP.vcf'
        fn = self.prefix + '_FN.vcf'
        fp = self.prefix + '_FP.vcf'
        for i in (tp, fn, fp):
            if not os.path.exists(i):
                raise Exception('{0} was not generated by VarSim vcfcompare. Please check and rerun.'.format(i))
        self.tp, self.fn, self.fp = tp, fn, fp
Esempio n. 10
0
    def run(self):
        '''

        :return:
        '''
        #command example
        #rtg-tools-3.8.4-bdba5ea_install/rtg vcfeval --baseline truth.vcf.gz \
        #--calls compare1.vcf.gz -o vcfeval_split_snp -t ref.sdf --output-mode=annotate --sample xx --squash-ploidy --regions ?? \
        cmd = [
            'java',
            utils.JAVA_XMX,
            '-jar',
            utils.RTGJAR,
            'vcfeval',
            '-o',
            self.prefix,
            '--baseline',
            self.true_vcf,
            '-t',
            self.reference,
        ]
        if not self.exclude_filtered:
            cmd.append('--all-records')
        if not self.match_geno:
            cmd.append('--squash-ploidy')
        if self.sample:
            cmd.append('--sample')
            cmd.append(self.sample)
        if self.regions:
            cmd.append('--bed-regions')
            cmd.append(self.regions)
        if self.opts:
            cmd.append(self.opts)
        if len(self.vcfs) != 1:
            raise ValueError(
                'vcfeval only takes 1 prediction VCF and 1 truth VCF: {0}'.
                format(self.vcfs))
        cmd.append('--calls')
        cmd.append(self.vcfs[0])

        tp = os.path.join(self.prefix, 'tp-baseline.vcf.gz')
        tp_predict = os.path.join(self.prefix, 'tp.vcf.gz')
        fn = os.path.join(self.prefix, 'fn.vcf.gz')
        fp = os.path.join(self.prefix, 'fp.vcf.gz')

        if utils.count_variants(self.true_vcf) == 0 and utils.count_variants(
                self.vcfs[0]) == 0:
            #both truth and prediction are empty, do nothing
            utils.makedirs([self.prefix])
            shutil.copyfile(self.true_vcf, tp)
            shutil.copyfile(self.true_vcf, fn)
            shutil.copyfile(self.vcfs[0], tp_predict)
            shutil.copyfile(self.vcfs[0], fp)
        else:
            if self.log_to_file:
                with utils.versatile_open(self.log_to_file, 'a') as logout:
                    utils.run_shell_command(cmd, sys.stderr, logout)
            else:
                utils.run_shell_command(cmd, sys.stderr, sys.stderr)
        for i in (tp, tp_predict, fn, fp):
            if not os.path.exists(i):
                raise Exception(
                    '{0} was not generated by vcfeval. Please check and rerun.'
                    .format(i))
        self.tp, self.tp_predict, self.fn, self.fp = tp, tp_predict, fn, fp
Esempio n. 11
0
def match_false(augmented_file,
                files_to_pair_with,
                out_dir,
                sample,
                log_to_file,
                vcfeval_options,
                sdf,
                java="java"):
    """Try to pair up each false call in a file (augmented_file) with a variant in the other files provided in a list (files_to_pair_with) to create an annotated version of the first file.
    By default the the first variant in the list is provided to get an AF, the 2nd to determine the simulated variant (for false positives) and the 3rd to determine if a false positive is
    a pure false positive (not simulated) or not (wrong genotype)"""

    files_to_pair_with_clean = []
    for item in files_to_pair_with:
        files_to_pair_with_clean.append(utils.make_clean_vcf(item, out_dir))

    content = []
    annotated_content = []

    with utils.versatile_open(augmented_file, "rt") as augmented_file_handle:
        for line in augmented_file_handle.readlines():
            line_strip = line.strip()
            line_split = line_strip.split()

            if line_strip[0] == "#":
                annotated_content.append(line_strip)
                content.append(line_strip)

            else:
                if content[-1][0] != "#":
                    del content[-1]
                content.append(line_strip)

                single_var_file = utils.write_vcf(
                    content, os.path.join(out_dir, "single.vcf"))
                single_var_file = utils.sort_and_compress(single_var_file)

                for i, item in enumerate(files_to_pair_with_clean):

                    equivalent_variant = None

                    if item:
                        vcfeval_prefix = os.path.join(
                            out_dir, 'vcfeval_compare_results_annotate')

                        vcfeval_comparator = RTGVCFComparator(
                            prefix=vcfeval_prefix,
                            true_vcf=item,
                            reference=sdf,
                            regions=None,
                            sample=sample,
                            vcfs=[single_var_file],
                            exclude_filtered=False,
                            match_geno=False,
                            log_to_file=log_to_file,
                            opts=vcfeval_options,
                            java=java)

                        equivalent_variant = utils.get_equivalent_variant(
                            line_split, vcfeval_comparator.get_tp())

                        #clean up
                        if os.path.exists(vcfeval_prefix):
                            LOGGER.warn('{0} exists, removing ...'.format(
                                vcfeval_prefix))
                            shutil.rmtree(vcfeval_prefix)

                    if i == 0:
                        if equivalent_variant:
                            try:
                                AO = int(equivalent_variant[-1].split(':')
                                         [4].split(',')[0])
                                RO = int(equivalent_variant[-1].split(':')
                                         [2].split(',')[0])
                            except:
                                info = "N/A;"
                            else:
                                info = str(float(AO) / (AO + RO)) + ';'
                        else:
                            info = "N/A;"

                    elif i == 1:
                        if equivalent_variant:
                            info += equivalent_variant[
                                0] + '_' + equivalent_variant[
                                    1] + '_' + equivalent_variant[
                                        3] + '_' + equivalent_variant[
                                            4] + '_' + equivalent_variant[
                                                -1] + ";"
                        else:
                            info += "N/A;"

                    elif i == 2:
                        info += "pure;" if not equivalent_variant else "not;"

                line_split[6] = info
                annotated_content.append('\t'.join(line_split))

                #clean up
                if os.path.isfile(single_var_file):
                    os.remove(single_var_file)
                    os.remove(single_var_file + ".tbi")

    annotated_file = utils.write_vcf(
        annotated_content,
        os.path.join(
            args.out_dir, "{}_annotated.vcf".format(
                os.path.splitext(
                    os.path.splitext(
                        os.path.basename(augmented_file))[0])[0])))
    annotated_file = utils.sort_and_compress(annotated_file)

    #clean up
    for item in files_to_pair_with_clean:
        if item and os.path.isfile(item):
            os.remove(item)
            os.remove(item + ".tbi")
Esempio n. 12
0
    def run(self):
        '''

        :return:
        '''
        #command example
        #rtg-tools-3.8.4-bdba5ea_install/rtg vcfeval --baseline truth.vcf.gz \
        #--calls compare1.vcf.gz -o vcfeval_split_snp -t ref.sdf --output-mode=annotate --sample xx --squash-ploidy --regions ?? \
        cmd = [
            self.java,
            utils.JAVA_XMX,
            '-jar',
            utils.RTGJAR,
            'vcfeval',
            '-o',
            self.prefix,
            '--baseline',
            self.true_vcf,
            '-t',
            self.reference,
        ]
        if not self.exclude_filtered:
            cmd.append('--all-records')
        if not self.match_geno:
            cmd.append('--squash-ploidy')
        if self.sample:
            cmd.append('--sample')
            cmd.append(self.sample)
        if self.regions:
            cmd.append('--bed-regions')
            cmd.append(self.regions)
        if self.opts:
            cmd.append(self.opts)
        if len(self.vcfs) != 1:
            raise ValueError(
                'vcfeval only takes 1 prediction VCF and 1 truth VCF: {0}'.
                format(self.vcfs))
        cmd.append('--calls')
        cmd.append(self.vcfs[0])

        tp = os.path.join(self.prefix, 'tp-baseline.vcf.gz')
        tp_predict = os.path.join(self.prefix, 'tp.vcf.gz')
        fn = os.path.join(self.prefix, 'fn.vcf.gz')
        fp = os.path.join(self.prefix, 'fp.vcf.gz')

        #vcfeval refuses to run if true_vcf contains 0 variants
        if utils.count_variants(self.true_vcf) == 0:
            utils.makedirs([self.prefix])
            #because there is 0 ground truth variants, TP and FN will be empty
            shutil.copyfile(self.true_vcf, tp)
            shutil.copyfile(self.true_vcf, fn)
            if utils.count_variants(self.vcfs[0]) == 0:
                #if calls are empty, then TP_PREDICT and FP will for sure be empty
                shutil.copyfile(self.vcfs[0], tp_predict)
                shutil.copyfile(self.vcfs[0], fp)
            else:
                #if calls are not empty, then all calls will be FP due to 0 ground truth, TP_PREDICT will be empty
                shutil.copyfile(self.vcfs[0], fp)
                with utils.versatile_open(tp_predict,
                                          "w") as output, utils.versatile_open(
                                              self.vcfs[0], "r") as input:
                    for i in input:
                        if i.startswith('#'):
                            output.write(i)
                        else:
                            break
        else:
            if self.log_to_file:
                with utils.versatile_open(self.log_to_file, 'a') as logout:
                    utils.run_shell_command(cmd, sys.stderr, logout)
            else:
                utils.run_shell_command(cmd, sys.stderr, sys.stderr)
        for i in (tp, tp_predict, fn, fp):
            if not os.path.exists(i):
                raise Exception(
                    '{0} was not generated by vcfeval. Please check and rerun.'
                    .format(i))
        self.tp, self.tp_predict, self.fn, self.fp = tp, tp_predict, fn, fp
Esempio n. 13
0
def match_false(augmented_file,
                files_to_pair_with,
                out_dir,
                sample,
                log_to_file,
                vcfeval_options,
                sdf,
                java="java"):
    """Try to pair up each false call in a file (augmented_file) with a variant in the other files provided in a list (files_to_pair_with) to create an annotated version of the first file.
    By default the the first variant in the list is provided to get an AF, the 2nd to determine the simulated variant (for false positives) and the 3rd to determine if a false positive is
    a pure false positive (not simulated) or not (wrong genotype)"""
    files_to_pair_with_clean = []
    for item in files_to_pair_with:
        files_to_pair_with_clean.append(utils.make_clean_vcf(item, out_dir))

    content = []
    annotated_content = []

    with utils.versatile_open(augmented_file, "rt") as augmented_file_handle:
        for line in augmented_file_handle.readlines():
            line_strip = line.strip()
            line_split = line_strip.split()

            if line_strip[0] == "#":
                annotated_content.append(line_strip)
                content.append(line_strip)

            else:
                if content[-1][0] != "#":
                    del content[-1]
                content.append(line_strip)

                single_var_file = utils.write_vcf(
                    content, os.path.join(out_dir, "single.vcf"))
                single_var_file = utils.sort_and_compress(single_var_file)

                single_var_chr = line_split[0]
                info = ''

                for i, item in enumerate(files_to_pair_with_clean):

                    nonmatching_gt_variant = None

                    if item:
                        vcfeval_prefix = os.path.join(
                            out_dir, 'vcfeval_compare_results_annotate')

                        #Restrict the comparison to just the chromosome of the single variant by creating a filtered comparison file
                        filtered_true_vcf = utils.write_filtered_vcf(
                            item, single_var_chr,
                            os.path.join(out_dir, "filtered.vcf"))
                        filtered_true_vcf = utils.sort_and_compress(
                            filtered_true_vcf)

                        vcfeval_comparator = RTGVCFComparator(
                            prefix=vcfeval_prefix,
                            true_vcf=filtered_true_vcf,
                            reference=sdf,
                            regions=None,
                            sample=sample,
                            vcfs=[single_var_file],
                            exclude_filtered=False,
                            match_geno=False,
                            log_to_file=log_to_file,
                            opts=vcfeval_options,
                            java=java)

                        nonmatching_gt_variant = utils.get_closest_variant(
                            line_split, vcfeval_comparator.get_tp())

                        #if not nonmatching_gt_variant, check for matching alt and ref at the same position. Example of when this could be applicable is a 0/0 call when vcfeval will not pair up variants at the same locus with the same alt and ref even with match_geno=False
                        if not nonmatching_gt_variant:
                            nonmatching_gt_variant = utils.get_matching_alt_ref(
                                line_split, filtered_true_vcf)

                        #clean up
                        if os.path.exists(vcfeval_prefix):
                            LOGGER.warn('{0} exists, removing ...'.format(
                                vcfeval_prefix))
                            shutil.rmtree(vcfeval_prefix)

                    if i == 0:
                        AO_RO_DP_AD = {
                            "AO": None,
                            "RO": None,
                            "DP": None,
                            "AD": None
                        }
                        if nonmatching_gt_variant:
                            for entry in AO_RO_DP_AD:
                                AO_RO_DP_AD[entry] = utils.get_info(
                                    nonmatching_gt_variant, entry)

                        # gatk4 format
                        if AO_RO_DP_AD["AD"]:
                            AD_split = AO_RO_DP_AD["AD"].split(',')
                            AO = list(map(int, AD_split[1:]))
                            RO = int(AD_split[0])
                            for i, item in enumerate(AO):
                                comma = ',' if i < len(AO) - 1 else ''
                                if item + RO == 0:
                                    info += "0.0" + comma

                                else:
                                    info += str(float(item) /
                                                (item + RO)) + comma
                        #freebayes
                        elif AO_RO_DP_AD["AO"] and AO_RO_DP_AD["RO"]:
                            for i, item in enumerate(
                                    AO_RO_DP_AD["AO"].split(',')):
                                comma = ',' if i < len(
                                    AO_RO_DP_AD["AO"].split(',')) - 1 else ''
                                denominator = int(item) + int(
                                    AO_RO_DP_AD["RO"])
                                if denominator == 0:
                                    info += "0.0" + comma

                                else:
                                    info += str(
                                        float(item) / denominator) + comma
                        else:
                            info += "N/A"

                        info += ';'
                        info += "N/A" if not AO_RO_DP_AD["DP"] else str(
                            AO_RO_DP_AD["DP"])
                        info += ';'
                    elif i == 1:
                        if nonmatching_gt_variant:
                            info += nonmatching_gt_variant[
                                0] + '_' + nonmatching_gt_variant[
                                    1] + '_' + nonmatching_gt_variant[
                                        3] + '_' + nonmatching_gt_variant[
                                            4] + '_' + nonmatching_gt_variant[
                                                -1] + ";"
                        else:
                            info += "N/A;"

                    elif i == 2:
                        info += "pure;" if not nonmatching_gt_variant else "not;"

                line_split[6] = info
                annotated_content.append('\t'.join(line_split))

                #clean up
                for fil in [single_var_file, filtered_true_vcf]:
                    if os.path.isfile(fil):
                        os.remove(fil)
                        os.remove(fil + ".tbi")

    annotated_file = utils.write_vcf(
        annotated_content,
        os.path.join(
            args.out_dir, "{}_annotated.vcf".format(
                os.path.splitext(
                    os.path.splitext(
                        os.path.basename(augmented_file))[0])[0])))
    annotated_file = utils.sort_and_compress(annotated_file)

    #clean up
    for item in files_to_pair_with_clean:
        if item and os.path.isfile(item):
            os.remove(item)
            os.remove(item + ".tbi")