Python bgzip_and_tabix 예제들, source.variants.vcf_processing.bgzip_and_tabix Python 예제들

예제 #1

0

파일 보기

파일: filtering.py 프로젝트: Channing-Zeng/Reporting_Suite

def index_vcf(cnf, sample_name, filt_vcf_fpath, caller_name=None):
    if cnf is None:
        global glob_cnf
        cnf = glob_cnf

    info()
    info(sample_name + ((', ' + caller_name) if caller_name else '') +
         ': indexing')

    # for fpath in [pass_vcf_fpath, filt_vcf_fpath]:
    #     if not cnf.reuse_intermediate and not verify_file(fpath, silent=True):
    #         err(fpath + ' does not exist - cannot IGV index')
    #     else:
    #         if cnf.reuse_intermediate and verify_file(fpath + '.idx', silent=True):
    #             info('Reusing existing ' + fpath + '.idx')
    #         else:
    #             igvtools_index(cnf, fpath)

    if not cnf.reuse_intermediate and not verify_file(filt_vcf_fpath,
                                                      silent=True):
        err(filt_vcf_fpath + ' does not exist - cannot gzip and tabix')
    else:
        if cnf.reuse_intermediate and verify_file(filt_vcf_fpath + '.gz', silent=True) \
                and verify_file(filt_vcf_fpath + '.gz.tbi', silent=True):
            info(filt_vcf_fpath + '.gz and .gz.tbi exist; reusing')
        else:
            bgzip_and_tabix(cnf, filt_vcf_fpath)

예제 #2

0

파일 보기

def write_coverage(cnf,
                   output_dir,
                   chrom,
                   depths_by_pos,
                   cov_thresholds,
                   sample_index=None):
    coverage_data_fpath = join(output_dir, chrom + '.txt')
    if not cnf.reuse_intermediate or (
            not verify_file(coverage_data_fpath, silent=True)
            and not verify_file(coverage_data_fpath + '.gz', silent=True)):
        chrom_num = chrom.replace('chr', '')
        with file_transaction(cnf.work_dir, coverage_data_fpath) as tx:
            with open(tx, 'w') as f:
                fs = ['#chrom', 'pos', 'mean', 'median'
                      ] + [str(t) for t in cov_thresholds]
                f.write('\t'.join(fs) + '\n')
                sorted_positions = sorted(depths_by_pos.keys())
                for pos in sorted_positions:
                    depths = depths_by_pos[pos] if sample_index is None else [
                        depths_by_pos[pos][sample_index]
                    ]
                    mean_coverage = mean(depths)
                    median_coverage = median(depths)
                    pcnt_samples_ge_threshold = [
                        mean([1 if d >= t else 0 for d in depths])
                        for t in cov_thresholds
                    ]
                    res_line = chrom_num + '\t' + str(pos) + '\t' + str(
                        mean_coverage) + '\t' + str(median_coverage)
                    for pcnt_samples in pcnt_samples_ge_threshold:
                        res_line += '\t' + str(pcnt_samples)
                    f.write(res_line + '\n')
    bgzip_and_tabix(cnf, coverage_data_fpath, tabix_parameters='-p bed')

예제 #3

0

파일 보기

def finialize_annotate_file(cnf, vcf_fpath, sample, callername=None):
    # vcf_fpath = leave_first_sample(cnf, vcf_fpath)

    # if not cnf.no_check:
    #     vcf_fpath = _filter_malformed_fields(cnf, vcf_fpath)

    if not cnf.no_check and callername and 'vardict' not in callername:
        info()
        info('Adding SAMPLE=' + sample.name + ' annotation...')
        vcf_fpath = add_annotation(cnf,
                                   vcf_fpath,
                                   'SAMPLE',
                                   sample.name,
                                   number='1',
                                   type_='String',
                                   description='Sample name')

    final_vcf_fpath = join(
        cnf.output_dir,
        sample.name + (('-' + callername) if callername else '') + '.anno.vcf')
    if cnf.output_file:
        final_vcf_fpath = cnf.output_file
    if not vcf_fpath.endswith('.gz') and final_vcf_fpath.endswith('.gz'):
        final_vcf_fpath = splitext(final_vcf_fpath)[0]
    if vcf_fpath.endswith('.gz') and not final_vcf_fpath.endswith('.gz'):
        final_vcf_fpath = final_vcf_fpath + '.gz'

    info('Moving final VCF ' + vcf_fpath + ' to ' + final_vcf_fpath)
    if isfile(final_vcf_fpath):
        os.remove(final_vcf_fpath)
    shutil.copy(vcf_fpath, final_vcf_fpath)

    if cnf.qc:
        report = qc.make_report(cnf, final_vcf_fpath, sample)
        qc_dirpath = join(cnf.output_dir, 'qc')
        safe_mkdir(qc_dirpath)
        report = qc.save_report(cnf, report, sample, callername, qc_dirpath,
                                source.varqc_name)
        info('Saved QC to ' + qc_dirpath + ' (' + report.html_fpath + ')')
        info('-' * 70)
        info()

    if final_vcf_fpath.endswith('.gz'):
        if not is_gz(final_vcf_fpath):
            err(final_vcf_fpath + ' is in incorrect gzip format')
            anno_vcf_fpath_ungz = splitext(final_vcf_fpath)[0]
            anno_vcf_fpath_gz = final_vcf_fpath
            os.rename(anno_vcf_fpath_gz, anno_vcf_fpath_ungz)
        else:
            info(final_vcf_fpath + ' is a good gzipped file.')
            return [final_vcf_fpath]
    else:
        info('Compressing and indexing with bgzip+tabix ' + final_vcf_fpath)
        final_vcf_fpath = bgzip_and_tabix(cnf, final_vcf_fpath)
        info('Saved VCF again to ' + final_vcf_fpath)

    return [final_vcf_fpath]

예제 #4

0

파일 보기

파일: evaluate_capture_target.py 프로젝트: Channing-Zeng/Reporting_Suite

def evaluate_capture(cnf, bcbio_structures):
    samples = [s for bs in bcbio_structures for s in bs.samples]
    min_samples = math.ceil(cnf.min_ratio * len(samples))

    info('Filtering regions by depth')
    regions = check_regions_depth(cnf, bcbio_structures, min_samples)
    if not regions:
        err('No regions were filtered.')
        return None
    if cnf.bed or cnf.tricky_regions:
        regions = intersect_regions(cnf, bcbio_structures, regions,
                                    min_samples)

    regions_fname = 'filtered_regions.txt'
    regions_fpath = join(
        cnf.output_dir,
        add_suffix(regions_fname, str(cnf.min_depth))
        if cnf.min_depth else regions_fname)
    with open(regions_fpath, 'w') as out:
        out.write('## Minimal percent of region with low coverage: ' +
                  str((1 - cnf.min_percent) * 100) + '%\n')
        out.write(
            '## Minimal percent of samples that share the same feature: ' +
            str(cnf.min_ratio * 100) + '%\n')
        if not cnf.min_depth:
            out.write(
                '## Coverage threshold Nx is 10x for cell line and 100x for plasma\n'
            )
        else:
            out.write('## Coverage threshold Nx is ' + str(cnf.min_depth) +
                      'x\n')
        out.write('\t'.join([
            '#Chr', 'Start', 'End', 'Size', 'Gene', 'Depth<Nx',
            'SamplesSharingSameFeature', 'Annotation'
        ]) + '\n')
        for region in sorted(regions, key=lambda x: (x[0], int(x[1]))):
            out.write('\t'.join([str(val) for val in region]) + '\n')

    info()
    info(str(len(regions)) + ' regions were saved into ' + regions_fpath)
    bgzip_and_tabix(cnf, regions_fpath, tabix_parameters='-p bed')
    return regions_fpath

예제 #5

0

파일 보기

def _clip_vcf_by_bed(cnf, vcf_fpath, bed_fpath):
    info('Clipping VCF ' + vcf_fpath + ' using BED ' + bed_fpath)

    bedtools = get_system_path(cnf, 'bedtools')

    clipped_vcf_fpath = intermediate_fname(cnf, vcf_fpath, 'clip')
    cmdline = '{bedtools} intersect -header	-a {vcf_fpath} -b {bed_fpath}'.format(
        **locals())
    res = call(cnf, cmdline, output_fpath=clipped_vcf_fpath)

    clipped_gz_vcf_fpath = bgzip_and_tabix(cnf, clipped_vcf_fpath)

    return clipped_gz_vcf_fpath

예제 #6

0

파일 보기

def fix_vcf_sample_name(cnf, sample_name, vcf_fpath, output_fpath=None):
    output_fpath = output_fpath or intermediate_fname(cnf, vcf_fpath, 'sample')
    def fix_sample_name(l, i):
        if l.startswith('#CHROM'):
            fs = l.split('\t')
            fs[9] = sample_name
            l = '\t'.join(fs)
        elif not l.startswith('#'):
            fs = l.split('\t')
            kvs = fs[7].split(';')
            for i, kv in enumerate(kvs[:]):
                if kv.startswith('SAMPLE='):
                    kvs[i] = 'SAMPLE=' + sample_name
            l = '\t'.join(fs[:7]) + '\t' + ';'.join(kvs) + '\t' + '\t'.join(fs[8:])
            # l = re.sub("(?<=SAMPLE=)[^;](?=;)", sample_name, l)
        return l
    fixed_vcf = iterate_file(cnf, vcf_fpath, fix_sample_name, output_fpath=output_fpath)
    return bgzip_and_tabix(cnf, fixed_vcf)

예제 #7

0

파일 보기

파일: filtering.py 프로젝트: Channing-Zeng/Reporting_Suite

def combine_vcfs(cnf,
                 vcf_fpath_by_sname,
                 combined_vcf_fpath,
                 additional_parameters=''):
    gatk = get_java_tool_cmdline(cnf, 'gatk')
    if not gatk:
        info('GATK is not found, skipping merging VCFs')
        return None

    cmdl = '{gatk} -T CombineVariants -R {cnf.genome.seq} {additional_parameters}'.format(
        **locals())
    for s_name, vcf_fpath in vcf_fpath_by_sname.items():
        if vcf_fpath:
            cmdl += ' --variant:' + s_name + ' ' + vcf_fpath
    if ' --variant:' not in cmdl:
        err('No VCFs to combine')
        return None

    if cnf.reuse_intermediate and isfile(
            combined_vcf_fpath + '.gz') and verify_vcf(combined_vcf_fpath +
                                                       '.gz'):
        info(combined_vcf_fpath + '.gz exists, reusing')
        return combined_vcf_fpath + '.gz'

    cmdl += ' -o ' + combined_vcf_fpath
    res = call(cnf,
               cmdl,
               output_fpath=combined_vcf_fpath,
               stdout_to_outputfile=False,
               exit_on_error=False)
    if res:
        info('Joined VCFs, saved into ' + combined_vcf_fpath)
        if isfile(combined_vcf_fpath + '.tx.idx'):
            try:
                os.remove(combined_vcf_fpath + '.tx.idx')
            except OSError:
                err(traceback.format_exc())
                info()
        return bgzip_and_tabix(cnf, combined_vcf_fpath)
    else:
        warn('Could not join VCFs')
        return None

예제 #8

0

파일 보기

def _read_vcf_records_per_bed_region_and_clip_vcf(cnf, vcf_fpath, bed_fpath,
                                                  region_type, sample):
    info()
    info('Intersecting VCF ' + vcf_fpath + ' using BED ' + bed_fpath)

    vcf_columns_num = count_bed_cols(vcf_fpath)
    bed_columns_num = count_bed_cols(bed_fpath)

    vcf_bed_intersect = join(
        cnf.work_dir,
        splitext(basename(vcf_fpath))[0] + '_' + region_type +
        '_vcf_bed.intersect')
    bedtools = get_system_path(cnf, 'bedtools')
    if not cnf.reuse_intermediate or not verify_file(
            vcf_bed_intersect, silent=True, is_critical=False):
        cmdline = '{bedtools} intersect -header -a {vcf_fpath} -b {bed_fpath} -wo'.format(
            **locals())
        res = call(cnf,
                   cmdline,
                   output_fpath=vcf_bed_intersect,
                   max_number_of_tries=1,
                   exit_on_error=False)
        if not res:
            return None, None, None, None

    regions_in_order = []
    regions_set = set()
    vars_by_region = defaultdict(dict)
    var_by_site = dict()

    clipped_vcf_fpath = intermediate_fname(cnf,
                                           splitext(basename(vcf_fpath))[0],
                                           '_' + region_type + '_clip')

    with open(vcf_bed_intersect) as f, open(clipped_vcf_fpath,
                                            'w') as clip_vcf:
        for l in f:
            l = l.strip()
            if not l or l.startswith('#'):
                clip_vcf.write(l + '\n')
                continue
            fs = l.split('\t')
            chrom, pos, id_, ref, alt, qual, filt, info_fields = fs[:8]
            chrom_b, start_b, end_b, symbol, strand, feature, biotype = None, None, None, None, None, None, None
            if bed_columns_num >= 8:
                chrom_b, start_b, end_b, symbol, _, strand, feature, biotype, _ = fs[
                    -(bed_columns_num + 1):][:9]
            elif bed_columns_num >= 4:
                chrom_b, start_b, end_b, symbol, _ = fs[-(bed_columns_num +
                                                          1):][:5]
            assert chrom == chrom_b, l
            r = chrom, id_, start_b, end_b, symbol, strand, feature, biotype
            if r not in regions_set:
                regions_set.add(r)
                regions_in_order.append(r)

            cls = None
            if '=Hotspot' in info_fields: cls = 'Hotspot'
            if '=Deleterious' in info_fields: cls = 'Deleterious'
            if cls:
                var = Variant(chrom, pos, ref, alt, cls)
                vars_by_region[r][(chrom, pos, ref, alt)] = var
                var_by_site[(chrom, pos, ref, alt)] = var
                clip_vcf.write('\t'.join(
                    [chrom, pos, id_, ref, alt, qual, filt, info_fields]) +
                               '\n')

    clipped_gz_vcf_fpath = bgzip_and_tabix(cnf,
                                           clipped_vcf_fpath,
                                           max_number_of_tries=1,
                                           exit_on_error=False)

    return clipped_gz_vcf_fpath, regions_in_order, vars_by_region, var_by_site

예제 #9

0

파일 보기

파일: _deprecated_txt2vcf.py 프로젝트: Channing-Zeng/Reporting_Suite

def convert_vardict_txts_to_bcbio_vcfs(cnf,
                                       bs,
                                       sample,
                                       output_dir=None,
                                       pass_only=False):
    info('')
    info('Preparing data for ' + sample.name)
    anno_filt_vcf_fpath = sample.find_filt_vcf_by_callername(cnf.caller_name)
    if not anno_filt_vcf_fpath:
        return None, None

    if not output_dir:
        output_dir = cnf.output_dir or os.path.dirname(anno_filt_vcf_fpath)
    output_vcf_fpath = join(
        output_dir, sample.name + '-' + cnf.caller_name + filt_vcf_ending)
    pass_output_vcf_fpath = add_suffix(output_vcf_fpath, 'pass')
    if cnf.reuse_intermediate and verify_vcf(
            output_vcf_fpath + '.gz') and verify_vcf(pass_output_vcf_fpath +
                                                     '.gz'):
        info(output_vcf_fpath + '.gz and ' + pass_output_vcf_fpath +
             '.gz exists, reusing')
        return output_vcf_fpath + '.gz', pass_output_vcf_fpath + '.gz'

    info('Parsing PASS and REJECT mutations...')
    pass_mut_dict, reject_mut_dict, filter_values = get_mutation_dicts(
        cnf, bs, sample, pass_only=pass_only)
    sorted_mut_dict = combine_mutations(pass_mut_dict, reject_mut_dict)

    info('')
    info('Writing VCFs')
    vcf_reader = vcf.Reader(open_gzipsafe(anno_filt_vcf_fpath, 'r'))
    vcf_reader = add_keys_to_header(vcf_reader, filter_values)
    with file_transaction(cnf.work_dir, output_vcf_fpath) as filt_tx, \
        file_transaction(cnf.work_dir, pass_output_vcf_fpath) as pass_tx:
        vcf_writer = None
        if not pass_only:
            vcf_writer = vcf.Writer(open(filt_tx, 'w'), template=vcf_reader)
        vcf_pass_writer = vcf.Writer(open(pass_tx, 'w'), template=vcf_reader)
        for key, mut in sorted_mut_dict.items():
            record = get_record_from_vcf(vcf_reader, mut)
            if record:
                if key in pass_mut_dict:
                    record.FILTER = ['PASS']
                    if mut.reason:
                        record.INFO['Reason'] = mut.reason.replace(' ', '_')
                elif pass_only:
                    continue
                elif key in reject_mut_dict:
                    if not mut.reason:
                        continue
                    reject_reason_ids = [
                        filter_descriptions_dict[reason]
                        if reason in filter_descriptions_dict else reason
                        for reason in mut.reason.split(' and ')
                    ]
                    record.FILTER = [';'.join(reject_reason_ids)]
                if mut.signif:
                    record.INFO['Signif'] = mut.signif
                if mut.status:
                    record.INFO['Status'] = mut.status
                if vcf_writer:
                    vcf_writer.write_record(record)
                if key in pass_mut_dict:
                    vcf_pass_writer.write_record(record)
            else:
                warn('No record was found in ' + anno_filt_vcf_fpath +
                     ' for mutation ' + str(mut))

    output_gzipped_vcf_fpath = None
    if vcf_writer:
        vcf_writer.close()
        output_gzipped_vcf_fpath = bgzip_and_tabix(cnf, output_vcf_fpath)
        info('VCF file for vardict.txt is saved to ' +
             output_gzipped_vcf_fpath)
    vcf_pass_writer.close()
    output_gzipped_pass_vcf_fpath = bgzip_and_tabix(cnf, pass_output_vcf_fpath)
    info('VCF file for vardict.PASS.txt is saved to ' +
         output_gzipped_pass_vcf_fpath)
    return output_gzipped_vcf_fpath, output_gzipped_pass_vcf_fpath

예제 #10

0

파일 보기

def intersect_vcf(cnf, input_fpath, db_fpath, key):
    vcf_fpath = input_fpath

    db_fpath = verify_file(db_fpath)
    if not db_fpath:
        return None

    info('Intersecting with ' + db_fpath + ', writing key ' + str(key))

    info('Preparing db...')

    def _add_info_flag(l, i):
        if l.startswith('#'):
            return l
        fs = l.split('\t')
        info_col, ft_keys, ft_vals = fs[-3], fs[-2], fs[-1]
        ft_dict = dict(zip(ft_keys.split(':'), ft_vals.split(':')))
        for ann in ['DP', 'MQ']:
            val = ft_dict.get(ann, None)
            if val:
                # ft_keys[key.replace('.', '_') + '_' + ann] = val
                # del ft_keys[ann]
                info_col += ';' + key.replace('.', '_') + '_' + ann + '=' + val
        # ft_items = ft_dict.items()
        # ft_keys = [k for k, v in ft_items]
        # ft_vals = [v for k, v in ft_items]
        # return '\t'.join(fs[:-2]) + '\t' + ':'.join(ft_keys) + '\t' + ':'.join(ft_vals)
        return '\t'.join(fs[:-3]) + '\t' + info_col + '\t' + '\t'.join(fs[-2:])
        # rec.FORMAT[key.replace('.', '_') + '_DP'] = rec.genotype(key.split('.')[0])['DP']
        # rec.INFO[key.replace('.', '_') + '_MQ'] = rec.genotype(key.split('.')[0])['MQ']
        # return rec

    # db_fpath = iterate_vcf(cnf, db_fpath, _add_info_flag, suffix='INFO_FLAGS')
    db_fpath = iterate_file(cnf, db_fpath, _add_info_flag, suffix='INFO_FLAGS')

    info('Adding header meta info...')

    def _add_header(l, i):
        if l.startswith('#CHROM'):
            ext_l = ''
            for ann in ['DP', 'MQ']:
                ext_l += '##INFO=<ID=' + key.replace(
                    '.', '_'
                ) + '_' + ann + ',Number=1,Type=Integer,Description="description">\n'
            return ext_l + l
        return l

    db_fpath = iterate_file(cnf, db_fpath, _add_header, suffix='INFO_HEADER')

    # out_fpath = add_suffix(db_fpath, 'HEADERS')
    # if cnf.reuse_intermediate and verify_file(out_fpath, silent=True):
    #     info(out_fpath + ' exists, reusing')
    # else:
    #     reader = vcf_parser.Reader(open(db_fpath))
    #     for k in 'DP', 'MQ':
    #         k = k + '_' + key.replace('.', '_')
    #         reader.infos[k] = _Info(id=k, num=1, type='Integer', desc=k + ' ' + key)
    #
    #     with file_transaction(cnf.work_dir, out_fpath) as tx:
    #         recs = []
    #         cnt = 0
    #         with open(tx, 'w') as f:
    #             writer = vcf_parser.Writer(f, reader)
    #             while True:
    #                 cnt += 1
    #                 rec = next(reader, None)
    #                 if rec is None:
    #                     break
    #                 recs.append(rec)
    #                 if cnt % 1000000 == 0:
    #                     info('Written ' + str(cnt) + ' lines')
    #                     writer.write_records(recs)
    #                     recs = []
    #             writer.write_records(recs)
    #     db_fpath = out_fpath
    db_fpath = bgzip_and_tabix(cnf, db_fpath)

    info('Annotating using this db...')
    vcf_conf = {
        'path':
        db_fpath,
        'annotations':
        [key.replace('.', '_') + '_DP',
         key.replace('.', '_') + '_MQ']
    }
    vcf_fpath = _snpsift_annotate(cnf, vcf_conf, key, vcf_fpath)

    info('Moving INFO to FORMAT...')

    def _move_info_to_format(l, i):
        if l.startswith('#'):
            return l
        fs = l.split('\t')
        info_dict = dict([
            kv.split('=') if '=' in kv else (kv, True)
            for kv in fs[7].split(';')
        ])
        ft_keys = fs[8].split(':')
        all_ft_vals = [ft_vals.split(':') for ft_vals in fs[9:]]
        ft_dicts = [
            OrderedDict(zip(ft_keys, ft_vals)) for ft_vals in all_ft_vals
        ]
        for ann in ['DP', 'MQ']:
            k = key.replace('.', '_') + '_' + ann
            for ft_dict in ft_dicts:
                ft_dict[k] = info_dict.get(k, '.')
        all_ft_vals = []
        for ft_dict in ft_dicts:
            ft_items = ft_dict.items()
            ft_keys = [k for k, v in ft_items]
            all_ft_vals.append([v for k, v in ft_items])
        l = '\t'.join(fs[:8]) + '\t' + ':'.join(ft_keys)
        for ft_vals in all_ft_vals:
            l += '\t' + ':'.join(ft_vals)
        return l
        # rec.FORMAT[key.replace('.', '_') + '_DP'] = rec.genotype(key.split('.')[0])['DP']
        # rec.INFO[key.replace('.', '_') + '_MQ'] = rec.genotype(key.split('.')[0])['MQ']
        # return rec

    # db_fpath = iterate_vcf(cnf, db_fpath, _add_info_flag, suffix='INFO_FLAGS')
    vcf_fpath = iterate_file(cnf,
                             vcf_fpath,
                             _move_info_to_format,
                             suffix='FORMAT_FLAGS')

    info('Adding FORMAT header meta info...')

    def _add_format_header(l, i):
        if l.startswith('#CHROM'):
            ext_l = ''
            ext_l += '##FORMAT=<ID=' + key.replace(
                '.', '_'
            ) + '_DP,Number=1,Type=Integer,Description="Number of high-quality bases">\n'
            ext_l += '##FORMAT=<ID=' + key.replace(
                '.', '_'
            ) + '_MQ,Number=1,Type=Integer,Description="Average mapping quality">\n'
            return ext_l + l
        return l

    vcf_fpath = iterate_file(cnf,
                             vcf_fpath,
                             _add_format_header,
                             suffix='FORMAT_HEADER')

    info()
    if vcf_fpath:
        info('Renaming ' + vcf_fpath + ' -> ' + input_fpath)
        os.rename(vcf_fpath, input_fpath)
    else:
        warn('Intersection with ' + key + ' didn\'t work')
    return input_fpath

예제 #11

0

파일 보기

def run_annotators(cnf, vcf_fpath, bam_fpath):
    original_vcf = cnf.vcf

    db_section_by_name = OrderedDict(
        (dbname, cnf.annotation[dbname])
        for dbname in ['dbsnp', 'clinvar', 'cosmic', 'oncomine']
        if dbname in cnf.annotation
        and not cnf.annotation[dbname].get('skip-annotation'))

    # if not cnf.no_check:
    #     to_delete_id_ref = []
    #     if 'dbsnp' in db_section_by_name.keys():
    #         info('Removing IDs from dbsnp as rs*')
    #         to_delete_id_ref.append('rs')
    #     if 'cosmic' in db_section_by_name.keys():
    #         info('Removing IDs from dbsnp as COS*')
    #         to_delete_id_ref.append('COS')
    #
    #     def delete_ids(rec):  # deleting existing dbsnp and cosmic ID annotations
    #         if rec.ID:
    #             if isinstance(rec.ID, basestring):
    #                 if any(rec.ID.startswith(pref) for pref in to_delete_id_ref):
    #                     rec.ID = None
    #             else:
    #                 rec.ID = [id_ for id_ in rec.ID if not any(id_.startswith(pref) for pref in to_delete_id_ref)]
    #
    #         if not rec.FILTER:
    #             rec.FILTER = 'PASS'
    #
    #         return rec
    #
    #     info('Removing previous rs* and COS* IDs')
    #     vcf_fpath = iterate_vcf(cnf, vcf_fpath, delete_ids, suffix='delID')

    bcftools = get_system_path(cnf, 'bcftools')

    if not vcf_fpath.endswith('.gz') or not file_exists(vcf_fpath + '.tbi'):
        vcf_fpath = bgzip_and_tabix(cnf, vcf_fpath)

    cmdl = '{bcftools} annotate --remove ID {vcf_fpath}'
    res = call(cnf,
               cmdl.format(**locals()),
               output_fpath=add_suffix(rm_gz_ext(vcf_fpath), 'rmid'))
    if res:
        vcf_fpath = res
        vcf_fpath = bgzip_and_tabix(cnf, vcf_fpath)

    for dbname, dbconf in db_section_by_name.items() + cnf.annotation.get(
            'custom_vcfs', dict()).items():
        step_greetings('Annotating using ' + dbname)
        annotations = ','.join('INFO/' + a for a in dbconf.get('annotations'))
        if dbname in ('cosmic', 'dbsnp'):
            annotations += ',=ID'
        db_fpath = get_db_path(cnf, dbconf, dbname)
        if db_fpath:
            cmdl = '{bcftools} annotate -a ' + db_fpath + ' -c ' + annotations + ' {vcf_fpath}'
            res = call(cnf,
                       cmdl.format(**locals()),
                       output_fpath=add_suffix(rm_gz_ext(vcf_fpath), dbname))
            if res:
                vcf_fpath = res
                vcf_fpath = bgzip_and_tabix(cnf, vcf_fpath)

    verify_vcf(vcf_fpath, is_critical=True)

    if 'dbnsfp' in cnf.annotation:
        res = _snpsift_db_nsfp(cnf, vcf_fpath)
        if res:
            vcf_fpath = res

    if 'snpeff' in cnf.annotation:
        res, summary_fpath, genes_fpath = _snpeff(cnf, vcf_fpath)
        if res:
            vcf_fpath = res
            verify_vcf(vcf_fpath, is_critical=True)
            final_summary_fpath = join(cnf.output_dir, basename(summary_fpath))
            final_genes_fpath = join(cnf.output_dir, basename(genes_fpath))
            if isfile(final_summary_fpath): os.remove(final_summary_fpath)
            if isfile(final_genes_fpath): os.remove(final_genes_fpath)
            if file_exists(summary_fpath):
                shutil.move(summary_fpath, final_summary_fpath)
            if file_exists(genes_fpath):
                shutil.move(genes_fpath, final_genes_fpath)

    if 'tracks' in cnf.annotation and cnf.annotation[
            'tracks'] and cnf.annotation['tracks']:
        track_fapths = []
        for track_name in cnf.annotation['tracks']:
            if isfile(track_name) and verify_file(track_name):
                track_fapths.append(track_name)
            else:
                if 'tracks' in cnf['genome'] and cnf['genome'][
                        'tracks'] and track_name in cnf['genome']['tracks']:
                    track_fpath = cnf['genome']['tracks'][track_name]
                    if verify_file(track_fpath):
                        track_fapths.append(track_fpath)
        for track_fapth in track_fapths:
            res = _tracks(cnf, track_fapth, vcf_fpath)
            if res:
                vcf_fpath = res

    step_greetings('Intersection with database VCFs...')
    if 'intersect_with' in cnf.annotation:
        for key, db_fpath in cnf.annotation['intersect_with'].items():
            res = intersect_vcf(cnf,
                                input_fpath=vcf_fpath,
                                db_fpath=db_fpath,
                                key=key)
            if res:
                vcf_fpath = res

    if 'mongo' in cnf.annotation:
        res = _mongo(cnf, vcf_fpath)
        if res:
            vcf_fpath = res

    return vcf_fpath