예제 #1
0
def submit_job(cnf, cmdline, job_name, wait_for_steps=None, threads=1,
               output_fpath=None, stdout_to_outputfile=True, run_on_chara=False, **kwargs):

    prefix = str(cnf.project_name) + '_'
    if job_name: prefix += job_name + '_'
    prefix += datetime.now().strftime("%Y_%m_%d_%H_%M_%S") + '_'
    f, done_marker_fpath = make_tmpfile(cnf, prefix=prefix, suffix='.done')
    f, error_marker_fpath = make_tmpfile(cnf, prefix=prefix, suffix='.error')
    if isfile(done_marker_fpath): os.remove(done_marker_fpath)
    if isfile(error_marker_fpath): os.remove(error_marker_fpath)
    job_id = basename(splitext(done_marker_fpath)[0])

    tx_output_fpath = None
    if output_fpath:
        if cnf.reuse_intermediate and verify_file(output_fpath, silent=True):
            info(output_fpath + ' exists, reusing')
            j = JobRunning(None, None, None, None, None, output_fpath=output_fpath, **kwargs)
            j.is_done = True
            return j
        if stdout_to_outputfile:
            tx_output_fpath = output_fpath + '.tx'
            if isfile(tx_output_fpath):
                os.remove(tx_output_fpath)
            cmdline += ' > ' + tx_output_fpath
        else:
            if isfile(output_fpath):
                os.remove(output_fpath)

    qsub = get_system_path(cnf, 'qsub', is_critical=True)
    bash = get_system_path(cnf, 'bash', is_critical=True)

    if cnf.log_dir:
        err_fpath = log_fpath = join(cnf.log_dir, job_id + '.log')
    else:
        fd, fpath = make_tmpfile(cnf, suffix=job_id + '.log', text=True)
        err_fpath = log_fpath = fpath

    queue = cnf.queue
    runner_script = adjust_system_path(cnf.qsub_runner)
    verify_file(runner_script, is_critical=True, description='qsub_runner')
    hold_jid_line = '-hold_jid ' + ','.join(wait_for_steps or ['_'])
    mem = threads * 15
    priority = 0
    if cnf.qsub_priority:
        priority = cnf.qsub_priority
    extra_qsub_opts = ''
    if run_on_chara and is_us():
        extra_qsub_opts += '-l h="chara|rask"'
    cmdline = cmdline.replace('"', '\\"').replace('\\\\"', '\\"')
    qsub_cmdline = (
        '{qsub} -pe smp {threads} {extra_qsub_opts} -S {bash} -q {queue} -p {priority} '
        '-j n -o {log_fpath} -e {err_fpath} {hold_jid_line} '
        '-N {job_id} {runner_script} {done_marker_fpath} {error_marker_fpath} "{cmdline}"'.format(**locals()))
    info('Submitting job ' + job_id)
    info(qsub_cmdline)
    job = JobRunning(job_id, log_fpath, qsub_cmdline, done_marker_fpath, error_marker_fpath,
                     output_fpath=output_fpath, tx_output_fpath=tx_output_fpath,
                     stdout_to_outputfile=stdout_to_outputfile, **kwargs)
    call(cnf, qsub_cmdline, silent=True)
    return job
def check_genome_resources(cnf):
    if cnf.genome is None:
        critical('Please, specify genome build (one of available in ' +
                 cnf.sys_cnf +
                 ') using the --genome option (e.g., --genome hg38).')

    if not cnf.genomes:
        critical('"genomes" section is not specified in system config ' +
                 cnf.sys_cnf)

    info('Genome: ' + str(cnf.genome.name))

    for key in cnf.genome.keys():
        if key != 'name' and isinstance(cnf.genome[key], basestring):
            cnf.genome[key] = adjust_system_path(cnf.genome[key])

            if not verify_obj_by_path(cnf.genome[key], key, silent=True):
                if not cnf.genome[key].endswith('.gz') and verify_file(
                        cnf.genome[key] + '.gz', silent=True):
                    gz_fpath = cnf.genome[key] + '.gz'
                    if verify_file(gz_fpath, silent=True):
                        cnf.genome[key] = gz_fpath

    if not cnf.genome.features or not cnf.genome.bed_annotation_features or not cnf.genome.cds:
        warn(
            'Warning: features and bed_annotation_features and cds in the system config ('
            + cnf.sys_cnf + ') must be specified.')

    if not cnf.transcripts_fpath:
        cnf.transcripts_fpath = cnf.transcripts_fpath or get_canonical_transcripts(
            cnf.genome.name, ensembl=True)
예제 #3
0
def proc_args(argv):
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input. ' \
                  'Usage: ' + basename(__file__) + ' sample2bam.tsv --bed target.bed --contols sample1:sample2 -o results_dir'
    parser = OptionParser(description=description, usage=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('-o', dest='output_dir', metavar='DIR', default=join(os.getcwd(), 'seq2c'))
    parser.add_option('--bed', dest='bed', help='BED file to run Seq2C analysis')
    parser.add_option('-c', '--controls', dest='controls', help='Optional control sample names for Seq2C. For multiple controls, separate them using :')
    parser.add_option('--seq2c-opts', dest='seq2c_opts', help='Options for the final lr2gene.pl script.')
    parser.add_option('--no-prep-bed', dest='prep_bed', help=SUPPRESS_HELP, action='store_false', default=True)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        parser.print_usage()
        sys.exit(1)
    if len(args) == 1 and not args[0].endswith('.bam'):
        sample_names, bam_fpaths = read_samples(verify_file(args[0], is_critical=True, description='Input sample2bam.tsv'))
        bam_by_sample = OrderedDict()
        for s, b in zip(sample_names, bam_fpaths):
            bam_by_sample[s] = b
    else:
        bam_by_sample = find_bams(args)

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)
    check_genome_resources(cnf)

    cnf.output_dir = adjust_path(cnf.output_dir)
    verify_dir(dirname(cnf.output_dir), is_critical=True)
    safe_mkdir(cnf.output_dir)

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'Seq2C'
    set_up_dirs(cnf)

    samples = [
        source.TargQC_Sample(name=s_name, dirpath=join(cnf.output_dir, s_name), bam=bam_fpath)
            for s_name, bam_fpath in bam_by_sample.items()]
    info('Samples: ')
    for s in samples:
        info('  ' + s.name)
    samples.sort(key=lambda _s: _s.key_to_sort())

    target_bed = verify_bed(cnf.bed, is_critical=True) if cnf.bed else None

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner: critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    return cnf, samples, target_bed, cnf.output_dir
def get_system_path(cnf,
                    interpreter_or_name,
                    name=None,
                    extra_warning='',
                    suppress_warn=False,
                    is_critical=False):
    """ "name" can be:
        - key in system_into.yaml
        - relative path in the project (e.g. external/...)
        - anything in system path
    """
    interpreter = interpreter_or_name
    if name is None:
        name = interpreter_or_name
        interpreter = None

    if interpreter:
        if interpreter == 'java':
            return get_java_tool_cmdline(cnf,
                                         name,
                                         extra_warning,
                                         suppress_warn,
                                         is_critical=is_critical)

        return get_script_cmdline(cnf,
                                  interpreter,
                                  name,
                                  extra_warning=extra_warning,
                                  suppress_warn=suppress_warn,
                                  is_critical=is_critical)

    # IN SYSTEM CONFIG?
    if cnf and (cnf.resources is not None and name.lower() in cnf.resources
                and 'path' in cnf.resources[name.lower()]):

        tool_path = cnf.resources[name.lower()]['path']
        tool_path = adjust_system_path(tool_path)
        return verify_obj_by_path(tool_path, name, is_critical=is_critical)

    # IN PROJECT ROOT DIR? IN EXTERNAL?
    for dirpath in [code_base_path]:
        tool_path = join(dirpath, name)
        if exists(tool_path):
            return verify_obj_by_path(tool_path, name, is_critical=is_critical)

    # IN PATH?
    tool_path = which(name)
    if tool_path and exists(tool_path):
        return verify_obj_by_path(tool_path, name, is_critical=is_critical)

    msg = (name + ' was not found. You may either specify path in the system '
           'config, or load into your PATH environment variable. ' +
           extra_warning)
    if not suppress_warn:
        err(msg)
    if is_critical:
        critical(msg)
    return None
def check_system_resources(cnf, required=list(), optional=list()):
    to_exit = False

    for program in required:
        if not which(program):
            if cnf.resources is None:
                critical('No "resources" section in system config.')

            data = cnf.resources.get(program)
            if data is None:
                err(program +
                    ' is required. Specify path in system config or in your environment.'
                    )
                to_exit = True
            else:
                if 'module' in data:
                    os.system('module load ' + data['module'])
                    # if 'path' not in data:
                    #     data['path'] = program
                elif 'path' in data:
                    data['path'] = adjust_system_path(data['path'])
                    if not isdir(data['path']) and not file_exists(
                            data['path']):
                        err(data['path'] + ' does not exist.')
                        to_exit = True

    for program in optional:
        resources = cnf.get('resources')
        if not resources:
            break

        data = resources.get(program)
        if data is None:
            continue
        else:
            data['path'] = adjust_system_path(data['path'])
            if not isdir(data['path']) and not file_exists(data['path']):
                err(data['path'] + ' does not exist.')
                to_exit = True

    if to_exit:
        exit()
예제 #6
0
def process_all(cnf, bcbio_structure):
    samples = bcbio_structure.samples
    key_gene_by_name, use_custom_panel = get_key_or_target_bed_genes(
        cnf.bed, verify_file(adjust_system_path(cnf.key_genes), 'key genes'))
    key_or_target_genes = 'target' if use_custom_panel else 'key'
    mutations = {}
    for sample in samples:
        mutations[sample.name] = parse_mutations(cnf,
                                                 sample,
                                                 key_gene_by_name,
                                                 cnf.mutations_fpath,
                                                 key_or_target_genes,
                                                 for_flagged_report=True)
    _generate_summary_flagged_regions_report(cnf, bcbio_structure, samples,
                                             mutations, key_or_target_genes)
    pass
예제 #7
0
def main():
    cnf = get_args()

    vardict_res_fpath = cnf.mutations_fpath
    seq2c_tsv_fpath = cnf.seq2c_tsv_fpath
    sv_fpath = cnf.sv_fpath
    output_dir = cnf.output_dir
    sample_name = cnf.sample

    cytoband = None
    if 'hg38' in cnf.genome.name:
        cytoband = cnf.genome.circos_cytoband
    elif cnf.genome.name == 'hg19':
        cytoband = 'hg19'
    if not cytoband:
        critical('Circos plot does not support ' + cnf.genome + ' genome')

    svs_bed_fpath = join(output_dir, sample_name + '_svs.bed')
    parse_svs(cnf, sv_fpath, svs_bed_fpath)
    if not exists(svs_bed_fpath):
        return None

    modified_seq2c_fpath = join(output_dir, sample_name + '_seq2c.tsv')

    key_genes_chrom, _ = get_key_or_target_bed_genes(cnf.bed_fpath, verify_file(adjust_system_path(cnf.key_genes), 'key genes'))
    modify_seq2c(cnf, key_genes_chrom, seq2c_tsv_fpath, modified_seq2c_fpath)

    out_r_script = join(output_dir, sample_name + '.R')

    with open(out_r_script, 'w') as out_r_script_handle:
        out_r_script_handle.write(_circos_R_script.format(vardict_all=vardict_res_fpath,
                                  seq2c=modified_seq2c_fpath, outdir=output_dir,
                                  cytoband=cytoband, mysample=sample_name,
                                  svsbed=svs_bed_fpath))

    r_script = get_system_path(cnf, 'rscript')
    cmdline = '{r_script} {out_r_script}'.format(**locals())
    res = call(cnf, cmdline)
예제 #8
0
def generate_flagged_regions_report(cnf, output_dir, sample, ave_depth,
                                    gene_by_key):
    depth_threshs = cnf.coverage_reports.depth_thresholds
    report = PerRegionSampleReport(
        sample=sample,
        metric_storage=get_detailed_metric_storage(depth_threshs))
    report.add_record('Sample', sample.name)
    safe_mkdir(sample.flagged_regions_dirpath)
    ''' 1. Detect depth threshold (ave sample coverage * DEPTH_THRESH_FROM_AVE_COV)
        2. Select regions covered in less than MIN_DEPTH_PERCENT_AT_THRESH at threshold
        3. Sort by % at threshold
        4. Select those parts of those regions where % = 0, save to BED
        5. Find HotSpots at those regions
        6. Intersect HotSpots with tracks

        For each gene where are regions with parts % = 0:
            sort them by part where % = 0
    '''
    #vcf_dbs = ['oncomine', 'dbsnp', 'cosmic']
    vcf_dbs = ['oncomine']

    from source._deprecated_clinical_reporting.clinical_parser import get_key_or_target_bed_genes
    key_genes, _ = get_key_or_target_bed_genes(
        cnf.bed, verify_file(adjust_system_path(cnf.key_genes), 'key genes'))
    depth_cutoff = get_depth_cutoff(ave_depth, depth_threshs)
    genes_sorted = sorted(gene_by_key.values())
    min_cov, max_cov = min_and_max_based_on_outliers(genes_sorted)

    for coverage_type in ['low', 'high']:
        info('Selecting and saving ' + coverage_type + ' covered genes')
        selected_genes = []

        if coverage_type == 'low':
            selected_genes = [
                g for g in genes_sorted if g.gene_name in key_genes and (any(
                    e.rates_within_threshs[depth_cutoff] <
                    MIN_DEPTH_PERCENT_AT_THRESH for e in g.get_exons()) or any(
                        a.rates_within_threshs[depth_cutoff] <
                        MIN_DEPTH_PERCENT_AT_THRESH
                        for a in g.get_amplicons()))
            ]
        else:
            if max_cov:
                selected_genes = [
                    g for g in genes_sorted
                    if g.gene_name in key_genes and (any(
                        e.avg_depth > max_cov for e in g.get_exons()) or any(
                            a.avg_depth > max_cov for a in g.get_amplicons()))
                ]
        for region_type in ['exons', 'target']:
            selected_regions = []
            for gene in selected_genes:
                if coverage_type == 'low':
                    cur_regions = [
                        a for a in (gene.get_amplicons() if region_type ==
                                    'target' else gene.get_exons())
                        if a.rates_within_threshs[depth_cutoff] <
                        MIN_DEPTH_PERCENT_AT_THRESH
                        and 'Multi' not in a.feature
                    ]
                else:
                    cur_regions = [
                        a for a in (gene.get_amplicons() if region_type ==
                                    'target' else gene.get_exons())
                        if a.avg_depth > max_cov and 'Multi' not in a.feature
                    ]
                selected_regions.extend(cur_regions)

            if selected_regions:
                selected_regions_bed_fpath = join(
                    sample.flagged_regions_dirpath,
                    coverage_type + '_cov_' + region_type + '.bed')
                save_regions_to_bed(cnf, selected_regions,
                                    selected_regions_bed_fpath)

                # Report cov for Hotspots
                for db in vcf_dbs:
                    res = _report_normalize_coverage_for_variant_sites(
                        cnf, sample, ave_depth, db, selected_regions_bed_fpath,
                        selected_regions, depth_cutoff, region_type,
                        coverage_type)
                    if not res:
                        return None

            report = make_flat_region_report(sample, selected_regions,
                                             depth_threshs)
            flagged_txt_fpath = add_suffix(
                add_suffix(sample.flagged_txt, region_type), coverage_type)
            flagged_tsv_fpath = add_suffix(
                add_suffix(sample.flagged_tsv, region_type), coverage_type)
            report.save_txt(flagged_txt_fpath)
            report.save_tsv(flagged_tsv_fpath)

            info('')
            info(coverage_type + ' covered ' + region_type + '(total ' +
                 str(len(selected_regions)) + ') for sample ' + sample.name +
                 ' saved into:')
            info('  ' + flagged_txt_fpath + ', ' + flagged_tsv_fpath)

    return report
예제 #9
0
def main():
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input.'
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser, threads=1)
    parser.add_option('--work-dir', dest='work_dir', metavar='DIR')
    parser.add_option('--log-dir', dest='log_dir')
    parser.add_option('--only-summary',
                      dest='only_summary',
                      action='store_true')
    parser.add_option('-o',
                      dest='output_dir',
                      metavar='DIR',
                      default=join(os.getcwd(), 'targetqc'))
    parser.add_option('--reannotate',
                      dest='reannotate',
                      action='store_true',
                      default=False,
                      help='re-annotate BED file with gene names')
    parser.add_option('--dedup',
                      dest='dedup',
                      action='store_true',
                      default=False,
                      help='count duplicates in coverage metrics')
    parser.add_option('--bed',
                      dest='bed',
                      help='BED file to run targetSeq and Seq2C analysis on.')
    parser.add_option(
        '--exons',
        '--exome',
        '--features',
        dest='features',
        help=
        'Annotated CDS/Exon/Gene/Transcripts BED file to make targetSeq exon/amplicon regions reports.'
    )

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        critical('No BAMs provided to input.')
    bam_fpaths = list(set([abspath(a) for a in args]))

    bad_bam_fpaths = []
    for fpath in bam_fpaths:
        if not verify_bam(fpath):
            bad_bam_fpaths.append(fpath)
    if bad_bam_fpaths:
        critical('BAM files cannot be found, empty or not BAMs:' +
                 ', '.join(bad_bam_fpaths))

    run_cnf = determine_run_cnf(opts, is_wgs=not opts.__dict__.get('bed'))
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'TargQC'
    set_up_dirs(cnf)
    # cnf.name = 'TargQC_' + cnf.project_name

    check_genome_resources(cnf)

    verify_bed(cnf.bed, is_critical=True)
    bed_fpath = adjust_path(cnf.bed)
    info('Using amplicons/capture panel ' + bed_fpath)

    features_bed_fpath = adjust_path(
        cnf.features) if cnf.features else adjust_path(cnf.genome.features)
    info('Features: ' + features_bed_fpath)

    genes_fpath = None
    if cnf.genes:
        genes_fpath = adjust_path(cnf.genes)
        info('Custom genes list: ' + genes_fpath)

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner:
            critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    info('*' * 70)
    info()

    targqc_html_fpath = run_targqc(cnf, cnf.output_dir, bam_fpaths, bed_fpath,
                                   features_bed_fpath, genes_fpath)
    if targqc_html_fpath:
        send_email(
            cnf, 'TargQC report for ' + cnf.project_name + ':\n  ' +
            targqc_html_fpath)
예제 #10
0
def proc_args(argv):
    info(' '.join(sys.argv))
    info()

    description = 'This script generates target QC reports for each BAM provided as an input.'
    parser = OptionParser(description=description)
    add_cnf_t_reuse_prjname_donemarker_workdir_genome_debug(parser)
    parser.add_option('--log-dir', dest='log_dir')
    parser.add_option('--is-wgs',
                      dest='is_wgs',
                      action='store_true',
                      default=False,
                      help='whole genome sequencing')
    parser.add_option('--is-deep-seq',
                      dest='is_deep_seq',
                      action='store_true',
                      default=False,
                      help='deep targeted sequencing')
    parser.add_option('--only-summary',
                      dest='only_summary',
                      action='store_true')
    parser.add_option('-o',
                      dest='output_dir',
                      metavar='DIR',
                      default=join(os.getcwd(), 'targetqc'))
    parser.add_option('-c', '--caller', dest='caller')
    parser.add_option('--qc', dest='qc', action='store_true', default=False)
    parser.add_option('--no-qc',
                      dest='qc',
                      action='store_false',
                      default=False)
    parser.add_option('--qc-caption', dest='qc_caption', help=SUPPRESS_HELP)
    parser.add_option('--no-tsv',
                      dest='tsv',
                      action='store_false',
                      default=True,
                      help=SUPPRESS_HELP)

    (opts, args) = parser.parse_args()
    logger.is_debug = opts.debug

    if len(args) == 0:
        critical('No vcf files provided to input.')

    run_cnf = determine_run_cnf(opts,
                                is_targetseq=opts.is_deep_seq,
                                is_wgs=opts.is_wgs)
    cnf = Config(opts.__dict__, determine_sys_cnf(opts), run_cnf)

    vcf_fpath_by_sample = read_samples(args, cnf.caller)
    info()

    if not cnf.project_name:
        cnf.project_name = basename(cnf.output_dir)
    info('Project name: ' + cnf.project_name)

    cnf.proc_name = 'Variants'
    set_up_dirs(cnf)
    # cnf.name = 'TargQC_' + cnf.project_name
    info(' '.join(sys.argv))

    samples = [
        source.VarSample(s_name, join(cnf.output_dir, s_name), vcf=vcf_fpath)
        for s_name, vcf_fpath in vcf_fpath_by_sample.items()
    ]
    samples.sort(key=lambda _s: _s.key_to_sort())

    check_genome_resources(cnf)

    if not cnf.only_summary:
        cnf.qsub_runner = adjust_system_path(cnf.qsub_runner)
        if not cnf.qsub_runner:
            critical('Error: qsub-runner is not provided is sys-config.')
        verify_file(cnf.qsub_runner, is_critical=True)

    return cnf, samples
예제 #11
0
def _snpeff(cnf, input_fpath):
    if 'snpeff' not in cnf.annotation or 'snpeff' not in cnf.genome:
        return None, None, None

    step_greetings('SnpEff')

    output_fpath = intermediate_fname(cnf, input_fpath, 'snpEff')
    stats_fpath = join(
        cnf.work_dir, cnf.sample + (('-' + cnf.caller) if cnf.caller else '') +
        '.snpEff_summary.csv')

    if output_fpath.endswith('.gz'):
        output_fpath = output_fpath[:-3]
    if cnf.reuse_intermediate and verify_vcf(output_fpath):
        info('VCF ' + output_fpath + ' exists, reusing...')
        return output_fpath, stats_fpath, splitext(
            stats_fpath)[0] + '.genes.txt'

    snpeff = get_java_tool_cmdline(cnf, 'snpeff')

    ref_name = cnf.genome.snpeff.reference or cnf.genome.name
    if ref_name.startswith('hg19') or ref_name.startswith('GRCh37'):
        ref_name = 'GRCh37.75'
    if ref_name.startswith('hg38'): ref_name = 'GRCh38.82'

    opts = ''
    if cnf.annotation.snpeff.cancer: opts += ' -cancer'

    assert cnf.transcripts_fpath, 'Transcript for annotation must be specified!'
    verify_file(cnf.transcripts_fpath,
                'Transcripts for snpEff -onlyTr',
                is_critical=True)
    opts += ' -onlyTr ' + cnf.transcripts_fpath + ' '

    db_path = adjust_system_path(cnf.genome.snpeff.data)
    if db_path:
        opts += ' -dataDir ' + db_path
    elif cnf.resources.snpeff.config:
        conf = get_system_path(cnf, cnf.resources.snpeff.config)
        if conf:
            opts += ' -c ' + conf + ' '
        else:
            err('Cannot find snpEff config file ' +
                str(cnf.resources.snpeff.config))

    if cnf.annotation.snpeff.extra_options:
        opts += ''

    if not cnf.no_check:
        info('Removing previous snpEff annotations...')
        res = remove_prev_eff_annotation(cnf, input_fpath)
        if not res:
            err('Could not remove preivous snpEff annotations')
            return None, None, None
        input_fpath = res

    snpeff_type = get_snpeff_type(snpeff)
    if snpeff_type == "old":
        opts += ' -stats ' + stats_fpath + ' -csvStats'
    else:
        opts += ' -csvStats ' + stats_fpath

    cmdline = '{snpeff} eff {opts} -noLog -i vcf -o vcf {ref_name} {input_fpath}'.format(
        **locals())

    for i in range(1, 20):
        try:
            res = call_subprocess(cnf,
                                  cmdline,
                                  input_fpath,
                                  output_fpath,
                                  exit_on_error=False,
                                  stdout_to_outputfile=True,
                                  overwrite=True)
        except OSError:
            import traceback, time
            err(traceback.format_exc())
            warn()
            info('Waiting 1 minute')
            time.sleep(60)
            info('Rerunning ' + str(i))
        else:
            break

    output_fpath = verify_vcf(output_fpath, is_critical=True)

    snpeff_summary_html_fpath = 'snpEff_summary.html'
    if isfile(snpeff_summary_html_fpath):
        info('SnpEff created ' + snpeff_summary_html_fpath +
             ' in the cwd, removing it...')
        try:
            os.remove(snpeff_summary_html_fpath)
        except OSError:
            pass

    if res:
        return output_fpath, stats_fpath, splitext(
            stats_fpath)[0] + '.genes.txt'
    else:
        return None, None, None