Exemple #1
0
def read_biomart(genome_name):
    features_by_ens_id = dict()
    bm_fpath = ebl.biomart_fpath(genome_name)
    if not verify_file(bm_fpath):
        warn('Warning: biomart file for genome ' + genome_name +
             ' not found, skip using the TSL values')
        return dict()

    with open(bm_fpath) as f:
        for r in csv.DictReader(f, delimiter='\t'):
            features_by_ens_id[r['Transcript ID']] = r

    # hg38 version has TSL, checking if we can populate some TSL from it
    if not genome_name.startswith('hg38'):
        bm_fpath = ebl.biomart_fpath('hg38')
        if not verify_file(bm_fpath):
            critical(
                'Biomart for hg38 file not found, and needed for TSL values')
        with open(bm_fpath) as f:
            for r in csv.DictReader(f, delimiter='\t'):
                if r['Transcript ID'] not in features_by_ens_id:
                    features_by_ens_id[r['Transcript ID']] = r
                else:
                    features_by_ens_id[r['Transcript ID']][
                        'Transcript Support Level (TSL)'] = r[
                            'Transcript Support Level (TSL)']
    return features_by_ens_id
Exemple #2
0
def determine_sex(work_dir, bam_fpath, avg_depth, genome, target_bed=None):
    debug()
    debug('Determining sex')
    pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'pybedtools_tmp')))

    male_bed = None
    for k in chry_key_regions_by_genome:
        if k in genome:
            male_bed = BedTool(chry_key_regions_by_genome.get(k))
            break
    if not male_bed:
        warn('Warning: no male key regions for ' + genome + ', cannot identify sex')
        return None

    male_area_size = get_total_bed_size(male_bed)
    debug('Male region total size: ' + str(male_area_size))

    if target_bed:
        target_male_bed = join(work_dir, 'male.bed')
        with file_transaction(work_dir, target_male_bed) as tx:
            BedTool(target_bed).intersect(male_bed).merge().saveas(tx)
        target_male_area_size = get_total_bed_size(target_male_bed)
        if target_male_area_size == 0:
            debug('The male non-PAR region does not overlap with the capture target - cannot determine sex.')
            return None
        male_bed = target_male_bed
    else:
        debug('WGS, determining sex based on chrY key regions coverage.')

    info('Detecting sex by comparing the Y chromosome key regions coverage and average coverage depth.')
    if not bam_fpath:
        critical('BAM file is required.')
    index_bam(bam_fpath)

    chry_mean_coverage = _calc_mean_coverage(work_dir, male_bed, bam_fpath, 1)
    debug('Y key regions average depth: ' + str(chry_mean_coverage))
    avg_depth = float(avg_depth)
    debug('Sample average depth: ' + str(avg_depth))
    if avg_depth < AVG_DEPTH_THRESHOLD_TO_DETERMINE_SEX:
        debug('Sample average depth is too low (less than ' + str(AVG_DEPTH_THRESHOLD_TO_DETERMINE_SEX) +
             ') - cannot determine sex')
        return None

    if chry_mean_coverage == 0:
        debug('Y depth is 0 - it\s female')
        sex = 'F'
    else:
        factor = avg_depth / chry_mean_coverage
        debug('Sample depth / Y depth = ' + str(factor))
        if factor > FEMALE_Y_COVERAGE_FACTOR:  # if mean target coverage much higher than chrY coverage
            debug('Sample depth is more than ' + str(FEMALE_Y_COVERAGE_FACTOR) + ' times higher than Y depth - it\s female')
            sex = 'F'
        else:
            debug('Sample depth is not more than ' + str(FEMALE_Y_COVERAGE_FACTOR) + ' times higher than Y depth - it\s male')
            sex = 'M'
    debug('Sex is ' + sex)
    debug()
    return sex
Exemple #3
0
def tmpdir():
    dirpath = make_tmpdir()
    try:
        yield dirpath
    finally:
        try:
            shutil.rmtree(dirpath)
        except OSError:
            warn('Warning: cannot clean up temporary dir ' + dirpath)
Exemple #4
0
def tmpdir():
    dirpath = make_tmpdir()
    try:
        yield dirpath
    finally:
        try:
            shutil.rmtree(dirpath)
        except OSError:
            warn('Warning: cannot clean up temporary dir ' + dirpath)
Exemple #5
0
def workdir(cnf):
    if cnf.work_dir:
        verify_dir(cnf.work_dir, is_critical=True)
        yield cnf.work_dir
    else:
        cnf.work_dir = make_tmpdir()
        yield cnf.work_dir
        try:
            shutil.rmtree(cnf.work_dir)
        except OSError:
            warn('Warning: cannot clean up temporary dir ' + cnf.work_dir)
Exemple #6
0
def workdir(cnf):
    if cnf.work_dir:
        verify_dir(cnf.work_dir, is_critical=True)
        yield cnf.work_dir
    else:
        cnf.work_dir = make_tmpdir()
        yield cnf.work_dir
        try:
            shutil.rmtree(cnf.work_dir)
        except OSError:
            warn('Warning: cannot clean up temporary dir ' + cnf.work_dir)
Exemple #7
0
def run_multisample_qualimap(output_dir, work_dir, samples, targqc_full_report):
    """ 1. Generates Qualimap2 plots and put into plots_dirpath
        2. Adds records to targqc_full_report.plots
    """
    plots_dirpath = join(output_dir, 'plots')
    individual_report_fpaths = [s.qualimap_html_fpath for s in samples]
    if isdir(plots_dirpath) and not any(
            not can_reuse(join(plots_dirpath, f), individual_report_fpaths)
            for f in listdir(plots_dirpath) if not f.startswith('.')):
        debug('Qualimap miltisample plots exist - ' + plots_dirpath + ', reusing...')
    else:
        # Qualimap2 run for multi-sample plots
        if len([s.qualimap_html_fpath for s in samples if s.qualimap_html_fpath]) > 0:
            if find_executable() is not None:  # and get_qualimap_type(find_executable()) == 'full':
                qualimap_output_dir = join(work_dir, 'qualimap_multi_bamqc')

                _correct_qualimap_genome_results(samples)
                _correct_qualimap_insert_size_histogram(samples)

                safe_mkdir(qualimap_output_dir)
                rows = []
                for sample in samples:
                    if sample.qualimap_html_fpath:
                        rows += [[sample.name, sample.qualimap_html_fpath]]

                data_fpath = write_tsv_rows(([], rows), join(qualimap_output_dir, 'qualimap_results_by_sample.tsv'))
                qualimap_plots_dirpath = join(qualimap_output_dir, 'images_multisampleBamQcReport')
                cmdline = find_executable() + ' multi-bamqc --data {data_fpath} -outdir {qualimap_output_dir}'.format(**locals())
                run(cmdline, env_vars=dict(DISPLAY=None),
                    checks=[lambda _1, _2: verify_dir(qualimap_output_dir)], reuse=cfg.reuse_intermediate)

                if not verify_dir(qualimap_plots_dirpath):
                    warn('Warning: Qualimap for multi-sample analysis failed to finish. TargQC will not contain plots.')
                    return None
                else:
                    if exists(plots_dirpath):
                        shutil.rmtree(plots_dirpath)
                    shutil.move(qualimap_plots_dirpath, plots_dirpath)
            else:
                warn('Warning: Qualimap for multi-sample analysis was not found. TargQC will not contain plots.')
                return None

    targqc_full_report.plots = []
    for plot_fpath in listdir(plots_dirpath):
        plot_fpath = join(plots_dirpath, plot_fpath)
        if verify_file(plot_fpath) and plot_fpath.endswith('.png'):
            targqc_full_report.plots.append(relpath(plot_fpath, output_dir))
def read_biomart(genome_name):
    features_by_ens_id = dict()
    bm_fpath = ebl.biomart_fpath(genome_name)
    if not verify_file(bm_fpath):
        warn('Warning: biomart file for genome ' + genome_name + ' not found, skip using the TSL values')
        return dict()
    
    with open(bm_fpath) as f:
        for r in csv.DictReader(f, delimiter='\t'):
            features_by_ens_id[r['Transcript ID']] = r
    
    # hg38 version has TSL, checking if we can populate some TSL from it
    if not genome_name.startswith('hg38'):
        bm_fpath = ebl.biomart_fpath('hg38')
        if not verify_file(bm_fpath): critical('Biomart for hg38 file not found, and needed for TSL values')
        with open(bm_fpath) as f:
            for r in csv.DictReader(f, delimiter='\t'):
                if r['Transcript ID'] not in features_by_ens_id:
                    features_by_ens_id[r['Transcript ID']] = r
                else:
                    features_by_ens_id[r['Transcript ID']]['Transcript Support Level (TSL)'] = r[
                        'Transcript Support Level (TSL)']
    return features_by_ens_id
Exemple #9
0
def determine_sex(work_dir, bam_fpath, avg_depth, genome, target_bed=None):
    debug()
    debug('Determining sex')
    pybedtools.set_tempdir(safe_mkdir(join(work_dir, 'pybedtools_tmp')))

    male_bed = None
    for k in chry_key_regions_by_genome:
        if k in genome:
            male_bed = BedTool(chry_key_regions_by_genome.get(k))
            break
    if not male_bed:
        warn('Warning: no male key regions for ' + genome +
             ', cannot identify sex')
        return None

    male_area_size = get_total_bed_size(male_bed)
    debug('Male region total size: ' + str(male_area_size))

    if target_bed:
        target_male_bed = join(work_dir, 'male.bed')
        with file_transaction(work_dir, target_male_bed) as tx:
            BedTool(target_bed).intersect(male_bed).merge().saveas(tx)
        target_male_area_size = get_total_bed_size(target_male_bed)
        if target_male_area_size == 0:
            debug(
                'The male non-PAR region does not overlap with the capture target - cannot determine sex.'
            )
            return None
        male_bed = target_male_bed
    else:
        debug('WGS, determining sex based on chrY key regions coverage.')

    info(
        'Detecting sex by comparing the Y chromosome key regions coverage and average coverage depth.'
    )
    if not bam_fpath:
        critical('BAM file is required.')
    index_bam(bam_fpath)

    chry_mean_coverage = _calc_mean_coverage(work_dir, male_bed, bam_fpath, 1)
    debug('Y key regions average depth: ' + str(chry_mean_coverage))
    avg_depth = float(avg_depth)
    debug('Sample average depth: ' + str(avg_depth))
    if avg_depth < AVG_DEPTH_THRESHOLD_TO_DETERMINE_SEX:
        debug('Sample average depth is too low (less than ' +
              str(AVG_DEPTH_THRESHOLD_TO_DETERMINE_SEX) +
              ') - cannot determine sex')
        return None

    if chry_mean_coverage == 0:
        debug('Y depth is 0 - it\s female')
        sex = 'F'
    else:
        factor = avg_depth / chry_mean_coverage
        debug('Sample depth / Y depth = ' + str(factor))
        if factor > FEMALE_Y_COVERAGE_FACTOR:  # if mean target coverage much higher than chrY coverage
            debug('Sample depth is more than ' +
                  str(FEMALE_Y_COVERAGE_FACTOR) +
                  ' times higher than Y depth - it\s female')
            sex = 'F'
        else:
            debug('Sample depth is not more than ' +
                  str(FEMALE_Y_COVERAGE_FACTOR) +
                  ' times higher than Y depth - it\s male')
            sex = 'M'
    debug('Sex is ' + sex)
    debug()
    return sex
Exemple #10
0
def main():
    description = '''
Usage:
    ' + __file__ + ' hg19 [db.gtf]
'''

    options = [
        (['--debug'], dict(dest='debug', action='store_true', default=False)),
    ]
    parser = OptionParser(description=description)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    opts, args = parser.parse_args()
    if len(args) == 0:
        parser.exit(1, 'Please provide genome name as the first argument')
    logger.is_debug = opts.debug

    genome_name = args[0]

    if len(args) > 1:
        gtf_fpath = args[1]
    else:
        gtf_fpath = ebl.ensembl_gtf_fpath(genome_name)
    if not isfile(gtf_fpath):
        if not gtf_fpath.endswith('.gz'):
            gtf_fpath += '.gz'
    gtf_fpath = verify_file(gtf_fpath)
    debug('Reading the GTF database')
    db = gtf.get_gtf_db(gtf_fpath)

    debug('Reading biomart data')
    features_by_ens_id = read_biomart(genome_name)

    chroms = [c for c, l in ref.get_chrom_lengths(genome_name)]

    output_fpath = join(dirname(__file__), genome_name, 'ensembl.bed')
    unsorted_output_fpath = add_suffix(output_fpath, 'unsorted')
    debug('Processing features, writing to ' + unsorted_output_fpath)

    def _get(_rec, _key):
        val = _rec.attributes.get(_key)
        if val is None:
            return None
        assert len(val) == 1, (_key, str(val))
        return val[0]

    num_tx_not_in_biomart = 0
    num_tx_diff_gene_in_biomart = 0
    with open(unsorted_output_fpath, 'w') as out:
        out.write('\t'.join(ebl.BedCols.names[i]
                            for i in ebl.BedCols.cols[:-4]) + '\n')

        for rec in db.all_features(order_by=('seqid', 'start', 'end')):
            if rec.featuretype == 'gene': continue
            if rec.chrom not in chroms: continue
            if rec.end - rec.start < 0: continue

            tx_id = _get(rec, 'transcript_id')
            gname = _get(rec, 'gene_name')
            tx_biotype = _get(rec, 'transcript_biotype')
            if not tx_biotype: tx_biotype = _get(rec, 'gene_biotype')
            tsl = _get(rec, 'transcript_support_level')
            hugo_gene = None

            biomart_rec = features_by_ens_id.get(tx_id)
            if not biomart_rec:
                if rec.featuretype == 'transcript':
                    num_tx_not_in_biomart += 1
            else:
                bm_gname = biomart_rec['Associated Gene Name']
                bm_tx_biotype = biomart_rec['Transcript type']
                bm_tsl = biomart_rec.get('Transcript Support Level (TSL)')
                hugo_gene = biomart_rec['HGNC symbol']
                if bm_gname != gname:
                    if rec.featuretype == 'transcript':
                        num_tx_diff_gene_in_biomart += 1
                    continue
                tx_biotype = bm_tx_biotype
                tsl = bm_tsl.split()[0].replace('tsl', '') if bm_tsl else None

            fs = [None] * len(ebl.BedCols.cols[:-3])
            if not rec.chrom.startswith('chr'):
                rec.chrom = 'chr' + rec.chrom.replace('MT', 'M')
            fs[:6] = [
                rec.chrom,
                str(rec.start - 1),
                str(rec.end), gname,
                rec.attributes.get('exon_number', ['.'])[0], rec.strand
            ]
            fs[ebl.BedCols.FEATURE] = rec.featuretype or '.'
            fs[ebl.BedCols.BIOTYPE] = tx_biotype or '.'
            fs[ebl.BedCols.ENSEMBL_ID] = tx_id or '.'
            # fs[ebl.BedCols.REFSEQ_ID] = refseq_id or '.'
            # fs[ebl.BedCols.IS_CANONICAL] = 'canonical' if refseq_id in canonical_transcripts_ids else ''
            fs[ebl.BedCols.TSL] = tsl or '.'
            fs[ebl.BedCols.HUGO] = hugo_gene or '.'
            # fs[ebl.BedCols.names[ensembl.BedCols.GC]] = gc
            out.write('\t'.join(fs) + '\n')

    if num_tx_not_in_biomart:
        warn(str(num_tx_not_in_biomart) + ' transcripts not found in biomart')
    if num_tx_diff_gene_in_biomart:
        warn(
            str(num_tx_diff_gene_in_biomart) +
            ' transcripts have a different gene name in biomart')

    debug('Sorting results')
    sort_bed(unsorted_output_fpath,
             output_fpath,
             fai_fpath=ref.get_fai(genome_name),
             genome=genome_name)
    os.remove(unsorted_output_fpath)
    bgzip_and_tabix(output_fpath)
Exemple #11
0
def _log(msg, silent, is_critical):
    if is_critical:
        critical(msg)
    if not silent:
        warn(msg)
def main():
    description = '''
Usage:
    ' + __file__ + ' hg19 [db.gtf]
'''

    options = [
        (['--debug'], dict(dest='debug', action='store_true', default=False)),
    ]
    parser = OptionParser(description=description)
    for args, kwargs in options:
        parser.add_option(*args, **kwargs)
    opts, args = parser.parse_args()
    if len(args) == 0:
        parser.exit(1, 'Please provide genome name as the first argument')
    logger.is_debug = opts.debug

    genome_name = args[0]

    if len(args) > 1:
        gtf_fpath = args[1]
    else:
        gtf_fpath = ebl.ensembl_gtf_fpath(genome_name)
    if not isfile(gtf_fpath):
        if not gtf_fpath.endswith('.gz'):
            gtf_fpath += '.gz'
    gtf_fpath = verify_file(gtf_fpath)
    debug('Reading the GTF database')
    db = gtf.get_gtf_db(gtf_fpath)

    debug('Reading biomart data')
    features_by_ens_id = read_biomart(genome_name)

    chroms = [c for c, l in ref.get_chrom_lengths(genome_name)]
    
    output_fpath = join(dirname(__file__), genome_name, 'ensembl.bed')
    unsorted_output_fpath = add_suffix(output_fpath, 'unsorted')
    debug('Processing features, writing to ' + unsorted_output_fpath)

    def _get(_rec, _key):
        val = _rec.attributes.get(_key)
        if val is None:
            return None
        assert len(val) == 1, (_key, str(val))
        return val[0]

    num_tx_not_in_biomart = 0
    num_tx_diff_gene_in_biomart = 0
    with open(unsorted_output_fpath, 'w') as out:
        out.write('\t'.join(ebl.BedCols.names[i] for i in ebl.BedCols.cols[:-4]) + '\n')

        for rec in db.all_features(order_by=('seqid', 'start', 'end')):
            if rec.featuretype == 'gene': continue
            if rec.chrom not in chroms: continue
            if rec.end - rec.start < 0: continue
            
            tx_id = _get(rec, 'transcript_id')
            gname = _get(rec, 'gene_name')
            tx_biotype = _get(rec, 'transcript_biotype')
            if not tx_biotype: tx_biotype = _get(rec, 'gene_biotype')
            tsl = _get(rec, 'transcript_support_level')
            hugo_gene = None

            biomart_rec = features_by_ens_id.get(tx_id)
            if not biomart_rec:
                if rec.featuretype == 'transcript':
                    num_tx_not_in_biomart += 1
            else:
                bm_gname = biomart_rec['Associated Gene Name']
                bm_tx_biotype = biomart_rec['Transcript type']
                bm_tsl = biomart_rec.get('Transcript Support Level (TSL)')
                hugo_gene = biomart_rec['HGNC symbol']
                if bm_gname != gname:
                    if rec.featuretype == 'transcript':
                        num_tx_diff_gene_in_biomart += 1
                    continue
                tx_biotype = bm_tx_biotype
                tsl = bm_tsl.split()[0].replace('tsl', '') if bm_tsl else None

            fs = [None] * len(ebl.BedCols.cols[:-3])
            if not rec.chrom.startswith('chr'):
                rec.chrom = 'chr' + rec.chrom.replace('MT', 'M')
            fs[:6] = [rec.chrom,
                      str(rec.start - 1),
                      str(rec.end),
                      gname,
                      rec.attributes.get('exon_number', ['.'])[0],
                      rec.strand]
            fs[ebl.BedCols.FEATURE] = rec.featuretype or '.'
            fs[ebl.BedCols.BIOTYPE] = tx_biotype or '.'
            fs[ebl.BedCols.ENSEMBL_ID] = tx_id or '.'
            # fs[ebl.BedCols.REFSEQ_ID] = refseq_id or '.'
            # fs[ebl.BedCols.IS_CANONICAL] = 'canonical' if refseq_id in canonical_transcripts_ids else ''
            fs[ebl.BedCols.TSL] = tsl or '.'
            fs[ebl.BedCols.HUGO] = hugo_gene or '.'
            # fs[ebl.BedCols.names[ensembl.BedCols.GC]] = gc
            out.write('\t'.join(fs) + '\n')

    if num_tx_not_in_biomart:
        warn(str(num_tx_not_in_biomart) + ' transcripts not found in biomart')
    if num_tx_diff_gene_in_biomart:
        warn(str(num_tx_diff_gene_in_biomart) + ' transcripts have a different gene name in biomart')

    debug('Sorting results')
    sort_bed(unsorted_output_fpath, output_fpath, fai_fpath=ref.get_fai(genome_name), genome=genome_name)
    os.remove(unsorted_output_fpath)
    bgzip_and_tabix(output_fpath)
Exemple #13
0
def run_multisample_qualimap(output_dir, work_dir, samples,
                             targqc_full_report):
    """ 1. Generates Qualimap2 plots and put into plots_dirpath
        2. Adds records to targqc_full_report.plots
    """
    plots_dirpath = join(output_dir, 'plots')
    individual_report_fpaths = [s.qualimap_html_fpath for s in samples]
    if isdir(plots_dirpath) and not any(
            not can_reuse(join(plots_dirpath, f), individual_report_fpaths)
            for f in listdir(plots_dirpath) if not f.startswith('.')):
        debug('Qualimap miltisample plots exist - ' + plots_dirpath +
              ', reusing...')
    else:
        # Qualimap2 run for multi-sample plots
        if len(
            [s.qualimap_html_fpath
             for s in samples if s.qualimap_html_fpath]) > 0:
            if find_executable(
            ) is not None:  # and get_qualimap_type(find_executable()) == 'full':
                qualimap_output_dir = join(work_dir, 'qualimap_multi_bamqc')

                _correct_qualimap_genome_results(samples)
                _correct_qualimap_insert_size_histogram(samples)

                safe_mkdir(qualimap_output_dir)
                rows = []
                for sample in samples:
                    if sample.qualimap_html_fpath:
                        rows += [[sample.name, sample.qualimap_html_fpath]]

                data_fpath = write_tsv_rows(
                    ([], rows),
                    join(qualimap_output_dir,
                         'qualimap_results_by_sample.tsv'))
                qualimap_plots_dirpath = join(qualimap_output_dir,
                                              'images_multisampleBamQcReport')
                cmdline = find_executable(
                ) + ' multi-bamqc --data {data_fpath} -outdir {qualimap_output_dir}'.format(
                    **locals())
                run(cmdline,
                    env_vars=dict(DISPLAY=None),
                    checks=[lambda _1, _2: verify_dir(qualimap_output_dir)],
                    reuse=cfg.reuse_intermediate)

                if not verify_dir(qualimap_plots_dirpath):
                    warn(
                        'Warning: Qualimap for multi-sample analysis failed to finish. TargQC will not contain plots.'
                    )
                    return None
                else:
                    if exists(plots_dirpath):
                        shutil.rmtree(plots_dirpath)
                    shutil.move(qualimap_plots_dirpath, plots_dirpath)
            else:
                warn(
                    'Warning: Qualimap for multi-sample analysis was not found. TargQC will not contain plots.'
                )
                return None

    targqc_full_report.plots = []
    for plot_fpath in listdir(plots_dirpath):
        plot_fpath = join(plots_dirpath, plot_fpath)
        if verify_file(plot_fpath) and plot_fpath.endswith('.png'):
            targqc_full_report.plots.append(relpath(plot_fpath, output_dir))
Exemple #14
0
def _log(msg, silent, is_critical):
    if is_critical:
        critical(msg)
    if not silent:
        warn(msg)
Exemple #15
0
def determine_sex(work_dir, bam_fpath, ave_depth, genome, target_bed=None):
    info()
    info('Determining sex')

    male_bed = None
    for k in chry_key_regions_by_genome:
        if k in genome:
            male_bed = BedTool(chry_key_regions_by_genome.get(k))
            break
    if not male_bed:
        warn('Warning: no male key regions for ' + genome + ', cannot identify sex')
        return None

    male_area_size = male_bed.count()
    info('Male region total size: ' + str(male_area_size))

    if target_bed:
        male_bed = BedTool(target_bed).intersect(male_bed).merge()
        target_male_area_size = male_bed.count()
        if target_male_area_size < male_area_size * MALE_TARGET_REGIONS_FACTOR:
            info('Target male region total size is ' + str(target_male_area_size) + ', which is less than the ' +
                 'checked male regions size * ' + str(MALE_TARGET_REGIONS_FACTOR) +
                 ' (' + str(male_area_size * MALE_TARGET_REGIONS_FACTOR) + ') - cannot determine sex')
            return None
        else:
            info('Target male region total size is ' + str(target_male_area_size) + ', which is higher than the ' +
                 'checked male regions size * ' + str(MALE_TARGET_REGIONS_FACTOR) +
                 ' (' + str(male_area_size * MALE_TARGET_REGIONS_FACTOR) + '). ' +
                 'Determining sex based on coverage in those regions.')
    else:
        info('WGS, determining sex based on chrY key regions coverage.')

    info('Detecting sex by comparing the Y chromosome key regions coverage and average coverage depth.')
    if not bam_fpath:
        critical('BAM file is required.')
    index_bam(bam_fpath)

    chry_cov_output_fpath = sambamba_depth(work_dir, male_bed, bam_fpath, [])
    chry_mean_coverage = get_mean_cov(chry_cov_output_fpath)
    info('Y key regions average depth: ' + str(chry_mean_coverage))
    ave_depth = float(ave_depth)
    info('Sample average depth: ' + str(ave_depth))
    if ave_depth < AVE_DEPTH_THRESHOLD_TO_DETERMINE_SEX:
        info('Sample average depth is too low (less then ' + str(AVE_DEPTH_THRESHOLD_TO_DETERMINE_SEX) +
             ') - cannot determine sex')
        return None

    if chry_mean_coverage == 0:
        info('Y depth is 0 - it\s female')
        sex = 'F'
    else:
        factor = ave_depth / chry_mean_coverage
        info('Sample depth / Y depth = ' + str(factor))
        if factor > FEMALE_Y_COVERAGE_FACTOR:  # if mean target coverage much higher than chrY coverage
            info('Sample depth is more than ' + str(FEMALE_Y_COVERAGE_FACTOR) + ' times higher than Y depth - it\s female')
            sex = 'F'
        else:
            info('Sample depth is not more than ' + str(FEMALE_Y_COVERAGE_FACTOR) + ' times higher than Y depth - it\s male')
            sex = 'M'
    info('Sex is ' + sex)
    info()
    return sex