コード例 #1
0
ファイル: damage.py プロジェクト: somakchowdhury/pydamage
def test_damage(ref, bam, mode, wlen, show_al, min_al, min_cov, process, verbose):
    """Prepare data and run Vuong's test to test for damage

    Args:
        ref (str): name of referene in alignment file
        bam (str): bam file
        mode (str): opening mode of alignment file
        wlen (int): window length
        show_al (bool): Show alignment representations
        min_al (int): Minimum  number of aligned reads
        min_cov (float): Minimum coverage
        process (int): Number of process for parallelization
        verbose (bool): Run in verbose mode
    Returns:
        dict: Dictionary containing Vuong test results
    """
    al_handle = pysam.AlignmentFile(bam, mode=mode, threads=process)
    try:
        cov = avg_coverage(al_handle.count_coverage(contig=ref))
        nb_reads_aligned = al_handle.count(contig=ref)
        reflen = al_handle.get_reference_length(ref)

        if nb_reads_aligned >= min_al or cov >= min_cov:
            al = al_to_damage(reference=ref, al_handle=al_handle)
            ct_data, ga_data, cc_data, c_data, g_data, all_bases = al.get_damage(
                wlen=wlen, show_al=show_al)
            if ct_data:
                model_A = models.geom_mod()
                model_B = models.unif_mod()
                test_res = fit_models(ref=ref,
                                      model_A=model_A,
                                      model_B=model_B,
                                      ct_data=ct_data,
                                      cc_data=cc_data,
                                      ga_data=ga_data,
                                      all_bases=all_bases,
                                      wlen=wlen,
                                      verbose=verbose)
                test_res['reference'] = ref
                test_res['nb_reads_aligned'] = nb_reads_aligned
                test_res['coverage'] = cov

                return(check_model_fit(test_res, wlen, verbose))
        else:
            if verbose:
                print(
                    f"Did not attempt to fit a model to {ref} because of too few reads aligned")
                print(
                    f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov} - reflen: {reflen}\n")
            pass
    except ValueError as e:
        if verbose:
            print(
                f"Model fitting for {ref} failed because of too few reads aligned")
            print(f"Model fitting error: {e}")
            print(
                f"nb_reads_aligned: {nb_reads_aligned} - coverage: {cov} - reflen: {reflen}\n")
        return(False)
コード例 #2
0
def test_geom_pmf(generate_data):
    g = models.geom_mod()
    assert g.pmf(x=generate_data[0], geom_p=0.5, geom_pmin=0.01,
                 geom_pmax=0.5).all() == np.array([
                     0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3, 0.3,
                     0.15471624, 0.15471624, 0.15471624, 0.15471624,
                     0.15471624, 0.08207436, 0.08207436, 0.04575342,
                     0.02759295, 0.01851272, 0.0139726, 0.01170254, 0.01056751,
                     0.01
                 ]).all()
コード例 #3
0
def test_geom_log_pmf(generate_data):
    g = models.geom_mod()
    assert g.log_pmf(x=generate_data[0],
                     geom_p=0.5,
                     geom_pmin=0.01,
                     geom_pmax=0.5,
                     wlen=24).all() == np.array([
                         -1.2039728, -1.2039728, -1.2039728, -1.2039728,
                         -1.2039728, -1.2039728, -1.2039728, -1.2039728,
                         -1.2039728, -1.2039728, -1.86616253, -1.86616253,
                         -1.86616253, -1.86616253, -1.86616253, -2.50012956,
                         -2.50012956, -3.08448863, -3.59019479, -3.98929721,
                         -4.27065681, -4.44794902, -4.54997064, -4.60517019
                     ]).all()
コード例 #4
0
def test_geom_optim(generate_data):
    g = models.geom_mod()
    o, e = optim(function=g.pmf,
                 parameters=g.kwds,
                 xdata=generate_data[1],
                 ydata=generate_data[2],
                 bounds=g.bounds,
                 loss='linear')

    target = {
        'geom_p': 0.6039535547658853,
        'geom_pmin': 0.03637474931290336,
        'geom_pmax': 0.4211432052501663
    }
    for k in o:
        assert round(o[k], 3) == round(target[k], 3)
コード例 #5
0
        0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1
    ]).all()


def test_unif_log_pmf(generate_data):
    u = models.unif_mod()
    assert u.log_pmf(x=generate_data[0], unif_pmin=0.1).all() == np.array([
        -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
        -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
        -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
        -2.30258509, -2.30258509, -2.30258509, -2.30258509, -2.30258509,
        -2.30258509, -2.30258509, -2.30258509, -2.30258509
    ]).all()


def test_unif_optim(generate_data):
    u = models.unif_mod()
    o, e = optim(function=u.pmf,
                 parameters=u.kwds,
                 xdata=generate_data[1],
                 ydata=generate_data[2],
                 bounds=u.bounds,
                 loss='linear')
    assert o == {'unif_pmin': 0.1000000000000005}


if __name__ == "__main__":
    data, xdata, ydata = generate_data()
    g = models.geom_mod()
    o = optim(function=g.pmf, parameters=g.kwds, xdata=xdata, ydata=ydata)
コード例 #6
0
def damageplot(damage_dict, wlen, outdir):
    """Draw pydamage plots

    Args:
        damage_dict(dict): pydamage result dictionary
        wlen(int): window length
        qlen(int): query length
        outdir(str): Pydamage result directory
    """
    x = np.array(range(wlen))
    qlen = np.array(range(damage_dict['qlen']))
    y = np.array([damage_dict[i] for i in x])
    c2t = np.array([damage_dict[f"CtoT-{i}"] for i in qlen])
    g2a = np.array([damage_dict[f"GtoA-{i}"] for i in qlen])
    unif_pmin = damage_dict['unif_pmin']
    unif_pmin_stdev = damage_dict['unif_pmin_stdev']
    geom_p = damage_dict['geom_p']
    geom_pmin = damage_dict['geom_pmin']
    geom_pmin_stdev = damage_dict['geom_pmin_stdev']
    geom_pmax = damage_dict['geom_pmax']
    geom_pmax_stdev = damage_dict['geom_pmax_stdev']
    contig = damage_dict['reference']
    pvalue = damage_dict['pvalue']
    coverage = damage_dict['coverage']
    residuals = damage_dict['residuals']
    rmse = damage_dict['RMSE']
    plotdir = outdir

    if pvalue < 0.001:
        rpval = "<0.001"
    else:
        rpval = f"={round(pvalue,3)}"

    unif = unif_mod()
    unif_pmin_low = max(unif.bounds[0][0], unif_pmin - 2 * unif_pmin_stdev)
    unif_pmin_high = min(unif.bounds[1][0], unif_pmin + 2 * unif_pmin_stdev)
    y_unif = unif.pmf(x, unif_pmin)
    y_unif_low = np.maximum(np.zeros(y_unif.shape[0]),
                            unif.pmf(x, unif_pmin_low))
    y_unif_high = np.minimum(np.ones(y_unif.shape[0]),
                             unif.pmf(x, unif_pmin_high))

    geom = geom_mod()
    geom_pmin_low = max(geom.bounds[0][1], geom_pmin - 2 * geom_pmin_stdev)
    geom_pmin_high = min(geom.bounds[1][1], geom_pmin + 2 * geom_pmin_stdev)
    geom_pmax_low = max(geom.bounds[0][2], geom_pmax - 2 * geom_pmax_stdev)
    geom_pmax_high = min(geom.bounds[1][2], geom_pmax + 2 * geom_pmax_stdev)

    y_geom = geom.pmf(x, geom_p, geom_pmin, geom_pmax)
    y_geom_low = np.maximum(np.zeros(y_geom.shape[0]),
                            geom.pmf(x, geom_p, geom_pmin_low, geom_pmax_low))
    y_geom_high = np.minimum(
        np.ones(y_geom.shape[0]),
        geom.pmf(x, geom_p, geom_pmin_high, geom_pmax_high))

    plt.xticks(rotation=45, fontsize=8)

    fig, ax = plt.subplots()

    ax.plot(qlen, c2t, color='#bd0d45', alpha=0.1, label='C to T transitions')

    ax.plot(qlen, g2a, color='#236cf5', alpha=0.1, label='G to A transitions')

    ax.plot(x,
            y_unif,
            linewidth=2.5,
            color='DarkOliveGreen',
            alpha=0.8,
            label='Null model')

    ax.fill_between(x,
                    y_unif_low,
                    y_unif_high,
                    color='DarkOliveGreen',
                    alpha=0.1,
                    label='Null Model CI (2 sigma)')

    ax.plot(x,
            y_geom,
            linewidth=2.5,
            color='#D7880F',
            alpha=0.8,
            label='Damage model')

    ax.fill_between(x,
                    y_geom_low,
                    y_geom_high,
                    color='#D7880F',
                    alpha=0.1,
                    label='Damage Model CI (2 sigma)')

    ax.set_xlabel("Base from 5'", fontsize=10)
    ax.set_ylabel("Substitution frequency", fontsize=10)
    ax.set_xticks(qlen)
    ax.set_xticklabels(qlen, rotation=45, fontsize=6)
    ax.set_title(f"coverage: {round(coverage,2)} | pvalue{rpval}", fontsize=8)
    ax.legend(fontsize=8)
    # ax.set_title(f"coverage: {round(coverage,2)} | pvalue{rpval}", fontsize=8)

    left, bottom, width, height = [0.65, 0.3, 0.2, 0.2]
    ax2 = fig.add_axes([left, bottom, width, height])
    probplot(residuals, plot=ax2, sparams=(0, np.std(c2t)))
    ax2.set_xlabel("Observed value", fontsize=6)
    ax2.set_ylabel("Theoretical quantile", fontsize=6)
    ax2.set_title(f"QQplot of Damage model residuals, \nRMSE={round(rmse, 3)}",
                  fontsize=8)
    ax2.set_xticklabels([round(i, 3) for i in ax2.get_xticks()], fontsize=6)
    ax2.set_yticklabels([round(i, 3) for i in ax2.get_yticks()], fontsize=6)

    plt.suptitle(contig, fontsize=12, y=0.95)

    plt.savefig(f"{plotdir}/{contig}.png", dpi=200)