Ejemplo n.º 1
0
def create_ols_legend_plot(matplot_dict, motif_freq,
                           sample_scores, control_scores,
                           flank_length, legend_title):
    f = matplot_dict['figure']
    gs = matplot_dict['gridspec']

    phlyop_plots_legend = plt.Subplot(f, gs, autoscale_on=True)
    corr_result = get_pearson_corr(motif_freq,
                                   remove_flanking_scores(sample_scores, flank_length))
    corr_pval = corr_result[1]
    corr_r2 = corr_result[0]
    ttest_result = perform_t_test(remove_flanking_scores(sample_scores, flank_length),
                                  get_flanking_scores(sample_scores, flank_length))
    p_deltaphylop = ttest_result['one_sided_pval']
    delta_phylop = ttest_result['delta']
    #T_deltaphylop = ttest_result['T']
    pearsonr_pval = str('%.1g'%corr_pval)
    if 'e' in pearsonr_pval:
        pearsonr_pval += '}'
        pearsonr_pval = pearsonr_pval.replace('e', '*10^{').replace('-0','-')
    score_pval = str('%.1g'%p_deltaphylop)
    if 'e' in score_pval:
        score_pval += '}'
        score_pval = score_pval.replace('e', '*10^{').replace('-0','-')

    textstr = r'$r^2_{pearson}=%.2f(p=%s)$' '\n' r'$\Delta_{%s}=%.2f(p=%s)$' %(corr_r2, pearsonr_pval, legend_title, delta_phylop, score_pval)
    txtx = 1-LEGEND_XMULTIPLIER*len(textstr)/100.0
    phlyop_plots_legend.set_frame_on(False)
    phlyop_plots_legend.set_xticks([])
    phlyop_plots_legend.set_yticks([])
    phlyop_plots_legend.text(txtx, TXT_YPOS, textstr, fontsize=LEGEND_FONTSIZE)
    f.add_subplot(phlyop_plots_legend)
Ejemplo n.º 2
0
def create_scatter_plot(matplot_dict, motif_freq,
                        sample_scores, control_scores,
                        flank_length, num_occurrences, y_label):

    f = matplot_dict['figure']
    gs = matplot_dict['gridspec']
    scatter_plot = plt.Subplot(f, gs, autoscale_on=True)
    control_scores = remove_flanking_scores(control_scores, flank_length)
    sample_scores = remove_flanking_scores(sample_scores, flank_length)

    fit = np.polyfit(motif_freq, sample_scores, 1)
    fit_fn = np.poly1d(fit)

    control_ols = perform_OLS(control_scores, motif_freq)
    sample_ols = perform_OLS(sample_scores, motif_freq)

    sample_regression_line = sample_ols['regression_line']
    control_regression_line = control_ols['regression_line']

    s1 = scatter_plot.scatter(motif_freq, sample_scores, color='g',
                              s=[POINTSIZE for i in motif_freq],
                              marker='^', label=r'$\mathrm{Sample}$')
    scatter_plot.plot(motif_freq, sample_regression_line, 'g',
                      motif_freq, fit_fn(motif_freq),
                      color='g', linewidth=LINEWIDTH)
    s2 = scatter_plot.scatter(motif_freq, control_scores,
                              color=GREYNESS, s=[POINTSIZE for i in motif_freq],
                              marker='o', label=r'$\mathrm{Control}$')
    scatter_plot.plot(motif_freq, control_regression_line,
                      color=GREYNESS, linewidth=LINEWIDTH)
    leg = scatter_plot.legend(fontsize=14)
    leg.draw_frame(True)
    #leg.get_frame().set_edgecolor('b')
    leg.get_frame().set_linewidth(2.0)

    ticks_and_labels = np.linspace(1.02*min(motif_freq), 1.02*max(motif_freq),
                                   num = 3, endpoint=True)
    scatter_plot.set_xticks(ticks_and_labels)

    ticks_and_labels = ["$%.2f$"%(x/(1.02*num_occurrences)) for x in ticks_and_labels]
    scatter_plot.set_xticklabels(ticks_and_labels)#, rotation=45)

    yloc = plt.MaxNLocator(MAX_YTICKS)
    scatter_plot.yaxis.set_major_locator(yloc)
    scatter_plot.set_xlabel(r'$\mathrm{Most}\ \mathrm{frequent} \ \mathrm{base}\ \mathrm{frequency}$',
                            fontsize=FONTSIZE, fontweight='bold')
    scatter_plot.get_xaxis().tick_bottom()
    scatter_plot.get_yaxis().tick_left()
    scatter_plot.set_ylabel('$\mathrm{%s}\ \mathrm{Score}$'%(y_label), fontsize=FONTSIZE, fontweight='bold')
    scatter_plot.tick_params(axis='y', which='major', pad=TICKPAD)
    scatter_plot.tick_params(axis='x', which='major', pad=TICKPAD)
    scatter_plot.get_yaxis().set_tick_params(direction='out')
    scatter_plot.get_xaxis().set_tick_params(direction='out')
    scatter_plot.tick_params('both', length=TICKLENGTH, width=2, which='major')

    f.add_subplot(scatter_plot)
Ejemplo n.º 3
0
                'motif_enrichment': motif_enrichment,
                'motif_logo': motif_logo_encoded,
                'motif_logorc': motif_logorc_encoded,
                'moca_plot': moca_plot_encoded,
                'moca_plotrc': moca_plotrc_encoded,
                'no_fimo_hit_sample': True
            })
            continue

        gerp_mean_sample = np.loadtxt(
            os.path.join(fimo_sample, 'gerp.mean.txt')).tolist()
        phylop_mean_sample = np.loadtxt(
            os.path.join(fimo_sample, 'phylop.mean.txt')).tolist()

        delta_phylop_ttest = perform_t_test(
            remove_flanking_scores(phylop_mean_sample, flank_length),
            get_flanking_scores(phylop_mean_sample, flank_length))
        p_delta_phylop = delta_phylop_ttest['one_sided_pval']
        delta_phylop = delta_phylop_ttest['delta']

        delta_gerp_ttest = perform_t_test(
            remove_flanking_scores(gerp_mean_sample, flank_length),
            get_flanking_scores(gerp_mean_sample, flank_length))
        p_delta_gerp = delta_gerp_ttest['one_sided_pval']
        delta_gerp = delta_gerp_ttest['delta']

        phylop_sample_ols = perform_OLS(
            remove_flanking_scores(phylop_mean_sample, flank_length),
            motif_freq)
        gerp_sample_ols = perform_OLS(
            remove_flanking_scores(gerp_mean_sample, flank_length), motif_freq)
Ejemplo n.º 4
0
        motif_evalue = get_motif_evalue(record)

        if os.stat(os.path.join(fimo_sample, 'gerp.mean.txt')).st_size == 0:
            db.encode_tf_stats.insert_one({ 'encode_id': d,
                                            'motif_number': i+1,
                                            'center_enrichment': center_enrichment,
                                            'center_enrichment_pval': center_enrichment_pval,
                                            'motif_evalue': motif_evalue,
                                            'motif_enrichment': motif_enrichment,
                                            'no_fimo_hit_sample': True})
            continue

        gerp_mean_sample = np.loadtxt(os.path.join(fimo_sample, 'gerp.mean.txt')).tolist()
        phylop_mean_sample = np.loadtxt(os.path.join(fimo_sample, 'phylop.mean.txt')).tolist()

        delta_phylop_ttest = perform_t_test(remove_flanking_scores(phylop_mean_sample, flank_length),
                                            get_flanking_scores(phylop_mean_sample, flank_length))
        p_delta_phylop = delta_phylop_ttest['one_sided_pval']
        delta_phylop = delta_phylop_ttest['delta']

        delta_gerp_ttest = perform_t_test(remove_flanking_scores(gerp_mean_sample, flank_length),
                                          get_flanking_scores(gerp_mean_sample, flank_length))
        p_delta_gerp = delta_gerp_ttest['one_sided_pval']
        delta_gerp = delta_gerp_ttest['delta']

        phylop_sample_ols = perform_OLS(remove_flanking_scores(phylop_mean_sample, flank_length), motif_freq)
        gerp_sample_ols = perform_OLS(remove_flanking_scores(gerp_mean_sample, flank_length), motif_freq)
        phylop_sample_fit = phylop_sample_ols['regression_fit']
        gerp_sample_fit = gerp_sample_ols['regression_fit']
        corr_phylop_sample = get_pearson_corr(motif_freq, remove_flanking_scores(phylop_mean_sample, flank_length))
        corr_gerp_sample = get_pearson_corr(motif_freq, remove_flanking_scores(gerp_mean_sample, flank_length))