예제 #1
0
import setup
import config as cfg
from load_data import GeneData
from shapes.sigmoid import Sigmoid
from fitter import Fitter
from all_fits import get_all_fits, iterate_fits
from scalers import LogScaler

cfg.verbosity = 1
age_scaler = LogScaler()
pathway = 'serotonin'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
fitter = Fitter(Sigmoid(priors=None))
fits = get_all_fits(data, fitter)

extreme = [(g, r) for dsname, g, r, fit in iterate_fits(
    fits, R2_threshold=0.5, return_keys=True) if abs(fit.theta[0]) > 100]
예제 #2
0
    ax.set_ylabel('expression level', fontsize=fontsize)
    ticks = ax.get_yticks()
    ticks = np.array([ticks[0], ticks[-1]])
    ax.set_yticks(ticks)
    ax.set_yticklabels(['{:g}'.format(t) for t in ticks], fontsize=fontsize)

    return fig


cfg.verbosity = 1
age_scaler = LogScaler()

data = GeneData.load('both').scale_ages(age_scaler)

shapes = [
    Sigmoid('sigmoid_wide'),
    Poly(1, 'poly1'),
    Poly(3, 'poly3'),
    Spline()
]
GRs = [
    ('ADRB1', 'A1C', (5, 8)),
    ('GLRA2', 'STC', (5, 12)),
    ('TUBA1A', 'V1C', (10, 14)),
]

for g, r, yrange in GRs:
    print 'Doing {}@{}...'.format(g, r)
    thetas = []
    for shape in shapes:
        series = data.get_one_series(g, r)
예제 #3
0
    ax.bar(bin_edges[:-1], probs, width=width, color='b')

    ax.set_xlabel('z score', fontsize=fontsize)
    ax.set_ylabel('probability', fontsize=fontsize)
    ax.tick_params(axis='both', labelsize=fontsize)
    return fig


cfg.verbosity = 1
age_scaler = LogScaler()
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages(
    age_scaler).shuffle()

shape = Sigmoid('sigmoid_wide')
fitter = Fitter(shape, sigma_prior='normal')
fits = get_all_fits(data, fitter, allow_new_computation=False)
fits_shuffled = get_all_fits(data_shuffled,
                             fitter,
                             allow_new_computation=False)
R2_pairs = [(fit.LOO_score, fit2.LOO_score)
            for fit, fit2 in iterate_fits(fits, fits_shuffled)]
R2 = np.array([r for r, r_shuffled in R2_pairs])
R2_shuffled = np.array([r_shuffled for r, r_shuffled in R2_pairs])

name = '{}-{}'.format(data.pathway, shape.cache_name())
fig = plot_score_distribution(R2, R2_shuffled)
save_figure(fig,
            'RP/R2-distribution-{}.png'.format(name),
            under_results=True,
예제 #4
0
    # mark birth time with a vertical line
    ymin, ymax = ax.get_ylim()
    birth_age = age_scaler.scale(0)
    ax.plot([birth_age, birth_age], [ymin, ymax], '--', color='0.85')

    return fig


cfg.verbosity = 1
age_scaler = LogScaler()

all_data = GeneData.load('both').scale_ages(age_scaler)
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
shape = Sigmoid(priors='sigmoid_wide')
fitter = Fitter(shape, sigma_prior='normal')
fits = get_all_fits(data, fitter)

R2_threshold = 0.5
for b_unique in [False, True]:
    dct_pathways = load_17_pathways_breakdown(b_unique)
    dct_pathways['17 pathways'] = None
    for name, genes in dct_pathways.iteritems():
        fig = plot_onset_times(all_data, data, fitter, fits, {name: genes},
                               R2_threshold, b_unique)
        str_dir = 'unique' if b_unique else 'overlapping'
        str_unique = ' (unique)' if b_unique else ''
        filename = 'RP/{}/change-distributions-{}{}.png'.format(
            str_dir, name, str_unique)
        save_figure(fig, filename, under_results=True)
예제 #5
0
        zoom_max = max(max(zoom_data), zoom_max)
        ax.plot(xpos, h, linewidth=3, label=shape.cache_name())
    ax.set_xlim(*zoom)
    ax.set_ylim(0, zoom_max * 1.1)
    ax.legend(loc='best', fontsize=fontsize, frameon=False)
    ax.set_xlabel('test $R^2$ score', fontsize=fontsize)
    ax.set_ylabel("probability density", fontsize=fontsize)
    ax.tick_params(axis='both', labelsize=fontsize)
    return fig


cfg.verbosity = 1
age_scaler = LogScaler()
data = GeneData.load('both').scale_ages(age_scaler)

sigmoid = Sigmoid(priors='sigmoid_wide')
sigslope = Sigslope(priors='sigslope80')
shapes = [sigmoid, sigslope]
fitters = [Fitter(shape, sigma_prior='normal') for shape in shapes]

#####################################################
# Example fits
#####################################################
GRs = [
    ('ABHD4', 'STC', (5, 8)),
]
for g, r, yrange in GRs:
    for fitter in fitters:
        print 'Doing {}@{}...'.format(g, r)
        series = data.get_one_series(g, r)
        theta, _, _, _ = fitter.fit(series.ages, series.single_expression)
예제 #6
0
            rv = stats.norm(loc, sigma)
            x = np.linspace(xmin, xmax, 100)
            prob = rv.pdf(x)
            plt.plot(x, prob, 'k', linewidth=3)
            ttl_fit = r'Normal fit: $loc$={:.3f}, $\sigma$={:.3f}'.format(
                loc, sigma)
            ttl = '\n'.join([ttl, ttl_fit])
        plt.title(ttl)
    return vals


cfg.verbosity = 1
age_scaler = LogScaler()
pathway = 'serotonin'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
shape = Sigmoid()
fitter = Fitter(shape)
fits = get_all_fits(data, fitter, allow_new_computation=False)


def translate(g, r, fit):
    series = data.get_one_series(g, r)
    theta, sigma = fitter.translate_parameters_to_priors_scale(
        series.ages, series.single_expression, fit.theta, fit.sigma)
    a, h, mu, w = theta
    if h < 0:
        theta = (a + h, -h, mu, -w
                 )  # this is an equivalent sigmoid, with h now positive
    return Bunch(
        theta=theta,
        sigma=sigma,
예제 #7
0
def get_fits():
    data = GeneData.load('both').restrict_pathway('17pathways').scale_ages(age_scaler)
    shape = Sigmoid(priors='sigmoid_wide')
    fitter = Fitter(shape, sigma_prior='normal')
    fits = get_all_fits(data, fitter)
    return fits
예제 #8
0
#1) GPR50@DFC, delta-R2=0.573
#2) UCHL1@V1C, delta-R2=0.571
#3) GRIN2B@HIP, delta-R2=0.548
#4) SSTR1@A1C, delta-R2=0.533
#5) GRM7@DFC, delta-R2=0.531
#6) GRM7@STC, delta-R2=0.525
#7) TOMM40L@VFC, delta-R2=0.514
#8) CREB5@DFC, delta-R2=0.496
#9) GRM7@S1C, delta-R2=0.474

GRs = [
    ('HTR5A','S1C', (6, 10)),  # example of extreme parameter values without priors
    ('GABRA2','HIP', (4, 12)),  # delta-R2=0.669 (not actual trend in data)
]
fitters = [
    Fitter(Sigmoid()), 
    Fitter(Sigmoid('sigmoid_wide'),sigma_prior='normal')
]
labels = ['no priors', 'semi-informative']

for g,r,yrange in GRs:
    print 'Doing {}@{}...'.format(g,r)
    series = data.get_one_series(g,r)
    thetas = []
    for fitter in fitters:
        theta,_,_,_ = fitter.fit(series.ages, series.single_expression)
        thetas.append(theta)
    fig = plot_one_series(series,fitters,thetas,labels,yrange)
    save_figure(fig,'RP/fit-examples-{}-{}.png'.format(g,r), under_results=True)            
    for theta,label in zip(thetas,labels):
        print '{}: {}'.format(label,theta)
예제 #9
0
    probs = counts / float(sum(counts))
    width = bin_edges[1] - bin_edges[0]
    ax.bar(bin_edges[:-1], probs, width=width, color='b')

    ax.set_xlabel('z score', fontsize=fontsize)
    ax.set_ylabel('probability', fontsize=fontsize)   
    ax.tick_params(axis='both', labelsize=fontsize)
    return fig

cfg.verbosity = 1
age_scaler = LogScaler()
pathway = '17full'
data = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler)
data_shuffled = GeneData.load('both').restrict_pathway(pathway).scale_ages(age_scaler).shuffle()

shape = Sigmoid('sigmoid_wide')
fitter = Fitter(shape,sigma_prior='normal')
fits = get_all_fits(data,fitter,allow_new_computation=False)
fits_shuffled = get_all_fits(data_shuffled,fitter,allow_new_computation=False)
R2_pairs = [(fit.LOO_score,fit2.LOO_score) for fit,fit2 in iterate_fits(fits,fits_shuffled)]
R2 = np.array([r for r,r_shuffled in R2_pairs])
R2_shuffled = np.array([r_shuffled for r,r_shuffled in R2_pairs])

name = '{}-{}'.format(data.pathway,shape.cache_name())
fig = plot_score_distribution(R2,R2_shuffled)
save_figure(fig,'RP/R2-distribution-{}.png'.format(name), under_results=True, b_close=True)

mu_shuffled = np.mean(R2_shuffled)
std_shuffled = np.std(R2_shuffled)
z_scores = (R2-mu_shuffled)/std_shuffled
fig = plot_z_scores(z_scores)