예제 #1
0
def _add_scores(dataset,dataset_fits):
    for (g,r),fit in dataset_fits.iteritems():
        if g is None:
            continue  # it's a region fit
            
        series = dataset.get_one_series(g,r)
        try:
            if fit.fit_predictions is None:
                fit.fit_score = None
            else:
                fit.fit_score = cfg.score(series.single_expression, fit.fit_predictions)
        except:
            fit.fit_score = None
        try:
            fit.LOO_score = loo_score(series.single_expression, fit.LOO_predictions)
        except:
            fit.LOO_score = None
            
        # add score for correlation LOO fits
        correlation_levels = getattr(fit, 'with_correlations', None)
        if correlation_levels is not None:
            for level in correlation_levels:
                y_real = series.single_expression
                y_pred = level.LOO_predictions[series.original_inds] # match the predictions to the indices of the single series after NaN are removed from it
                level.LOO_score = loo_score(y_real, y_pred)
            
    return dataset_fits
예제 #2
0
def analyze_one_region(data, fitter, fits, region):
    print 'Analyzing region {}...'.format(region)
    series = data.get_several_series(data.gene_names, region)
    ds_fits = fits[data.get_dataset_for_region(region)]
    y = series.expression

    R2_tuples = {}
    for i, g in enumerate(series.gene_names):
        fit = ds_fits[(g, region)]
        y_real = y[:, i]
        y_basic = fit.LOO_predictions
        basic_R2 = loo_score(y_real, y_basic)
        scores = [basic_R2]
        for level in fit.with_correlations:
            y_multi_gene = level.LOO_predictions[series.original_inds]
            R2 = loo_score(y_real, y_multi_gene)
            scores.append(R2)
        if (np.array(scores) < -1).any():
            continue
        R2_tuples[(g, region)] = tuple(scores)

    region_fits = ds_fits[(None, region)]
    correlations = region_fits[
        0].correlations  # get correlations after one optimization iteration
    return R2_tuples, correlations
예제 #3
0
def _add_scores(dataset, dataset_fits):
    for (g, r), fit in dataset_fits.iteritems():
        if g is None:
            continue  # it's a region fit

        series = dataset.get_one_series(g, r)
        try:
            if fit.fit_predictions is None:
                fit.fit_score = None
            else:
                fit.fit_score = cfg.score(series.single_expression,
                                          fit.fit_predictions)
        except:
            fit.fit_score = None
        try:
            fit.LOO_score = loo_score(series.single_expression,
                                      fit.LOO_predictions)
        except:
            fit.LOO_score = None

        # add score for correlation LOO fits
        correlation_levels = getattr(fit, 'with_correlations', None)
        if correlation_levels is not None:
            for level in correlation_levels:
                y_real = series.single_expression
                y_pred = level.LOO_predictions[
                    series.
                    original_inds]  # match the predictions to the indices of the single series after NaN are removed from it
                level.LOO_score = loo_score(y_real, y_pred)

    return dataset_fits
예제 #4
0
파일: plots.py 프로젝트: ronniemaor/timefit
def plot_one_series(series, shape=None, theta=None, LOO_predictions=None, change_distribution=None, minimal_annotations=False, ax=None, show_legend=True):
    x = series.ages
    y = series.single_expression
    b_subplot = ax is not None
    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111)
    fontsize = cfg.minimal_annotation_fontsize if minimal_annotations else cfg.fontsize
    
    # plot the data points
    markersize = 8 if not minimal_annotations else 4
    ax.plot(series.ages, y, 'ks', markersize=markersize)
    if not b_subplot:
        ax.set_ylabel('expression level', fontsize=fontsize)
        ax.set_xlabel('age', fontsize=fontsize)
    ttl = '{}@{}'.format(series.gene_name, series.region_name)
    add_age_ticks(ax, series.age_scaler, fontsize)

    # plot change distribution if provided
    if change_distribution:
        ymin, ymax = ax.get_ylim()
        centers = change_distribution.centers
        width = centers[1] - centers[0]
        weights = change_distribution.weights
        weights *= 0.9 * (ymax - ymin) / weights.max()
        ax.bar(centers, weights, width=width, bottom=ymin, color='g', alpha=0.5)

    if shape is not None and theta is not None:
        # add fit parameters to title
        ttl = '{}, {} fit'.format(ttl, shape)
        more_ttl = shape.format_params(theta, series.age_scaler, latex=True)
        if more_ttl:
            ttl = '\n'.join([ttl, more_ttl])
        
        # draw the overall fit
        score = cfg.score(y,shape.f(theta,x))
        x_smooth,y_smooth = shape.high_res_preds(theta,x)        
        label = 'fit ({}={:.3g})'.format(cfg.score_type, score)
        ax.plot(x_smooth, y_smooth, 'b-', linewidth=3, label=label)

        # draw LOO predictions and residuals
        if LOO_predictions is not None:
            score = loo_score(y,LOO_predictions)
            for i,(xi,yi,y_loo) in enumerate(zip(x,y,LOO_predictions)):
                if y_loo is None or np.isnan(y_loo):
                    continue
                label = 'LOO ({}={:.3g})'.format(cfg.score_type, score) if i==0 else None
                ax.plot([xi, xi], [yi, y_loo], '-', color='0.5', label=label)
                ax.plot(xi, y_loo, 'x', color='0.5', markeredgewidth=2)
        if show_legend and not minimal_annotations:
            ax.legend(fontsize=fontsize, frameon=False)
        
    if not minimal_annotations:
        ax.tick_params(axis='y', labelsize=fontsize)
        if not b_subplot:
            ax.set_title(ttl, fontsize=fontsize)
    return ax.figure
예제 #5
0
def plot_one_exon(series,
                  shape=None,
                  theta=None,
                  LOO_predictions=None,
                  ax=None,
                  y_range=None):
    x = series.ages
    y = series.single_expression

    fontsize = cfg.minimal_annotation_fontsize
    markersize = 8
    y_scaler = scalers.build_scaler(cfg.plots_scaling, None)
    scaled = y_scaler is not None

    y_scaled = y_scaler.scale(y) if scaled else y
    if scaled and y_range is not None:
        y_range = y_scaler.scale(y_range)

    if y_range is not None:
        plt.ylim(y_range)
    ax.plot(series.ages, y_scaled, 'ks', markersize=markersize)
    ax.set_xlabel('age', fontsize=fontsize)
    add_age_ticks(ax, series.age_scaler, fontsize)
    exon = series.gene_name[series.gene_name.index(cfg.exon_separator) + 1:]
    ax.set_title(exon.replace(cfg.exon_separator, '-'), fontsize=14)

    if shape is not None and theta is not None:

        score = cfg.score(y, shape.f(theta, x))
        x_smooth, y_smooth = shape.high_res_preds(theta, x)
        if scaled:
            y_smooth = y_scaler.scale(y_smooth)
        label = 'fit ({}={:.3g})'.format(cfg.score_type, score)
        ax.plot(x_smooth, y_smooth, 'b-', linewidth=3, label=label)

        # draw LOO predictions and residuals
        if LOO_predictions is not None:
            score = loo_score(y, LOO_predictions)
            if scaled:
                LOO_predictions = y_scaler.scale(LOO_predictions)
            for i, (xi, yi,
                    y_loo) in enumerate(zip(x, y_scaled, LOO_predictions)):
                if y_loo is None or np.isnan(y_loo):
                    continue
                label = 'LOO ({}={:.3g})'.format(
                    cfg.score_type,
                    score) if i == 0 and score is not None else None
                ax.plot([xi, xi], [yi, y_loo], '-', color='0.5', label=label)
                ax.plot(xi, y_loo, 'x', color='0.5', markeredgewidth=2)

        ax.legend(fontsize=fontsize, frameon=False)
    return ax.figure
예제 #6
0
def analyze_one_region(data, fitter, fits, region):
    print 'Analyzing region {}...'.format(region)
    series = data.get_several_series(data.gene_names,region)
    ds_fits = fits[data.get_dataset_for_region(region)]
    y = series.expression
    
    R2_tuples = {}
    for i,g in enumerate(series.gene_names):
        fit = ds_fits[(g,region)]
        y_real = y[:,i]
        y_basic = fit.LOO_predictions
        basic_R2 = loo_score(y_real,y_basic)
        scores = [basic_R2]
        for level in fit.with_correlations:
            y_multi_gene = level.LOO_predictions[series.original_inds]
            R2 = loo_score(y_real,y_multi_gene)
            scores.append(R2)
        if (np.array(scores) < -1).any():
            continue
        R2_tuples[(g,region)] = tuple(scores)
        
    region_fits = ds_fits[(None,region)]
    correlations = region_fits[0].correlations # get correlations after one optimization iteration
    return R2_tuples, correlations
예제 #7
0
    for i, g in enumerate(series.gene_names):
        print 'Fitting series {}...'.format(i + 1)
        theta, sigma, LOO_predictions, _ = fitter.fit(x, y[:, i], loo=True)
        fit = Bunch(
            theta=theta,
            LOO_predictions=LOO_predictions,
        )
        fits.append(fit)

    print 'Fitting with correlations...'
    levels = fitter.fit_multi(x, y, loo=True, n_iterations=2)
    res = levels[-1]
    print 'Theta:'
    for ti in res.theta:
        print '  {}'.format(ti)
    print 'Sigma:'
    print res.sigma
    plot_series(series, fitter.shape, res.theta, res.LOO_predictions)

    R2_pairs = []
    for i, g in enumerate(series.gene_names):
        y_real = y[:, i]
        y_basic = fits[i].LOO_predictions
        y_multi_gene = res.LOO_predictions[:,
                                           i]  # no NANs in the generated data, so no need to handle the original_inds mess
        basic_R2 = loo_score(y_real, y_basic)
        multi_gene_R2 = loo_score(y_real, y_multi_gene)
        R2_pairs.append((basic_R2, multi_gene_R2))
    plot_comparison_scatter(R2_pairs, series.region_name)
    print 'R2_pairs = {}'.format(R2_pairs)
예제 #8
0
def plot_one_series(series,
                    shape=None,
                    theta=None,
                    LOO_predictions=None,
                    change_distribution=None,
                    minimal_annotations=False,
                    ax=None,
                    show_legend=True):
    x = series.ages
    y = series.single_expression
    b_subplot = ax is not None
    if ax is None:
        fig = plt.figure()
        ax = fig.add_subplot(111)
    fontsize = cfg.minimal_annotation_fontsize if minimal_annotations else cfg.fontsize

    # plot the data points
    markersize = 8 if not minimal_annotations else 4
    ax.plot(series.ages, y, 'ks', markersize=markersize)
    if not b_subplot:
        ax.set_ylabel('expression level', fontsize=fontsize)
        ax.set_xlabel('age', fontsize=fontsize)
    ttl = '{}@{}'.format(series.gene_name, series.region_name)
    add_age_ticks(ax, series.age_scaler, fontsize)

    # plot change distribution if provided
    if change_distribution:
        ymin, ymax = ax.get_ylim()
        centers = change_distribution.centers
        width = centers[1] - centers[0]
        weights = change_distribution.weights
        weights *= 0.9 * (ymax - ymin) / weights.max()
        ax.bar(centers,
               weights,
               width=width,
               bottom=ymin,
               color='g',
               alpha=0.5)

    if shape is not None and theta is not None:
        # add fit parameters to title
        ttl = '{}, {} fit'.format(ttl, shape)
        more_ttl = shape.format_params(theta, series.age_scaler, latex=True)
        if more_ttl:
            ttl = '\n'.join([ttl, more_ttl])

        # draw the overall fit
        score = cfg.score(y, shape.f(theta, x))
        x_smooth, y_smooth = shape.high_res_preds(theta, x)
        label = 'fit ({}={:.3g})'.format(cfg.score_type, score)
        ax.plot(x_smooth, y_smooth, 'b-', linewidth=3, label=label)

        # draw LOO predictions and residuals
        if LOO_predictions is not None:
            score = loo_score(y, LOO_predictions)
            for i, (xi, yi, y_loo) in enumerate(zip(x, y, LOO_predictions)):
                if y_loo is None or np.isnan(y_loo):
                    continue
                label = 'LOO ({}={:.3g})'.format(cfg.score_type,
                                                 score) if i == 0 else None
                ax.plot([xi, xi], [yi, y_loo], '-', color='0.5', label=label)
                ax.plot(xi, y_loo, 'x', color='0.5', markeredgewidth=2)
        if show_legend and not minimal_annotations:
            ax.legend(fontsize=fontsize, frameon=False)

    if not minimal_annotations:
        ax.tick_params(axis='y', labelsize=fontsize)
        if not b_subplot:
            ax.set_title(ttl, fontsize=fontsize)
    return ax.figure
예제 #9
0
    fits = []
    for i,g in enumerate(series.gene_names):
        print 'Fitting series {}...'.format(i+1)
        theta, sigma, LOO_predictions,_ = fitter.fit(x,y[:,i],loo=True)
        fit = Bunch(
            theta = theta,
            LOO_predictions = LOO_predictions,
        )
        fits.append(fit)
            
    print 'Fitting with correlations...'
    levels = fitter.fit_multi(x, y, loo=True, n_iterations=2)
    res = levels[-1]
    print 'Theta:'
    for ti in res.theta:
        print '  {}'.format(ti)
    print 'Sigma:'
    print res.sigma
    plot_series(series, fitter.shape, res.theta, res.LOO_predictions)
    
    R2_pairs = []
    for i,g in enumerate(series.gene_names):
        y_real = y[:,i]
        y_basic = fits[i].LOO_predictions        
        y_multi_gene = res.LOO_predictions[:,i]  # no NANs in the generated data, so no need to handle the original_inds mess
        basic_R2 = loo_score(y_real,y_basic)
        multi_gene_R2 = loo_score(y_real,y_multi_gene)
        R2_pairs.append( (basic_R2, multi_gene_R2) )
    plot_comparison_scatter(R2_pairs,series.region_name)
    print 'R2_pairs = {}'.format(R2_pairs)
예제 #10
0
def plot_one_series(series,
                    shape,
                    theta,
                    yrange=None,
                    b_annotate=False,
                    train_mask=None,
                    test_preds=None,
                    show_title=False):
    x = series.ages
    y = series.single_expression
    xmin, xmax = min(x), max(x)
    xmin = max(xmin, -2)

    if train_mask is None:
        train_mask = ~np.isnan(x)

    fig = plt.figure()
    ax = fig.add_axes([0.08, 0.15, 0.85, 0.8])

    # plot the data points
    if not b_annotate:
        ax.plot(x[train_mask], y[train_mask], 'ks', markersize=8)
    if yrange is None:
        ymin, ymax = ax.get_ylim()
    else:
        ymin, ymax = yrange

    if not b_annotate:
        # mark birth time with a vertical line
        birth_age = series.age_scaler.scale(0)
        ax.plot([birth_age, birth_age], [ymin, ymax], '--', color='0.85')

    if theta is not None:
        # draw the overall fit
        x_smooth, y_smooth = shape.high_res_preds(theta, x)
        ax.plot(x_smooth, y_smooth, 'b-', linewidth=3)

        # plot left out points and prediction error
        for xi, yi in zip(x[~train_mask], y[~train_mask]):
            y_loo = shape.f(theta, xi)
            ax.plot(xi, yi, 'rs', markersize=8)
            ax.plot([xi, xi], [yi, y_loo], '-', color='0.5')
            ax.plot(xi, y_loo, 'rx', markeredgewidth=2)

    if test_preds is not None:
        for xi, yi, y_loo in zip(x, y, test_preds):
            ax.plot([xi, xi], [yi, y_loo], '-', color='0.5')
            ax.plot(xi, y_loo, 'x', color='0.5', markeredgewidth=2)
        score = loo_score(y, test_preds)
        txt = "$R^2 = {:.2g}$".format(score)
        ax.text(0.02,
                0.8,
                txt,
                fontsize=equation_fontsize,
                transform=ax.transAxes)

    if b_annotate:
        # annotate sigmoid parameters
        arrow_color = 'green'
        a, h, mu, w = theta

        # onset
        y_onset = shape.f(theta, mu)
        ax.plot([mu, mu], [ymin, y_onset], 'g--', linewidth=2)
        ax.text(mu + 0.05,
                y_onset - 0.5,
                'onset',
                fontsize=fontsize,
                horizontalalignment='left')

        # baseline
        ax.plot([xmin, xmax], [a, a], 'g--', linewidth=2)
        ax.text(mu + 1.5,
                a + 0.05,
                'baseline',
                fontsize=fontsize,
                verticalalignment='bottom')

        # slope
        dx = 0.5
        dy = dx * h / (4 * w)  # that's df/dx at x=mu
        ax.plot([mu - dx, mu + dx], [y_onset - dy + 0.05, y_onset + dy + 0.05],
                'g--',
                linewidth=2)
        ax.text(mu - 0.5,
                y_onset + 1,
                'slope',
                fontsize=fontsize,
                horizontalalignment='right')
        ax.arrow(mu - 0.45,
                 y_onset + 0.95,
                 0.65,
                 -0.65,
                 length_includes_head=True,
                 width=0.005,
                 facecolor=arrow_color)

        #height
        xpos = mu + 4 * w
        ax.text(xpos + 0.05,
                y_onset,
                'height',
                fontsize=fontsize,
                verticalalignment='center')
        ax.arrow(xpos,
                 y_onset,
                 0,
                 h * 0.45,
                 length_includes_head=True,
                 width=0.005,
                 facecolor=arrow_color)
        ax.arrow(xpos,
                 y_onset,
                 0,
                 -h * 0.45,
                 length_includes_head=True,
                 width=0.005,
                 facecolor=arrow_color)

    ax.set_xlim(xmin, xmax)
    ax.set_ylim(ymin, ymax)

    # title
    if show_title:
        ttl = '{}@{}, {} fit'.format(series.gene_name, series.region_name,
                                     shape)
        ax.set_title(ttl, fontsize=fontsize)

    # set the development stages as x labels
    ax.set_xlabel('age', fontsize=fontsize)
    stages = [stage.scaled(series.age_scaler) for stage in dev_stages]
    ax.set_xticks([stage.central_age for stage in stages])
    ax.set_xticklabels([stage.short_name for stage in stages],
                       fontsize=xtick_fontsize,
                       fontstretch='condensed',
                       rotation=90)

    # set y ticks (first and last only)
    ax.set_ylabel('expression level', fontsize=fontsize)
    ticks = ax.get_yticks()
    ticks = np.array([ticks[0], ticks[-1]])
    ax.set_yticks(ticks)
    ax.set_yticklabels(['{:g}'.format(t) for t in ticks], fontsize=fontsize)

    return fig
예제 #11
0
def plot_one_series(series, shape, theta, yrange=None, b_annotate=False, train_mask=None, test_preds=None, show_title=False):
    x = series.ages
    y = series.single_expression    
    xmin, xmax = min(x), max(x)
    xmin = max(xmin,-2)

    if train_mask is None:
        train_mask = ~np.isnan(x)
    
    fig = plt.figure()
    ax = fig.add_axes([0.08,0.15,0.85,0.8])

    # plot the data points
    if not b_annotate:
        ax.plot(x[train_mask],y[train_mask], 'ks', markersize=8)
    if yrange is None:
        ymin, ymax = ax.get_ylim()
    else:
        ymin, ymax = yrange

    if not b_annotate:
        # mark birth time with a vertical line
        birth_age = series.age_scaler.scale(0)
        ax.plot([birth_age, birth_age], [ymin, ymax], '--', color='0.85')

    if theta is not None:
        # draw the overall fit
        x_smooth,y_smooth = shape.high_res_preds(theta,x)
        ax.plot(x_smooth, y_smooth, 'b-', linewidth=3)
    
        # plot left out points and prediction error
        for xi,yi in zip(x[~train_mask],y[~train_mask]):
            y_loo = shape.f(theta,xi)
            ax.plot(xi,yi, 'rs', markersize=8)
            ax.plot([xi, xi], [yi, y_loo], '-', color='0.5')
            ax.plot(xi, y_loo, 'rx', markeredgewidth=2)
    
    if test_preds is not None:
        for xi,yi,y_loo in zip(x,y,test_preds):
            ax.plot([xi, xi], [yi, y_loo], '-', color='0.5')
            ax.plot(xi, y_loo, 'x', color='0.5', markeredgewidth=2)
        score = loo_score(y,test_preds)
        txt = "$R^2 = {:.2g}$".format(score)
        ax.text(0.02,0.8,txt,fontsize=equation_fontsize,transform=ax.transAxes)

    if b_annotate:        
        # annotate sigmoid parameters
        arrow_color = 'green'
        a,h,mu,w = theta
        
        # onset
        y_onset = shape.f(theta, mu)
        ax.plot([mu,mu],[ymin,y_onset],'g--',linewidth=2)
        ax.text(mu+0.05,y_onset-0.5,'onset', fontsize=fontsize, horizontalalignment='left')
    
        # baseline
        ax.plot([xmin,xmax],[a, a],'g--',linewidth=2)
        ax.text(mu+1.5,a+0.05,'baseline', fontsize=fontsize, verticalalignment='bottom')
        
        # slope
        dx = 0.5
        dy = dx*h/(4*w) # that's df/dx at x=mu
        ax.plot([mu-dx,mu+dx],[y_onset-dy+0.05, y_onset+dy+0.05],'g--',linewidth=2)
        ax.text(mu-0.5,y_onset+1,'slope', fontsize=fontsize, horizontalalignment='right')
        ax.arrow(mu-0.45,y_onset+0.95,0.65,-0.65, length_includes_head=True, width=0.005, facecolor=arrow_color)
        
        #height
        xpos = mu + 4*w
        ax.text(xpos+0.05,y_onset,'height', fontsize=fontsize, verticalalignment='center')
        ax.arrow(xpos,y_onset,0,h*0.45, length_includes_head=True, width=0.005, facecolor=arrow_color)
        ax.arrow(xpos,y_onset,0,-h*0.45, length_includes_head=True, width=0.005, facecolor=arrow_color)

    ax.set_xlim(xmin,xmax)
    ax.set_ylim(ymin,ymax)
    
    # title
    if show_title:
        ttl = '{}@{}, {} fit'.format(series.gene_name, series.region_name, shape)
        ax.set_title(ttl, fontsize=fontsize)

    # set the development stages as x labels
    ax.set_xlabel('age', fontsize=fontsize)
    stages = [stage.scaled(series.age_scaler) for stage in dev_stages]
    ax.set_xticks([stage.central_age for stage in stages])
    ax.set_xticklabels([stage.short_name for stage in stages], fontsize=xtick_fontsize, fontstretch='condensed', rotation=90)    

    # set y ticks (first and last only)
    ax.set_ylabel('expression level', fontsize=fontsize)
    ticks = ax.get_yticks()
    ticks = np.array([ticks[0], ticks[-1]])
    ax.set_yticks(ticks)
    ax.set_yticklabels(['{:g}'.format(t) for t in ticks], fontsize=fontsize)
            
    return fig