Exemple #1
0
def plot_data(xs, ys, color, ax):
    ax.scatter(xs, ys, alpha=0.01, c=color)
    if color == "gold": color = "orange"
    if color == "green": color = "limegreen"

    # line of best fit
    grid = np.r_[0:24:512j]
    k0 = smooth.NonParamRegression(
        xs, ys, method=npr_methods.LocalPolynomialKernel(q=6))
    k0.fit()
    ax.plot(grid, k0(grid), color, linewidth=2)
    ax.set_xticks([0, 4, 8, 12, 16, 20, 24])
    ax.set_xlim(0, 24)
def regression_gas_kernel(b, s, df, theme):
    x = np.array(df['temperature'])
    y = np.array(df[theme])
    t_min = df['temperature'].min()
    t_max = df['temperature'].max()
    xd = np.r_[t_min:t_max:1]
    k1 = smooth.NonParamRegression(x, y, method=npr_methods.LocalPolynomialKernel(q=1))
    plt.plot(x, y, "o")
    plt.plot(xd, k1(xd))
    plt.xlabel('Temperature', fontsize=12)
    plt.ylabel(ylabel_dict[theme], fontsize=12)
    plt.title('Kernel Regression Fit {0} - Temperature Plot\n Building {1}, Station {2}'.format(title_dict[theme], b, s), fontsize=15)
    P.savefig(os.getcwd() + '/plot_FY_weather/eui_gas_kernel/{0}_{1}.png'.format(b, s), dpi=150)
    plt.close()
    return k1
def make_plot(data):
    fig, axs = plt.subplots(2, 3, figsize=(8, 6), sharey=True)
    for i, taxon in enumerate(data):
        if i <= 2:
            ax = axs[0][i]
        else:
            ax = axs[1][i - 3]
        xs = data[taxon][0]
        ys = data[taxon][1]
        positions = data[taxon][2]
        colors = []
        for position in positions:
            if position == "T": colors.append("red")
            if position == "M": colors.append("magenta")
            if position == "B": colors.append("cyan")
        ax.scatter(xs, ys, alpha=0.6, c=colors, edgecolor='k')

        # line of best fit
        grid = np.r_[0.5:5:512j]
        k0 = smooth.NonParamRegression(xs,
                                       ys,
                                       method=npr_methods.SpatialAverage())
        k0.fit()
        ax.plot(grid, k0(grid), "k", linewidth=2)

        test = stats.pearsonr(data[taxon][0], data[taxon][1])
        print taxon, test

        ax.set_title(taxon.split(";")[-1], fontsize=12)
        ax.grid(ls="--")
        ax.set_xticks([0, 1, 2, 3, 4, 5])
    #	if i!=0:
    #		ax.set_yticklabels([])

    fig.add_subplot(111, frameon=False)
    plt.tick_params(labelcolor='none',
                    top=False,
                    bottom=False,
                    left=False,
                    right=False)
    plt.xlabel("Distance to surface (cm)")
    plt.ylabel("Standardized abundance of taxon")

    plt.tight_layout()
    plt.savefig("figure.png", dpi=300)
Exemple #4
0
def aseErr_expression(genes_fpkm,called_ase_file,true_ase_file):
    
    called_ase_dict = loadASE_list(called_ase_file)
    true_ase_dict = loadASE_list(true_ase_file)

    gene_expression = loadFPKM(genes_fpkm)

    ##to be filled in with error values later (not paired with names)
    ase_errors = []
    ##filled with fpkms of expressed genes, in step with ase_errors
    expression_values = []

    ##called_ase genes are a subset of true_ase genes
    for ensid in called_ase_dict.keys():
	true_ase = true_ase_dict[ensid]
	called_ase = called_ase_dict[ensid]

	fpkm = gene_expression[ensid]

	true_ase = math.log( (true_ase/(1-true_ase)),2)
	called_ase = math.log( (called_ase/(1-called_ase)),2)

	expression_values.append(math.log(fpkm,2))

	ase_err = math.fabs(true_ase - called_ase)
	ase_errors.append(ase_err)

    plt.scatter(expression_values,ase_errors,alpha=.3,marker='o')
    r,pval =  stats.spearmanr(expression_values,ase_errors)
    plt.xlabel('log2(FPKM)\nr=%f'%r)
    plt.ylabel('log2 ASE_errors')
    plt.ylim([0,2.5])
    plt.xlim([-1,12])


    k0 = smooth.NonParamRegression(expression_values, ase_errors, method=npr_methods.SpatialAverage())
    k0.fit()

    xs = np.arange(-1,12,.01)

    plt.plot(xs, k0(xs), linewidth=2)

    plt.show()
Exemple #5
0
def non_parametric_regression(xs, ys, method):
    reg = smooth.NonParamRegression(xs, ys, method=method)
    reg.fit()
    return reg
Exemple #6
0
def fit(xs, ys):
    est = smooth.NonParamRegression(
        xs, ys, method=npr_methods.LocalPolynomialKernel(q=2))
    est.fit()
    return est
Exemple #7
0
    t, L = np.genfromtxt(bolo).T
    clean = np.where(L < 1e43)[0]
    t, L = t[clean], L[clean]
    pl.plot(t,
            np.log10(L),
            marker='o',
            linestyle='None',
            label=r"$\mathrm{%s}$" % sn_name,
            color='k')

    try:

        where_peak = np.where(np.logical_and(t > 10., t < 45.))[0]
        xs, ys = t[where_peak], (L[where_peak]) * 1e-42
        #k0 = smooth.NonParamRegression(xs, ys, method=npr_methods.SpatialAverage())
        k0 = smooth.NonParamRegression(
            xs, ys, method=npr_methods.LocalPolynomialKernel(q=4))
        k0_full = smooth.NonParamRegression(
            t, (L) * 1e-42, method=npr_methods.LocalPolynomialKernel(q=7))
        k0.fit()
        k0_full.fit()
        grid = np.arange(np.min(xs), np.max(xs), 0.1)
        grid_full = np.arange(np.min(t), np.max(t), 0.1)
        y_grid = np.log10(k0(grid)) + 42
        y_grid = y_grid[~np.isnan(y_grid)]
        y_grid_full = np.log10(k0_full(grid_full)) + 42
        y_grid_full = y_grid_full[~np.isnan(y_grid_full)]
        where_peak = np.argmax(y_grid)
        tp, Lp = grid[where_peak], y_grid[where_peak]
        M_p = M_ni(tp, 10**Lp)
        Ni_Khatami = Ni_K(10**Lp, tp, beta=4 / 3.)
        ni_t = np.arange(5, np.max(t) + 5, 0.01)
Exemple #8
0
def fit_data(input_json, add_plot_data=False, verbose=False, stderr=False):
    """
    Fit the data contained in the JSON file.
    """
    print_kwargs = {}
    if verbose and stderr:
        print_kwargs['file'] = sys.stderr

    with open(input_json, 'r') as fh:
        data = json.load(fh)

    smooth_fraction = data.get('smooth_fraction')
    polynomial_degree = data.get('polynomial_degree')

    output_json = defaultdict(lambda: defaultdict(dict))
    for sample_id, plex_data in six.iteritems(data.get('samples', {})):
        if verbose:
            print("Processing sample {}".format(sample_id), **print_kwargs)

        for plex_number, data in six.iteritems(plex_data):
            if verbose:
                print("\tProcessing plex {}".format(plex_number), **print_kwargs)

            fullset_amplicon_prop = data.get('fullset_amplicon_prop')
            testset_amplicon_prop_min = min(data.get('testset_amplicon_prop'))
            testset_amplicon_prop_max = max(data.get('testset_amplicon_prop'))
            if verbose:
                print("\t\tloading full amplicon property set: {}".format(fullset_amplicon_prop), **print_kwargs)

            try:
                regress = smooth.NonParamRegression(
                    data.get('testset_amplicon_prop'),
                    data.get('testset_amplicon_cov'),
                    bandwidth=(testset_amplicon_prop_max - testset_amplicon_prop_min) * smooth_fraction,
                    method=npr_methods.LocalPolynomialKernel(q=polynomial_degree)
                )
                regress.fit()
                regress_cached = CachedFunction(regress)

                # average predicted value
                mean_predicted = float(numpy.mean([regress_cached.eval(prop_val) for prop_val in fullset_amplicon_prop]))
                if verbose:
                    print("\t\tNonParamRegression mean predicted value: {}".format(mean_predicted), **print_kwargs)

                output_json[sample_id][plex_number] = {'mean_predicted': mean_predicted,
                                                       'singularmatrix': False,
                                                       'values': [],
                                                       'plot': {'x': [], 'y': []}}
                if mean_predicted > 0:
                    for amplicon_prop in fullset_amplicon_prop:
                        regress_value = float(regress_cached.eval(amplicon_prop))
                        output_json[sample_id][plex_number]['values'].append(regress_value)
                    if verbose:
                        print("\t\tNonParamRegression individual values: {}"
                              .format(output_json[sample_id][plex_number]['values']), **print_kwargs)

                if add_plot_data:
                    testset_amplicon_prop_range = numpy.linspace(testset_amplicon_prop_min, testset_amplicon_prop_max, 200)
                    output_json[sample_id][plex_number]['plot']['x'] = testset_amplicon_prop_range
                    for testset_amplicon_prop in testset_amplicon_prop_range:
                        output_json[sample_id][plex_number]['plot']['y'].append(float(regress_cached.eval(testset_amplicon_prop)))

            except numpy.linalg.linalg.LinAlgError as err:
                if 'singular matrix' in err.message.lower():
                    output_json[sample_id][plex_number] = {'mean_predicted': 0, 'singularmatrix': True}
                else:
                    raise
    return output_json
def plot_energy_temp_byyear(df_energy, df_temp, df_hdd, df_cdd, theme, b, s,
                            ld, kind, remove0):
    sns.set_palette(sns.color_palette('Set2', 9))
    sns.mpl.rc("figure", figsize=(10, 5))
    df = df_energy
    df['temp'] = df_temp[s].tolist()
    df['hdd'] = df_hdd[s].tolist()
    df['hdd'] = df['hdd'] * (-1.0)
    df['cdd'] = df_cdd[s].tolist()
    df.to_csv(os.getcwd() +
              '/csv_FY/energy_temperature_select/{0}_{1}_{2}.csv'.format(
                  b, s, title_dict[theme]),
              index=False)
    df1 = df.copy()
    df1['dd'] = df1['hdd']
    df2 = df.copy()
    df2['dd'] = df2['cdd']
    df3 = pd.concat([df1, df2], ignore_index=True)
    if kind != 'all':
        print df[kind].head()
        df = df[df[kind] != 0.0]
        if ld == 'line':
            gr = df.groupby('Fiscal Year')
            lines = []
            for name, group in gr:
                print(name, kind)
                group.sort([kind, theme], inplace=True)
                group = group[[kind, theme]]
                line, = plt.plot(group[kind], group[theme])
                lines.append(line)
                #print 'Building: {0}, year: {1}, {2} {3} [kbtu/sq.ft.]'.format(b, int(name), round(group[theme].sum(), 2), title_dict[theme])
        else:
            if kind == 'cdd':
                sns.set_palette(
                    sns.color_palette(
                        ['paleturquoise', 'turquoise', 'darkturquoise']))
            elif kind == 'hdd':
                sns.set_palette(sns.color_palette('Oranges'))
            sns.lmplot(x=kind,
                       y=theme,
                       hue='Fiscal Year',
                       data=df,
                       fit_reg=True)
            x = np.array(df[kind])
            y = np.array(df[theme])
            t_min = df[kind].min()
            t_max = df[kind].max()
            xd = np.r_[t_min:t_max:1]
            k1 = smooth.NonParamRegression(
                x, y, method=npr_methods.LocalPolynomialKernel(q=1))
            plt.plot(xd, k1(xd), '-', color=sns.color_palette('Set2')[5])
            plt.xlabel(xlabel_dict[kind], fontsize=12)
            plt.ylabel(ylabel_dict[theme], fontsize=12)
    else:
        if ld == 'line':
            gr = df.groupby('Fiscal Year')
            lines = []
            for name, group in gr:
                print(name, kind)
                group_elec = group.sort(['cdd', 'eui_elec'])
                group_gas = group.sort(['hdd', 'eui_gas'])
                # offset temperature to 0F
                group['temp'] = group['temp'] - 65.0
                group['temp_dd'] = group.apply(lambda r: r['hdd']
                                               if r['temp'] < 0 else r['cdd'],
                                               axis=1)
                group_temp = group.sort(['temp_dd', 'eui'])
                if remove0:
                    group_elec = group_elec[group_elec['cdd'] >= 10]
                    group_gas = group_gas[group_gas['hdd'] <= -10]
                group_temp = group.sort(['temp', 'eui'])
                line_elec, = plt.plot(group_elec['cdd'],
                                      group_elec['eui_elec'])
                line_gas, = plt.plot(group_gas['hdd'], group_gas['eui_gas'])
                line_temp, = plt.plot(group_temp['temp_dd'], group_temp['eui'])
                lines.append(line_elec)
                lines.append(line_gas)
                lines.append(line_temp)
            plt.ylabel(ylabel_dict[theme], fontsize=12)
            plt.title('{3}-{0} plot: Building {1}, Station {2}'.format(
                title_dict[theme], b, s, kind_dict[kind]))
        else:
            gr_gas = base_gas.groupby('Fiscal Year')
            base_gas_dict = {}
            for name, group in gr_gas:
                '''
                group = group[group['hdd'].abs() < 30]
                base_gas_dict[name] = 0
                if len(group) != 0:
                    base_gas_dict[name] = group['eui_gas'].mean()
                y = group['eui_gas'].tolist()
                x = group['dd'].tolist()
                p = np.polyfit(x, y, 2)
                g = sns.regplot('dd', 'eui_gas', data=group, fit_reg=True, order=2)
                base_gas_dict[name] = max(\
                        max(0, np.polyval(p, p[1]/(-2 * p[0]))),
                        min(max(0, np.polyval(p, min(x))),
                        max(0, np.polyval(p, max(x)))))
                xn = np.linspace(min(x), max(x), 50)
                plt.plot(xn, np.polyval(p, xn))
                plt.plot(x, y)
                plt.plot(x, [base_gas_dict[name]] * len(x))
                plt.show()
                #base_gas_dict[name] = sum(sorted(group['eui_gas'].tolist())[:2])/2
                '''
                tempdf = group.copy()
                tempdf['base_month'] = tempdf['month'].map(
                    lambda x: True if x == 12 or x < 3 else False)
                tempdf = tempdf[tempdf['base_month'] == True]
                print tempdf
                base_gas_dict[name] = tempdf['eui_gas'].mean()

            base_elec = df3.copy()
            base_elec_dict = {}
            for name, group in gr_gas:
                '''
                group = group[group['cdd'].abs() < 30]
                base_elec_dict[name] = 0
                if len(group) != 0:
                    base_elec_dict[name] = group['eui_elec'].mean()
                y = group['eui_elec'].tolist()
                x = group['dd'].tolist()
                p = np.polyfit(x, y, 2)
                base_elec_dict[name] = max(\
                        max(0, np.polyval(p, p[1]/(-2 * p[0]))),
                        min(max(0, np.polyval(p, min(x))),
                        max(0, np.polyval(p, max(x)))))
                #base_elec_dict[name] = sum(sorted(group['eui_elec'].tolist())[:2])/2
                '''
                base_elec_dict[name] = group['eui_elec'].min()

            print base_elec_dict
            print base_gas_dict
            if remove0:
                df3 = df3[df3['dd'].abs() >= 30]
            df_elec = df3.copy()
            df_elec['kind'] = 'Electricity'
            df_elec['eui_plot'] = df_elec['eui_elec']
            df_gas = df3.copy()
            df_gas['kind'] = 'Natural Gas'
            df_gas['eui_plot'] = df_gas.apply(
                lambda r: r['eui_gas'] + base_elec_dict[r['Fiscal Year']],
                axis=1)
            df_total = df3.copy()
            df_total['kind'] = 'Total'
            df_total['eui_plot'] = df_gas['eui']
            df_base_elec = df3.copy()
            df_base_elec['kind'] = 'Base-Electricity'
            df_base_elec['eui_plot'] = df_base_elec['Fiscal Year'].map(
                lambda x: base_elec_dict[x])
            df_base_gas = df3.copy()
            df_base_gas['kind'] = 'Base-Gas'
            df_base_gas['eui_plot'] = df_base_gas['Fiscal Year'].apply(
                lambda r: base_gas_dict[r] + base_elec_dict[r])
            df_all = pd.concat(
                [df_elec, df_gas, df_total, df_base_gas, df_base_elec],
                ignore_index=True)
            #df_all = pd.concat([df_elec, df_gas, df_total], ignore_index=True)
            g = sns.lmplot(x='dd',
                           y='eui_plot',
                           data=df_all,
                           col='Fiscal Year',
                           hue='kind',
                           fit_reg=True,
                           truncate=True,
                           lowess=True)
            #plt.xlabel(xlabel_dict[kind], fontsize=12)
            g = g.set_axis_labels(xlabel_dict[kind], ylabel_dict[kind])
    plt.ylim((0, 12))
    if ld == 'line':
        if kind != 'all':
            P.savefig(os.getcwd() +
                      '/plot_FY_weather/eui_{3}/{2}_byyear/{0}_{1}.png'.format(
                          b, s, theme, kind),
                      dpi=150)
        else:
            years = ['2013', '2014', '2015']
            line_labels = ['Electricity', 'Gas', 'Total']
            labels = reduce(lambda x, y: x + y,
                            [['{0}-{1}'.format(x, y) for y in line_labels]
                             for x in years])
            plt.legend(lines, labels)
            P.savefig(os.getcwd() +
                      '/plot_FY_weather/eui_{2}/{2}_byyear/{0}_{1}.png'.format(
                          b, s, kind),
                      dpi=150)

    else:
        if kind != 'all':
            P.savefig(
                os.getcwd() +
                '/plot_FY_weather/eui_{3}/{2}_byyear_dot/{0}_{1}.png'.format(
                    b, s, theme, kind),
                dpi=75)
        else:
            P.savefig(
                os.getcwd() +
                '/plot_FY_weather/eui_{2}/{2}_byyear_dot/{0}_{1}.png'.format(
                    b, s, kind),
                dpi=75)
    plt.close()
Exemple #10
0
def plot_energy_temp_byyear(df_energy, df_temp, df_hdd, df_cdd, theme, b, s,
                            ld, kind, remove0):
    sns.set_palette(sns.color_palette('Set2', 9))
    sns.mpl.rc("figure", figsize=(10, 5))
    df = df_energy
    df['temp'] = df_temp[s].tolist()
    df['hdd'] = df_hdd[s].tolist()
    df['hdd'] = df['hdd'] * (-1.0)
    df['cdd'] = df_cdd[s].tolist()
    df.to_csv(os.getcwd() +
              '/csv_FY/energy_temperature_select/{0}_{1}_{2}.csv'.format(
                  b, s, title_dict[theme]),
              index=False)
    if kind != 'all':
        print df[kind].head()
        df = df[df[kind] != 0.0]
        if ld == 'line':
            gr = df.groupby('Fiscal Year')
            lines = []
            for name, group in gr:
                print(name, kind)
                group.sort([kind, theme], inplace=True)
                group = group[[kind, theme]]
                line, = plt.plot(group[kind], group[theme])
                lines.append(line)
                #print 'Building: {0}, year: {1}, {2} {3} [kbtu/sq.ft.]'.format(b, int(name), round(group[theme].sum(), 2), title_dict[theme])
        else:
            sns.lmplot(x=kind,
                       y=theme,
                       hue='Fiscal Year',
                       data=df,
                       fit_reg=True)
            x = np.array(df[kind])
            y = np.array(df[theme])
            t_min = df[kind].min()
            t_max = df[kind].max()
            xd = np.r_[t_min:t_max:1]
            k1 = smooth.NonParamRegression(
                x, y, method=npr_methods.LocalPolynomialKernel(q=1))
            plt.plot(xd, k1(xd), '-', color=sns.color_palette('Set2')[5])
            plt.xlabel(xlabel_dict[kind], fontsize=12)
    else:
        if ld == 'line':
            gr = df.groupby('Fiscal Year')
            lines = []
            for name, group in gr:
                print(name, kind)
                group_elec = group.sort(['cdd', 'eui_elec'])
                group_gas = group.sort(['hdd', 'eui_gas'])
                # offset temperature to 0F
                group['temp'] = group['temp'] - 65.0
                group['temp_dd'] = group.apply(lambda r: r['hdd']
                                               if r['temp'] < 0 else r['cdd'],
                                               axis=1)
                group_temp = group.sort(['temp_dd', 'eui'])
                if remove0:
                    group_elec = group_elec[group_elec['cdd'] >= 10]
                    group_gas = group_gas[group_gas['hdd'] <= -10]
                group_temp = group.sort(['temp', 'eui'])
                line_elec, = plt.plot(group_elec['cdd'],
                                      group_elec['eui_elec'])
                line_gas, = plt.plot(group_gas['hdd'], group_gas['eui_gas'])
                line_temp, = plt.plot(group_temp['temp_dd'], group_temp['eui'])
                lines.append(line_elec)
                lines.append(line_gas)
                lines.append(line_temp)
            plt.ylabel(ylabel_dict[theme], fontsize=12)
            plt.title('{3}-{0} plot: Building {1}, Station {2}'.format(
                title_dict[theme], b, s, kind_dict[kind]))
        else:
            df['temp'] = df['temp'] - 65.0
            df['temp_dd'] = df.apply(lambda r: r['hdd']
                                     if r['temp'] < 0 else r['cdd'],
                                     axis=1)
            df_elec = df
            if remove0:
                df_elec = df_elec[df_elec['cdd'] >= 10]
            df_elec['dd'] = df_elec['cdd']
            df_elec['eui_plot'] = df_elec['eui_elec']
            df_elec['kind'] = 'Electricity'
            df_elec = df_elec[['dd', 'eui_plot', 'kind', 'Fiscal Year']]
            df_gas = df
            if remove0:
                df_gas = df_gas[df_gas['hdd'] <= -10]
            df_gas['dd'] = df_gas['hdd']
            df_gas['eui_plot'] = df_gas['eui_gas']
            df_gas['kind'] = 'Gas'
            df_gas = df_gas[['dd', 'eui_plot', 'kind', 'Fiscal Year']]
            df_temp = df
            df_temp['dd'] = df_temp['temp_dd']
            df_temp['eui_plot'] = df_temp['eui']
            df_temp['kind'] = 'Combined'
            df_temp = df_temp[['dd', 'eui_plot', 'kind', 'Fiscal Year']]
            df_all = pd.concat([df_elec, df_gas, df_temp], ignore_index=True)
            g = sns.lmplot(x='dd',
                           y='eui_plot',
                           data=df_all,
                           col='Fiscal Year',
                           hue='kind',
                           fit_reg=True,
                           truncate=True)
            #plt.xlabel(xlabel_dict[kind], fontsize=12)
            g = g.set_axis_labels(xlabel_dict[kind], ylabel_dict[kind])
    plt.ylim((0, 10))
    if ld == 'line':
        if kind != 'all':
            P.savefig(os.getcwd() +
                      '/plot_FY_weather/eui_{3}/{2}_byyear/{0}_{1}.png'.format(
                          b, s, theme, kind),
                      dpi=150)
        else:
            years = ['2013', '2014', '2015']
            line_labels = ['Electricity', 'Gas', 'Total']
            labels = reduce(lambda x, y: x + y,
                            [['{0}-{1}'.format(x, y) for y in line_labels]
                             for x in years])
            plt.legend(lines, labels)
            P.savefig(os.getcwd() +
                      '/plot_FY_weather/eui_{2}/{2}_byyear/{0}_{1}.png'.format(
                          b, s, kind),
                      dpi=150)

    else:
        if kind != 'all':
            P.savefig(
                os.getcwd() +
                '/plot_FY_weather/eui_{3}/{2}_byyear_dot_noreg/{0}_{1}.png'.
                format(b, s, theme, kind),
                dpi=75)
        else:
            P.savefig(
                os.getcwd() +
                '/plot_FY_weather/eui_{2}/{2}_byyear_dot/{0}_{1}.png'.format(
                    b, s, kind),
                dpi=75)
    plt.close()