Python regplot 예제들, seaborn.regplot Python 예제들

예제 #1

0

파일 보기

파일: parmesan.py 프로젝트: Sandy4321/meatball_stats

def make_plot(X_train, y_train, X, y, test_data, model, model_name, features, response):
    feature = X.columns
    f, ((ax1, ax2), (ax3, ax4)) = plt.subplots(2, 2, sharey=False)
    sns.regplot(X[feature[4]], y, test_data, ax=ax1)
    sns.boxplot(X[feature[4]], y, color="Blues_r", ax=ax2)
    model.fit(X_train, y_train)
    sns.residplot(X[feature[4]], (model.predict(X) - y) ** 2, color="indianred", lowess=True, ax=ax3)
    if model_name is 'linear':
        sns.interactplot(X[feature[3]], X[feature[4]], y, ax=ax4, filled=True, scatter_kws={"color": "dimgray"}, contour_kws={"alpha": .5})
    elif model_name is 'logistic':
        pal = sns.blend_palette(["#4169E1", "#DFAAEF", "#E16941"], as_cmap=True)
        levels = np.linspace(0, 1, 11)
        sns.interactplot(X[feature[3]], X[feature[4]], y, levels=levels, cmap=pal, logistic=True)
    else:
        pass
    ax1.set_title('Regression')
    ax2.set_title(feature[4]+' Value')
    ax3.set_title(feature[4]+' Residuals')
    ax4.set_title('Two-value Interaction')
    f.tight_layout()
    plt.savefig(model_name+'_'+feature[4], bbox_inches='tight')

    # Multi-variable correlation significance level
    f, ax = plt.subplots(figsize=(10, 10))
    cmap = sns.blend_palette(["#00008B", "#6A5ACD", "#F0F8FF",
                              "#FFE6F8", "#C71585", "#8B0000"], as_cmap=True)
    sns.corrplot(test_data, annot=False, diag_names=False, cmap=cmap)
    ax.grid(False)
    ax.set_title('Multi-variable correlation significance level')
    plt.savefig(model_name+'_multi-variable_correlation', bbox_inches='tight')

    # complete coefficient plot - believe this is only for linear regression
    sns.coefplot("diagnosis ~ "+' + '.join(features), test_data, intercept=True)
    plt.xticks(rotation='vertical')
    plt.savefig(model_name+'_coefficient_effects', bbox_inches='tight')

예제 #2

0

파일 보기

파일: fpkmTableManager.py 프로젝트: JEstabrook/FPKM-Manager

def volcano_plot(tissue_array, type=None, out):
    """Generates a tissue specific volcano plot based on the log2 fold change (x) and log10(p-values from Tukeys T-test

    Args:
        tissue_array (array): Tissue specific array
        type (str): Tissue data set was derived from; used for labeling plot
        out (str): The title of the Volcano plot generated <Tissue>+'-<out>'.pdf'

    Returns:
        Nothing. Generates a Volcano plot for a tissue specific array

    """
    plt.clf()
    sns.set(font_scale=1.4)
    from scipy.stats import ttest_ind
    filt = tissue_array[np.all(tissue_array != 0, axis=1)]
    x = log2(filt[:, -2:].mean(axis=1)) - log2(filt[:, :3].mean(axis=1))
    y = -log10(ttest_ind(filt[:, -2:], filt[:, :2], axis=1)[1:][0])
    xy = column_stack((x, y))
    xy = xy[~isinf(xy).any(axis=1)]
    sns.regplot(xy[:, 0], xy[:, 1], fit_reg=False, color='k',
                scatter_kws={'alpha': 0.9, 's': 2.0, 'rasterized': False, 'zorder': 1}).set_ylim(0, )
    de = xy[abs(xy[:, 0]) > 1, :]
    de = de[de[:, 1] > 2, :]
    up = sum(de[:, 0] > 0)
    down = sum(de[:, 0] < 0)
    sns.regplot(de[:, 0], de[:, 1], fit_reg=False, color='r',
                scatter_kws={'alpha': 0.9, 's': 2.5, 'rasterized': False, 'zorder': 1})
    plt.axhline(y=2.0, linewidth=.8, color='red', linestyle='dashed')
    plt.axvline(x=1.0, linewidth=.8, color='red', linestyle='dashed')
    plt.axvline(x=-1.0, linewidth=.8, color='red', linestyle='dashed')
    plt.xlabel(r'$\log_2$(KO/WT)')
    plt.ylabel(r'-$\log_{10}$ p-value')
    plt.suptitle('%s: Downregulated genes: %s    Upregulated genes: %s ' % (type, down, up))
    plt.savefig('%s-'+out % (type), format='pdf')

예제 #3

0

파일 보기

파일: aede.py 프로젝트: lincolnfrias/nupis

def moran_plot(IM):

    import pandas as pd
    import seaborn as sns
    import matplotlib.pyplot as plt
    import numpy as np
    import pysal as ps

    y_norm = normalize(IM.y)
    y_lag = ps.lag_spatial(IM.w, IM.y)
    y_lag_norm = normalize(y_lag)
    dados = pd.DataFrame({'y':IM.y, 'y_norm':y_norm,
                          'y_lag':y_lag, 'y_lag_norm':y_lag_norm})

    f, ax = plt.subplots(1, figsize=(7, 5))
    sns.regplot('y_norm', 'y_lag_norm', data=dados, ci=None,
                color='black', line_kws={'color':'red'})
    plt.axvline(0, c='gray', alpha=0.7)
    plt.axhline(0, c='gray', alpha=0.7)

    limits = np.array([y_norm.min(), y_norm.max(), y_lag_norm.min(), y_lag_norm.max()])
    limits = np.abs(limits).max()
    border = 0.02
    ax.set_xlim(- limits - border, limits + border)
    ax.set_ylim(- limits - border, limits + border)

    plt.show();

예제 #4

0

파일 보기

파일: misc.py 프로젝트: davenquinn/Attitude

def aligned_residuals(pca):
    """
    Plots error components along with bootstrap
    resampled error surface. Provides another
    statistical method to estimate the variance
    of a dataset.
    """
    A = pca.rotated()
    fig, axes = P.subplots(2,1,
            sharex=True, frameon=False)
    fig.subplots_adjust(hspace=0, wspace=0.1)
    kw = dict(c="#555555", s=40, alpha=0.5)

    #lengths = attitude.pca.singular_values[::-1]
    lengths = (A[:,i].max()-A[:,i].min() for i in range(3))

    titles = (
        "Long cross-section (axis 3 vs. axis 1)",
        "Short cross-section (axis 3 vs. axis 2)")

    for title,ax,(a,b) in zip(titles,axes,
            [(0,2),(1,2)]):

        seaborn.regplot(A[:,a], A[:,b], ax=ax)
        ax.text(0,1,title,
            verticalalignment='top',
            transform=ax.transAxes)
        ax.autoscale(tight=True)
        for spine in ax.spines.itervalues():
            spine.set_visible(False)
    ax.set_xlabel("Meters")
    return fig

예제 #5

0

파일 보기

파일: data_analysis.py 프로젝트: ekolik/-Python-Analysis_of_wine_quality

def explore(wine_set):
    low = wine_set[wine_set['quality'] <= 5]
    medium = wine_set[(wine_set['quality'] == 6) | (wine_set['quality'] == 7)]
    high = wine_set[wine_set['quality'] > 7]

    print('association between wine`s density and residual sugar for wines \nof `low` quality')
    print(scipy.stats.pearsonr(low['density'], low["residual_sugar"]))
    print('\nof `medium` quality')
    print(scipy.stats.pearsonr(medium['density'], medium["residual_sugar"]))
    print('\nof `high` quality')
    print(scipy.stats.pearsonr(high['density'], high["residual_sugar"]))

    scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=low)
    plt.xlabel("Density of wine")
    plt.ylabel("Residual sugar in wine, gram")
    plt.title("Association between wine's density and residual sugar for wines of `low` quality")
    plt.show()

    scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=medium)
    plt.xlabel("Density of wine")
    plt.ylabel("Residual sugar in wine, gram")
    plt.title("Association between wine's density and residual sugar for wines of `medium` quality")
    plt.show()

    scat0 = seaborn.regplot(x="density", y="residual_sugar", fit_reg=True, data=high)
    plt.xlabel("Density of wine")
    plt.ylabel("Residual sugar in wine, gram")
    plt.title("Association between wine's density and residual sugar for wines of `high` quality")
    plt.show()

예제 #6

0

파일 보기

파일: weathernorm.py 프로젝트: yujiex/GSA

def plot_building_temp():
    sns.set_context("paper", font_scale=1.5)
    b = "AZ0000FF"
    s = "KTUS"
    filelist = glob.glob(os.getcwd() + "/csv_FY/testWeather/{0}*.csv".format(b))
    dfs = [pd.read_csv(csv) for csv in filelist]
    col = "eui_gas"
    dfs2 = [df[[col, "month", "year"]] for df in dfs]
    df3 = pd.concat(dfs2)

    temp = pd.read_csv(os.getcwd() + "/csv_FY/weather/weatherData_meanTemp.csv")
    temp["year"] = temp["Unnamed: 0"].map(lambda x: float(x[:4]))
    temp["month"] = temp["Unnamed: 0"].map(lambda x: float(x[5:7]))
    temp.set_index(pd.DatetimeIndex(temp["Unnamed: 0"]), inplace=True)
    temp = temp[[s, "month", "year"]]
    joint2 = pd.merge(df3, temp, on=["year", "month"], how="inner")
    joint2.to_csv(os.getcwd() + "/csv_FY/testWeather/test_temp.csv", index=False)

    sns.lmplot(s, col, data=joint2, col="year", fit_reg=False)
    plt.xlim((joint2[s].min() - 10, joint2[s].max() + 10))
    plt.ylim((0, joint2[col].max() + 0.1))
    P.savefig(os.getcwd() + "/csv_FY/testWeather/plot/scatter_temp_byyear.png", dpi=150)
    plt.close()

    joint2 = joint2[(2012 < joint2["year"]) & (joint2["year"] < 2015)]
    sns.regplot(s, col, data=joint2, fit_reg=False)
    plt.xlim((joint2[s].min() - 10, joint2[s].max() + 10))
    plt.ylim((0, joint2[col].max() + 0.1))
    P.savefig(os.getcwd() + "/csv_FY/testWeather/plot/scatter_temp_1314.png", dpi=150)
    plt.close()

예제 #7

0

파일 보기

파일: weather_normalize0126.py 프로젝트: yujiex/GSA

def plot_energy_temp(df_energy, df_temp, theme, b, s):
    df = pd.DataFrame({'energy': df_energy[theme], 'temp': df_temp[s]})
    sns.regplot('temp', 'energy', data=df, fit_reg=False)
    P.savefig(os.getcwd() + '/plot_FY_weather/{2}/{0}_{1}.png'.format(b, s, theme), dpi = 150)
    plt.title('Temperature-{0} plot: {1}, {2}'.format(theme, b, s))
    plt.close()
    return

예제 #8

0

파일 보기

파일: weathernorm_pm.py 프로젝트: yujiex/GSA

def plot_energy_temp(df_energy, df_temp, theme, b, s):
    df = pd.DataFrame({"energy": df_energy[theme], "temp": df_temp[s]})
    sns.regplot("temp", "energy", data=df, fit_reg=False)
    P.savefig(os.getcwd() + "/plot_FY_weather/{2}/{0}_{1}.png".format(b, s, theme), dpi=150)
    plt.title("Temperature-{0} plot: {1}, {2}".format(theme, b, s))
    plt.close()
    return

예제 #9

0

파일 보기

파일: statsmodels_playground.py 프로젝트: InstituteforDiseaseModeling/EMOD

def seaborn_plot_rolling(df_name=df_default_name):
    my_df = load_df(df_name)
    import seaborn as sns
    import matplotlib.pyplot as plt

    sns.regplot(x="age", y="mean_mod_50", data=my_df)
    plt.show()

예제 #10

0

파일 보기

파일: statsmodels_playground.py 프로젝트: InstituteforDiseaseModeling/EMOD

def seaborn_plot(df_name=df_default_name):
    my_df = load_df(df_name)
    import seaborn as sns
    import matplotlib.pyplot as plt

    sns.regplot(x="age", y="mod_acquire", data=my_df)
    plt.show()

예제 #11

0

파일 보기

파일: spacing_analysis.py 프로젝트: logyuan/NBA-player-movement

def plot_offense_vs_defense_spacing(spacing_data):
    """
    Plot of offensive vs. defensive spacing for games

    Args:
        spacing_data (pd.DataFrame): Dataframe with columns of spacing data
            ['home_offense_areas', 'home_defense_areas',
             'away_offense_areas', 'away_defense_areas']
        save_fig (bool): if True, save plot to temp/ directory

    Returns None
        Also, shows plot.
    """
    sns.regplot(spacing_data.away_offense_areas,
                spacing_data.home_defense_areas,
                fit_reg=True, color=sns.color_palette()[0],
                ci=None)
    sns.regplot(spacing_data.home_offense_areas,
                spacing_data.away_defense_areas,
                fit_reg=False, color=sns.color_palette()[0],
                ci=None)
    plt.xlabel('Average Offensive Spacing (sq ft)', fontsize=16)
    plt.ylabel('Average Defensive Spacing (sq ft)', fontsize=16)
    plt.title('Offensive spacing robustly induces defensive spacing',
              fontsize=16)
    plt.savefig('temp/OffenseVsDefense.png')
    plt.close()
    return None

예제 #12

0

파일 보기

파일: arima.py 프로젝트: billdthompson/NorthWind

def report_model_results(input_data, fit_model, name, filename):
	order = r_forecast.arimaorder(fit_model)
	print name + " ({},{},{})".format(*order)
	print
	try:
		intercept, intercept_se = r_stats.coef(fit_model).rx2("intercept")[0], numpy.sqrt(r_stats.vcov(fit_model).rx2("intercept", "intercept")[0])
		print "Intercept:", intercept, "+-", intercept_se
	except:
		print "No intercept"
	print

	residuals = numpy.array(r_stats.residuals(fit_model)[sum(order):])
	residuals_mask = ~numpy.isnan(residuals)
	fit_model_err = numpy.nansum(residuals**2)
	baseline_err = numpy.sum((numpy.nanmean(input_data) - input_data[sum(order):][residuals_mask])**2)

	print name + " model squared error:  ", fit_model_err, "({} NA residuals)".format(numpy.count_nonzero(numpy.isnan(residuals)))
	print "No model squared error:" + " " * len(name), baseline_err
	print "-> R^2:", 1 - fit_model_err / baseline_err
	print
	print

	plt.figure()
	plt.axis('equal')
	seaborn.regplot(input_data[sum(order):], input_data[sum(order):] - residuals)
	plt.savefig(filename)
	plt.close()

예제 #13

0

파일 보기

파일: ABuMetricsFutures.py 프로젝트: 3774257/abu

    def plot_returns_cmp(self, only_show_returns=False, only_info=False):
        """考虑资金情况下的度量，进行与benchmark的收益度量对比，收益趋势，资金变动可视化，以及其它度量信息，不涉及benchmark"""

        self.log_func('买入后卖出的交易数量:{}'.format(self.order_has_ret.shape[0]))
        self.log_func('胜率:{:.4f}%'.format(self.win_rate * 100))

        self.log_func('平均获利期望:{:.4f}%'.format(self.gains_mean * 100))
        self.log_func('平均亏损期望:{:.4f}%'.format(self.losses_mean * 100))

        self.log_func('盈亏比:{:.4f}'.format(self.win_loss_profit_rate))

        self.log_func('策略收益: {:.4f}%'.format(self.algorithm_period_returns * 100))
        self.log_func('策略年化收益: {:.4f}%'.format(self.algorithm_annualized_returns * 100))

        self.log_func('策略买入成交比例:{:.4f}%'.format(self.buy_deal_rate * 100))
        self.log_func('策略资金利用率比例:{:.4f}%'.format(self.cash_utilization * 100))
        self.log_func('策略共执行{}个交易日'.format(self.num_trading_days))

        if only_info:
            return

        self.algorithm_cum_returns.plot()
        plt.legend(['algorithm returns'], loc='best')
        plt.show()

        if only_show_returns:
            return
        sns.regplot(x=np.arange(0, len(self.algorithm_cum_returns)), y=self.algorithm_cum_returns.values)
        plt.show()
        sns.distplot(self.capital.capital_pd['capital_blance'], kde_kws={"lw": 3, "label": "capital blance kde"})
        plt.show()

예제 #14

0

파일 보기

파일: parA_inheritance.py 프로젝트: mountainpenguin/spot_analysis

def versus(data, x, y, xlabel="ratio", ylabels=("growth1", "growth2"), outfn="versus"):
    plt.figure(figsize=(12, 6))

    re_data = reconfigure_data(data, x, y)

#    series1 = data[(data[x] > 1)]
#    series2 = data[(data[x] < 1)]
#
#    growth1 = pd.concat([series1[y[0]], series2[y[1]]])
#    growth2 = pd.concat([series1[y[1]], series2[y[0]]])
#    ratio = pd.concat([series1[x], 1 / series2[x]])
#
#    indata = {}
#    indata[xlabel] = ratio
#    indata[ylabels[0]] = growth1
#    indata[ylabels[1]] = growth2
#    re_data = pd.DataFrame(indata)

    plt.subplot(131)
    sns.regplot(x="x", y="y1", data=re_data)
    shared.add_stats(re_data, "x", "y1")
    sns.despine()

    plt.subplot(132)
    sns.regplot(x="x", y="y2", data=re_data)
    shared.add_stats(re_data, "x", "y2")
    sns.despine()

    ax = plt.subplot(133)
    swarm(ax, re_data, ylabels[0], ylabels[1], "growth rate (\si{\per\hour})")

    plt.tight_layout()
    plt.savefig("ParA_inheritance/{0}.pdf".format(outfn))

예제 #15

0

파일 보기

파일: weathernorm_backup.py 프로젝트: yujiex/GSA

def plot_building_temp():
    sns.set_context("paper", font_scale=1.5)
    b = 'AZ0000FF'
    s = 'KTUS'
    filelist = glob.glob(os.getcwd() + '/csv_FY/testWeather/{0}*.csv'.format(b))
    dfs = [pd.read_csv(csv) for csv in filelist]
    col = 'eui_gas'
    dfs2 = [df[[col, 'month', 'year']] for df in dfs]
    df3 = (pd.concat(dfs2))

    temp = pd.read_csv(os.getcwd() + '/csv_FY/weather/weatherData_meanTemp.csv')
    temp['year'] = temp['Unnamed: 0'].map(lambda x: float(x[:4]))
    temp['month'] = temp['Unnamed: 0'].map(lambda x: float(x[5:7]))
    temp.set_index(pd.DatetimeIndex(temp['Unnamed: 0']), inplace=True)
    temp = temp[[s, 'month', 'year']]
    joint2 = pd.merge(df3, temp, on = ['year', 'month'], how = 'inner')
    joint2.to_csv(os.getcwd() + '/csv_FY/testWeather/test_temp.csv', index=False)

    sns.lmplot(s, col, data=joint2, col='year', fit_reg=False)
    plt.xlim((joint2[s].min() - 10, joint2[s].max() + 10))
    plt.ylim((0, joint2[col].max() + 0.1))
    P.savefig(os.getcwd() + '/csv_FY/testWeather/plot/scatter_temp_byyear.png', dpi=150)
    plt.close()

    joint2 = joint2[(2012 < joint2['year']) & (joint2['year'] < 2015)]
    sns.regplot(s, col, data=joint2, fit_reg=False)
    plt.xlim((joint2[s].min() - 10, joint2[s].max() + 10))
    plt.ylim((0, joint2[col].max() + 0.1))
    P.savefig(os.getcwd() + '/csv_FY/testWeather/plot/scatter_temp_1314.png', dpi=150)
    plt.close()

예제 #16

0

파일 보기

파일: data.py 프로젝트: mlsamsom/PyFrictionTools

    def show_friction_line(self, sns_context="talk"):
        """
        Shows results from dF/dN friction test
        :param sns_context:
        :return: None
        """
        self.mean_friction_frame()

        p_dat = self.mean_fric_frame[self.mean_fric_frame['direction'] == 0]
        n_dat = self.mean_fric_frame[self.mean_fric_frame['direction'] == 1]

        xbins = self.mean_fric_frame['load_index'].max() + 1

        sns.set_context(sns_context)

        f, (ax1, ax2) = plt.subplots(nrows=1, ncols=2)

        sns.regplot(x='N', y='F', data=p_dat, x_bins=xbins, ax=ax1)
        sns.regplot(x='N', y='F', data=n_dat, x_bins=xbins, ax=ax2)

        ax1.set_title("(+)")
        ax2.set_title("(-)")

        plt.tight_layout()

        plt.show()

예제 #17

0

파일 보기

파일: visuals.py 프로젝트: malcolmjmr/trading

def view_correlations(corr_df, dftouse, y=False, filter=None, col_num=2):
    
    if not y: # use dependent variable from first column of data frame
        
        y = dftouse[dftouse.columns.values[0]]
        
    
    # get the number of axes required 
    axes_num = len(corr_df)
    
    axes_num = axes_num + 1 if is_prime(axes_num) else axes_num
    
    row_num = axes_num / col_num
    
    print row_num , col_num

    # create the figure and axes for each plot 
    f, axes = plt.subplots(row_num, col_num, figsize=(30,30))

    
    # generate plots for each independent variable
    for ax, (x, corr) in zip(axes.ravel(),corr_df.iterrows()):
        
        if filter == None or filter in x:

            sns.regplot(x=dftouse[x], y=dftouse[y], ax=ax)
    
    plt.show()

예제 #18

0

파일 보기

파일: Graphs.py 프로젝트: alanhdu/Dex

    def createMatrixInteract(self, event):
        dlg = RegressDialog(self.parent, "Matrix Interaction Plot") 
        log = wx.CheckBox(dlg, label="Logistic Fit?")
        dlg.Add(log)
        if dlg.ShowModal() == wx.ID_OK:
            y, xs = dlg.GetValue()
            log = log.GetValue()
            data = self.parent.data[list(xs) + [y]]
            df = data[list(xs)]

            fig, axes = plt.subplots(nrows=len(xs), ncols=len(xs))
            for i, l1 in enumerate(df):
                for j, l2  in enumerate(df):
                    ax = axes[j, i]
                    ax.grid(False)
                    plt.subplot(ax)
                    if i == j:
                        sns.regplot(data[l1], data[y], ax=ax),
                        # would like to do logistic plot, but takes too long
                    elif i < j:
                        sns.interactplot(l1, l2, y, data, ax=ax, logistic=log,
                                cmap=settings["cmap"])

                    if i != 0 and j != 0:
                        ax.yaxis.set_visible(False)
                    if j != len(xs) - 1:
                        ax.xaxis.set_visible(False)

            plt.show()

예제 #19

0

파일 보기

파일: pptalk.py 프로젝트: fastforwardlabs/fastforwardlabs.github.io

def fig_compareposteriors(posterior_a, posterior_b):
    pbbetter = (np.array(posterior_b) > np.array(posterior_a)).mean()
    agtb = [i for i, (a, b) in enumerate(zip(posterior_a, posterior_b))
            if a > b]
    bgta = [i for i, (a, b) in enumerate(zip(posterior_a, posterior_b))
            if b > a]

    fig, ax = plt.subplots(figsize=(6.5, 6.5))
    ax = sns.regplot(np.array(posterior_a)[agtb], np.array(posterior_b)[agtb],
                     fit_reg=False, color=cola, marker='.')
    ax = sns.regplot(np.array(posterior_a)[bgta], np.array(posterior_b)[bgta],
                     fit_reg=False, color=colb, marker='.')
    ax.plot(0.04, 0.05, color='#222222', marker='X')

    lim1, lim2 = 0, 0.12
    ax.set_xlim(lim1, lim2)
    ax.set_ylim(lim1, lim2)
    ax.plot([lim1, lim2], [lim1, lim2], color=colb)
    ax.text(0.07, 0.10, 'B better', color=colb)
    ax.text(0.07, 0.095, '{:.1f}%'.format(100*pbbetter), color=colb)
    ax.text(0.09, 0.07, 'A better', color=cola)
    ax.text(0.09, 0.065, '{:.1f}%'.format(100-100*pbbetter), color=cola)
    ax.set_xlabel('Conversion fraction layout A')
    ax.set_ylabel('Conversion fraction layout B')
    fig.savefig('img/compareposteriors.png', bbox_inches='tight')

예제 #20

0

파일 보기

파일: example.py 프로젝트: konstantinstadler/sci_python_template

def fig_linplot(data, x , y, aly_title, fig_save = True):
    """ Plot correlations

    Parameters
    ----------
    data : pd.DataFrame
    x,y : str
        X and Y axis for the plot, valid column names of data
    aly_title : str
    fig_save : bool, optional
        False if data should not be saved
        

    """
    ff = file_folder_specs()

    title = aly_title + 'for {} vs {}'.format(x, y)
    sns.regplot(x ,y , data = data)
    plt.title(title)
    plt.xlim(0)
    plt.ylim(0)
    if fig_save:
        _save_fig(title, ff['fig'])
    plt.show()
    plt.close()

예제 #21

0

파일 보기

파일: DataAnalysisTool0.4.py 프로젝트: williamzhuang/gcms-data-analysis

def timePlotLine(data):
    normalize = input("Would you like to normalize the y-axis? (y/n): ")
    geneNamesDict = {}
    for _, row in data.iterrows():
        geneNamesDict[row['Gene']] = 1

    data = data.pivot_table('Values', ['Sample'], ['Gene', 'Time'])
    geneList = geneNamesDict.keys()
    ylabel = input("What should the y-axis label be?: ")

    counter = 1

    for key in geneList:
        
        plt.figure(counter)
        tempTable = data[key]
        tempTable = tempTable.T
        tempTable = tempTable.dropna(axis=1, how='any')
        if normalize == 'y':
            tempTable = tempTable / np.amax(tempTable.values)
            

        tempTable['Time'] = tempTable.index
        tempTable = pd.melt(tempTable, id_vars='Time')[['Time','value']]
        sns.regplot(x='Time',y='value',data=tempTable,scatter=True)
        plt.title(key)
        plt.ylabel(ylabel)
        plt.xlabel('Time(min)')
        counter += 1
    plt.show()

예제 #22

0

파일 보기

파일: edaHelper.py 프로젝트: dingocuster/edaHelper

    def plot_against_y(self, function=None, y_margin=0.1, lim=10, context="talk"):
        """Where colour is squared error or some other var"""
        # do linked plots here
        cat, cont, time = cat_cont_time(self.df[self.vars_of_interest])
        #        cat = self.df.columns[self.df.dtypes=='category']
        #        cont =  self.df.columns[self.df.dtypes=='float64']
        # first continuous
        cols = cat + cont + time
        cols = cols[:10]
        sns.set_context(context)
        fig, axs = plt.subplots(nrows=1, ncols=len(cols), sharey=True)
        for ax, col in zip(axs.flat, cols):
            if col in cont:
                sns.regplot(x=col, y=self.y, data=self.df, ax=ax)
            #        g = sns.lmplot(x="total_bill", y=self.y, data=self.df)
            # then categorical

            # fig, axs = plt.subplots(nrows=1, ncols=len(cat), sharey=True)
            # for ax, col in zip(axs.flat, cat):
            elif col in cat:
                sns.violinplot(x=col, y=self.y, data=self.df, ax=ax)
            else:
                # plot timeseries
                self.df([self.y, col]).plot()
        y_min, y_max = self.df[self.y].min(), (self.df[self.y].max())
        y_range = y_max - y_min
        plt.ylim(y_min - y_margin * y_range, y_max + y_margin * y_range)
        #        g = sns.FacetGrid(self.df,col=self.df.columns[self.df.dtypes=='category'],row=self.y,sharey=True)
        #        g.map(sns.violinplot)
        return fig

예제 #23

0

파일 보기

파일: crbl.py 프로젝트: luizirber/shmlast

    def plot_crbl_fit(model_df, rbh_df, hits_df, model_plot_fn, show=False,
                     figsize=(10,10)):

        plt.style.use('seaborn-ticks')

        with FigureManager(model_plot_fn, show=show, 
                           figsize=figsize) as (fig, ax):

            scatter_kws = {'s': 10, 'alpha':0.7}
            scatter_kws['c'] = sns.xkcd_rgb['ruby']
            scatter_kws['marker'] = 'o'
            line_kws = {'c': sns.xkcd_rgb['red wine'], 
                        'label':'Query Hits Regression'}
            sample_size = min(len(hits_df), 10000)
            sns.regplot('s_aln_len', 'E_s', hits_df.sample(sample_size), order=1, 
                        label='Query Hits', scatter_kws=scatter_kws, 
                        line_kws=line_kws, color=scatter_kws['c'], ax=ax)

            scatter_kws['c'] = sns.xkcd_rgb['twilight blue']
            scatter_kws['marker'] = 's'
            sns.regplot('center', 'fit', model_df, 
                        fit_reg=False, x_jitter=True, y_jitter=True, ax=ax,
                        label='CRBL Fit', scatter_kws=scatter_kws, line_kws=line_kws)

            leg = ax.legend(fontsize='medium', scatterpoints=3, frameon=True)
            leg.get_frame().set_linewidth(1.0)

            ax.set_xlim(model_df['center'].min(), model_df['center'].max())
            ax.set_ylim(0, max(model_df['fit'].max(), hits_df['E'].max()) + 50)
            ax.set_title('CRBL Fit')

예제 #24

0

파일 보기

파일: ScatterPlot.py 프로젝트: srisachin/PlotCreator-2

	def plotExp(self,exp,myData):
		plt.figure();
		sns.regplot(exp[1]['xaxis'],exp[1]['yaxis'],data=myData);
		plt.savefig("plots/static/%d.png" %settings.count);
		plt.clf()
		plt.close()
		settings.count=settings.count+1;

예제 #25

0

파일 보기

파일: simulate.py 프로젝트: sammosummo/monet

def sim_regression(show=True):
    """Simulate a data set with one regressor (age) on both d' and c.

    """
    intercepts = {'d': 1, 'c': 0}
    betas = {'d': -0.005, 'c': 0.001}
    errors = {'d': 0.05, 'c': 0.005}
    reg = lambda p, y: intercepts[p] + betas[p] * y
    nsubjects = 100
    y = np.linspace(10, 90, 1000)
    ages = np.random.randint(18, 80, size=nsubjects)
    data = {
        'age': ages,
        'N': [50] * nsubjects,
        'S': [50] * nsubjects
    }
    plt.figure(figsize=(15, 7.5))

    for i, p in enumerate(['d', 'c'], 1):

        plt.subplot(1, 2, i)
        true_p = reg(p, ages) + norm.rvs(0, errors[p], nsubjects)
        sb.regplot(ages, true_p, fit_reg=False, label='True values (regression + some error)')
        plt.plot(y, reg(p, y), linewidth=2, label='True regression line')

        plt.ylabel(p)

        data['true_%s' % p] = true_p

    simulations = np.asarray(
        simulate(data['true_d'], data['true_c'], data['N'], data['S'])
    )
    data['F'], data['H'], data['M'], data['R'] = zip(*simulations)
    data['mle_d'], data['mle_c'] = zip(*est_sdt(*zip(*simulations)))
    df = pd.DataFrame(data)
    df['subj'] = ['subj_%i' % i for i in df.index]
    df.to_csv('02.OneContinuousAndOneDichotomousPredictor.csv')

    ylabels = {'d': "$d^\prime$", 'c': "$c$"}
    for i, p in enumerate(['d', 'c'], 1):

        plt.subplot(1, 2, i)
        sb.regplot(
            'age', 'mle_%s' % p, df,
            label='MLEs based on simulated data (%i trials per subject)' %
                  (data['N'][0] + data['S'][0])
        )
        plt.ylabel(ylabels[p])
        plt.xlabel('Age (years)')
        plt.xlim(10, 90)

    if show is True:
        print df
        plt.legend()
        plt.tight_layout(pad=0)
        plt.savefig('fig1.png')

    else:
        return df

예제 #26

0

파일 보기

파일: week4_visualizations.py 프로젝트: wer61537/Wesleyan-DataMangementVisualization

def scatter_plot(df,dep_var, indep_var,units):
    seaborn.regplot(x=indep_var, y=dep_var, data=df, fit_reg=False)
    #would be great to figure out how to remove '_cat'    
    plt.xlabel(indep_var)
    plt.ylabel(dep_var + ", " + units)   
    plt.title("Scatterplot of " + dep_var + " versus " + indep_var)  
    plt.savefig(wd + "Scatterplot_" + dep_var + "_vs_"+ indep_var + '.png')
    plt.close

예제 #27

0

파일 보기

파일: crossvalidation.py 프로젝트: karlderkaefer/scikit

def plot_prediction_error(name, clf, X, y):
    plt.figure()
    cv = KFold(X.shape[0], 5, shuffle=True)
    predicted = cross_val_predict(clf, X, y, cv=cv)
    print("%.3f = mean squared error" % mean_squared_error(y, predicted))
    sns.regplot(x=y[:1000], y=predicted[:1000])
    sns.axlabel("actual", "predicted")
    plt.savefig("plot_validation_" + name + ".png")

예제 #28

0

파일 보기

파일: Assignment2.py 프로젝트: fsee/DataScienceClass3

def make_scatter_plot(explain, response):
    seaborn.regplot(x=explain, y=response, fit_reg=True, data=df_combined)
    plt.xlabel(str(explain))
    plt.ylabel(str(response))
    plt.title('Association between '+str(explain)+ ' and '+str(response))
    plt.show()

    print (scipy.stats.pearsonr(df_combined[explain], df_combined[response]))

예제 #29

0

파일 보기

파일: correlation_plot.py 프로젝트: COMBINE-lab/QuantAnalysis

def makeCorrPlot(truthCol, predCol, df, outBase, method, measure, logged=True):
    plt.cla()
    plt.clf()

    measureName = "NO MEASURE NAME"
    if measure == "tpm":
        outFile = "{}_tpm_corr.pdf".format(outBase)
        measureName = "TPM"
    elif measure == "num_reads":
        outFile = "{}_num_reads_corr.pdf".format(outBase)
        measureName = "# fragments"


    rv, pv = stats.spearmanr(df[truthCol], df[predCol])
    corrText = "Spearman r = {0:.2f}".format(rv)

    if (logged):
        minLogVal = -2.5
        smallVal = 1e-2
        ax = plt.axes()
        minVal = min(np.log10(df.loc[df[truthCol] > 0, truthCol].min()),
                     np.log10(df.loc[df[predCol] > 0, predCol].min()))
        maxVal = max(np.log10(df.loc[df[truthCol] > 0, truthCol].max()),
                     np.log10(df.loc[df[predCol] > 0, predCol].max()))

        sns.regplot(np.log10(df[truthCol]), np.log10(df[predCol]), df, 
                fit_reg=False, dropna=True, color=[0.7, 0.7, 0.7, 0.2],
                ax=ax)

        ax.set_xlabel("log(True {})".format(measureName))
        ax.set_ylabel("log({} {})".format(method, measureName))

        ax.set_xlim(minVal-0.5, maxVal+0.5) 
        ax.set_ylim(minVal-0.5, maxVal+0.5) 
        plt.figtext(0.15, 0.85, corrText)
        
    else:
        ax = plt.axes()
 
        minVal = min(df[truthCol].min(), df[predCol].min())
        maxVal = max(df[truthCol].max(), df[predCol].max())

        sns.regplot(truthCol, predCol, df, fit_reg=False, 
                    color=[0.7, 0.7, 0.7, 0.2], ax=ax)
 
        ax.set_xlabel("True {}".format(measureName))
        ax.set_ylabel("{} {}".format(method, measureName))

        ax.set_xlim(minVal, maxVal) 
        ax.set_ylim(minVal, maxVal) 
 
        plt.figtext(0.15, 0.85, corrText)

    # Get rid of axis spines
    sns.despine()

    plt.savefig(outFile)

예제 #30

0

파일 보기

파일: time_of_day.py 프로젝트: IanEisenberg/Self_Regulation_Ontology

def plot_time_effects(measure_DVs, melted_DVs, title=None):
    f, (ax1,ax2) = plt.subplots(1, 2, figsize=(16,8))
    for name in measure_DVs.columns[:-2]:
        sns.regplot('hour', name, data=measure_DVs, lowess=True, label=name,
                    ax=ax1, scatter_kws={'s': 100, 'alpha': .4})
    ax1.legend()
    sns.boxplot('split_time', 'value', hue='variable', data=melted, ax=ax2)
    if title:
        plt.suptitle(title, fontsize=18)
    plt.show()

예제 #31

0

파일 보기

df = df[df['date'] >= '2020-03-01'].reset_index()
df.drop('index', axis=1, inplace=True)

print(df.head())

plt.figure(figsize=(20, 10))
plt.bar(df.date, df['new_cases'], color='salmon')
ax = plt.gca()
for i, label in enumerate(ax.get_xaxis().get_ticklabels()):
    if i % 7 != 0:
        label.set_visible(False)

sns.regplot(x=df.index,
            y='rolling_avg_week',
            data=df,
            order=16,
            ci=None,
            color='red')

plt.title('California COVID-19 New Cases By Day')
plt.xlabel('Date')
plt.ylabel('New Cases')
plt.show()

last_week = df.tail(7)['rolling_avg_week']
m = (last_week.iloc[6] - last_week.iloc[0]) / 6
b = last_week.iloc[6]

five_hundred = ceil((500 - b) / m)
print('Based on current weekly trends, it would take ' + str(five_hundred) +
      ' days until the average number of new daily cases in '

예제 #32

0

파일 보기

파일: untitled0.py 프로젝트: Barak29/Chapter-14

df=pd.DataFrame(np.random.rand(6,4),
                index=['one','two','three','four','five','six'],
                columns=[pd.Index(['A','B','C','D'],name='Genus')])
df.plot.bar()
df.plot.barh(stacked=True, alpha=0.5)
df

tips =pd.read_csv('C:/Users/barak/OneDrive/Documents/python/pydata-book-2nd-edition/examples/tips.csv')
######Seaborn
import seaborn as sns
tips['tip_pct']= tips['tip']/(tips['total_bill']-tips['tip'])
tips
sns.barplot(x='tip_pct',y='day',data=tips,orient='h')
sns.barplot(x='tip_pct',y='day',data=tips,orient='h', hue='time')
sns.set(style='whitegrid')

tips['tip_pct'].plot.density()


macrodata =pd.read_csv('C:/Users/barak/OneDrive/Documents/python/pydata-book-2nd-edition/examples/macrodata.csv')
######Seaborn
data=macrodata[['cpi','m1','tbilrate','unemp']]
trans_data=np.log(data).diff().dropna()
trans_data[-5:]
sns.regplot('m1','unemp',data=trans_data)
plt.title('Change in log %s Versus log %s' % ('m1','unemp'))

sns.pairplot(trans_data,diag_kind='kde',plot_kws={'alpha':0.2})

########
adding things

예제 #33

0

파일 보기

파일: FrequencyCharts.py 프로젝트: Microshak/IoTDataScience

#We can use bar chart or pie chart to visualise the distribution of categorical variables.
fig = plt.figure(figsize=(15, 8))
for i, c in enumerate(categoricalVariables):
    ax = plt.subplot(3, 3, i + 1)
    sns.countplot(x=train[c])
fig.tight_layout()
plt.show()

train[continuousVariables].describe()

fig = plt.figure(figsize=(15, 5))
for i, c in enumerate(continuousVariables):
    ax = plt.subplot(2, 2, i + 1)
    sns.distplot(train[c].dropna())
fig.tight_layout()
plt.show()

fig = plt.figure(figsize=(15, 10))
for i, c in enumerate(categoricalVariables):
    ax = plt.subplot(3, 3, i + 1)
    sns.boxplot(x=train[c], y=train["count"])
fig.tight_layout()

#continuous variables

fig = plt.figure(figsize=(15, 10))
for i, c in enumerate(continuousVariables):
    ax = plt.subplot(2, 2, i + 1)
    sns.regplot(x=train[c], y=train["count"])
fig.tight_layout()

예제 #34

0

파일 보기

파일: utildata.py 프로젝트: mysterious-ben/Titanic

def regplot(data: pd.DataFrame, featX: str, featY: str) -> None:
    fig, ax = plt.subplots()
    sns.regplot(data[featX], data[featY], logistic=True, ax=ax)
    sns.regplot(data[featX], data[featY], lowess=True, ax=ax)
    fig.show()

예제 #35

0

파일 보기

파일: _visualizer.py 프로젝트: antgonza/q2-diversity

def mantel(output_dir: str,
           dm1: skbio.DistanceMatrix,
           dm2: skbio.DistanceMatrix,
           method: str = 'spearman',
           permutations: int = 999,
           intersect_ids: bool = False,
           label1: str = 'Distance Matrix 1',
           label2: str = 'Distance Matrix 2') -> None:
    test_statistics = {'spearman': 'rho', 'pearson': 'r'}
    alt_hypothesis = 'two-sided'

    # The following code to handle mismatched IDs, and subsequently filter the
    # distance matrices, is not technically necessary because skbio's mantel
    # function will raise an error on mismatches with `strict=True`, and will
    # handle intersection if `strict=False`. However, we need to handle the ID
    # matching explicitly to find *which* IDs are mismatched -- the error
    # message coming from scikit-bio doesn't describe those. We also need to
    # have the mismatched IDs to display as a warning in the viz if
    # `intersect_ids=True`. Finally, the distance matrices are explicitly
    # filtered to matching IDs only because their data are used elsewhere in
    # this function (e.g. extracting scatter plot data).

    # Find the symmetric difference between ID sets.
    ids1 = set(dm1.ids)
    ids2 = set(dm2.ids)
    mismatched_ids = ids1 ^ ids2

    if not intersect_ids and mismatched_ids:
        raise ValueError(
            'The following ID(s) are not contained in both distance matrices. '
            'This sometimes occurs when mismatched files are passed. If this '
            'is not the case, you can use `intersect_ids` to discard these '
            'mismatches and apply the Mantel test to only those IDs that are '
            'found in both distance matrices.\n\n%s' %
            ', '.join(sorted(mismatched_ids)))

    if mismatched_ids:
        matched_ids = ids1 & ids2
        # Run in `strict` mode because the matches should all be found in both
        # matrices.
        dm1 = dm1.filter(matched_ids, strict=True)
        dm2 = dm2.filter(matched_ids, strict=True)

    # Run in `strict` mode because all IDs should be matched at this point.
    r, p, sample_size = skbio.stats.distance.mantel(dm1,
                                                    dm2,
                                                    method=method,
                                                    permutations=permutations,
                                                    alternative=alt_hypothesis,
                                                    strict=True)

    result = pd.Series(
        [method.title(), sample_size, permutations, alt_hypothesis, r, p],
        index=[
            'Method', 'Sample size', 'Permutations', 'Alternative hypothesis',
            '%s %s' % (method.title(), test_statistics[method]), 'p-value'
        ],
        name='Mantel test results')
    table_html = q2templates.df_to_html(result.to_frame())

    # We know the distance matrices have matching ID sets at this point, so we
    # can safely generate all pairs of IDs using one of the matrices' ID sets
    # (it doesn't matter which one).
    scatter_data = []
    for id1, id2 in itertools.combinations(dm1.ids, 2):
        scatter_data.append((dm1[id1, id2], dm2[id1, id2]))

    plt.figure()
    x = 'Pairwise Distance (%s)' % label1
    y = 'Pairwise Distance (%s)' % label2
    scatter_data = pd.DataFrame(scatter_data, columns=[x, y])
    sns.regplot(x=x, y=y, data=scatter_data, fit_reg=False)
    plt.savefig(os.path.join(output_dir, 'mantel-scatter.svg'))

    context = {
        'table': table_html,
        'sample_size': sample_size,
        'mismatched_ids': mismatched_ids
    }
    index = os.path.join(TEMPLATES, 'mantel_assets', 'index.html')
    q2templates.render(index, output_dir, context=context)

예제 #36

0

파일 보기

from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression
import matplotlib.pyplot as plt
import statsmodels.formula.api as smf
import statsmodels.api as sm
from scipy.stats import linregress
import seaborn as sns

allData = pd.read_csv(r'C:\Users\nuran\Desktop\Senior_Project\All_data.csv',
                      skiprows=0,
                      delimiter=',')

x = allData['Independence_00']
y = allData['Response_Rate_00']  #y

plt.figure(figsize=(10, 10))

sns.regplot(x, y)
plt.plot([x], [y], 'o', label='Indepedence', color='yellow', markersize='3')

plt.title('Linear Regression Analysis: Response Rate & Independence for 2000')
plt.xlabel('Independence', color='#1C2833')
plt.ylabel('Response Rate')
plt.show()

#
# clean code

#
# clean code

예제 #37

0

파일 보기

파일: 01__rna_seq_analysis.py 프로젝트: kmattioli/2019__lncRNA_CRISPRi

len(diff_hESC_endo_ncRNA[diff_hESC_endo_ncRNA["qval_hESC_endo"] < 0.05])

# In[103]:

len(diff_hESC_endo_ncRNA[diff_hESC_endo_ncRNA["qval_hESC_endo"] < 0.05]
    ["gene_name"].unique())

# In[104]:

fig = plt.figure(figsize=(1.5, 1.75))
g = sns.regplot(x="endo_hESC_log2fc",
                y="qval_log10_hESC_endo",
                data=diff_hESC_endo_ncRNA,
                fit_reg=False,
                color="firebrick",
                scatter_kws={
                    "s": 8,
                    "edgecolors": "white",
                    "linewidths": 0.5
                })

plt.xlabel("log2(endoderm/hESC)")
plt.ylabel("negative log10 q value")
plt.ylim((-0.1, 4))
plt.xlim((-8.5, 8.5))
plt.axhline(y=-np.log10(0.05), linestyle="dashed", color="black", linewidth=1)
#plt.title("volcano plot for ncRNAs in endoderm vs. hESCs\n(n=%s)" % (len(diff_hESC_endo_ncRNA)))
plt.savefig("Fig2E_1.pdf", bbox_inches="tight", dpi="figure")

# In[105]:

예제 #38

0

파일 보기

print(lm.summary())


# In[18]:


# regression statistics of rest of world sales
lm = smf.ols(formula = "Global_Sales ~ Other_Sales", data = df).fit()
print(lm.summary())


# In[22]:


# regression plot of North American vs global sales
sns.regplot(x = "NA_Sales", y = "Global_Sales", data = df)


# In[23]:


# regression plot of European vs global sales
sns.regplot(x = "EU_Sales", y = "Global_Sales", data = df)


# In[24]:


# regression plot of Japanese vs global sales
sns.regplot(x = "JP_Sales", y = "Global_Sales", data = df)

예제 #39

0

파일 보기

import seaborn as sns
from matplotlib import pyplot as plt
import pandas as pd
sns.set_context("poster")
sns.axes_style()
sns.despine()
from scipy import stats

df = pd.read_csv('data.csv')

sns.lmplot(x='frame', y='x_position', data=df, fit_reg=True)

# get coeffs of linear fit
slope, intercept, r_value, p_value, std_err = stats.linregress(
    df['frame'], df['x_position'])

# use line_kws to set line label for legend
ax = sns.regplot(
    x="frame",
    y="x_position",
    data=df,
    line_kws={'label': "y={0:.1f}x+{1:.1f}".format(slope, intercept)})

# plot legend
ax.legend()

sns.despine()
plt.show()

ax.get_figure().savefig("output.png")

예제 #40

0

파일 보기

파일: Kaggle_CalCOFI_cleanCorrVisMap.py 프로젝트: arqumbintaj/Kaggle-Python-Code

    count = count + 1

plt.show()


#  Plot high correlation attributes - PLOTS #3  -  GOOD
fig = plt.figure(figsize=(14,60))
col = 3
row  = int(len(df_corr.Attributes)/col)
count = 1

for i, j in zip(df_plot.Attributes,df_plot.Correlation):
    fig.add_subplot(row, col, count)
    plt.title('Salinity vs {} (corr = {:.4f})\nnormalized distribution'.format(i,j))
    #plt.xlim(-4,4)
    sns.regplot(x=df_sample[i],y="Salnty",data=df_sample,order=2, scatter_kws={'alpha':0.25},color='green');
    count = count + 1

plt.show()
#===============================================


#===============================================
#  PLOT ALL COLUMNS IN CORR  - not good
#===============================================
#  Plot high correlation attributes - PLOTS #1  - CRAP
fig = plt.figure(figsize=(12,60))
plotNum  = 1     # initialize plot number

#for i in df_high.columns.drop(['Salnty','R_SALINITY']):
for i, j in zip(df_high.Attributes,df_high.Correlation):

예제 #41

0

파일 보기

import seaborn as sns
from pydataset import data
import pandas as pd
import matplotlib.pyplot as plt
from env import host, password, user

iris = data('iris')
sns.distplot(iris['Petal.Length'])

sns.regplot(x='Petal.Length', y='Petal.Width', data=iris)  # Yes

sns.relplot(x='Sepal.Length', y='Sepal.Width', data=iris, hue='Species'
            )  # Probably? Many edge cases between versicolor and virginica

sns.pairplot(
    iris
)  # It looks like setosa is easy to identify regardless of feature; I think petal width/length look like the best pair of features by which to distinguish versicolor and virginica, but it's still an imperfect metric.

anscombe = sns.load_dataset('anscombe')

anscombe.groupby('dataset').describe()

sns.relplot(x='x', y='y', data=anscombe, col='dataset')

insectsprays = data('InsectSprays')

sns.boxplot(data=insectsprays, y='count', x='spray')

swiss = data('swiss')

swiss['is_catholic'] = swiss['Catholic'] > 80

예제 #42

0

파일 보기

# given (hh:mm:ss)_1 and (hh:mm:ss)_2. find the difference in seconds
print(
    sum([(60**((5 - i) % 3)) * int(input()) * int((i // 3 - 0.5) * 2.0)
         for i in range(6)]))

import seaborn as sns
ax = sns.regplot(x='input', y='output', data=df, color='green', marker='+')

예제 #43

0

파일 보기

파일: plots_old.py 프로젝트: tylov-climate/KSS

def pairgrid_plots(stats,
                   dims,
                   statop,
                   season=None,
                   scenario=None,
                   period=None):
    season_ren = {
        'full': 'annual',
        's1': 'spring',
        's2': 'summer',
        's3': 'autumn',
        's4': 'winter',
        'FULL': 'annual',
        'MAM': 'spring',
        'JJA': 'summer',
        'SON': 'autumn',
        'DJF': 'winter'
    }
    stat_ren = {'timmean': 'mean', 'timvar': 'variance'}
    exp_ren = {'timmean': 'mean', 'timvar': 'variance'}

    d = dims['inv']
    op = d[2][statop]
    m = {}
    exps = [
        i for i in range(len(dims['exps']))
        if ((period is None) or period in dims['exps'][i]) and (
            (scenario is None) or scenario in dims['exps'][i])
    ]
    seasons = [i for i in range(len(dims['seasons']))
               ] if season is None else [d[0][season]]
    #scenarios = [i for i in range(len(dims['exps'])) if scenario in dims['exps'][i]] if scenario else []
    #periods = [i for i in range(len(dims['exps'])) if period in dims['exps'][i]] if period else []
    #print(dims['exps'])
    #print('scen', [dims['exps'][i] for i in scenarios])
    #print('peri', [dims['exps'][i] for i in periods])
    #print(seasons)
    #exit()
    '''
    print('Building DataFrame')
    for s in seasons:
        for e in exps:
            tas = stats[s][e][op][d[3]['tas']]
            pr = stats[s][e][op][d[3]['pr']]
            exp_label = dims['exps'][e]
            season_label = season_ren[dims['seasons'][s]]
            #stat_label = stat_ren[statop]
            name = '%s %s %s' % ('tas', season_label, exp_label)
            print(name)
            m[name] = tas
            name = '%s %s %s' % ('pr', season_label, exp_label)
            m[name] = pr
    print('Define DataFrame')
    #df = pd.DataFrame(m)
    #print(df)
    #return

    g = sns.PairGrid(df)
    g.map_upper(sns.regplot)
    g.map_lower(sns.kdeplot, cmap = 'Blues_d')
    g.map_diag(sns.kdeplot, lw = 3, legend = True);
    plt.show()
    '''
    fig = plt.figure(figsize=(18, 9))
    n = 0
    #cols = seasons if season is None else
    pos = [len(exps), len(seasons), n]
    print('exps', [dims['exps'][i] for i in exps], pos)

    for e in exps:
        for s in seasons:
            # plot one week pr / tas
            n += 1
            pos[2] = n
            ax = fig.add_subplot(*pos)
            tas = stats[s][e][op][d[3]['tas']]
            pr = stats[s][e][op][d[3]['pr']] * (24 * 60 * 60)
            exp_label = dims['exps'][e]
            season_label = season_ren[dims['seasons'][s]]
            tas_name = '%s %s %s' % ('tas', season_label, exp_label)
            pr_name = '%s %s %s' % ('pr', season_label, exp_label)
            print(tas_name)
            df = pd.DataFrame({tas_name: tas, pr_name: pr})
            sns.regplot(data=df, x=tas_name, y=pr_name, fit_reg=True, ax=ax)

    plt.show()

예제 #44

0

파일 보기

                filewriter.add_summary(summ_buf, i)
            else:
                train_loss, _ = sess.run([loss, train_op], feed_dict=feed_dict)

            if min_loss is None or train_loss < min_loss:
                min_loss = train_loss
        table.add_row([path, min_loss, w.eval(), b.eval()])

# As we can see, 'ds2.csv' has the lowest loss, and thus contains the most linear data, 'ds3.csv' coming a distant second. The values of w and b are shown in the table.

# In[6]:

print(table)

# As we can see, the plots show that `ds2.csv` is the most linear.

# In[8]:

import seaborn as sb
import matplotlib.pyplot as plt
for path in paths:
    d = pd.read_csv(path, names=['x', 'y'])
    m1 = np.max(d['x'])
    x = np.array(d['x'])
    y = np.array(d['y'])
    m2 = np.max(d['y'])
    sb.regplot((x / m1), (y / m2))
    plt.show()

# In[ ]:

예제 #45

0

파일 보기

파일: seaborn_regplot.py 프로젝트: gdabba-WORK/pandas_study

import matplotlib.pyplot as plt
import seaborn as sns

titanic = sns.load_dataset('titanic')

# 스타일 테마 설정 (5가지: darkgrid, whitegrid, dark, white, ticks)
sns.set_style('darkgrid')

# 그래프 객체 생성 (figure에 2개의 서브 플롯을 생성)
fig = plt.figure(figsize=(15, 5))
ax1 = fig.add_subplot(1, 2, 1)
ax2 = fig.add_subplot(1, 2, 2)

# 그래프 그리기 - 선형회귀선 표시(fit_reg=True)
sns.regplot(
    x='age',  # x축 변수
    y='fare',  # y축 변수
    data=titanic,  # 데이터
    ax=ax1)  # axe 객체 - 1번째 그래프

# 그래프 그리기 - 선형회귀선 미표시(fit_reg=False)
sns.regplot(
    x='age',
    y='fare',
    data=titanic,
    ax=ax2,  # axe 객체 - 2번째 그래프
    fit_reg=False)  # 회귀선 미표시

plt.show()

예제 #46

0

파일 보기

def plot_candidate_codons(env, df, codons, cmap=None):
    # type: (Environment, pd.DataFrame, List[str]) -> None

    fig, ax = plt.subplots()
    from sbsp_viz.colormap import ColorMap as CM

    for c in sorted(codons):
        seaborn.regplot(df["GC"].astype(float).values,
                        df[c].astype(float).values,
                        label=c,
                        lowess=True,
                        scatter_kws={
                            "s": 5,
                            "alpha": 0.1
                        },
                        color=cmap[c])

    ax.set_ylim([-0.05, 1.05])
    ax.set_ylabel("Probability")
    ax.set_xlabel("GC")
    leg = ax.legend()
    for lh in leg.legendHandles:
        lh.set_alpha(1)

    plt.show()

    # bacteria vs archaea
    fig, axes = plt.subplots(1, 2, sharex="all", sharey="all")

    for t, ax in zip(["Bacteria", "Archaea"], axes.ravel()):
        df_tmp = df[df["Type"] == t]
        for c in sorted(codons):
            seaborn.regplot(df_tmp["GC"].astype(float).values,
                            df_tmp[c].astype(float).values,
                            label=c,
                            lowess=True,
                            scatter_kws={
                                "s": 5,
                                "alpha": 0.1
                            },
                            ax=ax,
                            color=cmap[c])

        ax.set_ylim([-0.05, 1.05])
        ax.set_ylabel("Probability")
        ax.set_xlabel("GC")
        ax.set_title(t)
        leg = ax.legend()
        for lh in leg.legendHandles:
            lh.set_alpha(1)

    plt.show()

    # group
    fig, axes = plt.subplots(2, 2, sharex="all", sharey="all")

    for t, ax in zip(list("ABCD"), axes.ravel()):
        df_tmp = df[df["GENOME_TYPE"] == t]
        for c in sorted(codons):
            seaborn.regplot(df_tmp["GC"].astype(float).values,
                            df_tmp[c].astype(float).values,
                            label=c,
                            lowess=True,
                            scatter_kws={
                                "s": 5,
                                "alpha": 0.1
                            },
                            ax=ax,
                            color=cmap[c])

        ax.set_ylim([-0.05, 1.05])
        ax.set_ylabel("Probability")
        ax.set_xlabel("GC")
        ax.set_title(t)
        leg = ax.legend()
        for lh in leg.legendHandles:
            lh.set_alpha(1)

    plt.show()

예제 #47

0

파일 보기

파일: Data Analysis.py 프로젝트: yeachan153/Methodology-Consulting

print(
    str(RMes.shape[0] - RMes[RMes['EC taken'] >= 120].shape[0]) +
    ' RMes students removed with <120ECs')
RMes_filtered = RMes[RMes['EC taken'] >= 120]

# 3a) Checking normality
stats.kstest(RMes_filtered['thesis_grades'], 'norm')
stats.kstest(RMes_filtered['EC taken'], 'norm')
# 3b) Spearmans
stats.spearmanr(RMes_filtered['thesis_grades'],
                RMes_filtered['EC taken'])  # rs = 0.0572, p = 0.520

fig, ax = plt.subplots(figsize=(45, 24))
sns.regplot(RMes_filtered['EC taken'],
            RMes_filtered['thesis_grades'],
            color='black',
            y_jitter=0.05)
plt.xlabel('ECs taken', fontsize=35)
plt.ylabel('Thesis Grade', fontsize=35)
plt.title('ECs against thesis grade (RMes)', fontsize=35)
sns.set_style('darkgrid')
plt.xticks(fontsize=25)
plt.yticks(fontsize=25)
ax.set_ylim(5, 10.1)
ax.set_xlim(119, 156)
ax.text(145, 5.2, 'rs = 0.0572, p = 0.520, n = 129', fontsize=50)
plt.show()

# 4a) MSc all spec
stats.kstest(MSc_filtered['thesis_grades'], 'norm')
stats.kstest(MSc_filtered['EC taken'], 'norm')

예제 #48

0

파일 보기

파일: plotting_author_versus_distribution.py 프로젝트: eliseking/ScienceAccessibility


data2 = pd.DataFrame({
'Standard Reading Level': [mean_a+bwo],
    'CDF': [heights[0]]
    })


data1 = pd.DataFrame({
'Standard Reading Level': x_sub_set,
    'CDF': std_plot_ind
    })

legend_properties = {'weight':'bold','size':8}

ax = sns.regplot(data=benchmarks, x="benchmarks", y="CDF", fit_reg=False, marker="o", color="green")


#bbox_props = dict(boxstyle="rarrow", fc=(0.8,0.9,0.9), ec="b", lw=2)

#t = ax.text(0, 0, "Direction", ha="center", va="center", rotation=90,
#            size=15,
#            bbox=bbox_props)
#import pdb; pdb.set_trace()
#bmark_heights.reverse()
for i in bmark_heights:
    print(i)
#import pdb
#pdb.set_trace()
cnt=0
for i,j,k in zip(bmark_stats_items[0:-1],bmark_heights[0:-1],categories):

예제 #49

0

파일 보기

파일: modeltrain.py 프로젝트: mohit36/Resume-classification-and-scoring

df[df['Status'].notnull()]['Status'].value_counts().plot(kind = 'pie', autopct='%1.1f%%')
#plt.title('status Partitions')
#plt.show()

print(df[['Status', 'accuracy']][df.Status.notnull()].groupby('Status').mean())
df[df['Status'] != 'None'].boxplot(column = ['accuracy'], by = ['Status'])
plt.title('')
plt.show()
ax = sns.boxplot(x="Status", y="accuracy", hue="Status",data=df, linewidth=2.5)
plt.show()
X = np.array(d1)
y=df['Status'] = df['Status'].map({'completed': 1, 'parsed with error': 2,'drop case': 0})


sns.regplot(X, y, data=df, fit_reg=False)
plt.show()

import numpy as np
import matplotlib.pyplot as plt

# Create data
import seaborn as sns
import matplotlib.pyplot as plt
carrier_count = df['accuracy'].value_counts()
sns.set(style="darkgrid")
sns.barplot(carrier_count.index, carrier_count.values, alpha=0.9)

plt.title('Frequency Distribution of Carriers')
plt.ylabel('Number of Occurrences', fontsize=12)
plt.xlabel('accuracy', fontsize=12)

예제 #50

0

파일 보기

파일: HDI_STEM.py 프로젝트: jlricon/open_nintil

(sns.factorplot(x="SES",hue="Gender",y="Proportion who chooses\neach career group",
                data=spanish_data.rename(columns={"value":"Proportion who chooses\neach career group"}),
                col="Field",
                legend_out=False,
                order=["Low","High"],col_wrap=4)
    )
    # %%
plt.close()
target="loggdppc"
merged_all.assign(Engistem=lambda x: x.hy_f_engi/x.hy_f_STEM)
#merged_all.query("Nrrent<20").plot.scatter("loggdppc","Engistem")
(merged_all.groupby("Country").mean().pipe(lambda x: x.plot
 .scatter(target,"Engistem",s=x["pop"].pipe(lambda i: np.sqrt(i)),
          c=x["Nrrent"],cmap="viridis")
 ))
sns.regplot(target,"Engistem",data=merged_all.groupby("Country").mean().query("pop>3000"),lowess=True,scatter=False,ax=plt.gca())
plt.ylabel("% female Engineering / % female STEM")
plt.xlabel("SIGI (0=less legal discrimination)")
# %%
pa="Reds"
pal=sns.palettes.color_palette(pa,len(k))
k=merged_all.query("loggdppc<9.5")
plt.close()
#sns.set_palette(pa,len(k))
sns.set_palette( "deep")
plt.figure()
i=0
jp.joyplot(k,column="hy_f_STEM",by="TIME")
for name,group in k.groupby("TIME"):
    #sns.kdeplot(group.hy_f_STEM,ax=plt.gca(),label=name)
    plt.axvline(group.hy_f_STEM.mean(),c=pal[i],ymax=0.5)

예제 #51

0

파일 보기

plt.ylabel('Number of Super Bowls')
plt.show()

# Display the closest game(s) and biggest blowouts
print(super_bowls[super_bowls['difference_pts'] == 1])
print(super_bowls[super_bowls['difference_pts'] >= 35])

## Do blowouts translate to lost viewers?
# Join game and TV data, filtering out SB I because it was split over two networks
games_tv = pd.merge(tv[tv['super_bowl'] > 1], super_bowls, on='super_bowl')

# Import seaborn
import seaborn as sns

# Create a scatter plot with a linear regression model fit
sns.regplot(x="difference_pts", y="share_household", data=games_tv)

## Viewership and the ad industry over time
# Create a figure with 3x1 subplot and activate the top subplot
plt.subplot(3, 1, 1)
plt.plot(games_tv.super_bowl, games_tv.avg_us_viewers, color='#648FFF')
plt.title('Average Number of US Viewers')

# Activate the middle subplot
plt.subplot(3, 1, 2)
plt.plot(games_tv.super_bowl, games_tv.rating_household, color='#DC267F')
plt.title('Household Rating')

# Activate the bottom subplot
plt.subplot(3, 1, 3)
plt.plot(games_tv.super_bowl, games_tv.ad_cost, color='#FFB000')

예제 #52

0

파일 보기

파일: 03LinearRegression-MultiAlgos-BostonHousing.py 프로젝트: Manoj123-github/DSAI

    sns.distplot(colValues, bins=7, kde=False, color='b')
    plt.title(colName)
    plt.ylabel(colName)
    plt.xlabel('Bins')
    plt.show()

# scatterplots
# plot Sscatterplot
print('\n*** Scatterplot ***')
colNames = df.columns.tolist()
colNames.remove(depVars)
print(colName)
for colName in colNames:
    colValues = df[colName].values
    plt.figure()
    sns.regplot(data=df, x=depVars, y=colName, color= 'b', scatter_kws={"s": 5})
    plt.title(depVars + ' v/s ' + colName)
    plt.show()

# class count plot
# change as required
colNames = ["CHAS","RAD"]
print("\n*** Distribution Plot ***")
for colName in colNames:
    plt.figure()
    sns.countplot(df[colName],label="Count")
    plt.title(colName)
    plt.show()


################################

예제 #53

0

파일 보기

def plot_mev_miv(df_radiomics):
    """
    Plot a 3-subplot of pav vs eav, subcapsular and chemo
    :param df_radiomics:
    :return:
    """
    font = {'family': 'DejaVu Sans', 'size': 18}
    matplotlib.rc('font', **font)

    df = pd.DataFrame()
    df['PAV'] = df_radiomics['Predicted_Ablation_Volume']
    df['EAV'] = df_radiomics['Ablation Volume [ml]']
    df['Energy (kJ)'] = df_radiomics['Energy [kj]']
    df['MWA Systems'] = df_radiomics['Device_name']
    df['MIV'] = df_radiomics['Inner Ellipsoid Volume']
    df['MEV'] = df_radiomics['Outer Ellipsoid Volume']
    df['MEV-MIV'] = df['MEV'] - df['MIV']
    df['R(EAV:PAV)'] = df['EAV'] / df['PAV']

    fig, ax = plt.subplots(figsize=(12, 12))
    sns.distplot(df['Energy (kJ)'],
                 hist_kws={
                     "ec": 'black',
                     "align": "mid"
                 },
                 axlabel='Energy',
                 ax=ax)
    timestr = time.strftime("%H%M%S-%Y%m%d")
    figpath = os.path.join("figures",
                           'Energy_distribution_' + timestr + '.png')
    plt.savefig(figpath, bbox_inches='tight', dpi=300)
    plt.close()
    # drop outer volumes larger than 150 because they are probably erroneous
    df = df[df['MEV'] < 150]
    # drop the rows where MIV > MEV
    # since the minimum inscribed ellipsoid (MIV) should always be smaller than the maximum enclosing ellipsoid (MEV)
    df = df[df['MEV-MIV'] >= 0]
    min_val = int(min(df['MEV-MIV']))
    max_val = int(max(df['MEV-MIV']))
    print('Min Val Mev-Miv:', min_val)
    print('Max Val Mev-Miv:', max_val)
    print('nr of samples for mev-miv:', len(df))

    # %% histogram MEV-MIV
    fig, ax = plt.subplots(figsize=(12, 12))
    sns.distplot(
        df['MEV-MIV'],
        color=sns.xkcd_rgb["reddish"],
        hist_kws={
            "ec": 'black',
            "align": "mid"
        },
        axlabel='Distribution of Ablation Volume Irregularity (MEV-MIV) (mL)',
        ax=ax)

    timestr = time.strftime("%H%M%S-%Y%m%d")
    figpath = os.path.join("figures",
                           'MEV-MIV_distribution_' + timestr + '.png')
    plt.savefig(figpath, bbox_inches='tight', dpi=300)
    plt.close()

    fig1, ax1 = plt.subplots(figsize=(12, 12))
    sns.distplot(df['MEV'],
                 color=sns.xkcd_rgb["reddish"],
                 hist_kws={"ec": 'black'},
                 axlabel='MEV',
                 ax=ax1)
    timestr = time.strftime("%H%M%S-%Y%m%d")
    figpath = os.path.join("figures", 'MEV_distribution_' + timestr + '.png')
    plt.savefig(figpath, bbox_inches='tight', dpi=300)
    plt.close()

    fig1, ax2 = plt.subplots(figsize=(12, 12))
    sns.distplot(df['MIV'],
                 color=sns.xkcd_rgb["reddish"],
                 hist_kws={"ec": 'black'},
                 axlabel='MIV',
                 ax=ax2)
    timestr = time.strftime("%H%M%S-%Y%m%d")
    figpath = os.path.join("figures", 'MIV_distribution_' + timestr + '.png')
    plt.savefig(figpath, dpi=300)
    plt.close()

    fig1, ax3 = plt.subplots(figsize=(12, 12))
    sns.distplot(df['EAV'],
                 color=sns.xkcd_rgb["reddish"],
                 hist_kws={"ec": 'black'},
                 axlabel='EAV',
                 ax=ax3)
    timestr = time.strftime("%H%M%S-%Y%m%d")
    figpath = os.path.join("figures", 'EAV_distribution_' + timestr + '.png')
    plt.savefig(figpath, dpi=300)
    plt.close()

    fig1, ax4 = plt.subplots(figsize=(12, 12))
    sns.distplot(df['PAV'],
                 color=sns.xkcd_rgb["reddish"],
                 hist_kws={"ec": 'black'},
                 axlabel='PAV',
                 ax=ax4)
    timestr = time.strftime("%H%M%S-%Y%m%d")
    figpath = os.path.join("figures", 'PAV_distribution_' + timestr + '.png')
    plt.savefig(figpath, dpi=300)
    plt.close()

    # %%   R (EAV:PAV) on y-axis and MEV-MIV on the x-axis
    fig1, ax5 = plt.subplots(figsize=(12, 12))
    slope, intercept, r_square, p_value, std_err = stats.linregress(
        df['R(EAV:PAV)'], df['MEV-MIV'])
    print('p-val mev miv energy:', p_value)
    print()
    p = sns.regplot(y="R(EAV:PAV)",
                    x="MEV-MIV",
                    data=df,
                    scatter_kws={
                        "s": 100,
                        "alpha": 0.5
                    },
                    color=sns.xkcd_rgb["reddish"],
                    line_kws={'label': r'$r = {0:.2f}$'.format(r_square)},
                    ax=ax5)
    plt.xlabel('MEV-MIV (mL)')
    plt.legend()

    timestr = time.strftime("%H%M%S-%Y%m%d")
    figpath = os.path.join("figures",
                           'Ratio_EAV-PAV_MEV-MIV_difference_' + timestr)
    plt.savefig(figpath, dpi=300, bbox_inches='tight')
    plt.close()

예제 #54

0

파일 보기

파일: term_project-2.py 프로젝트: RakeshBattineedi/User-Review-Classification-and-Restaurant-Business-Data-Analysis-on-Yelp-Dataset

weighted_ratio = [item[3] for item in data_city]
categories =[item[4] for item in data_city]
category_count=[item[5] for item in data_city]
data_city = {"city" : city, "review_count": review_count, "stars":stars, "weighted_ratio":weighted_ratio,"categories":categories,"category_count":category_count}
data_city=pd.DataFrame(data_city)
city_business_reviews = data_city[['city', 'review_count', 'stars']].groupby(['city']).agg({'review_count':'sum','stars': 'mean'}).sort_values(by='review_count', ascending=False)
city_business_reviews['review_count'][0:20].plot(kind='bar', stacked=False, figsize=[10,10],colormap='winter')
plt.title('Top 20 cities by reviews')

city_weighted_ratio =data_city[['city', 'weighted_ratio']].groupby(['city']).agg({'weighted_ratio': 'sum'}).sort_values(by='weighted_ratio', ascending=False)

city_weighted_ratio['weighted_ratio'][0:20].plot(kind='bar', stacked=False, figsize=[10,10],colormap='summer')
plt.title('Top 20 cities by Weighted Rating')

import seaborn as sns
sns.regplot(x=data_city["stars"], y=data_city["category_count"], fit_reg=False)

categories_data=spark.read.csv("categories.csv", header=True)
categories =categories_data.withColumn("stars", categories_data["stars"].cast(DoubleType()))
categories =categories.withColumn("review_count", categories_data["review_count"].cast(DoubleType()))

category=categories.select("city","categories","state","stars","review_count").collect()

city =  [item[0] for item in category]
categories = [item[1] for item in category]
state =  [item[2] for item in category]
stars = [item[3] for item in category]
review_count=[item[4] for item in category]
category = {"city" : city, "categories": categories, "state":state, "stars":stars,"review_count":review_count}

df=pd.DataFrame(category)

예제 #55

0

파일 보기

ax1 = plt.subplot(221)
ax1 = sns.violinplot(x="state", y="pledge_log", data=df_kick, palette="hls")
ax1.set_xticklabels(ax1.get_xticklabels(), rotation=45)
ax1.set_title("Understanding the Pledged values by state", fontsize=15)
ax1.set_xlabel("State Description", fontsize=12)
ax1.set_ylabel("Pledged Values(log)", fontsize=12)

ax2 = plt.subplot(222)
ax2 = sns.violinplot(x="state", y="goal_log", data=df_kick)
ax2.set_xticklabels(ax2.get_xticklabels(), rotation=45)
ax2.set_title("Understanding the Goal values by state", fontsize=15)
ax2.set_xlabel("State Description", fontsize=12)
ax2.set_ylabel("Goal Values(log)", fontsize=12)

ax0 = plt.subplot(212)
ax0 = sns.regplot(x="goal_log", y="pledge_log", data=df_kick, x_jitter=False)
ax0.set_title("Better view of Goal x Pledged values", fontsize=15)
ax0.set_xlabel("Goal Values(log)")
ax0.set_ylabel("Pledged Values(log)")
ax0.set_xticklabels(ax0.get_xticklabels(), rotation=90)
plt.show()

# <h2>Analysing further the CAaegorys: </h2>
# - Sucessful category's frequency
# - failed category's frequency
# - General Goal Distribuition by Category

# In[10]:

main_cats = df_kick["main_category"].value_counts()
main_cats_failed = df_kick[df_kick["state"] ==

예제 #56

0

파일 보기

X2_test = sc_X.transform(X_test)

# adsaasd
# X_sm = sm.add_constant(X)
# model = sm.OLS(y,X_sm)
# print(model.fit().summary())

lm1 = LinearRegression()
lm1.fit(X_train, y_train)
lm1_pred = lm1.predict(X_test)
print('Linear Regression Performance:')
print('MAE:', metrics.mean_absolute_error(y_test, lm1_pred))
print('RMSE:', np.sqrt(metrics.mean_squared_error(y_test, lm1_pred)))
print('R2_Score: ', metrics.r2_score(y_test, lm1_pred))
fig = plt.figure(figsize=(8, 5))
sns.regplot(y_test, lm1_pred, color='g')
plt.xlabel('COA')
plt.ylabel('Predictions')
plt.title('LinearRegression Prediction Performance ')
plt.grid()
plt.show()
# print('Estimated coefficients for the linear regression: ',lm1.coef_)
# print('Independent term: ', lm1.intercept_)

# import pickle
# filename = 'LinearRegression.sav'
# pickle.dump(lm1, open(filename, 'wb'))
# # load the model from disk
# loaded_model = pickle.load(open(filename, 'rb'))
# result = loaded_model.score(X_test, Y_test)
# print(result)

예제 #57

0

파일 보기

파일: plot_split_timeseries.py 프로젝트: epfl-ada/ada-2020-project-milestone-p3-p3_0x414441

def split_timeseries_figures(in_frames,
                             names,
                             split_at=PRISM_DATE,
                             same_plot=True,
                             **kwargs):
    """
    Takes the input data and creates a plot similar to figure 4a in the paper.
    :param dataframes: An iterable of 2 pandas data frames with the dates (dtype=PeriodIndex) as index and views as only column
    :param names: Names for the incoming data frames (neede for the legend)
    :param split_at: The date at which the data shall be split
    :param same_plot: If set false, a subplot will be created for each dataframe in in_frames. If true, they will be plotted on the same axis.
    :return: Plots the figure
    """

    # Default values that can be changed
    title = kwargs.get('title', '')
    figsize = kwargs.get('figsize', [18, 6])
    keyword = kwargs.get('keyword', 'views')
    sharey = kwargs.get('sharey', True)
    show_legend = kwargs.get('legend', True) and same_plot

    # Make sure we can iterate over the input argument to get a constant behavior, even if there is only 1df given.
    if isinstance(in_frames, pd.DataFrame):
        in_frames = [in_frames]
    if isinstance(names, str):
        names = [names]
    assert len(names) == len(
        in_frames
    ), "{} dataframes but {} names specified. This should be equal".format(
        len(in_frames), len(names))
    nr_subplots = len(in_frames)
    same_plot = True if nr_subplots == 1 else same_plot  # Remove useless specification of 'separate plots' if there is only 1 df

    dfs_to_plot = []
    for in_frame in in_frames:
        # Prepare the dataframe for seaborn (https://stackoverflow.com/questions/52112979/having-xticks-to-display-months-in-a-seaborn-regplot-with-pandas)
        # Seaborn has issues handling datetimes, so for the computation they are transformed to integers before transforming them back for the labelling later on.
        # Matplotlib provides the necessary functionality.
        dataframe = in_frame.copy()
        dataframe.index = dataframe.index.to_timestamp()
        dataframe['date_ordinal'] = mdates.date2num(dataframe.index)
        dfs_to_plot.append(dataframe)

    # Some color definition for plotting and the legend
    if 'colors' in kwargs:
        colors = kwargs['colors']
    else:
        colors = {}
        cmap = cm.get_cmap(kwargs.get('cmap', 'Set1'))
        cmap_colors = cmap.colors
        for i, name in enumerate(names):
            colors[name] = cmap_colors[i % len(cmap_colors)]
    colors['Prism Disclosure, 6/6/2013'] = 'red'

    # Starting to build the actual plot
    if same_plot:
        fig, ax = plt.subplots(figsize=figsize)
        show_every_nth_month = 1  # There is enough space for every month to be displayed
    else:
        COLS = math.ceil(math.sqrt(nr_subplots))
        ROWS = math.ceil(nr_subplots / COLS)
        fig, axs = plt.subplots(ncols=COLS,
                                nrows=ROWS,
                                sharex=True,
                                sharey=sharey,
                                figsize=figsize)
        show_every_nth_month = COLS  # It gets tight if we display every month

    for i, dataframe in enumerate(dfs_to_plot):
        # First some axes unpacking. Numpy has an ugly feature of changing the depth of the list storing the axes, so lets unpack them
        if not same_plot:
            if ROWS == 1:
                ROW = 0
                COL = i
                ax = axs[COL]
            else:
                COL = i % COLS
                ROW = i // COLS
                ax = axs[ROW][COL]
        before = dataframe.loc[dataframe.index < split_at]
        after = dataframe.loc[dataframe.index >= split_at]
        sns.regplot(x='date_ordinal',
                    y=keyword,
                    data=before,
                    ax=ax,
                    color=colors[names[i]],
                    scatter_kws={
                        'color': colors[names[i]],
                        's': 30
                    },
                    line_kws={'color': colors[names[i]]})
        sns.regplot(x='date_ordinal',
                    y=keyword,
                    data=after,
                    ax=ax,
                    color=colors[names[i]],
                    scatter_kws={
                        'color': colors[names[i]],
                        's': 30
                    },
                    line_kws={'color': colors[names[i]]})
        ax.set(xlabel='', ylabel=''
               )  # Per default, we do not really want a label on every subplot
        if not show_legend:
            ax.set_title(names[i])  # Some minimum information should be there

        # Tune the visuals
        ax.set_xlim(dataframe['date_ordinal'].min() - 15,
                    dataframe['date_ordinal'].max() + 15)  # 15 days offset
        ax.vlines(mdates.date2num(split_at),
                  0,
                  1,
                  color=colors['Prism Disclosure, 6/6/2013'],
                  transform=ax.get_xaxis_transform(),
                  label=split_at)
        if (not same_plot) and (COL == 0) and (not sharey):
            ax.set_ylim(dataframe[keyword].min() * 0.9,
                        dataframe[keyword].max() *
                        1.1)  # Assumes there are no negative values
            ax.set_ylabel(keyword)
        if (not same_plot) and (ROW + 1 == ROWS):
            # As mentioned above, the date was transformed to integers for the sake of plotting it using seaborn.
            # Now, they have to be transformed back to have nice x-Axis labels that are human-readable
            loc = mdates.MonthLocator(interval=show_every_nth_month)
            ax.xaxis.set_major_locator(loc)
            ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(loc))

    # Formatting and Optics
    fig.patch.set_facecolor('lightgrey')
    fig.suptitle(title)

    # Axis settings
    if same_plot:
        ax.set_xlabel('Month / Year')
        ax.set_ylabel(keyword)
        # As mentioned above, the date was transformed to integers for the sake of plotting it using seaborn.
        # Now, they have to be transformed back to have nice x-Axis labels that are human-readable
        loc = mdates.MonthLocator(interval=show_every_nth_month)
        ax.xaxis.set_major_locator(loc)
        ax.xaxis.set_major_formatter(mdates.AutoDateFormatter(loc))

    fig.autofmt_xdate(rotation=60)

    if show_legend:
        # Add a custom legend
        legend_patches = []
        for entry, c in colors.items():
            legend_patches.append(mpatches.Patch(color=c, label=entry))
        #  legend_patches.append(mpatches.Patch(color='lightgrey', label='95% Confidence Interval'))
        font = FontProperties()
        font.set_size('large')
        plt.legend(handles=legend_patches,
                   title='Legend',
                   bbox_to_anchor=(0, 0),
                   loc='lower left',
                   prop=font,
                   ncol=math.ceil(math.sqrt(nr_subplots)),
                   fancybox=True,
                   shadow=True)

예제 #58

0

파일 보기

파일: 190911_monteCarlo.py 프로젝트: tsgouvea/ValueTimeInv

    mySession.loc[mySession.stim == stim, 'istim'] = istim
    mySession.loc[mySession.perc == stim, 'iperc'] = istim
mySession.loc[:, 'istim'] = mySession.loc[:, 'istim'].astype(int)
mySession.loc[:, 'iperc'] = mySession.loc[:, 'iperc'].astype(int)

rho = pd.DataFrame(index=np.arange(nTrials),
                   columns=np.sort(mySession.istim.drop_duplicates()))
rho.loc[0, :] = 1

W = pd.DataFrame(index=np.arange(tbf.shape[0]), columns=rho.columns)
W.loc[:, :] = 1

#%%
sns.regplot(x='stim',
            y='isChoiceLeft',
            data=mySession,
            logistic=True,
            ci=None,
            y_jitter=0.01)
plt.show()

#%% INITIAL CONDITIONS

# mySession.loc[0,'waitingTime'] = np.random.choice(np.arange(tbf.shape[1]), 1, p=pnorm((tbf.T @ W).values[:,mySession.iperc[0]])).item()
#
# mySession.loc[0,'feedbackTime'] = truncExp(1.5, .5, 8)

#%%
# hf[ipair], ha[ipair] = plt.subplots(1, 3, figsize=(10, 3))
hf, ha = plt.subplots(1, 3)

# %%

예제 #59

0

파일 보기

import seaborn as sns
import pandas as pd
from matplotlib.pyplot import *

sns.set_theme()

tem = pd.read_csv("CSV_files/first/tempYearly.csv")

rai = pd.read_csv("CSV_files/first/rainYearly.csv")

tem["Rainfall"] = rai["Rainfall"]

sns.regplot(
    x="Rainfall",
    y="Temperature",
    data=tem[(0.0 <= tem['Rainfall']) & (tem['Rainfall'] < 10.0) &
             (0.0 <= tem['Temperature']) & (tem['Temperature'] < 50.0)])

show()

예제 #60

0

파일 보기

파일: tmb_combine_all.py 프로젝트: sishirsubedi/commonTools

                     df_join['TMB-Foundation-Value'])[0], 3)) + "/" +
          str(
              round(
                  st.pearsonr(df_join['TMB-Total-Variants'],
                              df_join['TMB-Foundation-Value'])[0], 3)) + "/" +
          str(round(r2_score(y, y_slr), 3)))
plt.savefig("Fig_4_TMB_HMH_Foundation_Variants_Score_ScatterPlot_main.png")
plt.close()

### caris vs Foundation
x1 = np.reshape(
    df_join[df_join['Source'] == "Caris"]['TMB-Total-Variants'].values,
    (-1, 1))
y1 = df_join[df_join['Source'] == "Caris"]['TMB-Foundation-Value'].values
y1_slr = fitData(x1, y1, "simple-lr")
sns.regplot(x=x1, y=y1, ci=None)
plt.title("TMB Score Comparison-Caris, S/P/R^2= " +
          str(round(st.spearmanr(x1, y1)[0], 3)) + "/" + str(
              round(
                  st.pearsonr(
                      df_join[df_join['Source'] == "Caris"]
                      ['TMB-Total-Variants'].values, y1)[0], 3)) + "/" +
          str(round(r2_score(y1, y1_slr), 3)))
plt.savefig("TMB_HMH_Variants_Foundation_Score_ScatterPlot_Caris.png")
plt.close()

x2 = np.reshape(
    df_join[df_join['Source'] == "Foundation"]['TMB-Total-Variants'].values,
    (-1, 1))
y2 = df_join[df_join['Source'] == "Foundation"]['TMB-Foundation-Value'].values
y2_slr = fitData(x2, y2, "simple-lr")