def firmsOneRandSeedPlot(df, rSeed, varsToDraw, firmGroup, argsPlot, cols):

    tmpDF = df[df.randomSeed==rSeed[0]].loc[:, ['tick', firmGroup] + varsToDraw]

    tmpDF = tmpDF.melt(id_vars=['tick', firmGroup], value_vars = varsToDraw)

    cols = min(cols, len(varsToDraw))

    argsPlot.update({'data' : tmpDF, 'col_wrap' : cols })

    sns.relplot(**argsPlot)
    
    return
def firmsMultiPlot(df, rSeeds, varsToDraw, firmGroup, argsPlot, cols, rows):

    #Truncate randoSeeds and vars to proper size
    rSeeds = rSeeds[:cols]
    varsToDraw = varsToDraw[:rows]

    tmpDF = df.loc[df.randomSeed.isin(rSeeds), ['randomSeed','tick', firmGroup] + varsToDraw]
    tmpDF = tmpDF.melt(id_vars=['randomSeed','tick', firmGroup], value_vars = varsToDraw)

    argsPlot.update({'col' : 'randomSeed', 'row' : 'variable', 'data' : tmpDF})
    sns.relplot(**argsPlot)

    return
def firmsOneVarPlot(df, rSeeds, varsToDraw, firmGroup, argsPlot, cols, rows):

    tmpDF = df.loc[df.randomSeed.isin(rSeeds), ['randomSeed','tick', firmGroup] + varsToDraw]
 
    n = cols * rows  
    pagsOfSeeds = [rSeeds[i:i+n] for i in range(0,len(rSeeds),n)]
    
    for pagOfSeeds in pagsOfSeeds:
        tmpDF = df.loc[df.randomSeed.isin(pagOfSeeds), ['randomSeed','tick', firmGroup] + varsToDraw]
        argsPlot.update({'y' : varsToDraw[0], 
                         'col' : "randomSeed",
                         'data' : tmpDF,
                         'col_wrap' : cols
                         })        
            
        sns.relplot(**argsPlot)

    return
            zip(
                factor_names + ['runksum', 'comparison', 'pvalue'],
                list(factors_values) +
                [[p_value_mock], [pvalue_type], [pvalue]]))
        #print(pvalue_dict)
        p_values_df = p_values_df.append(pd.DataFrame(pvalue_dict),
                                         ignore_index=True)

#sns.set(rc={'figure.figsize':(2,2)})
#sns.set(font_scale=2)
g = sns.relplot('threshold_factor',
                'runksum',
                'fb_type',
                col='metric_type',
                data=p_values_df,
                kind='line',
                row='comparison',
                col_order=['magnitude', 'n_spindles', 'duration', 'amplitude'],
                row_order=pvalue_types,
                height=2.5,
                palette=['#3CB4E8', '#438BA8', '#002A3B', '#FE4A49'])
#g.axes[0][0].semilogy()
#g.fig.set_size_inches(10,10)
[[ax.axhline(p, color='k', linestyle='--') for ax in g.axes.flatten()]
 for p in [-1.65, 1.65]]
for color, axes in zip(['#3CB4E8', '#438BA8', '#002A3B', '#FE4A49'], g.axes):
    [ax.axhspan(-1.65, 1.65, color=color, alpha=0.2) for ax in axes.flatten()]
#plt.tight_layout()

[ax.set_title('') for ax in g.axes.flatten()]
[
Beispiel #5
0
from sklearn.model_selection import GridSearchCV

data = pd.read_csv('1.csv', header=None)
data.columns = ['score', 'date']
features = ['score', 'date']
X = data[features]
x = data['date']
y = data['score']

plt.title("B00SKQFT4I")
plt.xlabel("date")
plt.ylabel("score")
plt.style.use('fivethirtyeight')
plt.plot(x, y)

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(20))
plt.show()

sns.relplot(x="star", y="score", data=data)

km = KMeans(6)
km.fit(X)
data['cluster_k6'] = km.predict(X)
sns.relplot(x="star", y="score", hue="cluster_k6", palette="Set1", data=data)

param_test1 = {'n_clusters': np.arange(2, 11, 1)}
gsearch1 = GridSearchCV(estimator=KMeans(), param_grid=param_test1, cv=5)
gsearch1.fit(X)
score_list = -pd.DataFrame(gsearch1.cv_results_)['mean_test_score']
sns.lineplot(x=range(2, 11), y=score_list)
Beispiel #6
0
#weekmask = multiyear_df.index % 7 == 0  # Doesn't exactly work with groups.
# Do based on day of week?
multiyear_df.date = pd.to_datetime(multiyear_df.date)

weekmask = multiyear_df.date.dt.dayofweek == 0
multiyear_week_df = multiyear_df[weekmask]
#
#
#train_test_mask = multiyear_week_df['snowyear'] % 2 == 0
#stats.describe(train_test_mask) # A frequency table would be better, but, eh
#multiyear_train = multiyear_week_df[train_test_mask]
#multiyear_test = multiyear_week_df[~train_test_mask]

sns.relplot(
    x='date',
    y='value',
    hue='stationtriplet',
    kind='line',
    data=multiyear_week_df)  # Egads, this is damn slow on full dataset.
sns.relplot(
    x='date',
    y='value',
    hue='stationtriplet_codes',
    kind='line',
    data=multiyear_week_df)  # Egads, this is damn slow on full dataset.

# Convert "date" to "days since mindate"
datemin = min(multiyear_week_df.date)
multiyear_week_df['dayssincemindate'] = (
    (multiyear_week_df.date - datemin).dt.days) / 365.

# For coregionalization, form 2-column input/output with data in col1 and "dimension" (label) in col2
sns.kdeplot(dfMaster1Aug.EVI)
sns.kdeplot(dfMaster1Sep.EVI)



dfMasterApril.loc[(dfMasterApril.Latitude<36) & (dfMasterApril.Latitude>35)].NDVI
sns.kdeplot(dfMasterMarch.NDVI.loc[(dfMasterMarch.Latitude<32) & (dfMasterMarch.Latitude>30)])
sns.kdeplot(dfMasterApril.loc[(dfMasterApril.Latitude<36) & (dfMasterApril.Latitude>35)].NDVI)

                                  
#dflowlat = dfMaster.loc[(dfMaster.Latitude<37.5)]

#dfMaster is current, large and low ndvi dataset
#dfMaster1 is old, small and high ndvi value dataset

sns.relplot(x='NDVI', y='EVI', hue='year', data=dfMasterApril, legend='full')
sns.relplot(x='NDVI', y='EVI', hue='year', data=dfMaster1April, legend='full')

dfMaster.index.unique()
dfMaster1.index.unique()

dfMaster['cropland_mode'].value_counts()
dfMaster1['CDL'].value_counts()

dfMasterApril['cropland_mode'].value_counts()
dfMaster1April['CDL'].value_counts()

dfMaster.year.value_counts()
dfMaster1.year.value_counts()

dfMasterApril.loc(dfMasterApril.cropland_mode == 24)
Beispiel #8
0
def plot(df,
         output_path,
         name,
         selectors,
         grid=[],
         hue=None,
         style=None,
         folder=None):
    w = selector(df, selectors)

    title = ' | '.join(f"{k}:{v}" for k, v in selectors.items()
                       if not isinstance(v, list))

    print(f'start plotting {name}')

    dfs = df[w].copy()

    if (hue is not None) and pd.api.types.is_numeric_dtype(dfs[hue].dtype):
        dfs[hue] = "#" + dfs[hue].astype(str)

    for col, iscat in (dfs.dtypes == 'category').iteritems():
        if iscat:
            dfs[col] = dfs[col].cat.remove_unused_categories()

    # assert indiviuallity
    groupby = [c for c in (grid + [hue, style, 'episode']) if c is not None]

    dfs = dfs.dropna(subset=groupby)

    df_dup = dfs.duplicated(subset=groupby)
    if df_dup.any():
        print(dfs[df_dup].sort_values(groupby)[groupby])
        raise ValueError('There are doublicates.')

    grid.sort(key=lambda l: dfs[l].nunique())

    grid = {n: g for g, n in zip(grid[::-1], ['col', 'row'])}

    grid_order = {
        f'{k}_order': sorted([n for n in dfs[v].unique() if not pd.isnull(n)])
        for k, v in grid.items()
    }

    g = sns.relplot(data=dfs,
                    x='episode',
                    y='value',
                    **grid,
                    **grid_order,
                    hue=hue,
                    style=style,
                    kind="line",
                    ci=None)
    plt.subplots_adjust(top=0.9)
    g.fig.suptitle(title)
    g.fig.patch.set_facecolor('white')
    if folder:
        ensure_directory(os.path.join(output_path, folder))
        filename = os.path.join(output_path, folder, f"{name}.png")
    else:
        filename = os.path.join(output_path, f"{name}.png")
    plt.savefig(filename)
    print(f'Saved {filename}')
    plt.close()
Beispiel #9
0
df_array = []
    
for tuple in zip(approaches, representations):
    print(tuple)
    approach = tuple[0]
    representation = tuple[1]
    for query_type in query_types:
        if approach == "unsupervised_lm" and (representation=="" or query_type=="triggers"):
            continue
        print(os.path.join(data_directory, trg_lang, "results", "data", src_lang, approach, representation, query_type, "output.res"))
        result_file = open(os.path.join(data_directory, trg_lang, "results", "data", src_lang, approach, representation, query_type, "output.res"))
        result_strings = result_file.readlines()
        data = []
        for result_string in result_strings:
            if representation!="":
                result = convert_result_string(result_string, approach + "_" + representation, query_type)
            else:
                result = convert_result_string(result_string, approach, query_type)
            data.append(result)
        df = pd.DataFrame(data, columns = ["#examples", "p5", "p10", "p20", "mAP", "method", "query_type"])
        df_array.append(df)
        #all_df = pd.concat([all_df, df], ignore_index=True)
        
#all_df = df_array[0]
#for df in df_array[1:]:
#    all_df = all_df.append(df, ignore_index=True)
all_df = pd.concat(df_array)
print(all_df)

sns_plot = sns.relplot(x="#examples", y="p5", col="query_type", row="method", height=3, kind="line", estimator=None, data=all_df)
sns_plot.savefig(os.path.join(data_directory, trg_lang, "results", "figures", "result.pdf"))
Beispiel #10
0
#sns.barplot(data=df_last, x='연도',y='평당분양가격', ci=None)
sns.catplot(data=df_last, x='연도', y='평당분양가격', kind='bar', col='지역명', col_wrap=4, ci=None)



#lineplot으로 연도별 평당분양가격 그려보기
plt.figure(figsize=(10,5))
sns.lineplot(data=df_last, x='연도', y='평당분양가격',hue='지역명')
plt.legend(bbox_to_anchor=(1.02,1), loc=2, borderaxespad=0)



#replot
#hue값을 한 그래프에 나타내는 것x -> 다른 그래프를 생성하여 나타낸다.
sns.relplot(data=df_last, x='연도', y='평당분양가격', kind='line', col='지역명', col_wrap=4, ci=None)

####수치데이터 히스토그램그리기
# 수치형 데이터분포형을 정확히 표현해주는 그래프
# 하나의 숫자 변수만 입력해야한다.
# bins : 변수를 n개의 bin으로 자른다.
b = df_last['평당분양가격'].hist(bins=10)

#distplot으로 히스토그램그리기
#distplot은 결측지가 있으면 에러가 난다.
price = df_last.loc[df_last['평당분양가격'].notnull(), '평당분양가격'] #어떤열을쓸건지도 적어주어야함
price

sns.distplot(price)

#kde, rug
Beispiel #11
0
 def  graficoPersonasMuertas(self):   
     arr=np.reshape(self.RegistroMuertos, (len(self.RegistroMuertos), 1)).T[0]
     data=pd.DataFrame({'horas':range(0,len(self.RegistroMuertos)),'personas_muertas':arr})
     sns.relplot(x="horas",y="personas_muertas", kind="line", data=data)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.collections import LineCollection
from matplotlib.colors import ListedColormap, BoundaryNorm

data = pd.read_csv("Tampen.txt",delimiter='\\t', engine = 'python')
data.to_csv('Tampen1.csv')


df = data[['YEAR', 'TOTALHS','TOTALTP']]

x = df['TOTALHS']

y = df['TOTALTP']

z = df['YEAR']


sns.set(style="darkgrid", rc={'figure.figsize':(12,8)})
sns.relplot(x="YEAR", y="TOTALHS",  kind="line", data=df);

Beispiel #13
0
from sklearn.model_selection import train_test_split as split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn import metrics
'''load the data_set'''

df_c = pd.read_csv('ctrucks_clean.csv').set_index('supplier').fillna(0.00)
df_c.columns
df_c['supplier'] = df_c.index

df_c.drop('Unnamed: 0', axis=1, inplace=True)

sns.relplot(x="supplier",
            y="nbags",
            col="year",
            dashes=False,
            markers=True,
            kind="dist",
            data=df_c)

g = sns.catplot("supplier",
                col="year",
                col_wrap=1,
                data=df_c,
                kind="count",
                height=3,
                aspect=6)
g.set_xticklabels(rotation=90)
plt.savefig('suppyear')

plt.figure(figsize=(10, 10))
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
df = pd.read_csv("air_df_mean.csv")
print(df.head(30))
sns.relplot(x="hour",y="NO_2_mean",data=df,kind="line",
            style="location",hue="location",
            markers=True,dashes=False)
plt.show()
Beispiel #15
0
    final_data.loc[:, 'hour'] = np.tile(range(0, 8760), 2)

    final_data.loc[:, 'solar_tech'] = solar_tech

    return final_data


# Make all the data
package_temps = pd.concat(
    [analyze_temp(st) for st in ['swh', 'ptc_notes', 'ptc_tes']],
    axis=0,
    ignore_index=True)

# Plot the results
palette = dict(
    zip(['min_mean_temp', 'max_mean_temp'], sns.color_palette("rocket_r", 6)))

# Plot the lines on two facets
sns.relplot(x='hour',
            y="Temp_C",
            hue="min_or_max",
            col="solar_tech",
            palette=palette,
            height=5,
            aspect=.75,
            facet_kws=dict(sharex=True, sharey=True),
            kind="line",
            legend="full",
            data=package_temps)
Beispiel #16
0
plot1.set_xticklabels(plot1.get_xticklabels(), rotation=45, ha='right', rotation_mode='anchor', size=14)
plot1.set_yticklabels(plot1.get_yticks(), size=14)
ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
plot1.set_xlabel('')
plot1.set_ylabel('Conversion (%)', size=16)
plt.savefig('conversion_time1.png', format='png', dpi=500, bbox_inches='tight')


dfl[0]['a'] = 'a'

count=0
for i in dfl:
    print(dfl[count])
    fig = plt.figure(figsize=(11, 7))
    ax = fig.add_subplot(1, 1, 1)
    plot = sns.relplot(x='Date', y='λ (%) CpG', ci=None, col='a', hue='Date', col_wrap=(1), kind='scatter', linewidth=0.2, x_jitter=100000, height=3, aspect=1.5, s=30, legend=False, data=dfl[count])


    for ax in plot.axes.flat:
        # ax.set(xlim=(70, 100))
        # ax.xaxis.set_major_locator(ticker.MultipleLocator(10))
        # ax.xaxis.set_major_formatter(ticker.ScalarFormatter())
        
        ax.set(ylim=(None, 101))
        ax.yaxis.set_major_locator(ticker.MultipleLocator(5))
        ax.yaxis.set_major_formatter(ticker.ScalarFormatter())
        
    #     # ax.grid(b=True, which='major')
        
    #     ax.plot([0, 100], [0, 100], linestyle='--', linewidth=1, zorder=0.9, color='grey')
        
    rowperneuron = int(len(runresult['v']) / N)  # this needs to an integer
    runresult['t_stim'] = tile(np.linspace(0, runtime, rowperneuron),
                               N)  # need to add n neurons here
    capresult.append(runresult)

# %%
#tidy and plot
capresult = pd.concat(capresult)

# %%
sns.relplot(
    x='t_stim',
    y='v',
    kind='line',
    #hue = 'sweep',
    row='sweep',
    height=2,
    aspect=8,
    #row = 'neuron',
    hue='neuron',
    data=capresult)

# %% [markdown]
# I played with the stimulation voltage, it seems like spikes mostly go to full 50 mV amplitude with the given conductances even for small injections.
#
# That is encouraging, the output is tidy, now we can try to include the per-neuron parameters, to summarise more.
#
# We can do this with a spikemonitor object.

# %%
start_scope()
                     cond_names,
                     norm_list,
                     model=model,
                     adjust_time=False)

# =============================================================================
# same plot but now facet for readouts
# =============================================================================
figname = "pscan_models"
g = sns.relplot(x="xnorm",
                y="ylog",
                kind="line",
                data=df_new,
                hue="readout",
                col="pname",
                height=4,
                facet_kws={
                    "margin_titles": True,
                    "sharex": True,
                    "sharey": True
                },
                legend="full")

ylim = (None, None)
g.set(ylim=ylim, ylabel="log2FC")
for ax in g.axes.flat:
    ax.set_xscale("log")
    ax.xaxis.set_major_locator(ticker.LogLocator(base=10.0, numticks=100))

[plt.setp(ax.texts, text="") for ax in g.axes.flat]
g.set_titles(row_template='{row_name}', col_template='{col_name}')
alldata = pd.concat([train, test], axis=0).reset_index(drop=True)

alldata["Open Date"] = pd.to_datetime(alldata["Open Date"])
alldata["Year"] = alldata["Open Date"].apply(lambda x: x.year)
alldata["Month"] = alldata["Open Date"].apply(lambda x: x.month)
alldata["Day"] = alldata["Open Date"].apply(lambda x: x.day)
alldata["kijun"] = "2015-04-27"
alldata["kijun"] = pd.to_datetime(alldata["kijun"])
alldata["BusinessPeriod"] = (alldata["kijun"] -
                             alldata["Open Date"]).apply(lambda x: x.days)

alldata = alldata.drop('Open Date', axis=1)
alldata = alldata.drop('kijun', axis=1)

#%%
sb.relplot(x="Open Date", y="revenue", col="City Group", data=train)

#%%

# 訓練データ特徴量をリスト化
cat_cols = alldata.dtypes[alldata.dtypes == 'object'].index.tolist()
num_cols = alldata.dtypes[alldata.dtypes != 'object'].index.tolist()

other_cols = ['Id', 'WhatIsData']
# 余計な要素をリストから削除
cat_cols.remove('WhatIsData')  #学習データ・テストデータ区別フラグ除去
num_cols.remove('Id')  #Id削除

# カテゴリカル変数をダミー化
cat = pd.get_dummies(alldata[cat_cols])
Beispiel #20
0
cond_names2 = ["beta"+str(val) for val in arr]

# =============================================================================
# run experiment
# =============================================================================
exp = multi_exp(time, cond_list, cond_names, cond_names2)

norm = matplotlib.colors.Normalize(
    vmin=np.min(arr),
    vmax=np.max(arr))

# choose a colormap
cm = matplotlib.cm.Blues

# create a ScalarMappable and initialize a data structure
sm = matplotlib.cm.ScalarMappable(cmap=cm, norm=norm)
sm.set_array([])

g = sns.relplot(x = "time", y = "value", kind = "line", data = exp, hue = "cond2", 
                col = "cond", palette = "Blues", height = 5,legend = False)

   
g.set(ylim = (0, 30), xlim = (0,time[-1]))
ax = g.axes[0][0]
ax.set_ylabel("cell dens. norm.")
g.set_titles("{col_name}")
cbar = g.fig.colorbar(sm, ax = g.axes, ticks = [1,5,10])
cbar.set_label(r"$\beta$")

g.savefig("../figures/prolif_tc_beta.pdf")
Beispiel #21
0
#what is pylint? did'nt I alraedy have that?
#I messwed with that 'base':conda when I was working with the flask stuff over the weekend


# Use the iris database to answer the following quesitons:

iris = sns.load_dataset('iris')


# What does the distribution of petal lengths look like?
sns.boxplot(data=iris, y = 'petal_length')
sns.distplot(iris.petal_length)

# Is there a correlation between petal length and petal width?
sns.relplot(x='petal_length', y='petal_width', data=iris)

# Would it be reasonable to predict species based on sepal width and sepal length?
sns.relplot(x='sepal_length', y='sepal_width', columns='species' ,data=iris)

# Which features would be best used to predict species?
sns.relplot(x='petal_length', y='petal_width', col='species' ,
            hue='species', data=iris)
#The data is much more tightly clustered together when plotting the petal measurements.            


# 1
# Using the lesson as an example, use seaborn's load_dataset function to load the anscombe 
# data set. Use pandas to group the data by the dataset column, and calculate summary 
# statistics for each dataset. What do you notice?
anscombe = sns.load_dataset('anscombe')
## Seaborn
A more modern plotting library built on matplotlib


import seaborn as sns

tips = sns.load_dataset("tips")
dots = sns.load_dataset("dots")
diamonds = sns.load_dataset("diamonds")

sns.relplot(
    data = dots,
    x = "time",
    y = "firing_rate",
    col = "align",
    hue="coherence", 
    size="choice",
    kind = "line",
    palette=sns.color_palette("rocket_r")
)

### Scatter

sns.set_theme(style="whitegrid", context='paper')
sns.scatterplot(data = diamonds,
                x="carat", y="price",
                hue="clarity", size="depth"
               )

### Boxplot
Beispiel #23
0
print("Avg Of Kms Driven",avg_kms_driven)                               #"KMs Driven"
UsedCarData["KMs Driven"].replace(np.nan,avg_kms_driven,inplace=True) #Replace Null values with avg km driven  
UsedCarData.dropna(subset={"Brand","Condition","Fuel","Model","Registered City","Transaction Type"},axis=0,inplace=True)#Delete the NUll data 
UsedCarData.reset_index(drop=True,inplace=True) #Reset index
UsedCarData.replace("",np.nan,inplace=True)
print(UsedCarData)

#***************PHASE-II*************#
#***Grpah Brand Vs Price****
print(UsedCarData['Fuel'].value_counts())
print(UsedCarData['Registered City'].value_counts())
#****Graph Condition Vs Price***
fig=plt.figure()
sns.barplot(x='Condition',y='Price',hue="Condition",data=UsedCarData.loc[:,('Condition','Price')])
#****Graph Fuel Vs Price***
sns.relplot(x='Fuel',y='Price',kind="line",data=UsedCarData.loc[:,('Fuel','Price')])
#****Graph Year Vs Price***
sns.relplot(x='Year',y='Price',kind="line",data=UsedCarData.loc[:,('Year','Price')])
#****Graph Condition Vs Year***
var=UsedCarData.groupby('Condition').Year.sum()
fig=plt.figure()
ax1 = fig.add_subplot(1,1,1)
ax1.set_xlabel('Condition')
ax1.set_ylabel('Year')
ax1.set_title("Condition Vs Year")
var.plot(kind='bar')
fig=plt.figure()
#****Graph Brand Vs Price***
g=sns.barplot(x='Brand',y='Price',data=UsedCarData.loc[:,('Brand','Price')])
for item in g.get_xticklabels(): item.set_rotation(90)
plt.show()
Beispiel #24
0
steps = range(500, 10000, 500)
for n_obs in steps:
    texts, labels = DataReader(0.5, dataset=DATASET).take(n_obs)

    s_train = set(texts)
    print(f"train/test overlaps: {len(s_train & s_test)}"
          )  # todo: ensure 0 overlaps

    dataset = [simple_preprocess(t) for t in texts]

    svm_model = tfidf_svm(dataset, labels)

    predicted = svm_model.predict(test_dataset)
    acc = accuracy_score(test_labels, predicted)

    print(f"accuracy with {n_obs} training examples. svm: {acc}")
    accuracy_per_step_svm.append(acc)

df_plot = pd.DataFrame(data={
    'svm': accuracy_per_step_svm,
    'training_examples': list(steps)
})

sns.set()
sns_plot = sns.relplot(data=pd.melt(df_plot, ['training_examples']),
                       x='training_examples',
                       y='value',
                       hue='variable',
                       kind="line")
sns_plot.savefig("baseline_per_examples_polluted.png")
Beispiel #25
0
## All of this code chunk needs to be run at one time, otherwise you get weird errors. This
## is true for many plotting commands which are composed of multiple commands.

# %% [markdown]
# The `FacetGrid` tells `seaborn` that we're going to layer graphs, with layers based on `hue` and the hues being determined by values of `kind`. Notice that we can add a few more details like the aspect ratio of the plot and so on. The documentation for [FacetGrid](https://seaborn.pydata.org/generated/seaborn.FacetGrid.html), which we will also use for facets below, may be helpful in finding all the options you can control.

# %% [markdown]
# We can also show more than one kind of layer on a single graph

# %%
fmri = sns.load_dataset('fmri')

# %%
plt.style.use('seaborn-notebook')
sns.relplot(x='timepoint', y='signal', data=fmri)

# %%
sns.relplot(x='timepoint', y='signal', data=fmri, kind='line')

# %%
sns.relplot(x='timepoint', y='signal', data=fmri, kind='line', hue='event')

# %%
sns.relplot(x='timepoint',
            y='signal',
            data=fmri,
            hue='region',
            style='event',
            kind='line')
Beispiel #26
0
            acc = 0
            total = np.sum(acc_data.values[:, 3])
            for row in acc_data.values:
                acc += row[1] * row[3] / total
            for j, stat in enumerate(
                [acc, model, num_feats, train, test, attribute]):
                master_df.values[i, j] = stat
            if (i == 0):
                title_string = ((
                    "{} predictor trained on {}, tested on {}".format(
                        attribute, train, test)))

    master_df['feats'] = pd.to_numeric(master_df['feats'])
    master_df['acc'] = pd.to_numeric(master_df['acc'])
    print(master_df)
    print(master_df.dtypes)
    #fmri = sns.load_dataset("fmri")
    #print(fmri)
    #idk = sns.relplot(x="timepoint", y="signal", hue="region", style="event", kind="line", data=fmri)
    idk = sns.relplot(x="feats",
                      y="acc",
                      hue="model",
                      kind="line",
                      data=master_df)
    #idk = sns.relplot(x="feats", y="acc", kind="line", data=master_df)
    plt.title(title_string)
    plt.ylim(0, 1)
    plt.tight_layout()
    plt.savefig('figures/' + (title_string.replace(" ", "")) + '.png')
    #plt.show()
### 6-3. col 옵션을 추가하여 그래프를 별도로 그려볼 수 있습니다

# 또한, `col_wrap`으로 한 줄에 표기할 column의 갯수를 명시할 수 있습니다.

sns.lmplot(x='total_bill', y='tip', hue='smoker', col='day', col_wrap=2, height=6, data=tips)
plt.show()

## 7. relplot

# 두 column간 상관관계를 보지만 `lmplot`처럼 선형관계를 따로 그려주지는 않습니다.

# [relplot 도큐먼트](https://seaborn.pydata.org/generated/seaborn.relplot.html?highlight=relplot#seaborn.relplot)

### 7-1. 기본 relplot

sns.relplot(x="total_bill", y="tip", hue="day", data=tips)
plt.show()

### 7-2. col 옵션으로 그래프 분할

sns.relplot(x="total_bill", y="tip", hue="day", col="time", data=tips)
plt.show()

### 7-3. row와 column에 표기할 데이터 column 선택

sns.relplot(x="total_bill", y="tip", hue="day", row="sex", col="time", data=tips)
plt.show()

### 7-4. 컬러 팔레트 적용

sns.relplot(x="total_bill", y="tip", hue="day", row="sex", col="time", palette='CMRmap_r', data=tips)
#st.area_chart(data_tx)
#st.area_chart(data_daily)
#'COVID-19 daily data for Texas'

#Sidebar
st.sidebar.title("Data-Set Selector")
st.sidebar.markdown("Select the Charts/Plots accordingly:")
if st.sidebar.checkbox('Show Me The Data - "CSV Data-Set"'):
    '### Data for from an hisstorical basis'
    st.dataframe(data_tx, width=3000, height=700)
if st.sidebar.checkbox('Positive to Recovered Cases Texas'):
    '### Positive to Recovered Cases Texas'
    'As we look at our scatter plot we are able to see that as we see the amount of cases rise, we are also see a rise in recovered cases as well'

    'Looking at the "hue" change we are able to see the breaking point for this data around the middle of 2020-06'
    ax = sns.relplot(x='datetime', y='positive', hue='recovered', data=data_tx)
    ax = ax.set_xticklabels(rotation=30)
    ax = ax.set_ylabels('Positive Cases in hundred thousands')
    st.pyplot(ax)
if st.sidebar.checkbox("'Deaths' To 'Hospitalized Currently' Cases In Texas"):
    "### 'Deaths' To 'Hospitalized Currently' Cases In Texas"
    'When we take a look at the "deaths" we are spike in "deaths" almost 1 month after the initial spike of "positive" cases.'
    ax1 = sns.relplot(x='datetime',
                      y='death',
                      hue='hospitalizedCurrently',
                      data=data_tx)
    ax1 = ax1.set_xticklabels(rotation=30)
    ax1 = ax1.set_ylabels('Current Death Counts')
    st.pyplot(ax1)

#st.vega_lite_chart(data_daily)
Beispiel #29
0

"""
plota um gráfico de dispersão
    @params hue define cor de plotagem variando pela coluna "finalizado"
"""
sns.scatterplot(x="horas_esperadas", y="preco", hue="finalizado", data=df_dados)



"""
plota um gráfico de relação separado os gráficos pela coluna "finalizado"
    @params hue define cor de plotagem variando pela coluna "finalizado"
    @params col define a separação dos gráficos pela coluna "finalizado"
"""
sns.relplot(x="horas_esperadas", y="preco", hue="finalizado", col="finalizado", data=df_dados)


SEED = 20
# se setarmos aqui podemos tirar o random_state das chamadas quando estas usam o np.random
np.random.seed(SEED)

"""
Separação dos dados e treino do modelo.
"""
x = df_dados[["horas_esperadas", "preco"]]
y = df_dados["finalizado"]

# método para separar dados
from sklearn.model_selection import train_test_split
treino_x, teste_x, treino_y, teste_y = train_test_split(x, y, test_size = 0.20, # 20% da massa para teste
Beispiel #30
0
        0.952, 0.960, 0.964, 0.965, 0.968, 0.969, 0.970, 0.970, 0.971, 0.972,
        0.972, 0.972, 0.973, 0.973, 0.973, 0.973, 0.973, 0.974, 0.974, 0.974
    ],
    'hF': [
        0.948, 0.958, 0.962, 0.964, 0.967, 0.967, 0.968, 0.969, 0.970, 0.970,
        0.970, 0.971, 0.971, 0.972, 0.972, 0.972, 0.972, 0.973, 0.973, 0.973
    ],
}

#df = pd.DataFrame(data)
df = pd.DataFrame(dataes)

ax = sns.relplot(
    data=df,
    kind="line",
    x="size%",
    y="Acc",
    facet_kws=dict(sharex=False),
)
ax.set(xlabel='% dataset', ylabel='Precisión')
plt.savefig('learning_curve_es.svg')
#col="align", hue="choice", size="coherence", style="choice",
'''

Learning curve
\begin{table}[htbp]
    \centering
    \begin{tabular}{cccccc}
        \toprule
        \multicolumn{2}{c}{Resources}&\\
        \cline{1-2} 
'''In this exercise, we'll explore Seaborn's mpg dataset, which contains one row per car model and includes information such as the year the car was made, the number of miles per gallon ("M.P.G.") it achieves, the power of its engine (measured in "horsepower"), and its country of origin.

What is the relationship between the power of a car's engine ("horsepower") and its fuel efficiency ("mpg")? And how does this relationship vary by the number of cylinders ("cylinders") the car has? Let's find out.

Let's continue to use relplot() instead of scatterplot() since it offers more flexibility.'''

# Import Matplotlib and Seaborn
import matplotlib.pyplot as plt
import seaborn as sns

# Create scatter plot of horsepower vs. mpg
sns.relplot(x="horsepower",
            y="mpg",
            data=mpg,
            kind="scatter",
            size="cylinders",
            hue="cylinders")

# Show plot
plt.show()
    return df


if __name__ == "__main__":
    df = makeUrl()

    df.dropna(inplace=True)
    df["newTests"] = df["newTests"].astype(int)
    df["positive Test Percentage"] = 2
    df["positive Test Percentage"] = df["positive Test Percentage"].astype(
        float)
    df['positive Test Percentage'] = list(
        map(lambda x, y: x / y, df['newCases'], df['newTests']))
    df["positive Test Percentage"] = df["positive Test Percentage"] * 100

    df['date'] = pd.to_datetime(df['date'], dayfirst=False, yearfirst=False)
    df.sort_values(by=["date"], inplace=True, ascending=True)

    print(df)

style.use('ggplot')

g = sns.relplot(
    x="date",
    y="positive Test Percentage",
    kind="line",
    data=df,
)
g.fig.autofmt_xdate()
plt.show()
Year_imdb.rename(columns={"IMDb Mean":"IMDb_Mean"}, inplace=True)
Year_imdb.rename(columns={"Number of Shows":"Number_series"}, inplace=True)

fig, ax = plt.subplots(figsize=(16, 9))
ax.plot(Year_imdb.sort_index()["IMDb_Mean"],color="red")
plt.xlabel("Years",color="black",fontsize=20)
plt.ylabel("IMDb Ratings",color="black",fontsize=20)
plt.yticks(fontsize=20)
plt.xticks(fontsize=20)
plt.show()


#Plot05
fig, ax = plt.subplots()
fig.set_size_inches(11.7, 8.27)
sns.relplot(x="IMDb_Mean", y="Number_series", data=Year_imdb,)
plt.title("Distribution of IMDb Ratings Respect to Number of Series")
plt.xlabel("IMDb Ratings", fontsize=15)
plt.ylabel("Number of Series", fontsize=15)
plt.yticks(fontsize=15)
plt.xticks(fontsize=15)
plt.show()

#plot06
vistvdata["Year"] = vistvdata["Year"].astype("int32") 
vistvdata['Year_Cut'] = pd.qcut(vistvdata['Year'], q=4)
vistvdata["Year_Cut"].cat.categories

sns.set_style("whitegrid")
fig, ax = plt.subplots()
fig.set_size_inches(11.7, 8.27)
"""
Line plots on multiple facets
=============================

_thumb: .45, .42

"""
import matplotlib.pyplot as plt
import seaborn as sns
sns.set(style="ticks")

dots = sns.load_dataset("dots")
help(sns.load_dataset)

print dots.head(10)

# Define a palette to ensure that colors will be
# shared across the facets
palette = dict(zip(dots.coherence.unique(),
                   sns.color_palette("rocket_r", 6)))

# Plot the lines on two facets
sns.relplot(x="time", y="firing_rate",
            hue="coherence", size="choice", col="align",
            size_order=["T1", "T2"], palette=palette,
            height=5, aspect=.75, facet_kws=dict(sharex=False),
            kind="line", legend="full", data=dots)
plt.show()
"""
Scatterplot with varying point sizes and hues
==============================================

_thumb: .45, .5

"""
import seaborn as sns
import matplotlib.pyplot as plt
sns.set(style="white")

# Load the example mpg dataset, mpg is pandas.DataFrame
mpg = sns.load_dataset("mpg")
print mpg.columns
print mpg.head(10)

# Plot miles per gallon against horsepower with other semantics
sns.relplot(x="horsepower", y="mpg", hue="origin", size="weight",
            sizes=(40, 400), alpha=.5, palette="muted",
            height=6, data=mpg)

#plt.show()