Ejemplo n.º 1
0
 def plot(self):
     title = self.titlestring % (self.DS.name, self.DS.clustname,
                                 self.DS.levels)
     self.shortname = self.DS.shortclustname + self.shortname
     data = []
     colors = get_spaced_colors(len(self.DS.clusters[self.DS.levels]))
     samples = []
     labels = []
     for i, c in enumerate(self.DS.clusters[self.DS.levels]):
         samples.append(c.T)
         labels.append(c.shape[0] * [i])
     samples = np.hstack(samples)[:3, :]
     labels = np.hstack(labels)
     df = pd.DataFrame(
         samples.T, columns=["Dim %d" % i for i in range(samples.shape[0])])
     df["label"] = ["Cluster %d" % i for i in labels]
     fig = ff.create_scatterplotmatrix(
         df, diag='box', index="label", colormap=colors)
     if self.plot_mode != "div":
         fig["layout"]["title"] = title
     else:
         fig["layout"]["title"] = None
     #del fig.layout["width"]
     #del fig.layout["height"]
     return self.makeplot(fig, "agg/" + self.shortname)
Ejemplo n.º 2
0
def scatter_matrix(df):
    fig = ff.create_scatterplotmatrix(df.dropna(),
                                      diag='box',
                                      height=1000,
                                      width=1900,
                                      colormap='Portland')
    py.plot(fig, filename='/tmp/scatter_matrix.html')
def scatterplot_matrix(x, y):
    dat = pd.concat([x, y], axis=1, join='inner')
    fig = ff.create_scatterplotmatrix(dat,
                                      diag='histogram',
                                      index='Group',
                                      height=800,
                                      width=800)
    py.iplot(fig, filename='Histograms along Diagonal Subplots')
Ejemplo n.º 4
0
def visualize(df):
    df.iplot(kind='histogram', bins=50, subplots=True, filename='/tmp/histogram-subplots.html', asPlot=True)
    # df.scatter_matrix(filename='/tmp/scatter-matrix.html')

    df.iplot(
        kind="scatter",
        x="longitude",
        y="latitude",
        filename='/tmp/loc.html',
        asPlot=True,
    )

    fig = ff.create_scatterplotmatrix(df[["housing_median_age", "total_rooms", "median_income", "median_house_value", ]], diag='histogram', width=1000, height=1000)
    py.plot(fig, filename='/tmp/scatterplotmatrix.html')
Ejemplo n.º 5
0
def plotScatter(df, height, width):
    fig = ff.create_scatterplotmatrix(df.pct_change(1),
                                      diag='histogram',
                                      height=height,
                                      width=width)
    # customize xaxisn and yaxisn
    for k in fig['layout']:
        if k.startswith('xaxis') or k.startswith('yaxis'):
            fig['layout'][k].update(fixedrange=True)
            #logger.warn(fig['layout'][k])
    div = plot(fig,
               output_type='div',
               config=dict(displayModeBar=True, showLink=False))
    return div
Ejemplo n.º 6
0
    def plot_overall_results(self, save_fig=False):
        """Create a pairplot with train and test data.

        Parameters
        ----------
        save_fig : bool, optional
            If save_fig is True, saves the result in img folder. If False, does not
            save. The default is True.

        Returns
        -------
        fig : Plotly graph_objects.Figure()
            The figure object with the plot.
        """
        path = Path(__file__).parent
        df = pd.concat([self.train, self.test], axis=0)
        df["label"] = [
            "train" if x < len(self.train) else "test" for x in range(len(df))
        ]
        axes_default = dict(
            gridcolor="lightgray",
            showline=True,
            linewidth=1.5,
            linecolor="black",
            mirror=True,
            exponentformat="power",
            zerolinecolor="lightgray",
        )

        fig = create_scatterplotmatrix(
            df,
            diag="box",
            index="label",
            title="",
        )
        fig.update_layout(
            width=1500,
            height=1500,
            plot_bgcolor="white",
            hovermode=False,
        )
        fig.update_xaxes(**axes_default)
        fig.update_yaxes(**axes_default)

        if save_fig:
            fig.write_html(str(path / f"models/{self.name}/img/pairplot.html"))
            fig.write_image(str(path / f"models/{self.name}/img/pairplot.png"))

        return fig
Ejemplo n.º 7
0
def plot_scatter_matrix(file,
                        save_path='../../figs/',
                        save_name='scatter_matrix'):
    if (not os.path.isfile(file)):
        print('file not exist')
        return None
    scatter_matrix = pd.read_csv(file, names=['year', 'brought', 'num'])
    scatter_matrix.num = scatter_matrix.num / 10000.0
    scatter_matrix = scatter_matrix.pivot(index='year',
                                          columns='brought',
                                          values='num').reset_index()
    scatter_matrix.year = scatter_matrix.year.astype(np.str)
    scatter_matrix.fillna(0)

    fig = ff.create_scatterplotmatrix(
        scatter_matrix,
        diag='box',
        index='year',
        height=800,
        width=800,
        title='Distribution of every brought each year')
    save_path = os.path.join(save_path, save_name)
    plot(fig, filename=save_path + '.html', auto_open=False)
Ejemplo n.º 8
0
                                   8.43 * rho_d)
PTF_b = 2.78 * 10**(-6) * np.exp(
    19.52 * P - 8.97 - 2.82 * 10**(-2) * Clay + 1.81 * 10**(-4) * Sand**2 -
    9.41 * 10**(-3) * Clay**2 - 8.40 * P**2 + 7.77 * 10**(-2) * Sand * P -
    2.98 * 10**(-3) * Sand**2 * P**2 - 1.95**10**(-2) * Clay**2 * P**2 +
    1.73 * 10**(-5) * Sand**2 * Clay + 2.73 * 10**(-2) * Clay**2 * P +
    1.43 * 10**(-3) * Sand**2 * P - 3.5 * 10**(-6) * Clay**2 * Sand)

import plotly.figure_factory as ff
import pandas as pd
import plotly.plotly as py

dataframe = pd.DataFrame({'Davids': Ksat_d, 'A': PTF_a, 'B': PTF_b})
fig = ff.create_scatterplotmatrix(dataframe,
                                  diag='box',
                                  colormap_type='cat',
                                  height=800,
                                  width=800)
py.iplot(fig, filename='Colormap as a Dictionary')

plt.scatter(PTF_a, PTF_b)
plt.ylabel('PTF_a')
plt.xlabel('PTF_b')
plt.title('comparison between PTF K')
plt.show()

plt.scatter(PTF_a * 10**(3), Ksat_d)
plt.ylabel('Davids')
plt.xlabel('PTF_a')
plt.title('A')
plt.show()
Ejemplo n.º 9
0
import pandas as pd
import numpy as np

# Options for pandas
pd.options.display.max_columns = 30

df = pd.read_csv('data/blue_jays.csv', index_col=0)

df.head()
print(df)

from plotly.offline import iplot
import plotly.figure_factory as ff

figure = ff.create_scatterplotmatrix(df[['KnownSex', 'Head', 'Mass', 'Skull']],
                                     index='KnownSex',
                                     height=800,
                                     width=800)
iplot(figure)

corrs = df.corr()

figure = ff.create_annotated_heatmap(z=corrs.round(2).values,
                                     x=list(corrs.columns),
                                     y=list(corrs.index),
                                     showscale=True)
#iplot(figure)

figure = ff.create_scatterplotmatrix(df[['KnownSex', 'Head', 'Mass', 'Skull']],
                                     diag='histogram',
                                     index='KnownSex',
                                     height=800,
Ejemplo n.º 10
0
        html.Tbody([
            html.Tr([
                html.Td(dataframe.iloc[i][col]) for col in dataframe.columns
            ]) for i in range(min(len(dataframe), max_rows))
        ])
    ])

def generate_table3(dataframe):
    return dash_table.DataTable(
    data=dataframe.to_dict('records'),
    columns=[{'id': c, 'name': c} for c in df.columns],
    page_size=10
    )

fig1 = ff.create_scatterplotmatrix(df2,        diag='histogram',
                                  colormap='Viridis',
                                  colormap_type='cat',
                                  height=800, width=1400)
fig = ff.create_scatterplotmatrix(df2, diag='box',index = 'index',colormap='Portland',colormap_type='cat',height=700, width=700)
fig2 = px.scatter(df2, x="world_rank", y="research", color="world_rank")
fig3 = px.scatter(df2, x="world_rank", y="teaching", color="world_rank")
fig4 = px.scatter(df2, x="world_rank", y="citations", color="world_rank")
fig5 = px.scatter(df2, x="research", y="teaching", color="world_rank")

text = '''
On va analyser les données des 50 premières Universités et essayer de voir s'il y a 
des correlations entre certains critères :
- Dans un premier temps on va analyser les données de manières classique.
- Dans un deuxième temps on utiler la méthode de l'analyse des composantes principals.
''' 
text1 = '''
On remaque une correlation entre :
Ejemplo n.º 11
0
                                                 ax=axes[i - 3, 1],
                                                 rot=0)

    elif i < 9:
        ax = customers[item].value_counts().plot(kind='bar',
                                                 ax=axes[i - 6, 2],
                                                 rot=0)
    ax.set_title(item)

# #### Matrix comparing some variables which have high correlation with Churn

# In[21]:

fig = ff.create_scatterplotmatrix(
    customers.loc[:1000, ['tenure', 'MonthlyCharges', 'TotalCharges']],
    diag='histogram',
    height=1000,
    width=1000)
iplot(fig, filename='Histograms along Diagonal Subplots')

# ### Churn Analysis
# Now I will analyse the Churn variable

# In[22]:

colors = ['black', 'blue']
ax = (customers['Churn'].value_counts() * 100.0 / len(customers)).plot(
    kind='bar', stacked=True, rot=0, color=colors, figsize=(8, 6))

ax.yaxis.set_major_formatter(mtick.PercentFormatter())
ax.set_ylabel('% Customers', size=15)
Ejemplo n.º 12
0
#   y=y,
#   z=z,
#   mode='markers',
#   marker=dict(
#     size=12,
#     line=dict(
#       color='rgba(217, 217, 217, 0.14)',
#       width=0.5
#     ),
#     opacity=0.8
#   )
# )

# data = [trace]

# layout = go.Layout(

# )

# fig = go.Figure(data=data, layout=layout)

# py.plot(fig, '3dscatter.html')

fig = ff.create_scatterplotmatrix(train, diag='box', index='Species',
  colormap= dict(
    Irissetosa = '#00F5FF',
    Irisversicolor = '#32CD32',
    Irisvirginica = '#DAA520'
  ),
 colormap_type='cat', height=800, width=800)
py.plot(fig, filename='Scatter')
Ejemplo n.º 13
0
trace = go.scatter(
    x =x,
    y =y,
    mode = 'makers'
)

layout = go.layout(title="iris dataset",
                   hovermode='closest',
                   xaxis= dict(title='sepal length(cm)',
                               ticklen=5,
                               zeroline=false,
                               gridwidth=2,
                               ),
                   yaxis=dict(title='sepal width (cm)', ticklen=5,gridwidth=2,),
                   showlegend=false
                   )
data = [trace]

fig= go.figure(data=data, layout=layout)
plot(fig)

df = pd.dataframe(iris_data.data, columns=[iris_data.feature_names])
df['class'] = [iris_data.target_names[i] for i in iris_data.target]

import plotly.figure_factory as ff

fig = ff.create_scatterplotmatrix(df, index='class', diag='histgram', size=10
                                  height=800, with=800)

plot(fig)
Ejemplo n.º 14
0
    df_AF['Adolescent fertility'][df_AF.Year == '2017'],
    df_AF['Adolescent fertility'][df_AF.Year == '2018']
]
group_labels = ['2014', '2015', '2016', '2017', '2018']
fig_AF = ff.create_distplot(hist_data, group_labels, bin_size=5)
fig_AF.update_layout(title='Distribution of Adolescent fertility 2014-2018')

# Adolescent fertility vs GDP
dataframe = df_AF_for_GDP
data = dataframe.loc[:, ["GDP", "Adolescent fertility"]][df_AF_for_GDP.Year ==
                                                         '2018']
data["index"] = np.arange(1, len(data) + 1)
fig_AF_vs_GDP = ff.create_scatterplotmatrix(data,
                                            diag='box',
                                            index='index',
                                            colormap='RdBu',
                                            colormap_type='cat',
                                            height=400,
                                            width=510)
fig_AF_vs_GDP.update_layout(title='Adolescent fertility vs GDP')

# Adolescent fertility vs Secondary school enrollment
fig_AF_vs_SES = px.scatter(df_AF_for_SES,
                           x="Secondary school enrollment",
                           y="Adolescent fertility",
                           size="Adolescent fertility",
                           hover_name="Country",
                           color="Country Code",
                           animation_frame="Year",
                           animation_group="Country",
                           size_max=35,
Ejemplo n.º 15
0
def visualisation(df):
    x_axis = st.selectbox(label='Select Column X-Axis',
                          options=col_name,
                          key='x_axis')
    y_axis = st.selectbox(label='Select Column Y-Axis',
                          options=col_name,
                          key='y_axis')
    color = st.selectbox(label='Select Categorical Column',
                         options=col_name,
                         key='color')
    chart_type = st.sidebar.selectbox(label='Select Chart Type',
                                      options=[
                                          'Pair Plot', 'Scatter Plot',
                                          'Line Chart', 'Pie Chart',
                                          'Strip Plot', 'Violin Plot',
                                          'Histogram', 'Distribution Plot',
                                          'Sunburst Chart', 'Box Plot'
                                      ],
                                      index=1)
    if chart_type == 'Pair Plot':
        fig1 = ff.create_scatterplotmatrix(df,
                                           diag='histogram',
                                           height=800,
                                           width=800)
        st.plotly_chart(fig1)
        fig2 = ff.create_scatterplotmatrix(df,
                                           index=color,
                                           diag='box',
                                           height=800,
                                           width=800,
                                           colormap_type='cat')
        st.plotly_chart(fig2)

    elif chart_type == 'Scatter Plot':
        st.header(f'{x_axis} vs {y_axis}')
        fig = px.scatter(df, x=x_axis, y=y_axis)
        st.plotly_chart(fig)
        st.header(
            f'{x_axis} vs {y_axis} and legends are {color} columns value')
        fig1 = px.scatter(df, x=x_axis, y=y_axis, color=color, hover_data=df)
        st.plotly_chart(fig1)

    elif chart_type == 'Line Chart':
        st.header(f'{x_axis} vs {y_axis}')
        fig = px.line(df, x=x_axis, y=y_axis, hover_data=df)
        st.plotly_chart(fig)
        st.header(
            f'{x_axis} vs {y_axis} and legends are {color} columns value')
        fig1 = go.Figure()
        for color, groupdf in df.groupby(color):
            fig1.add_trace(
                go.Scatter(x=groupdf[x_axis],
                           y=groupdf[y_axis],
                           name=color,
                           mode='markers'))
            st.plotly_chart(fig1)

    elif chart_type == 'Pie Chart':
        st.header(f'Pie Chart of {color} column')
        fig = px.pie(df, names=df[color])
        st.plotly_chart(fig)
        st.header(f'Donut Chart of {color} column')
        fig1 = px.pie(df, names=df[color], hole=0.3)
        st.plotly_chart(fig1)

    elif chart_type == 'Strip Plot':
        st.header(f'{x_axis} vs {y_axis}')
        fig = px.strip(df, x=df[x_axis], y=df[y_axis])
        st.plotly_chart(fig)
        st.header(
            f'{x_axis} vs {y_axis} and legends are {color} columns value')
        fig1 = px.strip(df, x=df[x_axis], y=df[y_axis], color=df[color])
        st.plotly_chart(fig1)

    elif chart_type == 'Violin Plot':
        st.header(f'{x_axis} vs {y_axis}')
        fig = px.violin(df, x=df[x_axis], y=df[y_axis], points='all')
        st.plotly_chart(fig)
        st.header(
            f'{x_axis} vs {y_axis} and legends are {color} columns value')
        fig1 = px.violin(df,
                         x=df[x_axis],
                         y=df[y_axis],
                         color=df[color],
                         points='all')
        st.plotly_chart(fig1)

    elif chart_type == 'Histogram':
        fig = px.histogram(df, x=df[x_axis])
        st.plotly_chart(fig)
        if st.checkbox(label='Y-Axis is Sum'):
            fig1 = px.histogram(df, x=df[x_axis], y=df[y_axis], histfunc='sum')
            st.plotly_chart(fig1)
            fig2 = px.histogram(df,
                                x=df[x_axis],
                                y=df[x_axis],
                                color=df[color],
                                histfunc='sum')
            st.plotly_chart(fig2)
        if st.checkbox(label='Y-Axis is Count'):
            fig3 = px.histogram(df,
                                x=df[x_axis],
                                y=df[y_axis],
                                histfunc='count')
            st.plotly_chart(fig3)
            fig4 = px.histogram(df,
                                x=df[x_axis],
                                y=df[x_axis],
                                color=df[color],
                                histfunc='count')
            st.plotly_chart(fig4)
        if st.checkbox(label='Y-Axis is Average'):
            fig5 = px.histogram(df, x=df[x_axis], y=df[y_axis], histfunc='avg')
            st.plotly_chart(fig5)
            fig6 = px.histogram(df,
                                x=df[x_axis],
                                y=df[x_axis],
                                color=df[color],
                                histfunc='avg')
            st.plotly_chart(fig6)

    elif chart_type == 'Sunburst Chart':
        path = st.multiselect(label='Select The Path', options=df.columns)
        fig = px.sunburst(df, path=path, color=color, values=x_axis)
        st.plotly_chart(fig)

    elif chart_type == 'Box Plot':
        fig = px.box(df, x=df[x_axis], y=df[y_axis], points='all`')
        st.plotly_chart(fig)
        fig1 = px.box(df,
                      x=df[x_axis],
                      y=df[y_axis],
                      points='all',
                      color=df[color])
        st.plotly_chart(fig1)

    else:
        fig1 = px.histogram(df, x=df[x_axis], marginal="box")
        st.plotly_chart(fig1)
        fig2 = px.histogram(df, x=df[x_axis], y=df[y_axis], marginal="box")
        st.plotly_chart(fig2)
        fig3 = px.histogram(df,
                            x=df[x_axis],
                            y=df[y_axis],
                            color=df[color],
                            marginal="box")
        st.plotly_chart(fig3)
scatter_plot = ax.scatter(survey_df['My_R'],
                          survey_df['My_Python'],
                          facecolors='none',
                          edgecolors='blue')
plt.show()

# This is where we create the offline scatter plot matrix
pref_df = gather(software_df.copy(), 'software', 'pref', [
    'My_Java', 'My_Js', 'My_Python', 'My_R', 'My_SAS', 'Prof_Java', 'Prof_Js',
    'Prof_Python', 'Prof_R', 'Prof_SAS', 'Ind_Java', 'Ind_Js', 'Ind_Python',
    'Ind_R', 'Ind_SAS'
])
pref_df[['software', 'use']] = pref_df['software'].str.split('_', expand=True)
fig = ff.create_scatterplotmatrix(pref_df.iloc[:, 1:],
                                  diag='histogram',
                                  index='software',
                                  height=800,
                                  width=800)
plotly.offline.plot(fig, filename='scatter_matrix.html')

# Boxplot of software preferences
sns.factorplot(x="software", y="pref", col="use", data=pref_df, kind="box")

pref_stats = pref_df.iloc[:, 2:].groupby(['software', 'use'], as_index=False)

# examine intercorrelations among software preference variables
# with correlation matrix/heat map
corr_chart(df_corr=software_df)

# descriptive statistics for software preference variables
print('\nDescriptive statistics for survey data ---------------')
from sklearn.preprocessing import StandardScaler
#%% get data
data_df = utils.load_object('pd_for_train.pkl')
arousals = utils.load_object('arousal.pkl')
arousals_list = arousals['arousal'].tolist()

data_df = pu.match_label_with_sample(data_df, arousals_list)

#%%
# scatter plot matrix
#fig = data_df[['mean','max','median','min','skew']].reset_index(drop=True).scatter_matrix(asFigure=True)
#plotly.offline.plot(fig)

fig = ff.create_scatterplotmatrix(
    data_df[['delta_pq', 'delta_qr', 'slope_qr', 'label']],
    diag='histogram',
    index='label',
    height=1000,
    width=1000)

plotly.offline.plot(fig)

#%% box plot of mean grouped by arousal
fig = data_df.reset_index(drop=True).pivot(
    columns='label',
    values='slope_qr').iplot(kind='box',
                             title='slope_qr compare two group of arousal',
                             yTitle='mean',
                             xTitle='label',
                             asFigure=True)

plotly.offline.plot(fig)
Ejemplo n.º 18
0
    writer = csv.writer(output)
    for val in rows:
        writer.writerow(val)



PTF_a = 1.1574*10**(-5)*np.exp(20.62-0.96*np.log(Clay)-0.66*np.log(Sand)-0.46*np.log(OM)-8.43*rho_d)
PTF_b = 2.78*10**(-6)*np.exp(19.52*P-8.97-2.82*10**(-2)*Clay+1.81*10**(-4)*Sand**2-9.41*10**(-3)*Clay**2-8.40*P**2+7.77*10**(-2)*Sand*P-2.98*10**(-3)*Sand**2*P**2-1.95**10**(-2)*Clay**2*P**2+1.73*10**(-5)*Sand**2*Clay+2.73*10**(-2)*Clay**2*P+1.43*10**(-3)*Sand**2*P-3.5*10**(-6)*Clay**2*Sand)

import plotly.figure_factory as ff
import pandas as pd
import plotly.plotly as py

dataframe = pd.DataFrame({'Davids': Ksat_d, 'A': PTF_a, 'B': PTF_b})
fig = ff.create_scatterplotmatrix(dataframe, diag='box',
                                  colormap_type='cat',
                                  height=800, width=800)
py.iplot(fig, filename = 'Colormap as a Dictionary')

plt.scatter(PTF_a, PTF_b)
plt.ylabel('PTF_a')
plt.xlabel('PTF_b')
plt.title('comparison between PTF K')
plt.show()

plt.scatter(PTF_a*10**(3), Ksat_d)
plt.ylabel('Davids')
plt.xlabel('PTF_a')
plt.title('A')
plt.show()
Ejemplo n.º 19
0
plt.figure(figsize=(10, 10))
sns.heatmap(df.corr(), vmin=-1, annot=True)

# In[6]:

k = df.loc[:, [
    "age", "anaemia", "creatinine_phosphokinase", "diabetes",
    "ejection_fraction", "high_blood_pressure", "platelets",
    "serum_creatinine", "serum_sodium", "sex", "smoking", "time"
]]
k["index"] = np.arange(1, len(k) + 1)
scat_plot = ff.create_scatterplotmatrix(k,
                                        diag='box',
                                        index='index',
                                        colormap='Portland',
                                        colormap_type='cat',
                                        height=2400,
                                        width=1800)
iplot(scat_plot)

# In[7]:

fig = px.histogram(df, x="platelets", color="DEATH_EVENT")
fig.show()

# In[8]:

trace1 = go.Bar(x=df.DEATH_EVENT,
                y=df.diabetes,
                name="Diabetes",
Ejemplo n.º 20
0
fig = dict(data=data, layout=layout)
fig['layout']['xaxis'].update(dict(title = 'Job Satisfaction', 
                                   tickmode='linear',
                                   tickfont = dict(size = 10)))
py.iplot(fig)

# **Let's combine age, job satisfaction and salary!**

# In[ ]:


salary = salary[salary.Salary<1000000]
dat = salary[['Age','JobSatisfaction','Salary']]
dat["index"] = np.arange(len(dat))
fig = ff.create_scatterplotmatrix(dat, diag='box', index='index',
                                  colormap_type='cat',colormap='Jet',
                                  height=800, width=800)
py.iplot(fig)

# **It seems that there is no gender inequality in terms of salary in data science world.**

# In[ ]:


male_salary = salary[salary['GenderSelect'] == 'Male']
female_salary = salary[salary['GenderSelect'] == 'Female']
male = male_salary.groupby('Age').Salary.mean().to_frame()
female = female_salary.groupby('Age').Salary.mean().to_frame()

trace0 = go.Scatter(
    x = male.index,
                                          showticklabels = False, 
                                          showgrid = True),
                            # control x and y axes of the right marginal histogram
                            yaxis3 = dict(ticks = "", showticklabels = False),
                            xaxis3 = dict(nticks = 5, showgrid = False),
                            width = 850,
                            height = 450)


# Plotting the correlation
plotly.offline.plot(plotly_fig, filename = 'Correlation between Goog and MSFT')


## Lets repeat this correlation for all 4 tech stocks 
# What was the correlation between closing prices of the 4 tech stocks?
fig4 = ff.create_scatterplotmatrix(tech_rets, diag='histogram', size = 5,height=740, width=880)

for trace in fig4['data']:
    trace['opacity'] = 0.7
    trace['marker'] = dict(color = "seagreen", line = dict(color = 'white', 
                                                       width = 0.7))

plotly.offline.plot(fig4, filename = 'Correlation between 4 Tech Giants')

## A quick glance shows all four stocks are highly correlated with each 
# other. This which means you should not put all of them in one portfolio. 
# Rather, you should pick the least correlated ones and combine them with 
# stocks from other industries that have low correlation with the chosen 
# stocks. This is a wise path towards a more diverse portfolio, and
# reducing losses.
Ejemplo n.º 22
0
    'text': "Stock Prices",
    'x': 0.5,
    'xanchor': 'center'
})

nflx = nflx[['Open', 'Close', 'Volume']]
nflx["index"] = np.arange(len(nflx))

fig8 = go.Figure(
    ff.create_scatterplotmatrix(nflx,
                                diag='box',
                                index='index',
                                size=3,
                                height=600,
                                width=1150,
                                colormap='RdBu',
                                title={
                                    'text':
                                    "Netflix Stock Price (Scatterplot Matrix)",
                                    'x': 0.5,
                                    'xanchor': 'center'
                                }))

dp.Report(
    dp.Group(dp.Plot(fig0),
             dp.Plot(fig1),
             dp.Plot(fig2),
             dp.Plot(fig3),
             dp.Plot(fig4),
             dp.Plot(fig5),
             dp.Plot(fig6),
Ejemplo n.º 23
0
 def create_scatterplotmatrix(*args, **kwargs):
     FigureFactory._deprecated('create_scatterplotmatrix')
     from plotly.figure_factory import create_scatterplotmatrix
     return create_scatterplotmatrix(*args, **kwargs)
Ejemplo n.º 24
0
# 将性别内容转换为数字表示 男:1  女:0
def trans(x):
    # x = 1 if x == 'male' else 0
    return 1 if x == 'male' else 0
data['Sex'] = data['Sex'].apply(trans)

print(data.isnull().any())
data.fillna(data.Age.median(), inplace=True)

# 可视化要安装plotly
import plotly
import plotly.figure_factory as ff

data['Survived'] = data['Survived'].astype('str')
fig = ff.create_scatterplotmatrix(data, diag='histogram',index='Survived',colormap=[ '#32CD32', '#00F5FF'],
                                  height=800, width=800)
plotly.offline.plot(fig, filename='p2.html')

from sklearn.tree import DecisionTreeClassifier as DTC, export_graphviz
X = data.iloc[:, 1:4]
y = data.iloc[:, 0]
dtc = DTC(criterion='entropy')  # 基于信息熵
dtc.fit(X, y)
print('准确率:', dtc.score(X, y))

with open('data/tree.dot', 'w') as f:
    f = export_graphviz(dtc, feature_names=X.columns, out_file=f)

import pydot
(graph,) = pydot.graph_from_dot_file('data/tree.dot')
graph.write_png('data/tree.png')
Ejemplo n.º 25
0
#%% get data
import utils
import preprocessing.pre_utils as pu
data_df = utils.load_object('faps_for_train.pkl')
valence = utils.load_object('valence.pkl')
#arousals = utils.load_object('arousal.pkl')
valence_list = valence['valence'].tolist()
#arousal_list = arousals['arousal'].tolist()

data_df = pu.match_label_with_sample(data_df, valence_list)
#data_df = pu.match_label_with_sample(data_df,arousal_list,col_name='arousal')

#%% plot scatter matrix
fig = ff.create_scatterplotmatrix(data_df[[0, 1, 12, 13, 'label']],
                                  diag='histogram',
                                  index='label',
                                  height=1000,
                                  width=1000)

plotly.offline.plot(fig)

#%%
##iaps_class = iaps(r"C:\Users\DSPLab\Research\affective-monitor-model\preprocessing\IAPSinfoFile_Final.txt")
iaps_class = iaps(r"E:\Research\affective-monitor-model\preprocessing")
sample_list_from_pic_id = iaps_class.get_sample_idx(2141)
feel_df = iaps_class.get_feeling('happy')

#%%
#path = "C:\\Users\\DSPLab\\Research\\ExperimentData"
path = "E:\\Research\\ExperimentData"
n = 1
Ejemplo n.º 26
0
    def feature_interactions(rows, radio, url, dummy):
        data_id = int(re.search('data/(\d+)', url).group(1))
        if dummy == "done":
            df = pd.read_pickle('cache/df' + str(data_id) + '.pkl')
            fi = pd.read_pickle('cache/fi' + str(data_id) + '.pkl')
        else:
            return []
        meta_data = pd.DataFrame(rows)
        try:
            target_attribute = meta_data[meta_data["Target"] ==
                                         "true"]["Attribute"].values[0]
            target_type = (
                meta_data[meta_data["Target"] == "true"]["DataType"].values[0])
        except IndexError:
            return "No target found", "No target found"
        if target_type == "nominal" or target_type == "string":
            y = pd.Categorical(df[target_attribute]).codes
        else:
            y = df[target_attribute]
        # Feature interaction plots
        df = clean_dataset(df)
        numerical_features = list(
            meta_data["Attribute"][meta_data["DataType"] == "numeric"])
        nominal_features = list(
            meta_data["Attribute"][meta_data["DataType"] == "nominal"])
        top_numericals = (
            fi['index'][fi['index'].isin(numerical_features)][:5])
        top_nominals = (fi['index'][fi['index'].isin(nominal_features)][:5])
        df['target'] = df[target_attribute]
        C = [
            'rgb(166,206,227)', 'rgb(31,120,180)', 'rgb(178,223,138)',
            'rgb(51,160,44)', 'rgb(251,154,153)', 'rgb(227,26,28)'
        ]
        if target_type == "numeric":
            cmap_type = 'seq'
            df['target'] = y
            df['target'] = pd.cut(df['target'], 1000).astype(str)
            cat = df['target'].str.extract('\((.*),',
                                           expand=False).astype(float)
            df['bin'] = pd.Series(cat)
            df.sort_values(by='bin', inplace=True)
            df.drop('bin', axis=1, inplace=True)
        else:
            cmap_type = 'cat'
            N = len(df['target'].unique())
            try:
                df['target'] = df['target'].astype(int)
            except ValueError:
                print("target not converted to int")
            df.sort_values(by='target', inplace=True)
            df['target'] = df['target'].astype(str)

        if radio == "top":
            top_features = df[fi['index'][0:5].values]
            top_features['target'] = df['target']

            if len(top_numericals):

                matrix = ff.create_scatterplotmatrix(
                    top_features,
                    title='Top feature interactions',
                    diag='box',
                    index='target',
                    #colormap=C,
                    colormap_type=cmap_type,
                    height=800,
                    width=900)
                graph = dcc.Graph(figure=matrix)
            else:
                d = top_features
                parcats = [
                    go.Parcats(dimensions=[{
                        'label': column,
                        'values': list(d[column].values)
                    } for column in d.columns],
                               line={
                                   'color': y,
                                   'colorscale': 'Portland'
                               },
                               hoveron='color',
                               hoverinfo='count+probability',
                               arrangement='freeform')
                ]
                layout = go.Layout(autosize=False, width=1200, height=800)

                fig = go.Figure(data=parcats, layout=layout)
                graph = dcc.Graph(figure=fig)
        elif radio == "numeric":
            if len(top_numericals):
                df_num = df[top_numericals]
                df_num['target'] = df['target']
                matrix = ff.create_scatterplotmatrix(
                    df_num,
                    title='Top numeric feature interactions',
                    diag='box',
                    index='target',
                    #colormap=C,
                    colormap_type=cmap_type,
                    height=1000,
                    width=1000)
                graph = dcc.Graph(figure=matrix)
            else:
                graph = html.P("No numericals found")
        elif radio == "nominal":
            if len(top_nominals):
                df_nom = df[top_nominals]
                df_nom['target'] = df['target']

                parcats = [
                    go.Parcats(dimensions=[{
                        'label':
                        column,
                        'values':
                        list(df_nom[column].values)
                    } for column in df_nom.columns],
                               line={
                                   'color':
                                   pd.Categorical(df_nom['target']).codes,
                                   'colorscale': 'Portland'
                               },
                               hoveron='color',
                               hoverinfo='count+probability',
                               arrangement='freeform')
                ]
                layout = go.Layout(autosize=False, width=1000, height=800)
                fig = go.Figure(data=parcats, layout=layout)
                graph = dcc.Graph(figure=fig)
            else:
                graph = html.P("No nominals found")

        return html.Div(graph)
Ejemplo n.º 27
0
checkpokemonperformance("VenusaurMega Venusaur")

# In[ ]:

checkpokemonperformance("Regigigas")

# # 9. Scatterplot matrix
# Scatterplot matrix contains all the pairwise scatter plots of the variables on a single page in a matrix format. That is, if there are k variables, the scatterplot matrix will have k rows and k columns and the ith row and jth column of this matrix is a plot of Xi versus Xj.

# **9.1 Scatterplot matrix of attributes with boxplots**

# In[ ]:

fig = ff.create_scatterplotmatrix(df.iloc[:, 5:12],
                                  index='Generation',
                                  diag='box',
                                  size=2,
                                  height=800,
                                  width=800)
iplot(fig, filename='Scatterplotmatrix.png', image='png')

# # 10. Violin plots
#
# A violin plot is a method of plotting numeric data. It is similar to box plot with a rotated kernel density plot on each side.
#
# A violin plot is more informative than a plain box plot. In fact while a box plot only shows summary statistics such as mean/median and interquartile ranges, the violin plot shows the full distribution of the data. The difference is particularly useful when the data distribution is multimodal (more than one peak). In this case a violin plot clearly shows the presence of different peaks, their position and relative amplitude. This information could not be represented with a simple box plot which only reports summary statistics. The inner part of a violin plot usually shows the mean (or median) and the interquartile range. In other cases, when the number of samples is not too high, the inner part can show all sample points (with a dot or a line for each sample).
#
# Source: [Wikipedia](https://en.wikipedia.org/wiki/Violin_plot)
#
# **10.1 Violinplot of all stats**

# In[ ]:
Ejemplo n.º 28
0
    plot_bgcolor='#1F2024',
    paper_bgcolor='#1F2024',
    font_color='#DCDCDC',
)

# Modal 2 Corrélation
colonnes = [
    'world_rank',
    'total_score',
    'research',
    'teaching',
]
data_pca = dfs[colonnes]
lay1_fig2 = ff.create_scatterplotmatrix(data_pca,
                                        diag='histogram',
                                        colormap='Viridis',
                                        colormap_type='cat',
                                        height=500,
                                        width=1000)

modal = html.Div([
    dbc.Button(style={
        'margin-bottom': '5vh',
        'margin-left': '2vw',
        'backgroundColor': '#181B1E'
    },
               children=["Matrice des corrélations"],
               id="open-xl"),
    dbc.Modal(
        style={'backgroundColor': '#181B1E'},
        children=[
            dbc.ModalHeader("Matrice des Corrélation"),
fig1_1 = go.Figure(data=trace_heat)
fig1_1.update_layout(title="Correlation Matrix")

## ------ fig1_2 : Correlation Matrix ------ # première figure sur la page 1

data = df[["research", "teaching", "citations"]]
data["index"] = np.arange(1, len(data) + 1)

# scatter matrix
fig1_2 = ff.create_scatterplotmatrix(
    data,
    diag='box',
    index='index',
    colormap='Portland',
    colormap_type='cat',
    title='Relations between research, teatching and citations criteria',
    text=df[['university_name', 'world_rank']],
    hovertemplate="<b> University :</b> %{text[0]}<br>" +
    "<b> World rank :</b> %{text[1]}<br>" + "<extra></extra>",
)  #labels={"index": "world <br> rank"}, height=700, width=700)

##############
### Page 2 ###
##############
## ------ fig2_1 : Ebouli des valeurs propres ------ # première figure sur la page 2
# voir plus bas

## ------ fig2_2 : Cercle des corrélations ------ # 2ème figure sur la page 2
# voir plus bas
Ejemplo n.º 30
0
 def create_scatterplotmatrix(*args, **kwargs):
     FigureFactory._deprecated('create_scatterplotmatrix')
     from plotly.figure_factory import create_scatterplotmatrix
     return create_scatterplotmatrix(*args, **kwargs)
Ejemplo n.º 31
0
# <font color='black'>
# * import figure factory as ff
# * create_scatterplotmatrix = creates scatter plot
#     * data2015 = prepared data. It includes research, international and total scores with index from 1 to 401
#     * colormap = color map of scatter plot
#     * colormap_type = color type of scatter plot
#     * height and weight

# In[ ]:

# import figure factory
import plotly.figure_factory as ff
# prepare data
dataframe = timesData[timesData.year == 2015]
data2015 = dataframe.loc[:, ["research", "international", "total_score"]]
data2015["index"] = np.arange(1, len(data2015) + 1)
# scatter matrix
fig = ff.create_scatterplotmatrix(data2015,
                                  diag='box',
                                  index='index',
                                  colormap='Portland',
                                  colormap_type='cat',
                                  height=700,
                                  width=700)
iplot(fig)

# # Conclusion
# * If you like it, thank you for you upvotes.
# * If you have any question, I will happy to hear it
# ## To be continued
Ejemplo n.º 32
0
def plot_scatterplotmatrix(df, file_name):
    figure = ff.create_scatterplotmatrix(df, diag='histogram')
    figure.layout.update(width=2000, height=1500)
    figure.layout.update(font=dict(size=7))

    plot(figure, filename=file_name)