コード例 #1
0
ファイル: plotters.py プロジェクト: karunkannan/pymeda
 def plot(self):
     title = self.titlestring % (self.DS.name, self.DS.clustname,
                                 self.DS.levels)
     self.shortname = self.DS.shortclustname + self.shortname
     data = []
     colors = get_spaced_colors(len(self.DS.clusters[self.DS.levels]))
     samples = []
     labels = []
     for i, c in enumerate(self.DS.clusters[self.DS.levels]):
         samples.append(c.T)
         labels.append(c.shape[0] * [i])
     samples = np.hstack(samples)[:3, :]
     labels = np.hstack(labels)
     df = pd.DataFrame(
         samples.T, columns=["Dim %d" % i for i in range(samples.shape[0])])
     df["label"] = ["Cluster %d" % i for i in labels]
     fig = ff.create_scatterplotmatrix(
         df, diag='box', index="label", colormap=colors)
     if self.plot_mode != "div":
         fig["layout"]["title"] = title
     else:
         fig["layout"]["title"] = None
     #del fig.layout["width"]
     #del fig.layout["height"]
     return self.makeplot(fig, "agg/" + self.shortname)
コード例 #2
0
def scatter_matrix(df):
    fig = ff.create_scatterplotmatrix(df.dropna(),
                                      diag='box',
                                      height=1000,
                                      width=1900,
                                      colormap='Portland')
    py.plot(fig, filename='/tmp/scatter_matrix.html')
コード例 #3
0
def scatterplot_matrix(x, y):
    dat = pd.concat([x, y], axis=1, join='inner')
    fig = ff.create_scatterplotmatrix(dat,
                                      diag='histogram',
                                      index='Group',
                                      height=800,
                                      width=800)
    py.iplot(fig, filename='Histograms along Diagonal Subplots')
コード例 #4
0
def visualize(df):
    df.iplot(kind='histogram', bins=50, subplots=True, filename='/tmp/histogram-subplots.html', asPlot=True)
    # df.scatter_matrix(filename='/tmp/scatter-matrix.html')

    df.iplot(
        kind="scatter",
        x="longitude",
        y="latitude",
        filename='/tmp/loc.html',
        asPlot=True,
    )

    fig = ff.create_scatterplotmatrix(df[["housing_median_age", "total_rooms", "median_income", "median_house_value", ]], diag='histogram', width=1000, height=1000)
    py.plot(fig, filename='/tmp/scatterplotmatrix.html')
コード例 #5
0
ファイル: stockdb.py プロジェクト: oshea00/fintools
def plotScatter(df, height, width):
    fig = ff.create_scatterplotmatrix(df.pct_change(1),
                                      diag='histogram',
                                      height=height,
                                      width=width)
    # customize xaxisn and yaxisn
    for k in fig['layout']:
        if k.startswith('xaxis') or k.startswith('yaxis'):
            fig['layout'][k].update(fixedrange=True)
            #logger.warn(fig['layout'][k])
    div = plot(fig,
               output_type='div',
               config=dict(displayModeBar=True, showLink=False))
    return div
コード例 #6
0
    def plot_overall_results(self, save_fig=False):
        """Create a pairplot with train and test data.

        Parameters
        ----------
        save_fig : bool, optional
            If save_fig is True, saves the result in img folder. If False, does not
            save. The default is True.

        Returns
        -------
        fig : Plotly graph_objects.Figure()
            The figure object with the plot.
        """
        path = Path(__file__).parent
        df = pd.concat([self.train, self.test], axis=0)
        df["label"] = [
            "train" if x < len(self.train) else "test" for x in range(len(df))
        ]
        axes_default = dict(
            gridcolor="lightgray",
            showline=True,
            linewidth=1.5,
            linecolor="black",
            mirror=True,
            exponentformat="power",
            zerolinecolor="lightgray",
        )

        fig = create_scatterplotmatrix(
            df,
            diag="box",
            index="label",
            title="",
        )
        fig.update_layout(
            width=1500,
            height=1500,
            plot_bgcolor="white",
            hovermode=False,
        )
        fig.update_xaxes(**axes_default)
        fig.update_yaxes(**axes_default)

        if save_fig:
            fig.write_html(str(path / f"models/{self.name}/img/pairplot.html"))
            fig.write_image(str(path / f"models/{self.name}/img/pairplot.png"))

        return fig
コード例 #7
0
def plot_scatter_matrix(file,
                        save_path='../../figs/',
                        save_name='scatter_matrix'):
    if (not os.path.isfile(file)):
        print('file not exist')
        return None
    scatter_matrix = pd.read_csv(file, names=['year', 'brought', 'num'])
    scatter_matrix.num = scatter_matrix.num / 10000.0
    scatter_matrix = scatter_matrix.pivot(index='year',
                                          columns='brought',
                                          values='num').reset_index()
    scatter_matrix.year = scatter_matrix.year.astype(np.str)
    scatter_matrix.fillna(0)

    fig = ff.create_scatterplotmatrix(
        scatter_matrix,
        diag='box',
        index='year',
        height=800,
        width=800,
        title='Distribution of every brought each year')
    save_path = os.path.join(save_path, save_name)
    plot(fig, filename=save_path + '.html', auto_open=False)
コード例 #8
0
ファイル: soil_properties_EXU.py プロジェクト: dgketchum/etrm
                                   8.43 * rho_d)
PTF_b = 2.78 * 10**(-6) * np.exp(
    19.52 * P - 8.97 - 2.82 * 10**(-2) * Clay + 1.81 * 10**(-4) * Sand**2 -
    9.41 * 10**(-3) * Clay**2 - 8.40 * P**2 + 7.77 * 10**(-2) * Sand * P -
    2.98 * 10**(-3) * Sand**2 * P**2 - 1.95**10**(-2) * Clay**2 * P**2 +
    1.73 * 10**(-5) * Sand**2 * Clay + 2.73 * 10**(-2) * Clay**2 * P +
    1.43 * 10**(-3) * Sand**2 * P - 3.5 * 10**(-6) * Clay**2 * Sand)

import plotly.figure_factory as ff
import pandas as pd
import plotly.plotly as py

dataframe = pd.DataFrame({'Davids': Ksat_d, 'A': PTF_a, 'B': PTF_b})
fig = ff.create_scatterplotmatrix(dataframe,
                                  diag='box',
                                  colormap_type='cat',
                                  height=800,
                                  width=800)
py.iplot(fig, filename='Colormap as a Dictionary')

plt.scatter(PTF_a, PTF_b)
plt.ylabel('PTF_a')
plt.xlabel('PTF_b')
plt.title('comparison between PTF K')
plt.show()

plt.scatter(PTF_a * 10**(3), Ksat_d)
plt.ylabel('Davids')
plt.xlabel('PTF_a')
plt.title('A')
plt.show()
コード例 #9
0
import pandas as pd
import numpy as np

# Options for pandas
pd.options.display.max_columns = 30

df = pd.read_csv('data/blue_jays.csv', index_col=0)

df.head()
print(df)

from plotly.offline import iplot
import plotly.figure_factory as ff

figure = ff.create_scatterplotmatrix(df[['KnownSex', 'Head', 'Mass', 'Skull']],
                                     index='KnownSex',
                                     height=800,
                                     width=800)
iplot(figure)

corrs = df.corr()

figure = ff.create_annotated_heatmap(z=corrs.round(2).values,
                                     x=list(corrs.columns),
                                     y=list(corrs.index),
                                     showscale=True)
#iplot(figure)

figure = ff.create_scatterplotmatrix(df[['KnownSex', 'Head', 'Mass', 'Skull']],
                                     diag='histogram',
                                     index='KnownSex',
                                     height=800,
コード例 #10
0
ファイル: layouts.py プロジェクト: ines-data/3-PCA_AL
        html.Tbody([
            html.Tr([
                html.Td(dataframe.iloc[i][col]) for col in dataframe.columns
            ]) for i in range(min(len(dataframe), max_rows))
        ])
    ])

def generate_table3(dataframe):
    return dash_table.DataTable(
    data=dataframe.to_dict('records'),
    columns=[{'id': c, 'name': c} for c in df.columns],
    page_size=10
    )

fig1 = ff.create_scatterplotmatrix(df2,        diag='histogram',
                                  colormap='Viridis',
                                  colormap_type='cat',
                                  height=800, width=1400)
fig = ff.create_scatterplotmatrix(df2, diag='box',index = 'index',colormap='Portland',colormap_type='cat',height=700, width=700)
fig2 = px.scatter(df2, x="world_rank", y="research", color="world_rank")
fig3 = px.scatter(df2, x="world_rank", y="teaching", color="world_rank")
fig4 = px.scatter(df2, x="world_rank", y="citations", color="world_rank")
fig5 = px.scatter(df2, x="research", y="teaching", color="world_rank")

text = '''
On va analyser les données des 50 premières Universités et essayer de voir s'il y a 
des correlations entre certains critères :
- Dans un premier temps on va analyser les données de manières classique.
- Dans un deuxième temps on utiler la méthode de l'analyse des composantes principals.
''' 
text1 = '''
On remaque une correlation entre :
コード例 #11
0
                                                 ax=axes[i - 3, 1],
                                                 rot=0)

    elif i < 9:
        ax = customers[item].value_counts().plot(kind='bar',
                                                 ax=axes[i - 6, 2],
                                                 rot=0)
    ax.set_title(item)

# #### Matrix comparing some variables which have high correlation with Churn

# In[21]:

fig = ff.create_scatterplotmatrix(
    customers.loc[:1000, ['tenure', 'MonthlyCharges', 'TotalCharges']],
    diag='histogram',
    height=1000,
    width=1000)
iplot(fig, filename='Histograms along Diagonal Subplots')

# ### Churn Analysis
# Now I will analyse the Churn variable

# In[22]:

colors = ['black', 'blue']
ax = (customers['Churn'].value_counts() * 100.0 / len(customers)).plot(
    kind='bar', stacked=True, rot=0, color=colors, figsize=(8, 6))

ax.yaxis.set_major_formatter(mtick.PercentFormatter())
ax.set_ylabel('% Customers', size=15)
コード例 #12
0
#   y=y,
#   z=z,
#   mode='markers',
#   marker=dict(
#     size=12,
#     line=dict(
#       color='rgba(217, 217, 217, 0.14)',
#       width=0.5
#     ),
#     opacity=0.8
#   )
# )

# data = [trace]

# layout = go.Layout(

# )

# fig = go.Figure(data=data, layout=layout)

# py.plot(fig, '3dscatter.html')

fig = ff.create_scatterplotmatrix(train, diag='box', index='Species',
  colormap= dict(
    Irissetosa = '#00F5FF',
    Irisversicolor = '#32CD32',
    Irisvirginica = '#DAA520'
  ),
 colormap_type='cat', height=800, width=800)
py.plot(fig, filename='Scatter')
コード例 #13
0
trace = go.scatter(
    x =x,
    y =y,
    mode = 'makers'
)

layout = go.layout(title="iris dataset",
                   hovermode='closest',
                   xaxis= dict(title='sepal length(cm)',
                               ticklen=5,
                               zeroline=false,
                               gridwidth=2,
                               ),
                   yaxis=dict(title='sepal width (cm)', ticklen=5,gridwidth=2,),
                   showlegend=false
                   )
data = [trace]

fig= go.figure(data=data, layout=layout)
plot(fig)

df = pd.dataframe(iris_data.data, columns=[iris_data.feature_names])
df['class'] = [iris_data.target_names[i] for i in iris_data.target]

import plotly.figure_factory as ff

fig = ff.create_scatterplotmatrix(df, index='class', diag='histgram', size=10
                                  height=800, with=800)

plot(fig)
コード例 #14
0
    df_AF['Adolescent fertility'][df_AF.Year == '2017'],
    df_AF['Adolescent fertility'][df_AF.Year == '2018']
]
group_labels = ['2014', '2015', '2016', '2017', '2018']
fig_AF = ff.create_distplot(hist_data, group_labels, bin_size=5)
fig_AF.update_layout(title='Distribution of Adolescent fertility 2014-2018')

# Adolescent fertility vs GDP
dataframe = df_AF_for_GDP
data = dataframe.loc[:, ["GDP", "Adolescent fertility"]][df_AF_for_GDP.Year ==
                                                         '2018']
data["index"] = np.arange(1, len(data) + 1)
fig_AF_vs_GDP = ff.create_scatterplotmatrix(data,
                                            diag='box',
                                            index='index',
                                            colormap='RdBu',
                                            colormap_type='cat',
                                            height=400,
                                            width=510)
fig_AF_vs_GDP.update_layout(title='Adolescent fertility vs GDP')

# Adolescent fertility vs Secondary school enrollment
fig_AF_vs_SES = px.scatter(df_AF_for_SES,
                           x="Secondary school enrollment",
                           y="Adolescent fertility",
                           size="Adolescent fertility",
                           hover_name="Country",
                           color="Country Code",
                           animation_frame="Year",
                           animation_group="Country",
                           size_max=35,
コード例 #15
0
ファイル: app.py プロジェクト: satish-rawat/EDA-APP
def visualisation(df):
    x_axis = st.selectbox(label='Select Column X-Axis',
                          options=col_name,
                          key='x_axis')
    y_axis = st.selectbox(label='Select Column Y-Axis',
                          options=col_name,
                          key='y_axis')
    color = st.selectbox(label='Select Categorical Column',
                         options=col_name,
                         key='color')
    chart_type = st.sidebar.selectbox(label='Select Chart Type',
                                      options=[
                                          'Pair Plot', 'Scatter Plot',
                                          'Line Chart', 'Pie Chart',
                                          'Strip Plot', 'Violin Plot',
                                          'Histogram', 'Distribution Plot',
                                          'Sunburst Chart', 'Box Plot'
                                      ],
                                      index=1)
    if chart_type == 'Pair Plot':
        fig1 = ff.create_scatterplotmatrix(df,
                                           diag='histogram',
                                           height=800,
                                           width=800)
        st.plotly_chart(fig1)
        fig2 = ff.create_scatterplotmatrix(df,
                                           index=color,
                                           diag='box',
                                           height=800,
                                           width=800,
                                           colormap_type='cat')
        st.plotly_chart(fig2)

    elif chart_type == 'Scatter Plot':
        st.header(f'{x_axis} vs {y_axis}')
        fig = px.scatter(df, x=x_axis, y=y_axis)
        st.plotly_chart(fig)
        st.header(
            f'{x_axis} vs {y_axis} and legends are {color} columns value')
        fig1 = px.scatter(df, x=x_axis, y=y_axis, color=color, hover_data=df)
        st.plotly_chart(fig1)

    elif chart_type == 'Line Chart':
        st.header(f'{x_axis} vs {y_axis}')
        fig = px.line(df, x=x_axis, y=y_axis, hover_data=df)
        st.plotly_chart(fig)
        st.header(
            f'{x_axis} vs {y_axis} and legends are {color} columns value')
        fig1 = go.Figure()
        for color, groupdf in df.groupby(color):
            fig1.add_trace(
                go.Scatter(x=groupdf[x_axis],
                           y=groupdf[y_axis],
                           name=color,
                           mode='markers'))
            st.plotly_chart(fig1)

    elif chart_type == 'Pie Chart':
        st.header(f'Pie Chart of {color} column')
        fig = px.pie(df, names=df[color])
        st.plotly_chart(fig)
        st.header(f'Donut Chart of {color} column')
        fig1 = px.pie(df, names=df[color], hole=0.3)
        st.plotly_chart(fig1)

    elif chart_type == 'Strip Plot':
        st.header(f'{x_axis} vs {y_axis}')
        fig = px.strip(df, x=df[x_axis], y=df[y_axis])
        st.plotly_chart(fig)
        st.header(
            f'{x_axis} vs {y_axis} and legends are {color} columns value')
        fig1 = px.strip(df, x=df[x_axis], y=df[y_axis], color=df[color])
        st.plotly_chart(fig1)

    elif chart_type == 'Violin Plot':
        st.header(f'{x_axis} vs {y_axis}')
        fig = px.violin(df, x=df[x_axis], y=df[y_axis], points='all')
        st.plotly_chart(fig)
        st.header(
            f'{x_axis} vs {y_axis} and legends are {color} columns value')
        fig1 = px.violin(df,
                         x=df[x_axis],
                         y=df[y_axis],
                         color=df[color],
                         points='all')
        st.plotly_chart(fig1)

    elif chart_type == 'Histogram':
        fig = px.histogram(df, x=df[x_axis])
        st.plotly_chart(fig)
        if st.checkbox(label='Y-Axis is Sum'):
            fig1 = px.histogram(df, x=df[x_axis], y=df[y_axis], histfunc='sum')
            st.plotly_chart(fig1)
            fig2 = px.histogram(df,
                                x=df[x_axis],
                                y=df[x_axis],
                                color=df[color],
                                histfunc='sum')
            st.plotly_chart(fig2)
        if st.checkbox(label='Y-Axis is Count'):
            fig3 = px.histogram(df,
                                x=df[x_axis],
                                y=df[y_axis],
                                histfunc='count')
            st.plotly_chart(fig3)
            fig4 = px.histogram(df,
                                x=df[x_axis],
                                y=df[x_axis],
                                color=df[color],
                                histfunc='count')
            st.plotly_chart(fig4)
        if st.checkbox(label='Y-Axis is Average'):
            fig5 = px.histogram(df, x=df[x_axis], y=df[y_axis], histfunc='avg')
            st.plotly_chart(fig5)
            fig6 = px.histogram(df,
                                x=df[x_axis],
                                y=df[x_axis],
                                color=df[color],
                                histfunc='avg')
            st.plotly_chart(fig6)

    elif chart_type == 'Sunburst Chart':
        path = st.multiselect(label='Select The Path', options=df.columns)
        fig = px.sunburst(df, path=path, color=color, values=x_axis)
        st.plotly_chart(fig)

    elif chart_type == 'Box Plot':
        fig = px.box(df, x=df[x_axis], y=df[y_axis], points='all`')
        st.plotly_chart(fig)
        fig1 = px.box(df,
                      x=df[x_axis],
                      y=df[y_axis],
                      points='all',
                      color=df[color])
        st.plotly_chart(fig1)

    else:
        fig1 = px.histogram(df, x=df[x_axis], marginal="box")
        st.plotly_chart(fig1)
        fig2 = px.histogram(df, x=df[x_axis], y=df[y_axis], marginal="box")
        st.plotly_chart(fig2)
        fig3 = px.histogram(df,
                            x=df[x_axis],
                            y=df[y_axis],
                            color=df[color],
                            marginal="box")
        st.plotly_chart(fig3)
scatter_plot = ax.scatter(survey_df['My_R'],
                          survey_df['My_Python'],
                          facecolors='none',
                          edgecolors='blue')
plt.show()

# This is where we create the offline scatter plot matrix
pref_df = gather(software_df.copy(), 'software', 'pref', [
    'My_Java', 'My_Js', 'My_Python', 'My_R', 'My_SAS', 'Prof_Java', 'Prof_Js',
    'Prof_Python', 'Prof_R', 'Prof_SAS', 'Ind_Java', 'Ind_Js', 'Ind_Python',
    'Ind_R', 'Ind_SAS'
])
pref_df[['software', 'use']] = pref_df['software'].str.split('_', expand=True)
fig = ff.create_scatterplotmatrix(pref_df.iloc[:, 1:],
                                  diag='histogram',
                                  index='software',
                                  height=800,
                                  width=800)
plotly.offline.plot(fig, filename='scatter_matrix.html')

# Boxplot of software preferences
sns.factorplot(x="software", y="pref", col="use", data=pref_df, kind="box")

pref_stats = pref_df.iloc[:, 2:].groupby(['software', 'use'], as_index=False)

# examine intercorrelations among software preference variables
# with correlation matrix/heat map
corr_chart(df_corr=software_df)

# descriptive statistics for software preference variables
print('\nDescriptive statistics for survey data ---------------')
コード例 #17
0
from sklearn.preprocessing import StandardScaler
#%% get data
data_df = utils.load_object('pd_for_train.pkl')
arousals = utils.load_object('arousal.pkl')
arousals_list = arousals['arousal'].tolist()

data_df = pu.match_label_with_sample(data_df, arousals_list)

#%%
# scatter plot matrix
#fig = data_df[['mean','max','median','min','skew']].reset_index(drop=True).scatter_matrix(asFigure=True)
#plotly.offline.plot(fig)

fig = ff.create_scatterplotmatrix(
    data_df[['delta_pq', 'delta_qr', 'slope_qr', 'label']],
    diag='histogram',
    index='label',
    height=1000,
    width=1000)

plotly.offline.plot(fig)

#%% box plot of mean grouped by arousal
fig = data_df.reset_index(drop=True).pivot(
    columns='label',
    values='slope_qr').iplot(kind='box',
                             title='slope_qr compare two group of arousal',
                             yTitle='mean',
                             xTitle='label',
                             asFigure=True)

plotly.offline.plot(fig)
コード例 #18
0
    writer = csv.writer(output)
    for val in rows:
        writer.writerow(val)



PTF_a = 1.1574*10**(-5)*np.exp(20.62-0.96*np.log(Clay)-0.66*np.log(Sand)-0.46*np.log(OM)-8.43*rho_d)
PTF_b = 2.78*10**(-6)*np.exp(19.52*P-8.97-2.82*10**(-2)*Clay+1.81*10**(-4)*Sand**2-9.41*10**(-3)*Clay**2-8.40*P**2+7.77*10**(-2)*Sand*P-2.98*10**(-3)*Sand**2*P**2-1.95**10**(-2)*Clay**2*P**2+1.73*10**(-5)*Sand**2*Clay+2.73*10**(-2)*Clay**2*P+1.43*10**(-3)*Sand**2*P-3.5*10**(-6)*Clay**2*Sand)

import plotly.figure_factory as ff
import pandas as pd
import plotly.plotly as py

dataframe = pd.DataFrame({'Davids': Ksat_d, 'A': PTF_a, 'B': PTF_b})
fig = ff.create_scatterplotmatrix(dataframe, diag='box',
                                  colormap_type='cat',
                                  height=800, width=800)
py.iplot(fig, filename = 'Colormap as a Dictionary')

plt.scatter(PTF_a, PTF_b)
plt.ylabel('PTF_a')
plt.xlabel('PTF_b')
plt.title('comparison between PTF K')
plt.show()

plt.scatter(PTF_a*10**(3), Ksat_d)
plt.ylabel('Davids')
plt.xlabel('PTF_a')
plt.title('A')
plt.show()
コード例 #19
0
plt.figure(figsize=(10, 10))
sns.heatmap(df.corr(), vmin=-1, annot=True)

# In[6]:

k = df.loc[:, [
    "age", "anaemia", "creatinine_phosphokinase", "diabetes",
    "ejection_fraction", "high_blood_pressure", "platelets",
    "serum_creatinine", "serum_sodium", "sex", "smoking", "time"
]]
k["index"] = np.arange(1, len(k) + 1)
scat_plot = ff.create_scatterplotmatrix(k,
                                        diag='box',
                                        index='index',
                                        colormap='Portland',
                                        colormap_type='cat',
                                        height=2400,
                                        width=1800)
iplot(scat_plot)

# In[7]:

fig = px.histogram(df, x="platelets", color="DEATH_EVENT")
fig.show()

# In[8]:

trace1 = go.Bar(x=df.DEATH_EVENT,
                y=df.diabetes,
                name="Diabetes",
コード例 #20
0
fig = dict(data=data, layout=layout)
fig['layout']['xaxis'].update(dict(title = 'Job Satisfaction', 
                                   tickmode='linear',
                                   tickfont = dict(size = 10)))
py.iplot(fig)

# **Let's combine age, job satisfaction and salary!**

# In[ ]:


salary = salary[salary.Salary<1000000]
dat = salary[['Age','JobSatisfaction','Salary']]
dat["index"] = np.arange(len(dat))
fig = ff.create_scatterplotmatrix(dat, diag='box', index='index',
                                  colormap_type='cat',colormap='Jet',
                                  height=800, width=800)
py.iplot(fig)

# **It seems that there is no gender inequality in terms of salary in data science world.**

# In[ ]:


male_salary = salary[salary['GenderSelect'] == 'Male']
female_salary = salary[salary['GenderSelect'] == 'Female']
male = male_salary.groupby('Age').Salary.mean().to_frame()
female = female_salary.groupby('Age').Salary.mean().to_frame()

trace0 = go.Scatter(
    x = male.index,
                                          showticklabels = False, 
                                          showgrid = True),
                            # control x and y axes of the right marginal histogram
                            yaxis3 = dict(ticks = "", showticklabels = False),
                            xaxis3 = dict(nticks = 5, showgrid = False),
                            width = 850,
                            height = 450)


# Plotting the correlation
plotly.offline.plot(plotly_fig, filename = 'Correlation between Goog and MSFT')


## Lets repeat this correlation for all 4 tech stocks 
# What was the correlation between closing prices of the 4 tech stocks?
fig4 = ff.create_scatterplotmatrix(tech_rets, diag='histogram', size = 5,height=740, width=880)

for trace in fig4['data']:
    trace['opacity'] = 0.7
    trace['marker'] = dict(color = "seagreen", line = dict(color = 'white', 
                                                       width = 0.7))

plotly.offline.plot(fig4, filename = 'Correlation between 4 Tech Giants')

## A quick glance shows all four stocks are highly correlated with each 
# other. This which means you should not put all of them in one portfolio. 
# Rather, you should pick the least correlated ones and combine them with 
# stocks from other industries that have low correlation with the chosen 
# stocks. This is a wise path towards a more diverse portfolio, and
# reducing losses.
コード例 #22
0
    'text': "Stock Prices",
    'x': 0.5,
    'xanchor': 'center'
})

nflx = nflx[['Open', 'Close', 'Volume']]
nflx["index"] = np.arange(len(nflx))

fig8 = go.Figure(
    ff.create_scatterplotmatrix(nflx,
                                diag='box',
                                index='index',
                                size=3,
                                height=600,
                                width=1150,
                                colormap='RdBu',
                                title={
                                    'text':
                                    "Netflix Stock Price (Scatterplot Matrix)",
                                    'x': 0.5,
                                    'xanchor': 'center'
                                }))

dp.Report(
    dp.Group(dp.Plot(fig0),
             dp.Plot(fig1),
             dp.Plot(fig2),
             dp.Plot(fig3),
             dp.Plot(fig4),
             dp.Plot(fig5),
             dp.Plot(fig6),
コード例 #23
0
ファイル: tools.py プロジェクト: Meet11/StockAnalysis
 def create_scatterplotmatrix(*args, **kwargs):
     FigureFactory._deprecated('create_scatterplotmatrix')
     from plotly.figure_factory import create_scatterplotmatrix
     return create_scatterplotmatrix(*args, **kwargs)
コード例 #24
0
# 将性别内容转换为数字表示 男:1  女:0
def trans(x):
    # x = 1 if x == 'male' else 0
    return 1 if x == 'male' else 0
data['Sex'] = data['Sex'].apply(trans)

print(data.isnull().any())
data.fillna(data.Age.median(), inplace=True)

# 可视化要安装plotly
import plotly
import plotly.figure_factory as ff

data['Survived'] = data['Survived'].astype('str')
fig = ff.create_scatterplotmatrix(data, diag='histogram',index='Survived',colormap=[ '#32CD32', '#00F5FF'],
                                  height=800, width=800)
plotly.offline.plot(fig, filename='p2.html')

from sklearn.tree import DecisionTreeClassifier as DTC, export_graphviz
X = data.iloc[:, 1:4]
y = data.iloc[:, 0]
dtc = DTC(criterion='entropy')  # 基于信息熵
dtc.fit(X, y)
print('准确率:', dtc.score(X, y))

with open('data/tree.dot', 'w') as f:
    f = export_graphviz(dtc, feature_names=X.columns, out_file=f)

import pydot
(graph,) = pydot.graph_from_dot_file('data/tree.dot')
graph.write_png('data/tree.png')
コード例 #25
0
#%% get data
import utils
import preprocessing.pre_utils as pu
data_df = utils.load_object('faps_for_train.pkl')
valence = utils.load_object('valence.pkl')
#arousals = utils.load_object('arousal.pkl')
valence_list = valence['valence'].tolist()
#arousal_list = arousals['arousal'].tolist()

data_df = pu.match_label_with_sample(data_df, valence_list)
#data_df = pu.match_label_with_sample(data_df,arousal_list,col_name='arousal')

#%% plot scatter matrix
fig = ff.create_scatterplotmatrix(data_df[[0, 1, 12, 13, 'label']],
                                  diag='histogram',
                                  index='label',
                                  height=1000,
                                  width=1000)

plotly.offline.plot(fig)

#%%
##iaps_class = iaps(r"C:\Users\DSPLab\Research\affective-monitor-model\preprocessing\IAPSinfoFile_Final.txt")
iaps_class = iaps(r"E:\Research\affective-monitor-model\preprocessing")
sample_list_from_pic_id = iaps_class.get_sample_idx(2141)
feel_df = iaps_class.get_feeling('happy')

#%%
#path = "C:\\Users\\DSPLab\\Research\\ExperimentData"
path = "E:\\Research\\ExperimentData"
n = 1
コード例 #26
0
ファイル: data_callbacks.py プロジェクト: prabhant/openml.org
    def feature_interactions(rows, radio, url, dummy):
        data_id = int(re.search('data/(\d+)', url).group(1))
        if dummy == "done":
            df = pd.read_pickle('cache/df' + str(data_id) + '.pkl')
            fi = pd.read_pickle('cache/fi' + str(data_id) + '.pkl')
        else:
            return []
        meta_data = pd.DataFrame(rows)
        try:
            target_attribute = meta_data[meta_data["Target"] ==
                                         "true"]["Attribute"].values[0]
            target_type = (
                meta_data[meta_data["Target"] == "true"]["DataType"].values[0])
        except IndexError:
            return "No target found", "No target found"
        if target_type == "nominal" or target_type == "string":
            y = pd.Categorical(df[target_attribute]).codes
        else:
            y = df[target_attribute]
        # Feature interaction plots
        df = clean_dataset(df)
        numerical_features = list(
            meta_data["Attribute"][meta_data["DataType"] == "numeric"])
        nominal_features = list(
            meta_data["Attribute"][meta_data["DataType"] == "nominal"])
        top_numericals = (
            fi['index'][fi['index'].isin(numerical_features)][:5])
        top_nominals = (fi['index'][fi['index'].isin(nominal_features)][:5])
        df['target'] = df[target_attribute]
        C = [
            'rgb(166,206,227)', 'rgb(31,120,180)', 'rgb(178,223,138)',
            'rgb(51,160,44)', 'rgb(251,154,153)', 'rgb(227,26,28)'
        ]
        if target_type == "numeric":
            cmap_type = 'seq'
            df['target'] = y
            df['target'] = pd.cut(df['target'], 1000).astype(str)
            cat = df['target'].str.extract('\((.*),',
                                           expand=False).astype(float)
            df['bin'] = pd.Series(cat)
            df.sort_values(by='bin', inplace=True)
            df.drop('bin', axis=1, inplace=True)
        else:
            cmap_type = 'cat'
            N = len(df['target'].unique())
            try:
                df['target'] = df['target'].astype(int)
            except ValueError:
                print("target not converted to int")
            df.sort_values(by='target', inplace=True)
            df['target'] = df['target'].astype(str)

        if radio == "top":
            top_features = df[fi['index'][0:5].values]
            top_features['target'] = df['target']

            if len(top_numericals):

                matrix = ff.create_scatterplotmatrix(
                    top_features,
                    title='Top feature interactions',
                    diag='box',
                    index='target',
                    #colormap=C,
                    colormap_type=cmap_type,
                    height=800,
                    width=900)
                graph = dcc.Graph(figure=matrix)
            else:
                d = top_features
                parcats = [
                    go.Parcats(dimensions=[{
                        'label': column,
                        'values': list(d[column].values)
                    } for column in d.columns],
                               line={
                                   'color': y,
                                   'colorscale': 'Portland'
                               },
                               hoveron='color',
                               hoverinfo='count+probability',
                               arrangement='freeform')
                ]
                layout = go.Layout(autosize=False, width=1200, height=800)

                fig = go.Figure(data=parcats, layout=layout)
                graph = dcc.Graph(figure=fig)
        elif radio == "numeric":
            if len(top_numericals):
                df_num = df[top_numericals]
                df_num['target'] = df['target']
                matrix = ff.create_scatterplotmatrix(
                    df_num,
                    title='Top numeric feature interactions',
                    diag='box',
                    index='target',
                    #colormap=C,
                    colormap_type=cmap_type,
                    height=1000,
                    width=1000)
                graph = dcc.Graph(figure=matrix)
            else:
                graph = html.P("No numericals found")
        elif radio == "nominal":
            if len(top_nominals):
                df_nom = df[top_nominals]
                df_nom['target'] = df['target']

                parcats = [
                    go.Parcats(dimensions=[{
                        'label':
                        column,
                        'values':
                        list(df_nom[column].values)
                    } for column in df_nom.columns],
                               line={
                                   'color':
                                   pd.Categorical(df_nom['target']).codes,
                                   'colorscale': 'Portland'
                               },
                               hoveron='color',
                               hoverinfo='count+probability',
                               arrangement='freeform')
                ]
                layout = go.Layout(autosize=False, width=1000, height=800)
                fig = go.Figure(data=parcats, layout=layout)
                graph = dcc.Graph(figure=fig)
            else:
                graph = html.P("No nominals found")

        return html.Div(graph)
コード例 #27
0
checkpokemonperformance("VenusaurMega Venusaur")

# In[ ]:

checkpokemonperformance("Regigigas")

# # 9. Scatterplot matrix
# Scatterplot matrix contains all the pairwise scatter plots of the variables on a single page in a matrix format. That is, if there are k variables, the scatterplot matrix will have k rows and k columns and the ith row and jth column of this matrix is a plot of Xi versus Xj.

# **9.1 Scatterplot matrix of attributes with boxplots**

# In[ ]:

fig = ff.create_scatterplotmatrix(df.iloc[:, 5:12],
                                  index='Generation',
                                  diag='box',
                                  size=2,
                                  height=800,
                                  width=800)
iplot(fig, filename='Scatterplotmatrix.png', image='png')

# # 10. Violin plots
#
# A violin plot is a method of plotting numeric data. It is similar to box plot with a rotated kernel density plot on each side.
#
# A violin plot is more informative than a plain box plot. In fact while a box plot only shows summary statistics such as mean/median and interquartile ranges, the violin plot shows the full distribution of the data. The difference is particularly useful when the data distribution is multimodal (more than one peak). In this case a violin plot clearly shows the presence of different peaks, their position and relative amplitude. This information could not be represented with a simple box plot which only reports summary statistics. The inner part of a violin plot usually shows the mean (or median) and the interquartile range. In other cases, when the number of samples is not too high, the inner part can show all sample points (with a dot or a line for each sample).
#
# Source: [Wikipedia](https://en.wikipedia.org/wiki/Violin_plot)
#
# **10.1 Violinplot of all stats**

# In[ ]:
コード例 #28
0
    plot_bgcolor='#1F2024',
    paper_bgcolor='#1F2024',
    font_color='#DCDCDC',
)

# Modal 2 Corrélation
colonnes = [
    'world_rank',
    'total_score',
    'research',
    'teaching',
]
data_pca = dfs[colonnes]
lay1_fig2 = ff.create_scatterplotmatrix(data_pca,
                                        diag='histogram',
                                        colormap='Viridis',
                                        colormap_type='cat',
                                        height=500,
                                        width=1000)

modal = html.Div([
    dbc.Button(style={
        'margin-bottom': '5vh',
        'margin-left': '2vw',
        'backgroundColor': '#181B1E'
    },
               children=["Matrice des corrélations"],
               id="open-xl"),
    dbc.Modal(
        style={'backgroundColor': '#181B1E'},
        children=[
            dbc.ModalHeader("Matrice des Corrélation"),
コード例 #29
0
fig1_1 = go.Figure(data=trace_heat)
fig1_1.update_layout(title="Correlation Matrix")

## ------ fig1_2 : Correlation Matrix ------ # première figure sur la page 1

data = df[["research", "teaching", "citations"]]
data["index"] = np.arange(1, len(data) + 1)

# scatter matrix
fig1_2 = ff.create_scatterplotmatrix(
    data,
    diag='box',
    index='index',
    colormap='Portland',
    colormap_type='cat',
    title='Relations between research, teatching and citations criteria',
    text=df[['university_name', 'world_rank']],
    hovertemplate="<b> University :</b> %{text[0]}<br>" +
    "<b> World rank :</b> %{text[1]}<br>" + "<extra></extra>",
)  #labels={"index": "world <br> rank"}, height=700, width=700)

##############
### Page 2 ###
##############
## ------ fig2_1 : Ebouli des valeurs propres ------ # première figure sur la page 2
# voir plus bas

## ------ fig2_2 : Cercle des corrélations ------ # 2ème figure sur la page 2
# voir plus bas
コード例 #30
0
ファイル: tools.py プロジェクト: plotly/plotly.py
 def create_scatterplotmatrix(*args, **kwargs):
     FigureFactory._deprecated('create_scatterplotmatrix')
     from plotly.figure_factory import create_scatterplotmatrix
     return create_scatterplotmatrix(*args, **kwargs)
コード例 #31
0
ファイル: script62.py プロジェクト: darkblue-b/kaggleScape
# <font color='black'>
# * import figure factory as ff
# * create_scatterplotmatrix = creates scatter plot
#     * data2015 = prepared data. It includes research, international and total scores with index from 1 to 401
#     * colormap = color map of scatter plot
#     * colormap_type = color type of scatter plot
#     * height and weight

# In[ ]:

# import figure factory
import plotly.figure_factory as ff
# prepare data
dataframe = timesData[timesData.year == 2015]
data2015 = dataframe.loc[:, ["research", "international", "total_score"]]
data2015["index"] = np.arange(1, len(data2015) + 1)
# scatter matrix
fig = ff.create_scatterplotmatrix(data2015,
                                  diag='box',
                                  index='index',
                                  colormap='Portland',
                                  colormap_type='cat',
                                  height=700,
                                  width=700)
iplot(fig)

# # Conclusion
# * If you like it, thank you for you upvotes.
# * If you have any question, I will happy to hear it
# ## To be continued
コード例 #32
0
ファイル: plotly_workflow.py プロジェクト: zhang-cugb/unifloc
def plot_scatterplotmatrix(df, file_name):
    figure = ff.create_scatterplotmatrix(df, diag='histogram')
    figure.layout.update(width=2000, height=1500)
    figure.layout.update(font=dict(size=7))

    plot(figure, filename=file_name)