예제 #1
0
def test_splom_case():
    iris = px.data.iris()
    fig = px.scatter_matrix(iris)
    assert len(fig.data[0].dimensions) == len(iris.columns)
    dic = {"a": [1, 2, 3], "b": [4, 5, 6], "c": [7, 8, 9]}
    fig = px.scatter_matrix(dic)
    assert np.all(fig.data[0].dimensions[0].values == np.array(dic["a"]))
    ar = np.arange(9).reshape((3, 3))
    fig = px.scatter_matrix(ar)
    assert np.all(fig.data[0].dimensions[0].values == ar[:, 0])
예제 #2
0
def display_scatterplot():
    df2 = df[[
        "age", "pop", "pov", "college", "urate", "county_income", "h_income",
        "share_white", "share_black", "share_hispanic"
    ]]
    correlated_data = df2.corr(method='pearson')
    absolute_values = correlated_data.abs()
    sum_values = absolute_values.sum(axis=0).sort_values(ascending=False,
                                                         inplace=False)
    names = []
    for name, val in sum_values[0:5].iteritems():
        names.append(name)
    df3 = df2[names]
    return px.scatter_matrix(df3,
                             height=800,
                             labels={
                                 "pov": "Poverty Rate",
                                 "h_income": "House Hold Income",
                                 "college": "College Rate",
                                 "urate": "Unemployment Rate",
                                 "share_black": "Black Population",
                                 "share_white": "White Population",
                                 "share_hispanic": "Hispanic Population",
                                 "age": "Age",
                                 "pop": "Population",
                                 "county_income": "County Income"
                             })
예제 #3
0
def pcagrid(df, n_components=4, norm='none'):
    if (norm == 'z_score'):
        df_norm = (df - df.mean()) / df.std()
    elif (norm == 'min_max'):
        df_norm = (df - df.min()) / (df.max() - df.min())
    else:
        df_norm = df

    pca = sklearn.decomposition.PCA(n_components=n_components)
    components = pca.fit_transform(df.iloc[:, :-1])
    labels = {
        str(i): f"PC {i+1} ({var:.1f}%)"
        for i, var in enumerate(pca.explained_variance_ratio_ * 100)
    }
    labels['color'] = 'quality'
    fig = px.scatter_matrix(
        components,
        labels=labels,
        dimensions=range(len(pca.explained_variance_ratio_)),
        color=df["quality"],
        title=
        f'Total Explained Variance: {pca.explained_variance_ratio_.sum()*100:.2f}%'
    )
    fig.update_traces(diagonal_visible=False)
    return fig.show()
예제 #4
0
def plot_scattermatrix(dimensions, title, df):
    fig = px.scatter_matrix(
        df,
        dimensions=dimensions,
        color='default.payment.next.month',
        color_discrete_sequence=px.colors.qualitative.Set2,
        color_continuous_scale=px.colors.qualitative.Set2,
        symbol='default.payment.next.month',
    )

    fig.update_layout(title=title,
                      coloraxis_showscale=False,
                      width=1700,
                      height=1700,
                      legend=dict(
                          yanchor="top",
                          font=dict(size=16),
                          xanchor="right",
                      ))
    fig.update_traces(
        diagonal_visible=False,
        showupperhalf=False,
        marker=dict(
            colorscale=px.colors.qualitative.Set2,
            #showscale=False, # colors encode categorical variables
            line_color='white',
            line_width=0,
            size=2),
    )
    fig.show()

    import plotly.figure_factory as ff
예제 #5
0
파일: diabetes.py 프로젝트: mkurlit/AI-ML
def plot_input(df: pd.DataFrame, corrs=False):
    """
    @df -> dataframe with input data\n
    @corrs -> should correlation matrix be plotted (default : False)\n
    return None (fig.show())
    """

    if not corrs:
        ## Visualise data distribution
        fg = px.scatter_matrix(
            df,
            dimensions=[c for c in df.columns if c != 'class'],
            color='class',
        )
        fg.update_traces(diagonal_visible=False, showupperhalf=False)
        fg.show()

    else:
        ## Check correlation of data
        heat = go.Heatmap(z=np.array(df.corr('pearson')),
                          x=df.columns,
                          y=df.columns,
                          xgap=5,
                          ygap=5,
                          colorscale='ylorrd',
                          reversescale=True)
        fg = go.Figure(data=heat,
                       layout=go.Layout(
                           width=800,
                           height=800,
                           xaxis_showgrid=False,
                           yaxis_showgrid=False,
                       ))
        fg.show()
예제 #6
0
    def scatter_matrix(self, df='default'):
        if df == 'default':
            df = self.df_default.copy()
        elif df == 'attacks':
            df = self.df_default.copy()
            df = df[df['Attack_Type(42)'].isin(self.attack_types)]
        elif df == 'normal':
            df = self.df_normal.copy()
        elif df == 'dos':
            df = self.df_dos.copy()
        elif df == 'probe':
            df = self.df_probe.copy()
        elif df == 'u2r':
            df = self.df_u2r.copy()
        elif df == 'r2l':
            df = self.df_r2l.copy()
        else:
            print(
                'Invalid df option. Choose a df from one of the following: default, attacks, normal, dos, probe, u2r, r2l'
            )
            return

        df['Attack_Type(42)'] = df['Attack_Type(42)'].apply(
            self.__replace_values)

        fig = px.scatter_matrix(
            df,
            color="Attack_Type(42)",
            labels={col: col.replace('_', ' ')
                    for col in df.columns})  # remove underscore
        fig.update_traces(diagonal_visible=False)
        fig.show()
예제 #7
0
def num_analysis(n_clicks, input, *args):
    if input is None or input =='':
        return html.H5('分析方法を選択し、実行を押してください')
    else:
        if input == 'AAA':
            describe = use_df.describe().reset_index()
            return dash_table.DataTable(
                        column_selectable='multi',
                        fixed_rows={'headers': True, 'data': 0},
                        data=df_processor(describe)[0],
                        columns=df_processor(describe)[1],
                        style_table={
                            'overflowX': 'scroll',
                            'overflowY': 'scroll',
                            'maxHeight': '350px',
                            'maxWidht': '800px'
                        },
                        style_header={
                            'fontWeight': 'bold',
                            'textAlign': 'center'
                        }
                    )
        elif input == 'BBB':
            fig = px.scatter_matrix(
                use_df, 
                dimensions=num_cols, 
                color=target_column
            )
            return dcc.Graph(figure=fig)
        elif input == 'CCC':
            return html.H5(len(cat_cols))
예제 #8
0
def app():
    # vstup 1: výběr datové sady
    data_file_path = st.file_uploader("Data file")
    data = None
    if data_file_path is not None:
        # read data if user uploads a file
        data = pd.read_csv(data_file_path)
        # seek back to position 0 after reading
        data_file_path.seek(0)
    if data is None:
        st.warning("No data loaded")
        return
    # vstup 2: výběr parametrů scatter matrix
    dimensions = st.multiselect("Scatter matrix dimensions",
                                list(data.columns),
                                default=list(data.columns))
    color = st.selectbox("Color", data.columns)
    opacity = st.slider("Opacity", 0.0, 1.0, 0.5)

    # scatter matrix plat
    st.write(
        px.scatter_matrix(data,
                          dimensions=dimensions,
                          color=color,
                          opacity=opacity))

    # výběr sloupce pro zobrazení rozdělení dat
    interesting_column = st.selectbox("Interesting column", data.columns)
    # výběr funkce pro zobrazení rozdělovací funkce
    dist_plot = st.selectbox("Plot type", [px.box, px.histogram, px.violin])

    st.write(dist_plot(data, x=interesting_column, color=color))
예제 #9
0
파일: app.py 프로젝트: Caranell/University
def part_1(data):
    fig = go.Figure(
        data=go.Heatmap(z=data.values, x=data.keys(), y=data.keys()))
    fig.show()
    modified_data = data.unstack()
    attr = modified_data.sort_values(ascending=False).to_frame()
    attr.reset_index(inplace=True)
    attr.columns = ['first_attribute', 'second_attribute', 'correlation']
    print(attr)
    attr = attr.loc[attr.first_attribute != attr.second_attribute][0:10:2]
    print(attr)
    attr = attr.sort_values(by='first_attribute')
    fig = exp.line(attr,
                   x='first_attribute',
                   y='correlation',
                   text='second_attribute')
    fig.show()
    fig = exp.histogram(attr,
                        x='first_attribute',
                        y='correlation',
                        histfunc='max')
    fig.show()
    fig = exp.scatter_matrix(attr, color='second_attribute')
    fig.show()
    print_word_cloud(skills)
예제 #10
0
def generate_splom(df):
    splom = px.scatter_matrix(df,
                              dimensions=[
                                  "sepal_width", "sepal_length", "petal_width",
                                  "petal_length"
                              ])
    return splom
예제 #11
0
 def _generate_scatter(self):
     df = self.pp.get_numeric_df(self.settings['data']).copy()
     fig = px.scatter_matrix(df, height=700)
     fig.update_xaxes(tickangle=90)
     for annotation in fig['layout']['annotations']:
         annotation['textangle'] = -90
     return html.Div([
         html.Div(html.H1(children='Матрица рассеяния'),
                  style={'text-align': 'center'}),
         html.Div([
             html.Div(dcc.Graph(id='scatter_matrix', figure=fig),
                      style={
                          'width': '78%',
                          'display': 'inline-block',
                          'border-color': 'rgb(220, 220, 220)',
                          'border-style': 'solid',
                          'padding': '5px'
                      }),
             html.Div(dcc.Markdown(children=markdown_text_scatter),
                      style={
                          'width': '18%',
                          'float': 'right',
                          'display': 'inline-block'
                      })
         ])
     ],
                     style={'margin': '100px'})
예제 #12
0
def eda():
    st.title('Exploratory data analysis')

    # Show Dataset
    st.subheader("Preview DataFrame")
    st.write("Head", data.head())
    st.write("Tail", data.tail())

    # Dimensions
    data_dim = st.radio('What Dimension Do You Want to Show',
                        ('Rows', 'Columns'))
    if data_dim == 'Rows':
        st.write("Showing Length of Rows", len(data))
    if data_dim == 'Columns':
        st.write("Showing Length of Columns", data.shape[1])

    # Distributions of features
    st.subheader("Plot distribution of feature")
    x_options = [
        'year', 'mileage', 'volume', 'price', 'body', 'transmission', 'wheel',
        'drive', 'fuel'
    ]
    x_axis = st.selectbox('Which feature do you want to explore?', x_options)
    fig = px.histogram(data, x=x_axis)
    st.plotly_chart(fig)

    # Scatter plot with price
    st.subheader("Scatter plot")
    x_options = ['year', 'mileage', 'volume', 'price']
    dim = st.multiselect('Which feature do you want to explore?',
                         x_options,
                         default=['price', 'year'])
    fig2 = px.scatter_matrix(data, dimensions=dim)
    st.plotly_chart(fig2)
예제 #13
0
파일: toyo.py 프로젝트: satomun/chomoku
def update_graph1(selected_values):
    if len(selected_values) > 0:
        fig1 = px.line(toyo_cropnavi, x=toyo_cropnavi.index, y=selected_values)
        selected_df = toyo_cropnavi[selected_values]
        fig2 = px.scatter_matrix(selected_df)
        fig3 = px.imshow(selected_df.corr(), title="データ間の相関")
        return fig1, fig2, fig3
    raise PreventUpdate
예제 #14
0
파일: plot.py 프로젝트: rubzk/spotiCluster
    def scatter_matrix(self, form):

        fig = px.scatter_matrix(self.df,
                                dimensions=form['dimensions'],
                                color=form['color'],
                                width=1400,
                                height=800)

        return fig
예제 #15
0
 def test_scatter_matrix_plot(self, data: pd.DataFrame):
     crypto_comp = pd.concat(
         [data[stock]["Open"] for stock in params.get("STOCK_CODES")], axis=1
     )
     crypto_comp.columns = [
         f"{stock.capitalize()} Open" for stock in params.get("STOCK_CODES")
     ]
     fig = px.scatter_matrix(crypto_comp)
     fig.show()
예제 #16
0
def g4(batch_id, x, y, z, color, colorset):
    input_model = models.BatchInput(batch_id)
    df = input_model.as_pandas_dataframe()

    fig = px.scatter_matrix(df, dimensions=[x, y, z], color=color,
        color_continuous_scale=get_colorset(colorset))
    div = opy.plot(fig, auto_open=False, output_type='div')

    return div
예제 #17
0
    def scatterMatrix(self, df):
        """Create and show a scatter matrix with the given columns."""

        # remove 0s
        ldf = df[columns].dropna()

        # create scatter matrix and show
        fig = px.scatter_matrix(ldf)
        fig.show()
예제 #18
0
def plot_corr_mat(df, sentiment_col):
    df = df.rename(columns={sentiment_col: 'sentiment'})
    fig = px.scatter_matrix(
        df,
        dimensions=['sentiment', 'volume', 'cases', 'deaths'],
        color='country')
    fig.update_layout(autosize=True,
                      height=500,
                      margin=dict(b=5, t=20, l=5, r=5))
    return fig
예제 #19
0
def display_graph(x1value, x2value, x3value, x4vaue):
    #On crée la matrice
    figure = px.scatter_matrix(df,
         dimensions=[x1value, x2value, x3value, x4value],
         title="Matrice de Corrélation : "
    )
    figure.update_traces(diagonal_visible=False)
                              
    #On retourne la matrice
    return figure
예제 #20
0
def _make_task_scatter_chart_and_corr(df, color, year, weekdays, categories,
                                      hour_interval):
    df = df.loc[df["year"].isin(year)]
    df = df.loc[df["category"].isin(categories)]
    df = df.loc[(df["weekday"].isin(weekdays))]
    df = df.loc[(df["start_hour"] >= hour_interval[0])
                & (df["start_hour"] < hour_interval[1])]

    jittering_value = 0.35

    df["attention_j"] = df["attention_score"].apply(
        lambda x: x + random.uniform(-jittering_value, jittering_value
                                     ))  # Jittering
    df["happy_j"] = df["happy_score"].apply(lambda x: x + random.uniform(
        -jittering_value, jittering_value))  # Jittering
    df.dropna(inplace=True)

    df["size"] = 1  # NOTE: fixed value

    if len(df) == 0:
        return {}, {}

    scatter_matrix_fig = px.scatter_matrix(
        df,
        dimensions=["happy_j", "attention_j", "working_hours", "start_hour"],
        labels=labels,
        color=color,
        # color_discrete_map=colors,
        category_orders={
            "category": data_handler.TASK_CATEGORIES,
            "year": ["2017", "2018", "2019", "2020"],
            "weekday": list(calendar.day_name),
        },
        size="size",
        size_max=5,
        opacity=0.4,
        hover_data=[
            "start_time", "end_time", "description", "attention_score",
            "happy_score", "working_hours_text"
        ],
        height=700,
    )

    corr_df = df[[
        "attention_score", "happy_score", "working_hours", "start_hour"
    ]].corr()
    corr_fig = px.imshow(
        corr_df.to_numpy(),
        x=list(corr_df.columns),
        y=list(corr_df.columns),
        labels=labels,
        color_continuous_scale=px.colors.sequential.Viridis,
        range_color=[-1, 1],
    )
    return scatter_matrix_fig, corr_fig
예제 #21
0
    def plot_scatter_matrix(n_clicks, data, columns, selected, ndxs):
        if n_clicks is None:
            raise PreventUpdate
        df = pd.DataFrame(data)
        df["DateAcquired"] = pd.to_datetime(df["DateAcquired"])
        numeric_columns = df.select_dtypes(include=np.number).columns
        n_dimensions = len(numeric_columns)

        if ndxs is not None:
            df = df.reindex(ndxs)

        fig = px.scatter_matrix(df, dimensions=columns)

        fig.update_layout(
            autosize=True,
            height=1200,
            showlegend=False,
            margin=dict(l=50, r=10, b=200, t=50, pad=0),
            hovermode="closest",
        )

        config = T.gen_figure_config(filename="PQC-scatter-matrix")

        marker_color = df["Use Downstream"].replace({
            True:
            C.colors["use_downstream"],
            False:
            C.colors["dont_use_downstream"]
        })

        marker_line_color = df["Flagged"].replace({
            True:
            C.colors["flagged"],
            False:
            C.colors["not_flagged"]
        })

        marker_symbol = [0] * len(df)

        for i, ndx in enumerate(ndxs):
            if ndx in selected:
                marker_color[i] = C.colors["selected"]
                marker_symbol[i] = 1

        fig.update_traces(
            marker_symbol=marker_symbol,
            marker_line_color=marker_line_color,
            marker_line_width=2,
            opacity=0.8,
        )

        fig.update_traces(marker_color=marker_color)
        fig.update_traces(marker_size=20)

        return fig, config
예제 #22
0
def num_analysis(n_clicks, input, *args):
    if input is None or input == '':
        return html.H5('分析方法を選択し、実行を押してください')
    else:
        if input == 'AAA':
            fig = px.scatter_matrix(use_df,
                                    dimensions=num_cols,
                                    color=target_column)
            return dcc.Graph(figure=fig)
        else:
            return html.H5('分析方法を選択し、実行を押してください')
예제 #23
0
def scatter_matrix(df,dimensions=['SEXO','EDAD'],color='MUNICIPIO'):

    fig = px.scatter_matrix(df,
    dimensions=dimensions,
    color=color,#symbol="MUNICIPIO",
    color_continuous_scale=px.colors.diverging.Temps,#Temps Tropic
    title="Scatter matrix",
    labels={col:col.replace('_', ' ') for col in df[dimensions].columns}) # remove underscore
    fig.update_traces(diagonal_visible=False)
    
    return fig
예제 #24
0
def num_analysis(n_clicks, input, *args):
    if input is None or input =='':
        return html.H5('分析方法を選択し、実行を押してください')
    elif use_df is None:
        return html.H5('目的変数が選択されていません。')
    else:
        # 統計量一覧表の描画
        if input == 'AAA':
            describe = processed_df[num_cols].describe().round(4).reset_index()
            return [dash_table.DataTable(
                        column_selectable='multi',
                        fixed_rows={'headers': True, 'data': 0},
                        data=df_processor(describe)[0],
                        columns=df_processor(describe)[1],
                        style_table={
                            'overflowX': 'scroll',
                            'overflowY': 'scroll',
                            'maxHeight': '350px',
                            'maxWidht': '800px'
                        },
                        style_header={
                            'fontWeight': 'bold',
                            'textAlign': 'center'
                        }
                    ),
                    html.Br()]
        # ペアプロットの描画
        elif input == 'BBB':
            fig = px.scatter_matrix(
                processed_df, 
                dimensions=num_cols, 
                color=target_column
            )
            fig.update_layout(
                dragmode='select',
                width=1000,
                height=600,
                hovermode='closest',
            )
            return [dcc.Graph(figure=fig),
                    html.Br()]
        # 相関係数(ヒートマップ)の描画
        elif input == 'CCC':
            corr = processed_df[num_cols].corr().round(4)
            fig = ff.create_annotated_heatmap(
                z=corr.values, 
                x=list(corr.columns),
                y=list(corr.index), 
                colorscale='Oranges',
                hoverinfo='none'
            )
            return [dcc.Graph(figure=fig),
                    html.Br()]
예제 #25
0
def plot_pair_plots(data,
                    outfile,
                    folder_path,
                    dimensions_,
                    labels_=None,
                    color_=None):
    fig2 = px.scatter_matrix(data,
                             dimensions=dimensions_,
                             labels=labels_,
                             color=color_)
    fig2.write_html(os.path.join(folder_path, f"{outfile}.html"),
                    auto_open=False)
예제 #26
0
파일: toyo.py 프로젝트: satomun/chomoku
def update_graph2(selected_values, radio_value):
    if len(selected_values) > 0:
        if radio_value == "10m":
            selected_df = fire_rain[fire_rain["place"].isin(selected_values)]
            pivot_df = selected_df.pivot_table(values="value",
                                               columns=["place"],
                                               index="dt")
            pivot_corr = pivot_df.corr()
            fig3 = px.line(selected_df, x="dt", y="value", color="place")
            fig4 = px.scatter_matrix(pivot_df)
            fig5 = px.imshow(pivot_corr)
            return fig3, fig4, fig5
        elif radio_value == "daily":
            data1, data2 = make_daily_data(fire_rain)
            selected_df = data2[data2["place"].isin(selected_values)]
            matrix_data = data1[selected_values]
            fig6 = px.line(selected_df, x="dt", y="value", color="place")
            fig7 = px.scatter_matrix(matrix_data)
            fig8 = px.imshow(matrix_data.corr(), title="データ間の相関")
            return fig6, fig7, fig8
    raise PreventUpdate
예제 #27
0
def component_matrix(num_components=2, color_map="Item"):
    labels = {
        str(i): f"PC {i+1} ({var:.1f}%)"
        for i, var in enumerate(pca.explained_variance_ratio_ * 100)
    }

    fig = px.scatter_matrix(components,
                            labels=labels,
                            dimensions=range(num_components),
                            color=df_categorical[color_map])
    fig.update_traces(diagonal_visible=False)
    fig.show()
예제 #28
0
 def test_returns_scatter_matrix_plot(self, stock_data_returns: pd.DataFrame):
     returns_comp = pd.concat(
         [
             stock_data_returns.query(f'stock_name=="{stock}"')["returns"]
             for stock in params.get("STOCK_CODES")
         ], axis=1
     )
     returns_comp.columns = [
         f"{stock.capitalize()} returns" for stock in params.get("STOCK_CODES")
     ]
     fig = px.scatter_matrix(returns_comp)
     fig.show()
예제 #29
0
    def get_html(cfg, mode=None):
        df = ScatterMatrix.compute()
        fig = px.scatter_matrix(df, dimensions=["Verhalten", "Kompetenz", "Information", "Vertrauen"],
                                color="Rolle-Kontext", size="Anzahl")
        fig.update_traces(diagonal_visible=False)
        fig.update_layout(
            title="Zufriedenheit unter den Teilnehmern",
            paper_bgcolor='rgb(243, 243, 243)',
            plot_bgcolor='rgb(243, 243, 243)'
        )

        return fig.to_html(**cfg)
예제 #30
0
def Principal_Comp_Reg(df, features, Y, Standardize=False, n_components=2):
    pca = PCA(n_components=n_components)
    # Separating out the features
    x = df.loc[:, features].values
    # Separating out the target
    y = df.loc[:, [Y]].values
    if Standardize:
        # Standardizing the features
        x = StandardScaler().fit_transform(x)
    x = pca.fit_transform(x)
    principalComponents = np.hstack((x, y))
    principaldf = pd.DataFrame(principalComponents,
                               columns=[f"P{i}"
                                        for i in range(n_components)] + [Y])

    labels = {
        str(i): f"PC {i+1} ({var:.1f}%)"
        for i, var in enumerate(pca.explained_variance_ratio_ * 100)
    }

    fig = px.scatter_matrix(principalComponents,
                            labels=labels,
                            dimensions=range(3),
                            color=df[Y],
                            title="PCA of X+y")
    fig.update_traces(diagonal_visible=False)
    fig.show()

    recons = pca.inverse_transform(x)
    reconsdf = pd.DataFrame(recons, columns=features)

    fig = px.scatter_matrix(principalComponents,
                            labels=labels,
                            dimensions=range(3),
                            color=df[Y],
                            title="Inverse PCA of X+y")
    fig.update_traces(diagonal_visible=False)
    fig.show()

    return principaldf, pca.explained_variance_ratio_, reconsdf