Esempio n. 1
0
def generate_wspd_vs_vspd(df, df_dropna):
    """Generates vessel speed and wind speed density plot."""
    fig = None
    if not _check_wind_outages(df, df_dropna):
        fig = px.density_contour(df_dropna, x="VSPD kn", y="WSPD mph")
        fig.update_traces(contours_coloring="fill", colorscale="blues")
        fig.update_layout(xaxis_title_text="VSPD kn",
                          title="<b>Vessel and Wind Speed Density Plot</b>" +
                                "<br>" +
                                 "VSPD-WSPD Correlation: " +
                                 str(round(df_dropna.loc[:,
                                 ("VSPD kn", "WSPD mph")]
                                 .corr().iloc[0][1], 2)),
                          hoverlabel=dict(bgcolor="white", font_size=13),
                          width=875, height=600, plot_bgcolor="#F1F1F1",
                          font=dict(size=12), titlefont=dict(size=14),
                          margin=dict(t=100))
        fig.add_shape(type="line", x0=10, y0=0, x1=10, y1=1, xref="x",
                      yref="paper", line=dict(color="red", dash="solid",
                      width=1.5))
        fig.add_annotation(text="Speed Limit", showarrow=False, textangle=90,
                           font=dict(color="red"), xref="x", x=10.15,
                           yref="paper", y=1, hovertext="10 kn")
    else:
        fig = px.density_contour(pd.DataFrame({"WSPD mph":[], "VSPD kn":[]}),
                                 x="VSPD kn", y="WSPD mph")
        fig.add_annotation(text="Major Wind Outage<br>" +
                           str(round(100 - len(df_dropna) / len(df) * 100, 2)) +
                           "% of Data Missing", showarrow=False, textangle=0,
                           font=dict(color="black", size=20), xref="paper",
                           x=0.5, yref="paper", y=0.5)
        fig.update_layout(title="<b>Vessel and Wind Speed Density Plot</b>",
                          width=875, height=600, plot_bgcolor="#F1F1F1",
                          font=dict(size=12), titlefont=dict(size=14))
    return fig
Esempio n. 2
0
def build_graphBB1(dff,x_axis, y_axis,mode,trendline, marginal_sel,color,facet):
    if marginal_sel == "None": marginal_sel = None
    if color == "None": color = None
    if facet == "None": facet = None
    
    if trendline == "Ordinary Least Squares Regression": trendline = 'ols'
    elif trendline == "Locally Weighted Smoothing": trendline = 'lowess'
    

    fig = go.Figure()
    if mode == "Scatter":
        fig = px.scatter(dff, x=x_axis, y=y_axis, color=color, facet_col=facet, facet_col_wrap=3,
                        marginal_y=marginal_sel, marginal_x=marginal_sel, trendline="ols")
    if mode == "Heat":
        fig = px.density_heatmap(dff, x=x_axis, y=y_axis, marginal_x=marginal_sel, marginal_y=marginal_sel)
    if mode == "Density":
        #fig = px.density_contour(df, x="total_bill", y="tip", marginal_x=marginal_x, marginal_y=marginal_y)
        fig = px.density_contour(dff, x=x_axis, y=y_axis, color=color, marginal_x=marginal_sel,
                                 marginal_y=marginal_sel, trendline=trendline)
    if mode == "Density Fill":
        fig = px.density_contour(dff, x=x_axis, y=y_axis)
        fig.update_traces(contours_coloring="fill", contours_showlabels = True)
    
    
    fig.update_layout(
            #margin=dict(l=0, r=0, t=0, b=0),
            #paper_bgcolor="lightcyan",
            #plot_bgcolor='gainsboro' #gainsboro, lightsteelblue lightsalmon lightgreen lightpink lightcyan lightblue black
        )


    graph = dcc.Graph(id="scatterB", figure=fig)


    return graph
Esempio n. 3
0
    def plot_data_density(self):
        ''' Visualize reduced data in a 1dim,  2dim or 3dim scatter plot. If the panda data frame contains "Classification"
        as one column, the plots are labeled, otherwise not.
        '''

        # If reduced data is just 1 dimensional
        if self.d == 1:
            if self.classification:
                fig = px.density_contour(self.pd_data_frame,
                                         x=self.features[0],
                                         y=np.zeros(self.n),
                                         color='Classification',
                                         title='Density Contour')
            else:
                fig = px.density_contour(self.pd_data_frame,
                                         x=self.features[0],
                                         y=np.zeros(self.n),
                                         title='Density Contour')

        else:
            if self.classification:
                fig = px.density_contour(self.pd_data_frame,
                                         x=self.features[0],
                                         y=self.features[1],
                                         color=self.column_name,
                                         title='Density Contour')
            else:
                fig = px.density_contour(self.pd_data_frame,
                                         x=self.features[0],
                                         y=self.features[1],
                                         color=self.column_name,
                                         title='Density Contour')

        return fig
Esempio n. 4
0
def test_render_mode():
    df = px.data.gapminder()
    df2007 = df.query("year == 2007")
    fig = px.scatter(df2007, x="gdpPercap", y="lifeExp", trendline="ols")
    assert fig.data[0].type == "scatter"
    assert fig.data[1].type == "scatter"
    fig = px.scatter(df2007,
                     x="gdpPercap",
                     y="lifeExp",
                     trendline="ols",
                     render_mode="webgl")
    assert fig.data[0].type == "scattergl"
    assert fig.data[1].type == "scattergl"
    fig = px.scatter(df, x="gdpPercap", y="lifeExp", trendline="ols")
    assert fig.data[0].type == "scattergl"
    assert fig.data[1].type == "scattergl"
    fig = px.scatter(df,
                     x="gdpPercap",
                     y="lifeExp",
                     trendline="ols",
                     render_mode="svg")
    assert fig.data[0].type == "scatter"
    assert fig.data[1].type == "scatter"
    fig = px.density_contour(df, x="gdpPercap", y="lifeExp", trendline="ols")
    assert fig.data[0].type == "histogram2dcontour"
    assert fig.data[1].type == "scatter"
Esempio n. 5
0
def kdemap(num):
    fig = px.density_contour(df,
                             x=num,
                             y="Attrition_Flag",
                             title=f"Contour Map for {num}")
    fig.update_traces(contours_coloring="fill", contours_showlabels=True)
    return fig
Esempio n. 6
0
 def plot_da_stats(self):
     """Computes, prints & plots useful statistics about dissemination areas."""
     import plotly.express as px
     df = pd.DataFrame(list(self.dauid_map.values()))
     fig = px.density_contour(
         df,
         x="area",
         y="pop",
         marginal_x="histogram",
         marginal_y="histogram",
         range_x=[0, 2000],
         range_y=[0, 2000],
     )
     fig.data[0].update(contours_coloring="fill", contours_showlabels=True)
     fig.show()
     below_100_pops = [
         m for m in self.dauid_map.values() if m["pop"] <= 100
     ]
     below_200_pops = [
         m for m in self.dauid_map.values() if m["pop"] <= 200
     ]
     print(f"DA count: {len(self.dauid_map)}")
     print(
         f"\tDAs below 100 pop: {len(below_100_pops)} (population sum = {sum([int(m['pop']) for m in below_100_pops])})"
     )
     print(
         f"\tDAs below 200 pop: {len(below_200_pops)} (population sum = {sum([int(m['pop']) for m in below_200_pops])})"
     )
Esempio n. 7
0
def make_metric_plot(line='K40', pareto='Product', marginal='histogram'):
    plot = oee.loc[oee['Line'] == line]
    plot = plot.sort_values('Thickness Material A')
    plot['Thickness Material A'] = pd.to_numeric(plot['Thickness Material A'])
    if marginal == 'none':
        fig = px.density_contour(plot, x='Rate', y='Yield',
                     color=pareto)
    else:
        fig = px.density_contour(plot, x='Rate', y='Yield',
                 color=pareto, marginal_x=marginal, marginal_y=marginal)
    fig.update_layout({
                 "plot_bgcolor": "#F9F9F9",
                 "paper_bgcolor": "#F9F9F9",
                 "height": 750,
                 "title": "{}, Pareto by {}".format(line, pareto),
     })
    return fig
Esempio n. 8
0
def g7(batch_id, x, y, colorset):
    input_model = models.BatchInput(batch_id)
    df = input_model.as_pandas_dataframe()

    fig = px.density_contour(df, x=x, y=y)
    div = opy.plot(fig, auto_open=False, output_type='div')

    return div
Esempio n. 9
0
def update_graph_3(n_clicks, dropdown_value, range_slider_value, check_list_value, radio_items_value):
    print(n_clicks)
    print(dropdown_value)
    print(range_slider_value)
    print(check_list_value)
    print(radio_items_value)
    df = px.data.iris()
    fig = px.density_contour(df, x='sepal_width', y='sepal_length')
    return fig
Esempio n. 10
0
def correlation_densityplots(x_axis, y_axis):

    # density plots
    density_legendary = px.density_contour(
        df_ds,
        x=x_axis,
        y=y_axis,
        color="Legendary",
        marginal_x="rug",
        marginal_y="histogram",
        template=graph_template,
        height=300,
        title="Density contour, by legendary",
    )

    density_type = px.density_contour(
        df_ds,
        x=x_axis,
        y=y_axis,
        color="Type 1",
        marginal_x="rug",
        marginal_y="histogram",
        hover_name="Name",
        template=graph_template,
        height=300,
        title="Density contour, by Types",
    )

    density_generation = px.density_contour(
        df_ds,
        x=x_axis,
        y=y_axis,
        color="Generation",
        marginal_x="rug",
        marginal_y="histogram",
        hover_name="Name",
        template=graph_template,
        height=300,
        title="Density contour, by Genrations",
    )

    # return scatter_legendary,scatter_type,scatter_generation,density_legendary,density_type,density_generation,fig
    return density_legendary, density_type, density_generation
 def gdp_suicide_contour(self, df):
     df = df.sort_values(by=['year'])
     fig = px.density_contour(df,
                              x=' gdp_for_year ($) ',
                              y='suicides_no',
                              color='year',
                              marginal_y="histogram",
                              animation_frame='year',
                              animation_group='country')
     return fig
Esempio n. 12
0
def plotly_demo(ctx: Context, ) -> Records[PlotlyJson]:
    df = px.data.iris()
    fig = px.density_contour(
        df,
        x="sepal_width",
        y="sepal_length",
        color="species",
        marginal_x="rug",
        marginal_y="histogram",
    )
    return [fig.to_plotly_json()]
def update_graph_2(n_clicks, dropdown_value):
    print(n_clicks)
    print(dropdown_value)

    fig = px.density_contour(df_survey,
                             x='Years of experience',
                             y='Salary_in_EUR',
                             color="Gender",
                             title='XP Years by Salary & Gender')

    fig.update_layout({'height': 600})
    return fig
Esempio n. 14
0
def pair_contour(ftr0, ftr1):
    fig = px.density_contour(
        df_combine,
        x=ftr0,
        y=ftr1,
        color="color",
        color_discrete_sequence=[OPPOSITE_COLOR_0, MAIN_COLOR_0],
        marginal_x="histogram",
        marginal_y="histogram")

    fig.update_layout(
        dict(width=500,
             height=400,
             legend=dict(xanchor='left', x=0, y=1.2),
             margin=dict(l=50, r=0)))
    return fig
Esempio n. 15
0
import plotly.express as px

gapminder = px.data.gapminder()
fig = px.area(gapminder,
              x="year",
              y="pop",
              color="continent",
              line_group="country")
fig.write_html(os.path.join(dir_name, "area.html"))

# #### Visualize Distributions

import plotly.express as px

iris = px.data.iris()
fig = px.density_contour(iris, x="sepal_width", y="sepal_length")
fig.write_html(os.path.join(dir_name, "density_contour.html"))

import plotly.express as px

iris = px.data.iris()
fig = px.density_contour(
    iris,
    x="sepal_width",
    y="sepal_length",
    color="species",
    marginal_x="rug",
    marginal_y="histogram",
)
fig.write_html(os.path.join(dir_name, "density_contour_marginal.html"))
Esempio n. 16
0
def contour(**kwargs):
    fig = px.density_contour(**kwargs)
    fig.update_traces(contours_coloring="fill", contours_showlabels=True)
    return fig
Esempio n. 17
0
    def __init__(self, df, info):

        super().__init__()
        self.df = df

        self.resize(721, 600)
        self.setWindowTitle('Chart')
        self.setWindowIcon(QIcon('resource/icon.ico'))
        self.setWindowIconText('viuplot')

        self.view = QWebEngineView(self)

        if info['chart_type'] == 'scatter':
            self.fig = px.scatter(
                self.df,
                title=info['title'],
                x=info['x']['column'],
                log_x=info['x']['log'],
                marginal_x=info['x']['marginal'],
                error_x=info['x']['error'],
                error_x_minus=info['x']['error_minus'],
                range_x=info['x']['range'],
                facet_col=info['x']['facet'],
                y=info['y']['column'],
                log_y=info['y']['log'],
                marginal_y=info['y']['marginal'],
                error_y=info['y']['error'],
                error_y_minus=info['y']['error_minus'],
                range_y=info['y']['range'],
                facet_row=info['y']['facet'],
                trendline=info['trendline'],
                animation_frame=info['animation_frame'],
                animation_group=info['animation_group'],
                text=info['text'],
                hover_name=info['hover_name'],
                template=info['template'],
                #width = info['width'],
                #height = info['height'],
                #opacity = info['opacity'],
                color=info['color'],
                symbol=info['symbol'],
                size=info['size'])

        elif info['chart_type'] == 'scatter_3d':
            self.fig = px.scatter_3d(
                self.df,
                title=info['title'],
                x=info['x']['column'],
                log_x=info['x']['log'],
                error_x=info['x']['error'],
                error_x_minus=info['x']['error_minus'],
                range_x=info['x']['range'],
                y=info['y']['column'],
                log_y=info['y']['log'],
                error_y=info['y']['error'],
                error_y_minus=info['y']['error_minus'],
                range_y=info['y']['range'],
                z=info['z']['column'],
                log_z=info['z']['log'],
                error_z=info['z']['error'],
                error_z_minus=info['z']['error_minus'],
                range_z=info['z']['range'],
                animation_frame=info['animation_frame'],
                animation_group=info['animation_group'],
                text=info['text'],
                hover_name=info['hover_name'],
                template=info['template'],
                #width = info['width'],
                #height = info['height'],
                #opacity = info['opacity'],
                color=info['color'],
                symbol=info['symbol'],
                size=info['size'])

        elif info['chart_type'] == 'line':
            self.fig = px.line(
                self.df,
                title=info['title'],
                x=info['x']['column'],
                log_x=info['x']['log'],
                error_x=info['x']['error'],
                error_x_minus=info['x']['error_minus'],
                range_x=info['x']['range'],
                facet_col=info['x']['facet'],
                y=info['y']['column'],
                log_y=info['y']['log'],
                error_y=info['y']['error'],
                error_y_minus=info['y']['error_minus'],
                range_y=info['y']['range'],
                facet_row=info['y']['facet'],
                animation_frame=info['animation_frame'],
                animation_group=info['animation_group'],
                text=info['text'],
                hover_name=info['hover_name'],
                template=info['template'],
                #width = info['width'],
                #height = info['height'],
                #opacity = info['opacity'],
                color=info['color'])

        elif info['chart_type'] == 'bar':
            self.fig = px.bar(
                self.df,
                title=info['title'],
                x=info['x']['column'],
                log_x=info['x']['log'],
                error_x=info['x']['error'],
                error_x_minus=info['x']['error_minus'],
                range_x=info['x']['range'],
                facet_col=info['x']['facet'],
                y=info['y']['column'],
                log_y=info['y']['log'],
                error_y=info['y']['error'],
                error_y_minus=info['y']['error_minus'],
                range_y=info['y']['range'],
                facet_row=info['y']['facet'],
                animation_frame=info['animation_frame'],
                animation_group=info['animation_group'],
                text=info['text'],
                hover_name=info['hover_name'],
                template=info['template'],
                #width = info['width'],
                #height = info['height'],
                #opacity = info['opacity'],
                color=info['color'])

        elif info['chart_type'] == 'density_contour':
            self.fig = px.density_contour(
                self.df,
                title=info['title'],
                x=info['x']['column'],
                log_x=info['x']['log'],
                range_x=info['x']['range'],
                facet_col=info['x']['facet'],
                marginal_x=info['x']['marginal'],
                y=info['y']['column'],
                log_y=info['y']['log'],
                range_y=info['y']['range'],
                facet_row=info['y']['facet'],
                marginal_y=info['y']['marginal'],
                z=info['z']['column'],
                animation_frame=info['animation_frame'],
                animation_group=info['animation_group'],
                hover_name=info['hover_name'],
                template=info['template'],
                #width = info['width'],
                #height = info['height'],
                #opacity = info['opacity'],
                color=info['color'],
                histfunc=info['histfunc'],
                histnorm=info['histnorm'],
                nbinsx=info['nbinsx'],
                nbinsy=info['nbinsy'])

        elif info['chart_type'] == 'density_heatmap':
            self.fig = px.density_heatmap(
                self.df,
                title=info['title'],
                x=info['x']['column'],
                log_x=info['x']['log'],
                range_x=info['x']['range'],
                facet_col=info['x']['facet'],
                marginal_x=info['x']['marginal'],
                y=info['y']['column'],
                log_y=info['y']['log'],
                range_y=info['y']['range'],
                facet_row=info['y']['facet'],
                marginal_y=info['y']['marginal'],
                z=info['z']['column'],
                animation_frame=info['animation_frame'],
                animation_group=info['animation_group'],
                hover_name=info['hover_name'],
                template=info['template'],
                #width = info['width'],
                #height = info['height'],
                #opacity = info['opacity'],
                histfunc=info['histfunc'],
                histnorm=info['histnorm'],
                nbinsx=info['nbinsx'],
                nbinsy=info['nbinsy'])

        elif info['chart_type'] == 'histogram':
            self.fig = px.histogram(
                self.df,
                title=info['title'],
                x=info['x']['column'],
                log_x=info['x']['log'],
                range_x=info['x']['range'],
                facet_col=info['x']['facet'],
                marginal=info['x']['marginal'],
                y=info['y']['column'],
                log_y=info['y']['log'],
                range_y=info['y']['range'],
                facet_row=info['y']['facet'],
                animation_frame=info['animation_frame'],
                animation_group=info['animation_group'],
                hover_name=info['hover_name'],
                template=info['template'],
                #width = info['width'],
                #height = info['height'],
                #opacity = info['opacity'],
                color=info['color'],
                barmode=info['barmode'],
                barnorm=info['barnorm'],
                histfunc=info['histfunc'],
                histnorm=info['histnorm'],
                nbins=info['nbins'],
                cumulative=info['cumulative'])

        self.file_path = os.path.abspath(
            os.path.join(os.path.dirname(__file__), "temp.html"))
        po.plot(self.fig, filename=self.file_path, auto_open=False)
        url = QUrl(QUrl.fromLocalFile(self.file_path))
        self.view.resize(721, 800)
        self.view.load(url)

        self.create_menu()

        self.show()
Esempio n. 18
0
def main():
    st.title("Machine Learning Web Application")
    menu = [
        "Sentiment", "EDA", "DataViz", "Story", "Classification", "Timeseries"
    ]
    choice = st.sidebar.selectbox("Select Menu", menu)
    if choice == "EDA":
        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())
        else:
            st.write("No Dataset To Show")
        st.subheader("Exploratory Data Analysis")
        if data is not None:
            if st.checkbox("Show Shape"):
                st.write(df.shape)
            if st.checkbox("Show Summary"):
                st.write(df.describe())
            if st.checkbox("Correlation Matrix"):
                st.write(sns.heatmap(df.corr(), annot=True))
                st.pyplot()

    elif choice == "DataViz":
        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())
        else:
            st.write("No Dataset To Show")
        st.subheader("Data Visualization")
        if data is not None:
            all_columns = df.columns.to_list()
            if st.checkbox("Pie Chart"):
                columns_to_plot = st.selectbox("Select 1 Column to Visualize",
                                               all_columns)
                pie_plot = df[columns_to_plot].value_counts().plot.pie(
                    autopct="%1.1f%%")
                st.write(pie_plot)
                st.pyplot()
            plot_type = st.selectbox("Select Type of Plot",
                                     ["bar", "line", "area", "hist", "box"])
            selected_columns = st.multiselect("Select Columns To Visualize",
                                              all_columns)
            if st.button("Generate Plot"):
                st.success("Generating Custom Plot of {} for {}".format(
                    plot_type, selected_columns))
                if plot_type == "bar":
                    cust_data = df[selected_columns]
                    st.bar_chart(cust_data)
                elif plot_type == "line":
                    cust_data = df[selected_columns]
                    st.line_chart(cust_data)
                elif plot_type == "area":
                    cust_data = df[selected_columns]
                    st.area_chart(cust_data)
                elif plot_type:
                    cust_plot = df[selected_columns].plot(kind=plot_type)
                    st.write(cust_plot)
                    st.pyplot()
    elif choice == "Story":
        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())
            st.subheader("Storytelling with Data")
            if data.name == "gapminder.csv":
                fig = px.scatter(
                    df,
                    x="gdpPercap",
                    y="lifeExp",
                    animation_frame="year",
                    animation_group="country",
                    # fig = px.scatter(px.data.gapminder(), x="gdpPercap", y="lifeExp", animation_frame="year", animation_group="country",
                    size="pop",
                    color="country",
                    hover_name="country",
                    log_x=True,
                    size_max=100,
                    range_x=[100, 100000],
                    range_y=[25, 90])
                fig.update_layout(height=650)
                st.write(fig)
            elif data.name == "stocks.csv":
                fig = go.Figure()
                fig.add_trace(
                    go.Scatter(x=df['date'], y=df['AAPL'], name="Apple"))
                fig.add_trace(
                    go.Scatter(x=df['date'], y=df['AMZN'], name="Amazon"))
                fig.add_trace(
                    go.Scatter(x=df['date'], y=df['FB'], name="Facebook"))
                fig.add_trace(
                    go.Scatter(x=df['date'], y=df['GOOG'], name="Google"))
                fig.add_trace(
                    go.Scatter(x=df['date'], y=df['NFLX'], name="Netflix"))
                fig.add_trace(
                    go.Scatter(x=df['date'], y=df['MSFT'], name="Microsoft"))
                fig.layout.update(
                    title_text='Time Series data with Rangeslider',
                    xaxis_rangeslider_visible=True)
                st.write(fig)
            elif data.name == "iris.csv":
                y1 = df['sepal_length']
                x1 = df['sepal_width']
                y2 = df['petal_length']
                x2 = df['petal_width']
                color = df['species']
                fig1 = px.scatter(df,
                                  x=x1,
                                  y=y1,
                                  color=color,
                                  marginal_y="violin",
                                  marginal_x="box",
                                  trendline="ols",
                                  template="simple_white")
                fig2 = px.density_contour(df,
                                          x=x2,
                                          y=y2,
                                          color=color,
                                          marginal_y='histogram')
                st.write(fig1, fig2)
        else:
            st.write("No Dataset To Show")

    elif choice == "Classification":
        st.subheader("Classification Prediction")
        # if data is None:
        #     pass
        # elif data.name == "iris.csv":
        #     st.subheader("Iris flower Prediction from Machine Learning Model")
        iris = Image.open('iris.png')
        st.image(iris)

        model = open("model.pkl", "rb")
        knn_clf = joblib.load(model)
        #Loading images
        setosa = Image.open('setosa.png')
        versicolor = Image.open('versicolor.png')
        virginica = Image.open('virginica.png')

        st.sidebar.title("Features")
        #Intializing
        sl = st.sidebar.slider(label="Sepal Length (cm)",
                               value=5.2,
                               min_value=0.0,
                               max_value=8.0,
                               step=0.1)
        sw = st.sidebar.slider(label="Sepal Width (cm)",
                               value=3.2,
                               min_value=0.0,
                               max_value=8.0,
                               step=0.1)
        pl = st.sidebar.slider(label="Petal Length (cm)",
                               value=4.2,
                               min_value=0.0,
                               max_value=8.0,
                               step=0.1)
        pw = st.sidebar.slider(label="Petal Width (cm)",
                               value=1.2,
                               min_value=0.0,
                               max_value=8.0,
                               step=0.1)

        if st.button("Click Here to Classify"):
            dfvalues = pd.DataFrame(list(zip([sl], [sw], [pl], [pw])),
                                    columns=[
                                        'sepal_length', 'sepal_width',
                                        'petal_length', 'petal_width'
                                    ])
            input_variables = np.array(dfvalues[[
                'sepal_length', 'sepal_width', 'petal_length', 'petal_width'
            ]])
            prediction = knn_clf.predict(input_variables)
            if prediction == 1:
                st.image(setosa)
            elif prediction == 2:
                st.image(versicolor)
            elif prediction == 3:
                st.image(virginica)

    # st.title("Emotion Classifier App")
    # menu = ["Home"]
    # choice = st.sidebar.selectbox("Menu",menu)
    # create_page_visited_table()
    # create_emotionclf_table()
    elif choice == "Sentiment":
        # add_page_visited_details("Home",datetime.now())
        # data = " "
        st.subheader("Sentiment-Emotion Prediction")

        with st.form(key='emotion_clf_form'):
            search_text = st.text_area("Type Here")
            submit_text = st.form_submit_button(label='Submit')

        if submit_text:
            hasilSearch = api.search(q=str(search_text), count=2)
            texts = []
            for tweet in hasilSearch:
                texts.append(tweet.text)
            # raw_text2 = texts[1]
            raw_text = texts[0]
            # translated = translator.translate(raw_text)
            # translated = raw_text
            prediction = predict_emotions(raw_text)
            probability = get_prediction_proba(raw_text)
            sentiment = predict_sentiment(raw_text)
            proba_sentiment = get_sentiment_proba(raw_text)
            col1, col2 = st.beta_columns(2)
            with col1:
                st.success("Search Result")
                st.write(raw_text)
                # st.write(raw_text2)
                # st.write(translated.text)
                st.success("Prediction")
                emoji_icon = emotions_emoji_dict[prediction]
                st.write("{}:{}".format(prediction, emoji_icon))
                st.write("Confidence:{}".format(np.max(probability)))
                st.write("{}:{}".format(sentiment, emoji_sentiment[sentiment]))
                st.write("Confidence:{}".format(np.max(proba_sentiment)))
            with col2:
                st.success("Prediction Probability")
                # st.write(probability)
                # st.write(proba_sentiment)
                proba_df = pd.DataFrame(probability, columns=pipe_lr.classes_)
                proba_sent_df = pd.DataFrame(proba_sentiment,
                                             columns=pipe_ctm.classes_)
                # st.write(proba_df.T)
                # st.write(proba_sent_df.T)
                # proba_df_clean = proba_df.T.reset_index()
                # proba_df_clean.columns = ["emotions","probability"]
                proba_df_sent_clean = proba_sent_df.T.reset_index()
                proba_df_sent_clean.columns = ["sentiments", "probability"]

                # fig = alt.Chart(proba_df_clean).mark_bar().encode(x='emotions',y='probability',color='emotions')
                # st.altair_chart(fig,use_container_width=True)
                fig = alt.Chart(proba_df_sent_clean).mark_bar().encode(
                    x='sentiments', y='probability', color='sentiments')
                st.altair_chart(fig, use_container_width=True)

    elif choice == "Timeseries":
        import plotly.io as pio
        pio.templates.default = "seaborn"
        # Timeseries model
        from statsmodels.tsa.holtwinters import ExponentialSmoothing

        def create_model(col, seasonal):
            col = str(col)
            tr = ['add', 'mul']
            ss = ['add', 'mul']
            dp = [True, False]
            combs = {}
            aics = []

            # iterasi kombinasi option
            for i in tr:
                for j in ss:
                    for k in dp:
                        model = ExponentialSmoothing(data[col],
                                                     trend=i,
                                                     seasonal=j,
                                                     seasonal_periods=seasonal,
                                                     damped_trend=k)
                        model = model.fit()
                        combs.update({model.aic: [i, j, k]})
                        aics.append(model.aic)

            # forecasting dengan kombinasi terbaik
            best_aic = min(aics)
            model = ExponentialSmoothing(data[col],
                                         trend=combs[best_aic][0],
                                         seasonal=combs[best_aic][1],
                                         seasonal_periods=seasonal,
                                         damped_trend=combs[best_aic][2])

            # output
            fit = model.fit()
            return fit

        st.subheader("Time Series Prediction")
        data = st.file_uploader("Upload Dataset", type=["xlsx"])
        kolom = []
        if data is None:
            st.write('Please upload timeseries file (xlsx)')
        else:
            df = pd.read_excel(data)
            data = df.dropna()
            #             st.write(data.head())
            data['yyyy-mm'] = pd.to_datetime(data['Date']).dt.strftime('%Y-%m')
            st.write(data.head())
            #             st.write(data.head())
            #             st.write(data.head())
            kolom = data.columns.tolist()
            pilih = st.selectbox('Pilih Kolom', kolom)
            xaxis = data.iloc[:, 0].astype('str')
            fig1 = px.line(x=xaxis, y=data[pilih])
            st.plotly_chart(fig1)
            seasonal = st.number_input('Seasonal_periods',
                                       value=12,
                                       max_value=len(data),
                                       min_value=1,
                                       step=1)
            pred_period = st.number_input('Prediction_periods',
                                          value=6,
                                          max_value=len(data),
                                          min_value=1,
                                          step=1)
            # submit_data = st.form_submit_button(label='Create_model')
            if st.button('Create_model and Run_Prediction'):
                st.success("Create Model Success")
                tsmodel = create_model(pilih, seasonal)
                prediksi = list(tsmodel.forecast(pred_period))
                yaxis = data[pilih].tolist()
                # st.write(prediksi)
                for p in prediksi:
                    yaxis.append(p)
                last = df.index[-1]
                dfnew = df.drop(df.index[len(yaxis):last + 1])
                dfnew['prediction'] = yaxis
                dfnew.iloc[:, 0] = dfnew.iloc[:, 0].astype('str')
                # dfnew = dfnew.dropna()
                fig2 = go.Figure()
                fig2.add_trace(
                    go.Scatter(
                        x=dfnew.iloc[:, 0],
                        y=dfnew['prediction'],
                        # line = dict(color='firebrick', width=4, dash='dot'),
                        mode='lines+markers',
                        name='prediction'))
                fig2.add_trace(
                    go.Scatter(
                        x=dfnew.iloc[:, 0],
                        y=dfnew[pilih],
                        # line = dict(color='firebrick', width=4, dash='dot'),
                        mode='lines+markers',
                        name='actual'))
                st.plotly_chart(fig2)
Esempio n. 19
0
px.scatter(
  iris,  # 绘图数据集
  x="sepal_width",  # 横坐标
  y="sepal_length",  # 纵坐标
  color="species",  # 颜色值
  error_x="e",  # 横轴误差
  error_y="e"  # 纵轴误差
          )


# In[23]:


px.density_contour(
  iris,  # 绘图数据集
  x="sepal_width",  # 横坐标
  y="sepal_length",  # 纵坐标值
  color="species"  # 颜色
)


# In[24]:


px.density_contour(
  iris, # 数据集
  x="sepal_width",  # 横坐标值
  y="sepal_length",  # 纵坐标值
  color="species",  # 颜色
  marginal_x="rug",  # 横轴为线条图
  marginal_y="histogram"   # 纵轴为直方图
                  )
import plotly.express as px

df = {'Информатика': [3.3, 4.4, 3.5],
      'Физика': [3.2, 4.5, 5],
      'Математика': [3.4, 3.8, 4.2]

      }
fig = px.density_contour(df, x="Информатика", y="Физика", color="Математика",
                         template='presentation')

fig.show()
Esempio n. 21
0
    0 if money <= 0 else money for money in data['MONEY_LOST']
]
data['ARR_DELAY_ONLY'] = data['ARR_DELAY_ONLY'].apply(log2)

fig_info = pltx.scatter(data,
                        x="ARR_DELAY_ONLY",
                        y="MONEY_LOST",
                        color="OP_UNIQUE_CARRIER",
                        log_x=True)
fig_info.update_traces(marker=dict(size=3))
fig_info.show()

# Show density
fig_density = pltx.density_contour(data.head(60000),
                                   x="DISTANCE",
                                   y="MONEY_LOST",
                                   color="OP_UNIQUE_CARRIER",
                                   marginal_x="rug",
                                   marginal_y="histogram")
# >> fig_density.show()

# Show density heatmap for cities
fig_city = pltx.density_heatmap(data.head(30000),
                                x="ORIGIN",
                                y="DEST",
                                marginal_y="histogram")
# >> fig_city.show()

# Explore the skewness
skew = data.skew()
print('Skewness:', skew)
Esempio n. 22
0
def generate_density(df):
    density = px.density_contour(df, x="sepal_width", y="sepal_length")
    return density
Esempio n. 23
0
fig.update_traces(diagonal_visible=False)
plot(fig)

# these 3 variables have the strongest separation for the target
group = [
    "RepublicanFraction Ohio", "RepublicanFraction Wyoming",
    "RepublicanFraction Idaho"
]

# plot the group of 3 variables across categories
fig = px.scatter_3d(df,
                    x=group[0],
                    y=group[1],
                    z=group[2],
                    color=" DemocratWon",
                    opacity=0.7)
plot(fig)

# plot two variables across categories
fig = px.density_contour(df,
                         x=group[0],
                         y=group[1],
                         marginal_x="histogram",
                         marginal_y="box",
                         color=" DemocratWon")
plot(fig)

# plot SalePrice across categories
fig = px.strip(df, y=group[0], color=" DemocratWon")
plot(fig)
Esempio n. 24
0
def nhanes_multivariate_analysis(df):
    st.write(""" 
  ### Quantitative bivariate data

  Bivariate data arise when every "unit of analysis" (e.g. a person in the NHANES dataset) is assessed with respect to two traits (the NHANES subjects were assessed for many more than two traits, but we can consider two traits at a time here).
  
  Below we make a scatterplot of arm length against leg length.  This means that arm length ([BMXARML](https://wwwn.cdc.gov/Nchs/Nhanes/2015-2016/BMX_I.htm#BMXARML)) is plotted on the vertical axis and leg length ([BMXLEG](https://wwwn.cdc.gov/Nchs/Nhanes/2015-2016/BMX_I.htm#BMXLEG)) is plotted on the horizontal axis).  We see a positive dependence between the two measures -- people with longer arms tend to have longer legs, and vice-versa.  However it is far from a perfect relationship.
  """)

    df["RIAGENDRx"] = df.RIAGENDR.replace({1: "Male", 2: "Female"})
    fig = px.scatter(df,
                     x="BMXLEG",
                     y="BMXARML",
                     color="RIAGENDRx",
                     opacity=0.5,
                     title="Correlation arm length against leg length")
    st.plotly_chart(fig)

    st.write(""" 
  This plot also shows the Pearson correlation coefficient between the arm length and leg length, which is 0.62. The Pearson correlation coefficient ranges from -1 to 1, with values approaching 1 indicating a more perfect positive dependence.  In many settings, a correlation of 0.62 would be considered a moderately strong positive dependence. 
  """)

    fig = px.density_contour(
        df,
        x="BMXLEG",
        y="BMXARML",
        title="Contour correlation between arm length and leg length")
    fig.add_annotation(x=50,
                       y=45,
                       text="p=0.62",
                       font=dict(color="white", size=12),
                       showarrow=False)
    fig.update_traces(contours_coloring="fill", contours_showlabels=True)
    st.plotly_chart(fig)

    st.write(""" 
  As another example with slightly different behavior, we see that systolic and diastolic blood pressure (essentially the maximum and minimum blood pressure between two consecutive heart beats) are more weakly correlated than arm and leg length, with a correlation coefficient of 0.32. This weaker correlation indicates that some people have unusually high systolic blood pressure but have average diastolic blood pressure, and vice versa.
  """)
    fig = px.density_contour(df,
                             x="BPXSY1",
                             y="BPXDI1",
                             marginal_x="rug",
                             marginal_y="rug",
                             title="BPXSY1 and BPXDI1 correlation")
    fig.add_annotation(x=200,
                       y=100,
                       text="p=0.32",
                       font=dict(size=15, color="black"),
                       showarrow=False)
    st.plotly_chart(fig)

    st.write("""
  Next we look at two repeated measures of systolic blood pressure, taken a few minutes apart on the same person. These values are very highly correlated, with a correlation coefficient of around 0.96.
  """)
    x = df["BPXSY1"].to_numpy()
    y = df["BPXSY2"].to_numpy()
    fig = go.Figure(
        go.Histogram2dContour(x=x,
                              y=y,
                              colorscale='Jet',
                              contours=dict(showlabels=True,
                                            labelfont=dict(family='Raleway',
                                                           color='white')),
                              hoverlabel=dict(bgcolor='white',
                                              bordercolor='black',
                                              font=dict(family='Raleway',
                                                        color='black'))))

    fig.update_layout(title_text="BPXSY1 and BPXSY2 correlation", )
    fig.add_annotation(x=200,
                       y=200,
                       text="p=0.96",
                       font=dict(size=15, color="white"),
                       showarrow=False)
    st.plotly_chart(fig)

    st.write(""" 
  ### Heterogeneity and stratification

  Most human characteristics are complex -- they vary by gender, age, ethnicity, and other factors.  This type of variation is often referred to as "heterogeneity".  When such heterogeneity is present, it is usually productive to explore the data more deeply by stratifying on relevant factors, as we did in the univariate analyses.  

  Below, we continue to probe the relationship between leg length and arm length, stratifying first by gender, then by gender and ethnicity. The gender-stratified plot indicates that men tend to have somewhat longer arms and legs than women -- this is reflected in the fact that the cloud of points on the left is shifted slightly up and to the right relative to the cloud of points on the right.  In addition, the correlation between arm length and leg length appears to be somewhat weaker in women than in men.
  """)

    fig_fem = px.scatter(df, x="BMXLEG", y="BMXARML", facet_row="RIAGENDRx")
    st.plotly_chart(fig_fem, use_container_width=True)

    st.write(""" 
  Next we look to stratifying the data by both gender and ethnicity.  This results in 2 x 5 = 10 total strata, since there are 2 gender strata and 5 ethnicity strata. These scatterplots reveal differences in the means as well a diffrences in the degree of association (correlation) between different pairs of variables.  We see that although some ethnic groups tend to have longer/shorter arms and legs than others, the relationship between arm length and leg length within genders is roughly similar across the ethnic groups.  

  One notable observation is that ethnic group 5, which consists of people who report being multi-racial or are of any race not treated as a separate group (due to small sample size), the correlation between arm length and leg length is stronger, especially for men.  This is not surprising, as greater heterogeneity can allow correlations to emerge that are indiscernible in more homogeneous data.   
  """)

    fig = px.density_contour(df,
                             x="BMXLEG",
                             y="BMXARML",
                             facet_col="RIDRETH1",
                             facet_row="RIAGENDRx")
    fig.update_traces(contours_coloring="fill", contours_showlabels=True)
    st.plotly_chart(fig)
Esempio n. 25
0
                        dimensions=targets + features,
                        color=colors[2],
                        opacity=0.7)
fig.update_traces(diagonal_visible=False)
plot(fig)

# these 3 variables have the strongest separation for the target
group = ["price", "ram", "hd"]

# plot the group of 3 variables across categories
fig = px.scatter_3d(df,
                    x=group[0],
                    y=group[1],
                    z=group[2],
                    color=colors[2],
                    opacity=0.7)
plot(fig)

# plot two variables across categories
fig = px.density_contour(df,
                         x=group[0],
                         y=group[1],
                         marginal_x="histogram",
                         marginal_y="box",
                         color=colors[2])
plot(fig)

# plot a singl variable across categories
fig = px.strip(df, y=group[1], color=colors[2])
plot(fig)
Esempio n. 26
0
def build_plot(is_anim, plot_type, df, progress=None, **kwargs) -> dict:

    params = dict(**kwargs)
    for k, v in params.items():
        if v == amp_consts.NONE_SELECTED:
            params[k] = filter_none(params[k])
    num_columns = df.select_dtypes(include=[np.number]).columns.to_list()

    if is_anim:
        time_column = params.pop("time_column", "")
        if (
            time_column
            in df.select_dtypes(
                include=[np.datetime64, "datetime", "datetime64", "datetime64[ns, UTC]"]
            ).columns.to_list()
        ):
            df["time_step"] = df[time_column].dt.strftime("%Y/%m/%d %H:%M:%S")
            afc = "time_step"
        else:
            afc = time_column
        params["animation_frame"] = afc
        df = df.sort_values([afc])
        if plot_type not in [
            amp_consts.PLOT_PCA_3D,
            amp_consts.PLOT_PCA_2D,
            amp_consts.PLOT_PCA_SCATTER,
            amp_consts.PLOT_LDA_2D,
            amp_consts.PLOT_QDA_2D,
            amp_consts.PLOT_NCA,
        ]:
            x = params.get("x")
            params["range_x"] = (
                None if x not in num_columns else [df[x].min(), df[x].max()]
            )
            y = params.get("y")
            params["range_y"] = (
                None if y not in num_columns else [df[y].min(), df[y].max()]
            )
            if plot_type in [amp_consts.PLOT_SCATTER_3D, amp_consts.PLOT_PCA_3D]:
                z = params.get("z")
                params["range_z"] = (
                    None if z not in num_columns else [df[z].min(), df[z].max()]
                )

    params["data_frame"] = df

    fig = None
    model_data = None
    column_names = None
    class_names = None

    if plot_type == amp_consts.PLOT_SCATTER:
        fig = px.scatter(**params)
    elif plot_type == amp_consts.PLOT_SCATTER_3D:
        fig = px.scatter_3d(**params)
    elif plot_type == amp_consts.PLOT_LINE:
        fig = px.line(**params)
    elif plot_type == amp_consts.PLOT_BAR:
        fig = px.bar(**params)
    elif plot_type == amp_consts.PLOT_HISTOGRAM:
        if "orientation" in params and params.get("orientation") == "h":
            params["x"], params["y"] = None, params["x"]
        fig = px.histogram(**params)
    elif plot_type == amp_consts.PLOT_BOX:
        fig = px.box(**params)
    elif plot_type == amp_consts.PLOT_VIOLIN:
        fig = px.violin(**params)
    elif plot_type == amp_consts.PLOT_DENSITY_HEATMAP:
        fig = px.density_heatmap(**params)
    elif plot_type == amp_consts.PLOT_DENSITY_CONTOUR:
        fc = params.pop("fill_contours") is True
        fig = px.density_contour(**params)
        if fc:
            fig.update_traces(contours_coloring="fill", contours_showlabels=True)
    elif plot_type == amp_consts.PLOT_PARALLEL_CATEGORIES:
        fig = px.parallel_categories(**params)
    elif plot_type == amp_consts.PLOT_PARALLEL_COORDINATES:
        fig = px.parallel_coordinates(**params)
    elif plot_type == amp_consts.PLOT_SCATTER_MATRIX:
        fig = make_subplots(
            rows=len(num_columns),
            cols=len(num_columns),
            shared_xaxes=True,
            row_titles=num_columns,
        )
        color_column = params.get("color")
        if color_column is not None:
            template_colors = pio.templates[params.get("template")].layout["colorway"]
            if template_colors is None:
                template_colors = pio.templates[pio.templates.default].layout["colorway"]
            color_count = len(df[color_column].unique())
            if len(template_colors) >= color_count:
                pass
            else:
                template_colors = np.repeat(
                    template_colors, (color_count // len(template_colors)) + 1
                )
            template_colors = template_colors[:color_count]
        else:
            template_colors = 0
        legend_added = False
        step = 0
        total = len(num_columns) ** 2
        matrix_diag = params["matrix_diag"]
        matrix_up = params["matrix_up"]
        matrix_down = params["matrix_down"]
        for i, c in enumerate(num_columns):
            for j, l in enumerate(num_columns):
                progress(step, total)
                step += 1
                if i == j:
                    if matrix_diag == "Nothing":
                        continue
                    elif matrix_diag == "Histogram":
                        mtx_plot_kind = "Histogram"
                    else:
                        mtx_plot_kind = "Scatter"
                else:
                    if (
                        (i == j)
                        or (i > j and matrix_up == "Scatter")
                        or (i < j and matrix_down == "Scatter")
                    ):
                        mtx_plot_kind = "Scatter"
                    elif (i > j and matrix_up == "Nothing") or (
                        i < j and matrix_down == "Nothing"
                    ):
                        continue
                    elif (i > j and matrix_up == "2D histogram") or (
                        i < j and matrix_down == "2D histogram"
                    ):
                        mtx_plot_kind = "2D histogram"
                    else:
                        mtx_plot_kind = "Error"

                if isinstance(template_colors, int) or mtx_plot_kind == "2D histogram":
                    if mtx_plot_kind == "Histogram":
                        add_histogram(fig=fig, x=df[c], index=i + 1)
                    elif mtx_plot_kind == "Scatter":
                        add_scatter(
                            fig=fig,
                            x=df[c],
                            y=df[l],
                            row=j + 1,
                            col=i + 1,
                        )
                    elif mtx_plot_kind == "2D histogram":
                        add_2d_hist(fig=fig, x=df[c], y=df[l], row=j + 1, col=i + 1)
                else:
                    for color_parse, cat in zip(
                        template_colors, df[color_column].unique()
                    ):
                        df_cat = df[df[color_column] == cat]
                        if mtx_plot_kind == "Histogram":
                            add_histogram(
                                fig=fig,
                                x=df_cat[c],
                                index=i + 1,
                                name=cat,
                                marker=color_parse,
                                legend=not legend_added,
                            )
                        elif mtx_plot_kind == "Scatter":
                            add_scatter(
                                fig=fig,
                                x=df_cat[c],
                                y=df_cat[l],
                                row=j + 1,
                                col=i + 1,
                                name=cat,
                                marker=color_parse,
                                legend=not legend_added,
                            )
                    legend_added = True
                fig.update_xaxes(
                    title_text=c,
                    row=j + 1,
                    col=i + 1,
                )
                if c == 0:
                    fig.update_yaxes(
                        title_text=l,
                        row=j + 1,
                        col=i + 1,
                    )
        fig.update_layout(barmode="stack")
    elif plot_type in [
        amp_consts.PLOT_PCA_2D,
        amp_consts.PLOT_PCA_3D,
        amp_consts.PLOT_PCA_SCATTER,
    ]:
        X = df.loc[:, num_columns]
        ignored_columns = params.pop("ignore_columns", [])
        if ignored_columns:
            X = X.drop(
                list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1
            )
        column_names = X.columns.to_list()
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        model_data = PCA()
        x_new = model_data.fit_transform(X)
        pc1_lbl = f"PC1 ({model_data.explained_variance_ratio_[0] * 100:.2f}%)"
        pc2_lbl = f"PC2 ({model_data.explained_variance_ratio_[1] * 100:.2f}%)"
        x = x_new[:, 0]
        y = x_new[:, 1]
        df[pc1_lbl] = x * (1.0 / (x.max() - x.min()))
        df[pc2_lbl] = y * (1.0 / (y.max() - y.min()))
        params["x"] = pc1_lbl
        params["y"] = pc2_lbl
        if is_anim:
            params["range_x"] = [-1, 1]
            params["range_y"] = [-1, 1]
        try:
            sl = params.pop("show_loadings") is True
        except:
            sl = None
        if plot_type in [amp_consts.PLOT_PCA_3D]:
            z = x_new[:, 2]
            pc3_lbl = f"PC3 ({model_data.explained_variance_ratio_[2] * 100:.2f}%)"
            df[pc3_lbl] = z * (1.0 / (z.max() - z.min()))
            params["z"] = pc3_lbl
            if is_anim:
                params["range_z"] = [-1, 1]
            fig = px.scatter_3d(**params)
            if sl:
                loadings = np.transpose(model_data.components_[0:3, :])
                m = 1 / np.amax(loadings)
                loadings = loadings * m
                xc, yc, zc = [], [], []
                for i in range(loadings.shape[0]):
                    xc.extend([0, loadings[i, 0], None])
                    yc.extend([0, loadings[i, 1], None])
                    zc.extend([0, loadings[i, 2], None])
                fig.add_trace(
                    go.Scatter3d(
                        x=xc,
                        y=yc,
                        z=zc,
                        mode="lines",
                        name="Loadings",
                        showlegend=False,
                        line=dict(color="black"),
                        opacity=0.3,
                    )
                )
                fig.add_trace(
                    go.Scatter3d(
                        x=loadings[:, 0],
                        y=loadings[:, 1],
                        z=loadings[:, 2],
                        mode="text",
                        text=num_columns,
                        opacity=0.7,
                        name="Loadings",
                    ),
                )
        elif plot_type in [amp_consts.PLOT_PCA_3D]:
            fig = px.scatter(**params)
            if sl:
                loadings = np.transpose(model_data.components_[0:2, :])
                m = 1 / np.amax(loadings)
                loadings = loadings * m
                xc, yc = [], []
                for i in range(loadings.shape[0]):
                    xc.extend([0, loadings[i, 0], None])
                    yc.extend([0, loadings[i, 1], None])
                fig.add_trace(
                    go.Scatter(
                        x=xc,
                        y=yc,
                        mode="lines",
                        name="Loadings",
                        showlegend=False,
                        line=dict(color="black"),
                        opacity=0.3,
                    )
                )
                fig.add_trace(
                    go.Scatter(
                        x=loadings[:, 0],
                        y=loadings[:, 1],
                        mode="text",
                        text=column_names,
                        opacity=0.7,
                        name="Loadings",
                    ),
                )
        elif plot_type in [amp_consts.PLOT_PCA_SCATTER]:
            l = lambda x, y: x == y
            params_ = {
                "data_frame": x_new,
                "labels": {str(i): f"PC {i+1}" for i in range(x_new.shape[1] - 1)},
            }
            if params["color"] is not None:
                params_["color"] = df[params["color"]]
            if params["dimensions"] is not None:
                params_["dimensions"] = range(
                    min(
                        params["dimensions"],
                        x_new.shape[1] - 1,
                    )
                )
            if is_anim:
                params_["range_x"] = [-1, 1]
                params_["range_y"] = [-1, 1]
            fig = px.scatter_matrix(**params_)
            fig.update_traces(diagonal_visible=False)
    elif plot_type in [amp_consts.PLOT_LDA_2D, amp_consts.PLOT_QDA_2D]:
        X = df.loc[:, num_columns]
        ignored_columns = params.pop("ignore_columns", [])
        if ignored_columns:
            X = X.drop(
                list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1
            )
        column_names = X.columns.to_list()
        if params["target"] in df.select_dtypes(include=["object"]).columns.to_list():
            t = df[params["target"]].astype("category").cat.codes
        elif params["target"] in df.select_dtypes(include=[np.float]).columns.to_list():
            t = df[params["target"]].astype("int")
        else:
            t = df[params["target"]]
        class_names = df[params["target"]].unique()
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        if plot_type == amp_consts.PLOT_LDA_2D:
            model_data = LinearDiscriminantAnalysis(solver=params.pop("solver", "svd"))
        elif plot_type == amp_consts.PLOT_QDA_2D:
            model_data = QuadraticDiscriminantAnalysis(store_covariance=True)
        x_new = model_data.fit(X, y=t).transform(X)
        label_root = "LD" if plot_type == amp_consts.PLOT_LDA_2D else "QD"
        pc1_lbl = f"{label_root}1 ({model_data.explained_variance_ratio_[0] * 100:.2f}%)"
        pc2_lbl = f"{label_root}2 ({model_data.explained_variance_ratio_[1] * 100:.2f}%)"
        x = x_new[:, 0]
        y = x_new[:, 1]
        df[pc1_lbl] = x / np.abs(x).max()
        df[pc2_lbl] = y / np.abs(y).max()
        params["x"] = pc1_lbl
        params["y"] = pc2_lbl
        if is_anim:
            params["range_x"] = [-1, 1]
            params["range_y"] = [-1, 1]
        params.pop("target")
        sl = params.pop("show_loadings") is True
        fig = px.scatter(**params)
        if sl:
            loadings = np.transpose(model_data.coef_[0:2, :])
            loadings[:, 0] = loadings[:, 0] / np.abs(loadings[:, 0]).max()
            loadings[:, 1] = loadings[:, 1] / np.abs(loadings[:, 1]).max()
            # m = 1 / np.amax(loadings)
            # loadings = loadings * m
            xc, yc = [], []
            for i in range(loadings.shape[0]):
                xc.extend([0, loadings[i, 0], None])
                yc.extend([0, loadings[i, 1], None])
            fig.add_trace(
                go.Scatter(
                    x=xc,
                    y=yc,
                    mode="lines",
                    name="Loadings",
                    showlegend=False,
                    line=dict(color="black"),
                    opacity=0.3,
                )
            )
            fig.add_trace(
                go.Scatter(
                    x=loadings[:, 0],
                    y=loadings[:, 1],
                    mode="text",
                    text=column_names,
                    opacity=0.7,
                    name="Loadings",
                ),
            )
    elif plot_type in [amp_consts.PLOT_NCA]:
        X = df.loc[:, num_columns]
        ignored_columns = params.pop("ignore_columns", [])
        if ignored_columns:
            X = X.drop(
                list(set(ignored_columns).intersection(set(X.columns.to_list()))), axis=1
            )
        column_names = X.columns.to_list()
        if params["target"] in df.select_dtypes(include=["object"]).columns.to_list():
            t = df[params["target"]].astype("category").cat.codes
        elif params["target"] in df.select_dtypes(include=[np.float]).columns.to_list():
            t = df[params["target"]].astype("int")
        else:
            t = df[params["target"]]
        class_names = df[params["target"]].unique()
        scaler = StandardScaler()
        scaler.fit(X)
        X = scaler.transform(X)
        model_data = NeighborhoodComponentsAnalysis(
            init=params.pop("init", "auto"),
            n_components=min(len(column_names), params.pop("n_components", 2)),
        )
        x_new = model_data.fit(X, y=t).transform(X)
        df["x_nca"] = x_new[:, 0]
        df["y_nca"] = x_new[:, 1]
        params["x"] = "x_nca"
        params["y"] = "y_nca"
        if is_anim:
            params["range_x"] = [-1, 1]
            params["range_y"] = [-1, 1]
        params.pop("target")
        fig = px.scatter(**params)
    elif plot_type == amp_consts.PLOT_CORR_MATRIX:
        fig = px.imshow(
            df[num_columns].corr(method=params.get("corr_method")).values,
            x=num_columns,
            y=num_columns,
        )
    else:
        fig = None

    if plot_type in amp_consts.PLOT_IS_3D:
        fig.update_layout(scene={"aspectmode": "cube"})

    if fig is not None:
        fig.update_layout(
            height=params["height"],
            template=params["template"],
            legend={"traceorder": "normal"},
        )
        if ("size" not in params) or (
            (params["size"] is None) or (params["size"] == amp_consts.NONE_SELECTED)
        ):
            fig.update_traces(
                marker=dict(
                    size=8,
                    line=dict(width=2),  # color="DarkSlateGrey"),
                    opacity=0.7,
                ),
                selector=dict(mode="markers"),
            )

    return {
        k: v
        for k, v in zip(
            ["figure", "model_data", "column_names", "class_names"],
            [fig, model_data, column_names, class_names],
        )
        if v is not None
    }
Esempio n. 27
0
def article_vs_headline_plot(df_in):
    # Read in data
    # df = pd.read_csv(FILE_PATH)
    df = df_in
    df = df[["headline", "headline_score", "article_score",
             "news_desk"]].loc[(df["headline_score"] != 0)
                               & (df["article_score"] != 0)
                               & (df["section_name"] != "Business Day")
                               & (df["news_desk"] != "Media")
                               & (df["news_desk"] != "National")]
    # ['National' 'Business' 'Politics' 'Science' 'Climate']

    # Line colors
    lines_colors_dict = {
        "Society": "rgba(30, 144, 255, 0.7)",  # "dodgerblue",
        "Business": "rgba(255, 215, 0, 0.7)",  # "gold",
        "Politics": "rgba(178, 34, 34, 0.7)",  # "firebrick",
        "Science": "rgba(34, 139, 34, 0.7)",  # "forestgreen",
        "Climate": "rgba(255, 140, 0, 0.7)",  # "darkorange"
        "Arts&Leisure": "rgba(138, 43, 226, 0.7)",  # "blueviolet",
    }

    # Marker fill colors with 50% opacity
    markers_colors_dict = {
        "Society": "rgba(30, 144, 255, 0.2)",  # "dodgerblue",
        "Business": "rgba(255, 215, 0, 0.2)",  # "gold",
        "Politics": "rgba(178, 34, 34, 0.2)",  # "firebrick",
        "Science": "rgba(34, 139, 34, 0.2)",  # "forestgreen",
        "Climate": "rgba(255, 140, 0, 0.2)",  # "darkorange"
        "Arts&Leisure": "rgba(138, 43, 226, 0.2)",  # "blueviolet",
    }

    df["markers_colors"] = df["news_desk"].map(markers_colors_dict)
    df["lines_colors"] = df["news_desk"].map(lines_colors_dict)

    df.head(10)

    fig = go.Figure()
    i = 0
    for desk in df["news_desk"].unique():
        df_current = df.loc[df["news_desk"] == desk]
        fig.add_trace(
            px.density_contour(
                df_current,
                x="headline_score",
                y="article_score",
            )["data"][0])
        fig.data[i * 3].update(
            name=desk,
            line={
                "color":
                df_current["lines_colors"].loc[df_current["news_desk"] ==
                                               desk].unique()[0],
                "width":
                1,
            },
            legendgroup=desk,
            showlegend=True,
            hovertemplate="",
            hoverinfo="skip",
        )

        fig.add_trace(
            go.Scatter(
                x=df_current["headline_score"],
                y=df_current["article_score"],
            ))
        fig.data[(i * 3) + 1].update(
            mode="markers",
            marker={
                "color": df_current["markers_colors"],
                "line": {
                    "color": "rgba(105, 105, 105, .5)",
                    "width": 0.3
                },  # dimgrey
            },
            text=desk,
            hovertemplate="Headline: %{x}<br>Article: %{y}<extra></extra>",
            legendgroup=desk,
            showlegend=False,
        )

        MODELS_FILEPATH = os.path.join("news_app", "static", "resources",
                                       "saved_models")
        pickle_filename = f"pickle_model_{desk}.pkl"
        with open(os.path.join(MODELS_FILEPATH, pickle_filename),
                  "rb") as file:
            model = pickle.load(file)

        x_trace = [-1, 1]
        y_trace = [model.predict([[-1]])[0], model.predict([[1]])[0]]

        fig.add_trace(
            go.Scatter(
                x=x_trace,
                y=y_trace,
                mode="lines",
                line={
                    "color": lines_colors_dict[desk],
                    "width": 2,
                    "dash": "dot",
                },
                legendgroup=desk,
                name=desk,
                showlegend=False,
                text=df_current["news_desk"],
                hovertemplate="%{text}<extra></extra>",
            ))
        i += 1

    fig.layout.update(title="Article vs Headline Score",
                      title_x=0.5,
                      xaxis={"title": {
                          "text": "Headline Scores"
                      }},
                      yaxis={"title": {
                          "text": "Article Scores"
                      }},
                      paper_bgcolor="white",
                      plot_bgcolor="ghostwhite",
                      legend={"title": {
                          "text": "<b>News Desks<b>"
                      }})

    fig_data = json.dumps(fig, cls=plotly.utils.PlotlyJSONEncoder)

    return fig_data
Esempio n. 28
0
    facet_col="day",
    facet_col_wrap=3,
    category_orders={"day": ["Thur", "Fri", "Sat", "Sun"]},
)
fig.write_html(os.path.join(dir_name, "facet_wrap_ragged.html"))


gapminder = px.data.gapminder()
fig = px.area(gapminder, x="year", y="pop", color="continent", line_group="country")
fig.write_html(os.path.join(dir_name, "area.html"))

# #### Visualize Distributions


iris = px.data.iris()
fig = px.density_contour(iris, x="sepal_width", y="sepal_length")
fig.write_html(os.path.join(dir_name, "density_contour.html"))


iris = px.data.iris()
fig = px.density_contour(
    iris,
    x="sepal_width",
    y="sepal_length",
    color="species",
    marginal_x="rug",
    marginal_y="histogram",
)
fig.write_html(os.path.join(dir_name, "density_contour_marginal.html"))

Esempio n. 29
0
                 y="Area Income",
                 color="Clicked",
                 marginal_y="histogram",
                 marginal_x="histogram",
                 trendline="ols")
fig.update_layout(
    title_text='Relation Between Customer Age and Area Income',  # title of plot
    xaxis_title_text='Age of Customer',  # xaxis label
    yaxis_title_text='Area Income',  # yaxis label
)
fig.show()

# **Create a jointplot showing the kde distributions of Daily Time spent on site vs.
fig = px.density_contour(ad_data,
                         x="Age",
                         y="Daily Time Spent on Site",
                         marginal_y="histogram",
                         marginal_x="histogram")
fig.update_layout(
    title_text=
    'Relation Between Customer Age and Time Spent on Site',  # title of plot
    xaxis_title_text='Age of Customer',  # xaxis label
    yaxis_title_text='Time Spent on Site Daily',  # yaxis label
)

fig.show()

# ** Create a jointplot of 'Daily Time Spent on Site' vs. 'Daily Internet Usage'**
fig = px.scatter(ad_data,
                 x="Daily Time Spent on Site",
                 y="Daily Internet Usage",
Esempio n. 30
0
import plotly.express as px
import pandas as pd

flights = pd.read_csv("data_science/datasets/flights.csv")

# Show in a graph the distribution of the distances of the flights
fig = px.histogram(flights, x='distance', nbins=20)
fig.show()

# Show in a graph number of flights per origin
fig = px.histogram(flights, x='origin')
fig.show()

# Show in a graph the relationship between dep_time and arr_time
fig = px.scatter(flights, x='dep_time', y='arr_time')
fig.show()

# Show in a graph the relationship between dep_delay and origin
fig = px.box(flights, x='origin', y='dep_delay')
fig.show()

# Show in a graph the relationship between arr_delay and dep_delay
fig = px.density_contour(flights, x='arr_delay', y='dep_delay')
fig.show()

# Show in a graph the relationship between air_time, distance and origin
fig = px.scatter(flights, x='air_time', y='distance', color='origin')
fig.show()