Esempio n. 1
0
def update_scatter_c1(hoverData,var1,var2,type):
    """Updates scatter plot in response to change in granularity, vars,
        or which precinct user is hovering over"""
    if type == 'Precincts':
        try:
            precinctkey = hoverData['points'][0]['location']
        except TypeError:
            precinctkey = 270402
        merged1 = aggregate('Precincts', var1,resultscatalog)
        df1 = merged1[['PrecinctKey',var1]]
        merged2 = aggregate('Precincts', var2,resultscatalog)
        df2 = merged2[['PrecinctKey',var2]]
        df = df1.merge(df2,left_on='PrecinctKey',right_on='PrecinctKey')
        index = df.index[df['PrecinctKey'] == precinctkey].tolist()
        scat = px.scatter(df,x=var1,y=var2,trendline='ols',hover_data=['PrecinctKey'])
        scat.update_layout(xaxis_title=var1,yaxis_title=var2)
        results = px.get_trendline_results(scat)
        print(results.px_fit_results.iloc[0].summary())
    else: #for zipcode mappings
        try:
            zip = hoverData['points'][0]['location']
        except TypeError:
            zip = 76543
        merged1 = aggregate('Zipcodes', var1,resultscatalog)
        df1 = merged1[['zipcode',var1]]
        merged2 = aggregate('Zipcodes', var2,resultscatalog)
        df2 = merged2[['zipcode',var2]]
        df = df1.merge(df2,left_on='zipcode',right_on='zipcode')
        index = df.index[df['zipcode'] == zip].tolist()
        scat = px.scatter(df,x=var1,y=var2,trendline='ols',hover_data=['zipcode'])
        scat.update_layout(xaxis_title=var1,yaxis_title=var2)
        results = px.get_trendline_results(scat)
        print(results.px_fit_results.iloc[0].summary())
    return scat
Esempio n. 2
0
def test_trendline_results_passthrough(mode, options):
    df = px.data.gapminder().query("continent == 'Oceania'")
    fig = px.scatter(
        df,
        x="year",
        y="pop",
        color="country",
        trendline=mode,
        trendline_options=options,
    )
    assert len(fig.data) == 4
    for trace in fig["data"][0::2]:
        assert "trendline" not in trace.hovertemplate
    for trendline in fig["data"][1::2]:
        assert "trendline" in trendline.hovertemplate
        if mode == "ols":
            assert "R<sup>2</sup>" in trendline.hovertemplate
    results = px.get_trendline_results(fig)
    if mode == "ols":
        assert len(results) == 2
        assert results["country"].values[0] == "Australia"
        au_result = results["px_fit_results"].values[0]
        assert len(au_result.params) == 2
    else:
        assert len(results) == 0
Esempio n. 3
0
def update_capm_plot(logreturns):
    df = pd.merge(
        pd.DataFrame(logreturns).assign(
            Date=lambda x: pd.to_datetime(x['Date'])),
        r_ibov.resample('MS').sum().reset_index()).set_index('Date').melt(
            'IBOV')
    fig = px.scatter(df,
                     x='IBOV',
                     y='value',
                     trendline="ols",
                     facet_col='variable',
                     facet_col_wrap=4,
                     opacity=.5,
                     labels={
                         'value': 'Retorno excedente',
                         'IBOV': 'Retorno excedente IBOV'
                     })
    fig.update_yaxes(matches=None, showticklabels=False)
    fig.update_xaxes(showticklabels=False)
    fig.update_traces(line=dict(dash="dot"),
                      selector=dict(type="scatter", mode="lines"))

    results = px.get_trendline_results(fig)
    results['beta'] = results['px_fit_results'].apply(lambda x: x.params[1])
    results['alpha'] = results['px_fit_results'].apply(lambda x: x.params[0])
    results = results.reset_index().rename(columns={'variable': 'ticker'})
    results = results[['ticker', 'beta', 'alpha']]

    fig.for_each_annotation(
        lambda a: a.update(text='<b>' + a.text.split("=")[-1] + '</b>'))

    return fig, results.to_dict('records')
Esempio n. 4
0
def test_ols_trendline_slopes():
    fig = px.scatter(x=[0, 1], y=[0, 1], trendline="ols")
    # should be "y = 1 * x + 0" but sometimes is some tiny number instead
    assert "y = 1 * x + " in fig.data[1].hovertemplate
    results = px.get_trendline_results(fig)
    params = results["px_fit_results"].iloc[0].params
    assert np.all(np.isclose(params, [0, 1]))

    fig = px.scatter(x=[0, 1], y=[1, 2], trendline="ols")
    assert "y = 1 * x + 1<br>" in fig.data[1].hovertemplate
    results = px.get_trendline_results(fig)
    params = results["px_fit_results"].iloc[0].params
    assert np.all(np.isclose(params, [1, 1]))

    fig = px.scatter(x=[0, 1],
                     y=[1, 2],
                     trendline="ols",
                     trendline_options=dict(add_constant=False))
    assert "y = 2 * x<br>" in fig.data[1].hovertemplate
    results = px.get_trendline_results(fig)
    params = results["px_fit_results"].iloc[0].params
    assert np.all(np.isclose(params, [2]))

    fig = px.scatter(x=[1, 1],
                     y=[0, 0],
                     trendline="ols",
                     trendline_options=dict(add_constant=False))
    assert "y = 0 * x<br>" in fig.data[1].hovertemplate
    results = px.get_trendline_results(fig)
    params = results["px_fit_results"].iloc[0].params
    assert np.all(np.isclose(params, [0]))

    fig = px.scatter(x=[1, 1], y=[0, 0], trendline="ols")
    assert "y = 0<br>" in fig.data[1].hovertemplate
    results = px.get_trendline_results(fig)
    params = results["px_fit_results"].iloc[0].params
    assert np.all(np.isclose(params, [0]))

    fig = px.scatter(x=[1, 2], y=[0, 0], trendline="ols")
    assert "y = 0 * x + 0<br>" in fig.data[1].hovertemplate
    fig = px.scatter(x=[0, 0], y=[1, 1], trendline="ols")
    assert "y = 0 * x + 1<br>" in fig.data[1].hovertemplate
    fig = px.scatter(x=[0, 0], y=[1, 2], trendline="ols")
    assert "y = 0 * x + 1.5<br>" in fig.data[1].hovertemplate
Esempio n. 5
0
def test_no_slope_ols_trendline():
    fig = px.scatter(x=[0, 1], y=[0, 1], trendline="ols")
    assert "y = 1" in fig.data[1].hovertemplate  # then + x*(some small number)
    results = px.get_trendline_results(fig)
    params = results["px_fit_results"].iloc[0].params
    assert np.all(np.isclose(params, [0, 1]))

    fig = px.scatter(x=[1, 1], y=[0, 0], trendline="ols")
    assert "y = 0" in fig.data[1].hovertemplate
    results = px.get_trendline_results(fig)
    params = results["px_fit_results"].iloc[0].params
    assert np.all(np.isclose(params, [0]))

    fig = px.scatter(x=[1, 2], y=[0, 0], trendline="ols")
    assert "y = 0" in fig.data[1].hovertemplate
    fig = px.scatter(x=[0, 0], y=[1, 1], trendline="ols")
    assert "y = 0 * x + 1" in fig.data[1].hovertemplate
    fig = px.scatter(x=[0, 0], y=[1, 2], trendline="ols")
    assert "y = 0 * x + 1.5" in fig.data[1].hovertemplate
Esempio n. 6
0
def annual_subst_complaints_vs_prop_demo_reg(df,
                                             start,
                                             stop,
                                             figno,
                                             demo,
                                             ign_pcts=[]):
    df = df.copy()
    df = df[df[f"2010_Percent_{demo}_Residents"].notna()]
    df["Precinct"] = df["Precinct"].astype(int)
    df = df.rename(
        columns={
            f"2010_Percent_{demo}_Residents":
            f"2010 Percent {demo} Residents",
            "Annual_Mean_Substantiated":
            "Mean Annual Substantiated Misconduct Complaints"
        })
    df["Annual_Mean_Substantiated_Pred"] = cb0s + cb1s * df[
        "Annual_Mean_Crime_Reports"]
    df["Mean Annual 'Excess' Substantiated Complaints"] = df[
        "Mean Annual Substantiated Misconduct Complaints"] - df[
            "Annual_Mean_Substantiated_Pred"]

    shapes = seaborn_conf_int(df, f"2010 Percent {demo} Residents",
                              "Mean Annual 'Excess' Substantiated Complaints")
    fig = px.scatter(df,
                     x=df[f"2010 Percent {demo} Residents"],
                     y=df["Mean Annual 'Excess' Substantiated Complaints"],
                     color=df.Precinct,
                     text=df.Precinct,
                     trendline="ols")
    fig.update_traces(textposition='top center', textfont_size=6)
    fig.update_layout(shapes=shapes)
    fig.update_xaxes(
        title_text=
        f"<span style='font-size: 12px;'>Percent {demo} Residents (2010 U.S. Census)</span>"
    )
    fig.update_yaxes(
        title_text=
        "<span style='font-size: 12px;'>Mean Annual Number of 'Excess' Substantiated Misconduct Complaints</span>"
    )
    fig.update_layout(
        title={
            'text':
            f"<b>Figure {figno.capitalize()}</b>: Per-Precinct Mean Annual 'Excess' Substantiated Misconduct Complaints vs. Percent {demo} Residents ({start}-{stop})",
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        })
    fig.show()

    results = px.get_trendline_results(fig)
    return df, results.px_fit_results.iloc[0].summary()
Esempio n. 7
0
def test_overall_trendline():
    df = px.data.tips()
    fig1 = px.scatter(df, x="total_bill", y="tip", trendline="ols")
    assert len(fig1.data) == 2
    assert "trendline" in fig1.data[1].hovertemplate
    results1 = px.get_trendline_results(fig1)
    params1 = results1["px_fit_results"].iloc[0].params

    fig2 = px.scatter(
        df,
        x="total_bill",
        y="tip",
        color="sex",
        trendline="ols",
        trendline_scope="overall",
    )
    assert len(fig2.data) == 3
    assert "trendline" in fig2.data[2].hovertemplate
    results2 = px.get_trendline_results(fig2)
    params2 = results2["px_fit_results"].iloc[0].params

    assert np.all(np.array_equal(params1, params2))

    fig3 = px.scatter(
        df,
        x="total_bill",
        y="tip",
        facet_row="sex",
        trendline="ols",
        trendline_scope="overall",
    )
    assert len(fig3.data) == 4
    assert "trendline" in fig3.data[3].hovertemplate
    results3 = px.get_trendline_results(fig3)
    params3 = results3["px_fit_results"].iloc[0].params

    assert np.all(np.array_equal(params1, params3))
def make_plot(df, x_axis, y_axis):
    #fig = px.Figure()

    data = [px.scatter(
        x=df[x_axis],
        y=df[y_axis],
        trendline="ols",
    )]
    title = (f"{y_axis} vs {x_axis}")
    layout = go.Layout(
        xaxis=dict(title=x_axis),
        yaxis=dict(title=y_axis),
        title=title,
    )
    #, xaxis=dict(tickformat="%d-%m")
    #fig = px.figure(data=data, layout=layout)
    fig = px.scatter(df,
                     x=x_axis,
                     y=y_axis,
                     trendline="ols",
                     hover_data=["date", x_axis, y_axis])
    # fig.add_trace(go.Scatter(x=df[x_axis], y=df[y_axis], mode='markers',))

    st.plotly_chart(fig, use_container_width=True)
    model = px.get_trendline_results(fig)
    alpha = model.iloc[0]["px_fit_results"].params[0]
    beta = model.iloc[0]["px_fit_results"].params[1]
    # st.write (f"Alfa {alpha} - beta {beta}")
    st.write(f"y =  {round(alpha,4)} *x + {round(beta,4)}")
    r2 = px.get_trendline_results(fig).px_fit_results.iloc[0].rsquared
    st.write(f"R2 = {r2}")
    try:
        c = round(df[x_axis].corr(df[y_axis]), 3)
        st.write(f"Correlatie {x_axis} vs {y_axis}= {c}")
    except:
        st.write("_")
Esempio n. 9
0
    def scatterPlot(self, df, c):
        """Create and show a scatter plot between two variables

		Params
			df: data with values to graph (pd.DataFrame)	
			c: columns to subset (string list)

		Return: 
			NA 
		"""
        # make local changes to df
        localDF = df.copy()[c]

        # remove outliers
        c1, c2 = localDF.iloc[:, 0], localDF.iloc[:, 1]
        c1 = c1[c1.between(c1.quantile(.0),
                           c1.quantile(.95))]  # without outliers
        c2 = c2[c2.between(c2.quantile(.0),
                           c2.quantile(.95))]  # without outliers

        localDF.iloc[:, 0], localDF.iloc[:, 1] = c1, c2

        #localDF = self.drop_numerical_outliers(localDF)

        # remove 0s
        localDF[c] = localDF[c].replace(0, np.nan)  # remove 0s
        #localDF = np.sqrt(localDF[c]).dropna()
        localDF = localDF[c].dropna()

        # setup names
        x, y = localDF.columns.values
        tempDict = {x: np.array([]), y: np.array([])}

        for i in localDF[x].unique():
            sub = localDF[localDF[x] == i][y]
            tempDict[x] = np.append(tempDict[x], i)
            tempDict[y] = np.append(tempDict[y], np.mean(sub))

        tempDF = pd.DataFrame(tempDict)

        # create and show plot
        fig = px.scatter(tempDF, x=c[0], y=c[1], trendline="ols")
        fig.show()

        # get trend line information
        results = px.get_trendline_results(fig)
        summary = results.px_fit_results.iloc[0].summary()
        print(summary)
Esempio n. 10
0
def annual_subst_complaints_vs_complaints_per_officer_reg(
        df, start, stop, figno, ign_pcts=[]):
    df = df.copy()
    df = df.rename(
        columns={
            f"Mean_Substantiated_per_Officer":
            "Mean Substantiated Complaints Per Accused Officer",
            "Annual_Mean_Substantiated":
            "Mean Annual Substantiated Misconduct Complaints"
        })
    df["Annual_Mean_Substantiated_Pred"] = cb0s + cb1s * df[
        "Annual_Mean_Crime_Reports"]
    df["Mean Annual 'Excess' Substantiated Complaints"] = df[
        "Mean Annual Substantiated Misconduct Complaints"] - df[
            "Annual_Mean_Substantiated_Pred"]

    shapes = seaborn_conf_int(
        df, f"Mean Substantiated Complaints Per Accused Officer",
        "Mean Annual 'Excess' Substantiated Complaints")
    fig = px.scatter(
        df,
        x=df[f"Mean Substantiated Complaints Per Accused Officer"],
        y=df["Mean Annual 'Excess' Substantiated Complaints"],
        text=df.Precinct,
        trendline="ols")
    fig.update_traces(textposition='top center', textfont_size=6)
    fig.update_layout(shapes=shapes)
    fig.update_xaxes(
        title_text=
        f"<span style='font-size: 12px;'>Mean Annual Number of Substantiated Misconduct Complaints Per Accused Officer</span>"
    )
    fig.update_yaxes(
        title_text=
        "<span style='font-size: 12px;'>Mean Annual Number of 'Excess' Substantiated Misconduct Complaints</span>"
    )
    fig.update_layout(
        title={
            'text':
            f"<b>Figure {figno.capitalize()}</b>: Per-Precinct Mean Annual 'Excess' Substantiated Misconduct Complaints vs. Mean Substantiated Misconduct Complaints Per Accused Officer ({start}-{stop})",
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        })
    fig.show()

    results = px.get_trendline_results(fig)
    return df, results.px_fit_results.iloc[0].summary()
Esempio n. 11
0
def annual_subst_complaints_vs_reported_crime_reg(df,
                                                  start,
                                                  stop,
                                                  figno,
                                                  ign_pcts=[]):
    df = df.rename(
        columns={
            "Annual_Mean_Crime_Reports":
            "Mean Annual Reported Crimes",
            "Annual_Mean_Substantiated":
            "Mean Annual Substantiated Misconduct Complaints"
        })

    shapes = seaborn_conf_int(
        df, "Mean Annual Reported Crimes",
        "Mean Annual Substantiated Misconduct Complaints")
    fig = px.scatter(df,
                     x=df["Mean Annual Reported Crimes"],
                     y=df["Mean Annual Substantiated Misconduct Complaints"],
                     text=df.Precinct,
                     trendline="ols")
    fig.update_traces(textposition='top center', textfont_size=6)
    fig.update_layout(shapes=shapes)
    fig.update_xaxes(
        title_text=
        "<span style='font-size: 12px;'>Mean Annual Number of Reported Crimes</span>"
    )
    fig.update_yaxes(
        title_text=
        "<span style='font-size: 12px;'>Mean Annual Number of Substantiated Misconduct Complaints</span>"
    )
    fig.update_layout(
        title={
            'text':
            f"<b>Figure {figno.capitalize()}</b>: Per-Precinct Mean Annual Substantiated Misconduct Complaints vs. Mean Annual Reported Crimes ({start}-{stop})",
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        })
    fig.show()

    results = px.get_trendline_results(fig)
    global cb0s, cb1s
    cb0s, cb1s = results.px_fit_results.iloc[0].params
    return df, results.px_fit_results.iloc[0].summary()
Esempio n. 12
0
def annual_subst_complaints_vs_officers_reg(dfa,
                                            start,
                                            stop,
                                            figno,
                                            ign_pcts=[]):
    dfa = dfa[(dfa["Year"] >= start) & (dfa["Year"] <= stop)]
    dfa = dfa[~dfa["Precinct"].isin(ign_pcts)]
    dfs = dfa[dfa["Board Disposition"].str.contains("Substantiated ")]
    g = dfs.groupby("Year")["Unique Id"].count().reset_index().rename(
        columns={"Unique Id": "Substantiated"})
    og = dfa.drop_duplicates(["Year", "Num_NYPD_Officers_Year"
                              ])[["Year", "Num_NYPD_Officers_Year"
                                  ]].sort_values(by="Year")
    g = pd.merge(g, og, on="Year")
    g = g.rename(columns={"Num_NYPD_Officers_Year": "NYPD Officers"})

    shapes = seaborn_conf_int(g, "NYPD Officers", "Substantiated")
    fig = px.scatter(g,
                     x=g["NYPD Officers"],
                     y=g.Substantiated,
                     color=g.Year,
                     text=g.Year,
                     trendline="ols")
    fig.update_traces(textposition='top center', textfont_size=6)
    fig.update_layout(shapes=shapes)
    fig.update_xaxes(
        title_text=
        "<span style='font-size: 12px;'>Number of Sworn NYPD Officers</span>")
    fig.update_yaxes(
        title_text=
        "<span style='font-size: 12px;'>Number of Substantiated Misconduct Complaints</span>"
    )
    fig.update_layout(
        title={
            'text':
            f"<b>Figure {figno.capitalize()}</b>: Number of Substantiated Misconduct Complaints vs. Number of Sworn NYPD Officers ({start}-{stop})",
            'x': 0.5,
            'xanchor': 'center',
            'yanchor': 'top'
        })
    fig.show()

    results = px.get_trendline_results(fig)
    return g, results.px_fit_results.iloc[0].summary()
Esempio n. 13
0
def updating_graph(feature1, feature2):
    fig = px.scatter(df, x=feature1, y=feature2, trendline="ols")
    results = px.get_trendline_results(fig)
    results_summary = results.px_fit_results.iloc[0].summary()
    results_as_html = results_summary.tables[0].as_html()
    h = pd.read_html(results_as_html)[0]
    h = h.round(2)

    vals = list()
    for col in h.columns:
        vals.append(list(h[col]))

    fig3 = go.Figure(data=[
        go.Table(  #header=dict(values=['A Scores', 'B Scores']),
            cells=dict(values=vals))
    ])
    fig3.update_layout(width=700, height=900)

    return fig, fig3
Esempio n. 14
0
def get_fig_continent2(df):
    df = df.groupby(["location", "date_months"]).tail(1)
    df = df.groupby(["continent",
                     "date_months"])[["total_deaths",
                                      "total_cases"]].sum().reset_index()

    fig = px.scatter(df,
                     x="total_cases",
                     y="total_deaths",
                     color="continent",
                     marginal_x="box",
                     trendline="ols",
                     template="simple_white")

    results = px.get_trendline_results(fig)
    colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b']
    slopes = []
    text_positions = [(-2 * 10**6, 19000), (1.255 * 10**7, 224264),
                      (7 * 10**6, 307790), (9.85 * 10**6, 333600),
                      (-2 * 10**6, 1000), (9 * 10**6, 280000)]

    for i in range(results.shape[0]):
        slopes.append(round(results.iloc[i]["px_fit_results"].params[1], 3))

    coeffs_colors_pos = list(zip(slopes, colors, text_positions))

    for ccp in coeffs_colors_pos:
        fig.add_annotation(
            dict(font=dict(color=ccp[1], size=12),
                 x=ccp[2][0],
                 y=ccp[2][1],
                 showarrow=False,
                 text=ccp[0],
                 textangle=0,
                 xanchor='left',
                 xref="x",
                 yref="y"))
    return fig
Esempio n. 15
0
def calc_predictions(x):

    #running ols equation
    fig = px.scatter(x, x="X_val", y="Daily_Total", trendline="ols")
    model = px.get_trendline_results(fig)
    #getting ols params
    params = model.px_fit_results.iloc[0].params
    #getting last real value from dataframe
    last_xval = x.X_val.iloc[-1]
    #calculating the value in 2 weeks
    pred_xval = (last_xval + 1) + 14
    #getting 'a' value
    olsreg_aval = params[0]
    #getting 'x' value
    olsreg_xval = params[1]
    #list with range of values
    num_pred_list = list(range(last_xval + 1, pred_xval))
    #clear list for prediction values
    pred_val_list = []
    #calculating predictions
    for i in num_pred_list:
        calc = (olsreg_xval * i) + olsreg_aval
        pred_val_list.append(round(calc, 0))
    #grabbing base date
    base = x.Date.iloc[-1]
    #list of date into 14 days
    date_list = pd.date_range(start=str(base), periods=15)
    date_list = date_list[1:].tolist()
    #stripping time
    date_ymd_list = []
    for time in date_list:
        date_ymd_list.append(time.strftime('%Y-%m-%d'))
    #creating df with results
    data_tuples = list(zip(date_ymd_list, pred_val_list))
    pred_df = pd.DataFrame(data_tuples, columns=('Dates', 'OLS Values'))

    return pred_df
Esempio n. 16
0
def plot_trendline_axis_known(x_axis: Tuple[str, List[float]],
                              y_axis: Tuple[str, List[float]],
                              show_plot: bool = True) -> List[float]:
    """Function to give a general trend of the input values

    >>> x_axis_data = [1.0, 2.0, 3.0, 4.0, 5.0]
    >>> y_axis_data = [2.0, 4.0, 6.0, 8.0, 10.0]
    >>> plot_trendline_axis_known(('x-axis', x_axis_data), ('y-axis', y_axis_data), False)
    [-3.1086244689504383e-15, 2.0000000000000004]
    """
    df = pd.DataFrame({x_axis[0]: x_axis[1], y_axis[0]: y_axis[1]})
    fig = px.scatter(df,
                     x=x_axis[0],
                     y=y_axis[0],
                     marginal_x="box",
                     marginal_y="violin",
                     trendline="ols")

    if show_plot:  # default is to display plot
        fig.show()

    # get results of linear regression and return
    results = px.get_trendline_results(fig)
    return list(results.iloc[0]["px_fit_results"].params)
Esempio n. 17
0
st.subheader("Relación entre Precio (US$) y Superficie (m²)")
#Create scatter plot (filtered by zone)
fig_scatter = px.scatter(data_stat,
                         x='Surface',
                         y='Price_USD',
                         trendline="ols",
                         color='Price_m2_USD',
                         labels=dict(Surface="Superficie en m²",
                                     Price_USD="Precio en US$",
                                     Price_m2_USD="Precio por m² (US$)"))
st.plotly_chart(fig_scatter, use_container_width=True
                )  #write the figure in the web app and make it responsive

#Get results from the linear regression
results = px.get_trendline_results(fig_scatter)
results_summary = results.px_fit_results.iloc[0].summary()

#Note that tables is a list. The table at index 1 is the "core" table. Additionally, read_html puts dfs in a list, so we want index 0
#Credit to: https://stackoverflow.com/questions/51734180/converting-statsmodels-summary-object-to-pandas-dataframe/52976810
results_as_html = results_summary.tables[0].as_html()
reg_results = pd.read_html(results_as_html, header=None,
                           index_col=0)[0]  #Read as df
r_squared = reg_results.loc['Dep. Variable:'][3]  #Extract R-Squared
st.write(
    '<html lang="es"><html translate="no">',
    "En función del modelo desplegado en el gráfico de dispersión, se puede notar que para la",
    selected_zone_stat,
    ", el",
    "{:.0%}".format(r_squared),
    "de la varianza en el precio puede ser predicha basándose en la cantidad de m² de la propiedad.",
Esempio n. 18
0
    dict(
        xref='paper',
        yref='paper',
        x=0.5,
        y=-0.22,
        xanchor='center',
        yanchor='top',
        font=dict(family='Arial', size=12, color=color_footer),
        showarrow=False,
        text=
        'twitter.com/vivekparasharr | github.com/vivekparasharr | vivekparasharr.medium.com'
    ))
fig.update_layout(template="plotly_dark")
fig.show()

################################################################################

# Plotted using plotly express
import plotly.express as px
fig = px.scatter(
    x=df2[df2.Code == 'CHL'][df2.Year <= 2002].Daily.values,
    y=df[df.Code == 'CHL'][df.Year >= 1980].Total.values,
    error_y_minus=df[df.Code == 'CHL'][df.Year >= 1980].Total.values,
    trendline="ols")
fig.show()

results = px.get_trendline_results(fig)
print(results)
#results.query("sex == 'Male' and smoker == 'Yes'").px_fit_results.iloc[0].summary()
results.px_fit_results.iloc[0].summary()
Esempio n. 19
0
def create_xyplot(clickData, predictand, predictor, fc_time, bdnc, info):
    mo = np.int(fc_time[5:])
    if clickData == None:
        clickData = info['clickData']

    lat_click = clickData['points'][0]['y']
    lon_click = clickData['points'][0]['x']

    predictand = info['variables_prad'][predictand]
    predictor = info['variables_pred'][predictor]

    #print('Hello2!!')
    #print(lat_click,lon_click)
    #print(fc_time)
    #print lat_click
    #tt = dict_times[fc_time]
    pred = xr.open_dataset(bdnc + 'predodata_3m_nc_' + predictand + '_' +
                           str(mo).zfill(2) + '.nc')
    predfit = xr.open_dataset(bdnc + 'predodata_3m_fit_' + predictand + '_' +
                              str(mo).zfill(2) + '.nc')
    #print(predictor)
    if predictor == 'CO2':
        prad = xr.open_dataset(bdnc + 'predadata_v2_' + predictand + '.nc')
    else:
        prad = xr.open_dataset(bdnc + 'predadata_3m_nc_' + predictand + '_' +
                               str(mo).zfill(2) + '.nc')
    # Select right location and time slice
    #pred1d = pred.sel(lon=lon_click,lat=lat_click,method=str('nearest')).isel(time=slice(None,-tt))
    try:
        #print('try for 3d predictor..')
        pred1d = pred[predictor].sel(
            lon=lon_click, lat=lat_click,
            method=str('nearest')).sel(time=(pred['time.month'] == mo))
        pred1d_fit = predfit[predictor].sel(
            lon=lon_click, lat=lat_click,
            method=str('nearest')).sel(time=(pred['time.month'] == mo))
    except ValueError:
        #print('.. went for 1d predictor')
        pred1d = pred[predictor].sel(time=(pred['time.month'] == mo))
        pred1d_fit = predfit[predictor].sel(time=(pred['time.month'] == mo))

    prad1d = prad.sel(
        lon=lon_click, lat=lat_click,
        method=str('nearest')).sel(time=(prad['time.month'] == mo))

    print('prad1d', prad1d)
    print('pred1d', pred1d)
    print('pred1d_fit', pred1d_fit)

    data_orig = xr.merge([
        prad1d.to_array(name='predictand').squeeze(),
        pred1d.rename('orig'),
        pred1d_fit.rename('fit')
    ]).to_dataframe()
    #data_fit = xr.merge([prad1d.to_array(name='predictand').squeeze(),pred1d_fit]).to_dataframe()
    print(data_orig)
    data_melt_orig = data_orig.dropna().melt(id_vars='predictand',
                                             value_vars=['orig', 'fit'])
    #data_melt_fit = data_fit.dropna(dim='time').melt(id_vars='predictand',value_vars=['predictor_fit'])
    print(data_melt_orig)

    fig = px.scatter(data_melt_orig,
                     x='value',
                     y='predictand',
                     color='variable',
                     trendline='ols')
    fig.data[-1].name = 'Diner'
    fig.data[-1].showlegend = True

    results = px.get_trendline_results(fig)
    print(results.iloc[0])

    fig.update_layout(legend=go.layout.Legend(
        #x=0.8,
        #y=0.9,
        traceorder="normal",
        font=dict(family="sans-serif", size=12, color="black"),
        #bgcolor="LightSteelBlue",
        bordercolor="Black",
        borderwidth=2))

    fig.update_layout(
        go.Layout(
            title=
            'Correlation between burned area and observed and forecasted MDC (lat='
            + str(lat_click) + ', lon=' + str(lon_click) + ')',
            autosize=False,
            height=500,
            #yaxis=dict(title='Burned Area [km2]'),
        ))
    fig.update_yaxes(title_text=predictand)
    fig.update_xaxes(title_text=predictor)
    #fig.update_yaxes(title_text="Monthly Drought Code [-]", secondary_y=True)

    print(' ')
    print('>>> Finished create_cor_time_series <<<')
    print(' ')

    return (fig)
Esempio n. 20
0
)
st.plotly_chart(fig_map)

st.subheader('Population v/s Hospital Count')
st.text('''Scatter plot with trendline. See below graph for correlation 
coefficient if more than one state selected''')

fig_scatter = px.scatter(df[df['ST'].isin(states_choice)],
                         x="Population",
                         y="Hospital Count",
                         text="ST",
                         trendline="ols")
st.plotly_chart(fig_scatter)

if len(states_choice) > 1:
    r2_value = px.get_trendline_results(
        fig_scatter).px_fit_results.iloc[0].rsquared
    st.text(f'Correlation Coefficient (R^2 value): {r2_value:.5f}')

st.subheader('Hospital Count by State')
st.text('Bar chart showing number of Hospitals by State')

fig_bar = px.bar(
    df[df['ST'].isin(states_choice)],
    x='ST',
    y='Hospital Count',
    text='Hospital Count',
    labels={'ST': 'State'},
)
st.plotly_chart(fig_bar)
    def __call__(
        self,
        screen_object: Any,
        mode: str = 'pointmutant',
        show_results: bool = False,
        replicate: int = -1,
        output_html: Union[None, str, Path] = None,
        **kwargs: Any,
    ) -> None:
        """
        Generate a scatter plot between object and a second object of the
        same class.

        Parameters
        ----------
        screen_object : object from class *Screen* to do the scatter with

        mode : str, default 'pointmutant'.
            Alternative set to "mean" for the mean of each position.

        show_results : boolean, default False
            If set to true, will export the details of the linear fit.

        replicate : int, default -1
            Set the replicate to plot. By default, the mean is plotted.
            First replicate start with index 0.
            If there is only one replicate, then leave this parameter
            untouched.

        output_html : str, default None
            If you want to export the generated graph into html, add
            the path and name of the file. Example: 'path/filename.html'.

        **kwargs : other keyword arguments
        """
        temp_kwargs: Dict[str, Any] = self._update_kwargs(kwargs)

        # Chose mode:
        if mode == 'pointmutant':
            self.df_output = process_by_pointmutant(
                self.dataframes.df_notstopcodons[replicate],
                screen_object.dataframes.df_notstopcodons[replicate]
            )
        elif mode == 'mean':
            self.df_output = process_mean_residue(
                self.dataframes.df_notstopcodons[replicate],
                screen_object.dataframes.df_notstopcodons[replicate]
            )
            self.df_output['Variant'] = self.df_output['Position']
        # raise error if mode is not "mean" or "pointmutant"

        # create figure
        self.fig = px.scatter(
            x=self.df_output['dataset_1'],
            y=self.df_output['dataset_2'],
            trendline="ols",
            trendline_color_override="red",
        )

        self._tune_plot(temp_kwargs)
        self._save_html(output_html, temp_kwargs)

        if show_results:
            px.get_trendline_results(self.fig).px_fit_results.iloc[0].summary()
Esempio n. 22
0
def create_cor_fires(clickData,base_time,valid_time,area_size,variable='MDC (from TP)'):
    print(' ')
    print('>>> Starting create_cor_fires <<<')
    print(' ')
    
    PRINT = False
    
    if clickData == None: clickData = clickData_start
          
    #month = np.int(fc_time[5:])
    
    st = base_times[base_time]
    lt = valid_times[valid_time]
    year = 2018
    name1 = str(st)+'_'+str(lt)+'.nc'
    name2 = str(st)+'.nc'
    
    lat_click=clickData['points'][0]['y']
    lon_click=clickData['points'][0]['x']
    la1 = lat_click+0.1+area_sizes[area_size]/2
    la2 = lat_click-0.1-area_sizes[area_size]/2
    lo1 = lon_click-0.1-area_sizes[area_size]/2
    lo2 = lon_click+0.1+area_sizes[area_size]/2  
    
    predictand = variables[variable]

    REGION = True
    if REGION:
        modis = xr.open_dataset(bdnc+'2001-2018-MODIS_BA_r10.nc').sel(lon=slice(lo1,lo2),lat=slice(la1,la2)).sum(dim=['lat','lon'])
        modis_m = modis.sel(time=modis['time.month'] == lt)
        print(modis)
        print(modis_m)
        pred = xr.open_dataset(bdnc+'pred_v2_'+variables[variable]+'_'+name2).sel(lon=slice(lo1,lo2),lat=slice(la1,la2),leadtime=lt,time=slice('2001','2019'))
        pred1d = pred.kprep.mean(dim=['lat','lon','ens'])
        obs1d = pred.obs.mean(dim=['lat','lon'])
    else:
        modis = xr.open_dataset(bdnc+'2001-2018-MODIS_BA_r10.nc').sel(lon=lon_click,lat=lat_click,method='nearest')
        modis_m = modis.sel(time=modis['time.month'] == lt).load()
        pred = xr.open_dataset(bdnc+'pred_v2_'+variables[variable]+'_'+name2).sel(lon=lon_click,lat=lat_click,method=str('nearest')).sel(leadtime=lt,time=slice('2001','2019'))
        pred1d = pred.kprep.mean(dim='ens')
        obs1d = pred.obs
        
        

    if PRINT: print('create_cor_fires - modis time',modis_m.time)
    pred1d = pred1d.assign_coords(time=modis_m.time) # Correct time, was base time iso valid time
    obs1d = obs1d.assign_coords(time=modis_m.time)  # Correct time, was base time iso valid time

    if PRINT: print('create_cor_fires - modis burned area',modis_m['burned_area'])
    if PRINT: print('create_cor_fires - krpep values',pred1d)
    
    cor_ba_kprep = scipy.stats.pearsonr(pred1d.values.squeeze()[:-1],modis_m['burned_area'].values.squeeze()[:-1])
    cor_ba_obs = scipy.stats.pearsonr(obs1d.values.squeeze()[:-1],modis_m['burned_area'].values.squeeze()[:-1])

    data = xr.merge([modis_m / 1.e6,obs1d,pred1d]).drop('leadtime').to_dataframe()
    # Rewrite data in order to use plotly express
    data_melt = data.dropna().melt(id_vars='burned_area', value_vars=['obs', 'kprep']) 

    fig = px.scatter(data_melt, x='value', y='burned_area', color='variable',trendline='ols')
    fig.data[-1].name = 'Diner'
    fig.data[-1].showlegend = True

    results = px.get_trendline_results(fig)
    print(results.iloc[0])

    fig.update_layout(
        legend=go.layout.Legend(
            #x=0.8,
            #y=0.9,
            traceorder="normal",
            font=dict(
                family="sans-serif",
                size=12,
                color="black"
            ),
            #bgcolor="LightSteelBlue",
            bordercolor="Black",
            borderwidth=2
        )
    )    
    
    fig.update_layout(go.Layout(
            title = 'Correlation between burned area and observed and forecasted MDC (lat='+str(lat_click)+', lon='+str(lon_click)+')',
            autosize=False,
            height=500,
            #yaxis=dict(title='Burned Area [km2]'),
            ))
    fig.update_yaxes(title_text="Burned Area [km2]")
    fig.update_xaxes(title_text="MDC [-]")
    #fig.update_yaxes(title_text="Monthly Drought Code [-]", secondary_y=True)

    print(' ')
    print('>>> Finished create_cor_time_series <<<')
    print(' ')        
    
    return(fig)