def update_graph2(data):
    dff2 = pd.read_json(data, orient='split')
    return px.scatter(dff2, x="material", y="proportion", color="material")
Exemple #2
0
                   y=selected_value,
                   title=f"{selected_country}の{selected_value}")


handson5 = html.Div(
    [
        dcc.Graph(
            id="my_graph5",
            figure=px.scatter(
                gapminder,
                x="gdpPercap",
                y="lifeExp",
                size="pop",
                animation_frame="year",
                log_x=True,
                range_y=[20, 90],
                color="continent",
                size_max=70,
                hover_data=["country"],
                template={"layout": {
                    "dragmode": "select"
                }},
            ),
        ),
        html.H1(id="show_text5"),
    ],
    style={"margin": "5%"},
)


@app.callback(Output("show_text5", "children"),
Exemple #3
0
location = '/Users/Derrick-Vlad-/Desktop/Personal Projects/2019/Web Scraping/Monetary Authority Singapore (MAS)/Attempt 3/BackTests/CompileBackTest_4/Finalized_2/Plot_Data_3.csv'
typesss2 = pd.read_csv(location)
typesss2 = typesss2[(typesss2['Gross Claims'].astype(float) > min_all)
                    & (typesss2['Gross Premiums'].astype(float) > min_all) &
                    (typesss2['Operating Result'].astype(float) > min_all)]
typesss2 = typesss2[(typesss2['Gross Claims'].astype(float) < max_Claims) &
                    (typesss2['Gross Premiums'].astype(float) < max_Premiums)]

continents = ['Fire', 'Cargo & Hull', 'Work Injury', 'Misc']
typesss2 = typesss2[~typesss2.Coverage.isin(continents)]

typesss2 = typesss2.replace(r'^\s*$', np.nan, regex=True)
typesss2.dropna(inplace=True)
"""Plotter"""
fig5 = px.scatter(
    typesss2,
    x="Gross Claims",
    y="Gross Premiums",
    animation_frame="Year",
    animation_group="Insurer Code",
    size="Operating Result",
    color="Coverage",
    hover_name="Insurer Code",
    log_x=log_scaling,
    size_max=60,
    title=
    'Evolution: 14-Years of Private General Insurance Data in SG (Logarithmic Scale)'
    #range_x=[100, 1000000], range_y=[-50000, 250000],
)
plotly.offline.plot(fig5, filename='file6.html')
Exemple #4
0
merged[["temp","humidity","dew_point"]] = imp_values

merged.iloc[null_indexes]

# split meters into two groups
meters = list(merged.METER_ID.unique())
group1 = merged[(merged.METER_ID == meters[0]) | (merged.METER_ID == meters[1])]
group2 = merged[(merged.METER_ID != meters[0]) & (merged.METER_ID != meters[1])]

"""# EDA"""

subset = merged[["READ_VALUE","temp","humidity","dew_point"]]
sns.heatmap(subset.corr(),annot=True)
plt.title("Correlation Heatmap")

fig = px.scatter(merged, x="temp", y="READ_VALUE", color="humidity", 
                 width=2000, height=500, title="Temp Against Meter Value", facet_col="METER_ID")
fig.show()

fig = px.scatter(merged, x="humidity", y="READ_VALUE", color="temp",
                 width=2000, height=500, title="Humidity Against Meter Value", facet_col="METER_ID")
fig.show()

fig = px.scatter(merged, x="dew_point", y="READ_VALUE", color="humidity",
                 width=2000, height=500, title="Dew Point Against Meter Value", facet_col="METER_ID")
fig.show()

fig = px.line(merged,x="date",y="READ_VALUE",width=2000, height=500, color="METER_ID",title="Meter Reading Trend", facet_col="METER_ID")
fig.update_xaxes(tickangle=45)
fig.show()

"""# Stationary Data Check"""
Exemple #5
0
nba_df = nba_df.astype({'draft_year': 'float64'})

nba_df.rename(columns={
    'draft_year': 'Draft Year',
    'career_WS': 'Career Win Shares',
    'career_earnings': 'Career Earnings',
    'name': 'Name'
},
              inplace=True)
print(nba_df.columns)
import plotly.express as px

fig = px.scatter(
    nba_df,
    x="Career Win Shares",
    y="Career Earnings",
    color='Draft Year',
    title="Win Shares by Salary in NBA History",
    hover_data=['Career Win Shares', 'Career Earnings', 'Name', 'Draft Year'],
    color_continuous_scale=px.colors.sequential.Jet)

fig.update_layout(
    title="Win Shares by Salary in NBA History",
    xaxis_title="Career Win Shares",
    yaxis_title="Career Earnings ($)",
    height=500,
    font=dict(family="Courier New, monospace", size=14, color="Black"),
    margin=dict(l=150, r=60, t=60, b=60),
    paper_bgcolor="LightSteelBlue",
)

fig.update_traces(marker=dict(size=12,
Exemple #6
0
import plotly.express as px

df = px.data.iris()
fig = px.scatter(df, x="sepal_length", y="sepal_width", width=800, height=500, title='Gráfico de Dispersão')

# In jupyter can use:
# fig.show()

fig.write_html('first_figure.html', auto_open=True)
###State pivot
pivot_state = df.pivot_table(
    index="state",
    values=["projectAmountMillions", "numberOfProperties"],
    aggfunc="sum")
pivot_state.reset_index(inplace=True)
pivot_state["region"] = pivot_state["state"].apply(
    lambda x: state_to_region(x))
pivot_state = pivot_state[pivot_state["region"] !=
                          "skip"]  #removing regions not included in 50 states

#pairplot for states and amount funded/properties covered
ax = sns.pairplot(pivot_state.iloc[:, 1:], kind="scatter", hue="region")
ax.fig.suptitle(
    "Total Project Amount and Number of Properties Covered In FEMA Projects by US State"
)
ax.fig.tight_layout()
ax.fig.subplots_adjust(top=0.95)  # Reduce plot to make room
plt.show()

###plotly express scatter, for dash app later
import plotly.express as px
from plotly.offline import plot
available_states = pivot_state['state'].unique()

fig_px = px.scatter(pivot_state,
                    x="numberOfProperties",
                    y="projectAmountMillions",
                    hover_data=['state', 'region'],
                    color="region")
# plot(fig_px,filename='fema_state_propnumber_amountspent_scatter.html')
Exemple #8
0
import pandas as pd
import plotly.express as px

df=pd.read_csv("corona.csv")
fig=px.scatter(df,x="date",y="cases",color="country",title="corona cases per date")
fig.show()
gss_clean['men_bettersuited'] = gss_clean['men_bettersuited'].cat.reorder_categories(['agree',
                                                         'disagree'])
gss_clean['child_suffer'] = gss_clean['child_suffer'].astype('category')
gss_clean['child_suffer'] = gss_clean['child_suffer'].cat.reorder_categories(['strongly agree',
                                                         'agree',
                                                         'disagree',
                                                         'strongly disagree'])
gss_clean['men_overwork'] = gss_clean['men_overwork'].astype('category')
gss_clean['men_overwork'] = gss_clean['men_overwork'].cat.reorder_categories(['strongly agree',
                                                         'agree',
                                                         'neither agree nor disagree',
                                                         'disagree',
                                                         'strongly disagree'])
scat = px.scatter(gss_clean, x='job_prestige', y='income',
                color='sex',
                hover_data=['education', 'socioeconomic_index'],
                trendline='ols',
                labels={'job_prestige':'Occupational Prestige', 'income':'Income'})
boxes = pd.melt(gss_clean, id_vars=['sex'], value_vars=['income', 'job_prestige'])
fig1 = px.box(boxes.loc[boxes['variable']=='income'], x='value', y='sex', color='sex',
             labels={'value':'Income', 'sex':''})
fig1.update_layout(showlegend=False)
fig2 = px.box(boxes.loc[boxes['variable']=='job_prestige'], x='value', y='sex', color='sex',
             labels={'value':'Occupational Prestige', 'sex':''})
fig2.update_layout(showlegend=False)
gss6 = gss_clean[['income', 'sex', 'job_prestige']]
gss6['job_prestige'] = pd.cut(gss6.job_prestige, bins=6)
gss6 = gss6.dropna()
box_grid = px.box(gss6, x='income', y='sex', color='sex', facet_col='job_prestige', facet_col_wrap=2,
            labels={'income':'Income', 'sex':'Sex', 'job_prestige':'Occupational Prestige'},
            color_discrete_map = {'male':'blue', 'female':'red'})
Exemple #10
0
def perform_eda(data):
    st.title('Data Analysis')

    st.markdown('## Age Analysis')
    st.plotly_chart(plot_value_counts_bar(data, 'Age'))
    st.markdown('''#### Observation: 
    - Most of the movies/shows are targeted to adult audience''')

    st.markdown('## Rotten Tomatoes Ratings')
    data['Rotten_Tomatoes_Rounded'] = data['Rotten Tomatoes'].apply(round_fix)
    st.plotly_chart(plot_value_counts_bar(data, 'Rotten_Tomatoes_Rounded'))
    st.markdown('''#### Observations:
- Most of the content is high rated on streaming platforms''')

    st.markdown('## IMDB Ratings')
    data['IMDB_Rounded'] = data.IMDb.apply(round_fix_imdb)
    st.plotly_chart(plot_value_counts_bar(data, 'IMDB_Rounded'))
    st.markdown('''#### Observations:
- Most of content on streaming platforms has average ratings on IMDB.''')


    st.markdown('## Highest IMDb Movies/Shows')
    netflix_count = data[data['IMDB_Rounded']=='Really_Good']['Netflix'].sum()
    hulu_count = data[data['IMDB_Rounded']=='Really_Good']['Hulu'].sum()
    disney_count = data[data['IMDB_Rounded']=='Really_Good']['Disney+'].sum()
    prime_count = data[data['IMDB_Rounded']=='Really_Good']['Prime Video'].sum()
    indexes = ['Netflix', 'Hulu', 'Disney', 'Amazon Prime']
    values = [netflix_count, hulu_count, disney_count, prime_count]
    fig=px.pie(labels=indexes, values=values,title='Top content on OTT',hover_name=indexes)
    st.plotly_chart(fig)


    st.markdown('## Most Popular Genre')
    temp_data=data.copy()
    kata, temp_data = apply_encoding(temp_data, ['Genres', 'Country', 'Language'], get_kata=1)
    base_counts = get_counts(temp_data, 'Genres', kata['Genres'])
    base_counts = pd.DataFrame(index=base_counts.keys(),
                            data=base_counts.values(),
                            columns=['Counts'])
    base_counts.sort_values(by='Counts', inplace=True)
    colors=['#988D90' if i<1000 else '#F00045' for i in  base_counts.Counts]
    fig = px.bar(x=base_counts.index,
                y=base_counts['Counts'],
                title='Most Popular Genre',color_discrete_sequence=colors,color=base_counts.index)
    st.plotly_chart(fig)
    st.markdown('''#### Observations:
- Drama is most popular genre''')

    st.markdown('## Most Released Content')
    st.markdown('### Country')
    base_counts = get_counts(temp_data, 'Country', kata['Country'])
    base_counts = pd.DataFrame(index=base_counts.keys(),
                            data=base_counts.values(),
                            columns=['Counts'])
    base_counts.sort_values(by='Counts', ascending=False, inplace=True)
    fig = px.bar(x=base_counts.index[:10],
                y=base_counts['Counts'][:10],
                color=base_counts['Counts'][:10],
                title='Most Released Content')
    st.plotly_chart(fig)
    st.markdown('''#### Observations:
- Most released content was in US''')

    st.markdown('### Language')
    base_counts = get_counts(temp_data, 'Language', kata['Language'])
    base_counts = pd.DataFrame(index=base_counts.keys(),
                            data=base_counts.values(),
                            columns=['Counts'])
    base_counts.sort_values(by='Counts', ascending=False, inplace=True)
    fig = px.bar(x=base_counts.index[:5],
                y=base_counts['Counts'][:5],
                color=base_counts['Counts'][:5],
                title='Most Released Content: Language')
    st.plotly_chart(fig)
    st.markdown('''#### Observations:
From the above visualizations we can conclude that:
- We can work with few genres with count more than 1000 and rest of the genres can be categorized as others.
- It is important to keep countries, but at continent level for better clarity.
- Most of the content is in english only. ''')


    st.markdown('## OTT Platforms')
    st.markdown('### Content Releases')
    release_scores = get_ott_counts(temp_data,
                                ['Netflix', 'Hulu', 'Prime Video', 'Disney+'],
                                'Year')
    fig = px.scatter(
    release_scores,
    x='Year',
    y='Count',
    size='Count',
    color='Platform',
    title='Content Per OTT Apps released in consecutive years',
    color_discrete_sequence=['#E50914', '#3DBB3D', '#00A8E1', '#048f70 '])
    st.plotly_chart(fig)
    st.markdown('''#### Observations:
    - Amazon Prime Video has the most modern as well as old content''')

    st.markdown('### Top Genres')
    genres = kata['Genres'].copy()
    genres.extend(['All'])
    platform = ['Netflix', 'Hulu', 'Prime Video', 'Disney+', 'All']
    temp_data.IMDb=temp_data.IMDb.apply(replaceNAby1)
    temp_data.IMDb=temp_data.IMDb.astype(float)
    genre=st.selectbox('Genres',genres)
    plt_frm=st.selectbox('Platform',platform)
    st.plotly_chart(plot_genres(genre,plt_frm,temp_data))


    keep_genres=pickle.load(open('keep_genres.pickle','rb'))
    genre_counts = get_counts(eata, 'Genres', keep_genres)
    genre_counts = get_counts(eata, 'Genres', keep_genres)
    genre_counts = pd.DataFrame(index=genre_counts.keys(),
                            data=genre_counts.values(),
                            columns=['Counts'])
    genre_counts.sort_values(by='Counts',inplace=True)

    cont_counts = get_counts(eata, 'Continent',
                ['Africa', 'Antarctica', 'Asia', 'Europe', 'North America', 'Oceania', 'South America','NA'])
    cont_counts = pd.DataFrame(index=cont_counts.keys(),
                            data=cont_counts.values(),
                            columns=['Counts'])
    cont_counts.sort_values(by='Counts', ascending=False, inplace=True)
    
    keep_lang=pickle.load(open('keep_lang.pickle','rb'))
    lang_counts = get_counts(eata, 'Language',keep_lang)
    lang_counts = pd.DataFrame(index=lang_counts.keys(),
                            data=lang_counts.values(),
                            columns=['Counts'])
    lang_counts.sort_values(by='Counts', ascending=False, inplace=True)
Exemple #11
0
            This app was made to help answer these
            questions in a meaningful and usable way. It is simple to use and can give a recommendation backed
            by data on what the user should buy for their personal treatment.

            """),
        dcc.Link(dbc.Button('Find out what you need', color='primary'),
                 href='/predictions')
    ],
    md=4,
)

gapminder = px.data.gapminder()
fig = px.scatter(gapminder.query("year==2007"),
                 x="gdpPercap",
                 y="lifeExp",
                 size="pop",
                 color="continent",
                 hover_name="country",
                 log_x=True,
                 size_max=60)

column2 = dbc.Col([
    html.Div(
        html.Img(src=app.get_asset_url('herb.jpg'),
                 style={
                     'height': '80%',
                     'width': '80%'
                 }))
])

layout = dbc.Row([column1, column2])
Exemple #12
0
import csv
import plotly.express as px
import pandas as pd

with open("class2.csv", newline="") as f:
    data = csv.reader(f)
    fileData = list(data)

fileData.pop(0)
newData = []

for i in range(len(fileData)):
    num = fileData[i][1]
    newData.append(float(num))

n = len(newData)
sum = 0

for i in newData:
    sum = sum + i

mean = sum / n

df = pd.read_csv("class2.csv")
fig = px.scatter(df, x='Student Number', y="Marks")
fig.update_layout(shapes=[dict(type="line", y0=mean, y1=mean, x0=0, x1=n)])
fig.update_yaxes(rangemode="tozero")
fig.show()

print(mean)
Exemple #13
0
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd

if __name__ == "__main__":
    app = dash.Dash(__name__)

    df_boston_housing = pd.read_csv('./data/BostonHousing.csv')
    df_lin_model_params = pd.read_csv('./data/lin-model-params.csv',
                                      names=['param', 'coef'])

    bar_chart = px.bar(df_lin_model_params, x='param', y='coef')
    histogram = px.histogram(df_boston_housing, x="ptratio")
    scatter = px.scatter(df_boston_housing, x="age", y="medv")

    app.layout = html.Div(children=[
        dcc.Graph(id='bar', figure=bar_chart),
        dcc.Graph(id='hist', figure=histogram),
        dcc.Graph(id='scat', figure=scatter),
    ])
    app.run_server(debug=True)
Exemple #14
0
df_BCG_1 = df.groupby(["Angebotenes Produkt"])[["Gewinn",
                                                "Anzahl"]].sum().reset_index()
df_BCG_2 = df.groupby(["Angebotenes Produkt"
                       ])["Anzahl"].apply(lambda x: x.sum() / x.count())

df_BCG = df_BCG_1.merge(df_BCG_2, on="Angebotenes Produkt")
df_BCG = df_BCG.rename(columns={
    "Anzahl_x": "Anzahl",
    "Anzahl_y": "Kaufwahrscheinlichkeit"
})
df_BCG["Kaufwahrscheinlichkeit in %"] = df_BCG["Kaufwahrscheinlichkeit"] * 100
df_BCG["Gewinn pro Verkauf in €"] = df_BCG["Gewinn"] / df_BCG["Anzahl"]

# Scatter-Plot erstellen nach den Variablen "Kaufwahrscheinlichkeit in %" und "Gewinn pro Verkauf in €" pro Produkt
fig = px.scatter(df_BCG,
                 x=df_BCG["Kaufwahrscheinlichkeit in %"],
                 y=df_BCG["Gewinn pro Verkauf in €"],
                 color="Angebotenes Produkt")

# Figure-Element mit den einzelnen Sektionen der BCG-MAtrix über den Scatter-Plot legen, um die Klassifizierung zu visualisieren
fig.add_trace(
    go.Scatter(x=[12.5, 12.5],
               y=[900, 900],
               text=["<b>Poor Dogs</b>"],
               mode="text",
               showlegend=False))
fig.add_trace(
    go.Scatter(x=[12.5, 12.5],
               y=[1900, 1900],
               text=["<b>Questionmarks</b>"],
               mode="text",
               showlegend=False))
Exemple #15
0
# this directory
import os

dir_name = os.path.join("test", "percy")

import plotly.express as px

print(px.data.iris.__doc__)
px.data.iris().head()

# #### Scatter and Line plots

import plotly.express as px

iris = px.data.iris()
fig = px.scatter(iris, x="sepal_width", y="sepal_length")
fig.write_html(os.path.join(dir_name, "scatter.html"))

import plotly.express as px

iris = px.data.iris()
fig = px.scatter(iris, x="sepal_width", y="sepal_length", color="species")
fig.write_html(os.path.join(dir_name, "scatter_color.html"))

import plotly.express as px

iris = px.data.iris()
fig = px.scatter(
    iris,
    x="sepal_width",
    y="sepal_length",
Exemple #16
0
def predictor_processing(
    df, predicts, response, response_col, resp_type, resp_mean, response_col_uncoded
):
    # Predictor loop
    ########################################

    predicts_col = df[df.columns.intersection(predicts)]

    # Build preliminary results table
    results_cols = [
        "Response",
        "Predictor Type",
        "t Score",
        "p Value",
        "Regression Plot",
        "Diff Mean of Response (Unweighted)",
        "Diff Mean of Response (Weighted)",
        "Diff Mean Plot",
    ]
    results = pd.DataFrame(columns=results_cols, index=predicts)

    for pred_name, pred_data in predicts_col.iteritems():

        # Decide cat or cont
        ##########
        pred_string_check = isinstance(pred_data, str)
        pred_unique_ratio = len(pred_data.unique()) / len(pred_data)
        if pred_string_check or pred_unique_ratio < 0.05:
            pred_type = "Categorical"

            # Encode
            pred_data = pd.Categorical(pred_data, categories=pred_data.unique())
            pred_data, pred_labels = pd.factorize(pred_data)

            pred_data = pd.DataFrame(pred_data, columns=[pred_name])
            pred_data_uncoded = df[pred_name]

        else:
            pred_type = "Continuous"
            pred_data = pred_data.to_frame()

        # Bind response and predictor together again
        df_c = pd.concat([response_col, pred_data], axis=1)
        df_c.columns = [response, pred_name]

        # Relationship plot and correlations
        if resp_type == "Categorical" and pred_type == "Categorical":
            rel_matrix = confusion_matrix(pred_data, response_col)
            fig_relate = go.Figure(
                data=go.Heatmap(z=rel_matrix, zmin=0, zmax=rel_matrix.max())
            )
            fig_relate.update_layout(
                title=f"Relationship Between {response} and {pred_name}",
                xaxis_title=pred_name,
                yaxis_title=response,
            )

        elif resp_type == "Categorical" and pred_type == "Continuous":

            fig_relate = px.histogram(df_c, x=pred_name, color=response_col_uncoded)
            fig_relate.update_layout(
                title=f"Relationship Between {response} and {pred_name}",
                xaxis_title=pred_name,
                yaxis_title="count",
            )

        elif resp_type == "Continuous" and pred_type == "Categorical":

            fig_relate = px.histogram(df_c, x=response, color=pred_data_uncoded)
            fig_relate.update_layout(
                title=f"Relationship Between {response} and {pred_name}",
                xaxis_title=response,
                yaxis_title="count",
            )

        elif resp_type == "Continuous" and pred_type == "Continuous":

            fig_relate = px.scatter(y=response_col, x=pred_data, trendline="ols")
            fig_relate.update_layout(
                title=f"Relationship Between {response} and {pred_name}",
                xaxis_title=pred_name,
                yaxis_title=response,
            )

        response_html = response.replace(" ", "")
        pred_name_html = pred_name.replace(" ", "")

        relate_file_save = f"./hw4_plots/{response_html}_{pred_name_html}_relate.html"
        relate_file_open = f"./{response_html}_{pred_name_html}_relate.html"
        fig_relate.write_html(file=relate_file_save, include_plotlyjs="cdn")
        relate_link = (
            "<a target='blank' href="
            + relate_file_open
            + "><div>"
            + pred_type
            + "</div></a>"
        )

        # Regression
        ##########

        if resp_type == "Categorical":
            reg_model = sm.Logit(response_col, pred_data, missing="drop")

        else:
            reg_model = sm.OLS(response_col, pred_data, missing="drop")

        # Fit model
        reg_model_fitted = reg_model.fit()

        # Get t val and p score
        t_score = round(reg_model_fitted.tvalues[0], 6)
        p_value = "{:.6e}".format(reg_model_fitted.pvalues[0])

        # Plot regression
        reg_fig = px.scatter(y=df_c[response], x=df_c[pred_name], trendline="ols")
        reg_fig.write_html(
            file=f"./hw4_plots/{pred_name}_regression.html", include_plotlyjs="cdn"
        )
        reg_fig.update_layout(
            title=f"Regression: {response} on {pred_name}",
            xaxis_title=pred_name,
            yaxis_title=response,
        )

        reg_file_save = f"./hw4_plots/{response_html}_{pred_name_html}_reg.html"
        reg_file_open = f"./{response_html}_{pred_name_html}_reg.html"
        reg_fig.write_html(file=reg_file_save, include_plotlyjs="cdn")
        reg_link = "<a target='blank' href=" + reg_file_open + "><div>Plot</div></a>"

        # Diff with mean of response (unweighted and weighted)
        ##########

        # Get user input on number of mean diff bins to use
        if pred_type == "Continuous":
            bin_n = ""
            while isinstance(bin_n, int) is False or bin_n == "":
                bin_n = input(
                    f"\nEnter number of bins to use for difference with mean of response for {pred_name}:\n"
                )
                try:
                    bin_n = int(bin_n)
                except Exception:
                    continue
            else:
                pass
            df_c["bin_labels"] = pd.cut(df_c[pred_name], bins=bin_n, labels=False)
            binned_means = df_c.groupby("bin_labels").agg(
                {response: ["mean", "count"], pred_name: "mean"}
            )

        else:
            df_c.columns = [f"{response}", f"{pred_name}"]
            binned_means = df_c.groupby(pred_data.iloc[:, 0]).agg(
                {response: ["mean", "count"], pred_name: "mean"}
            )
            bin_n = len(np.unique(pred_data.iloc[:, 0].values))

        binned_means.columns = [f"{response} mean", "count", f"{pred_name} mean"]

        # Binning and mean squared difference calc
        binned_means["weight"] = binned_means["count"] / binned_means["count"].sum()
        binned_means["mean_sq_diff"] = (
            binned_means[f"{response} mean"].subtract(resp_mean, fill_value=0) ** 2
        )
        binned_means["mean_sq_diff_w"] = (
            binned_means["weight"] * binned_means["mean_sq_diff"]
        )

        # Diff with mean of response stat calculations (weighted and unweighted)
        msd_uw = binned_means["mean_sq_diff"].sum() * (1 / bin_n)
        msd_w = binned_means["mean_sq_diff_w"].sum()

        # Diff with mean of response plots
        fig_diff = make_subplots(specs=[[{"secondary_y": True}]])
        fig_diff.add_trace(
            go.Bar(
                x=binned_means[f"{pred_name} mean"],
                y=binned_means["count"],
                name="Observations",
            )
        )
        fig_diff.add_trace(
            go.Scatter(
                x=binned_means[f"{pred_name} mean"],
                y=binned_means[f"{response} mean"],
                line=dict(color="red"),
                name=f"Relationship with {response}",
            ),
            secondary_y=True,
        )
        fig_diff.update_layout(
            title_text=f"Difference in Mean Response: {response} and {pred_name}",
        )
        fig_diff.update_xaxes(title_text=f"{pred_name} (binned)")
        fig_diff.update_yaxes(title_text="count", secondary_y=False)
        fig_diff.update_yaxes(title_text=f"{response}", secondary_y=True)

        fig_diff_file_save = f"./hw4_plots/{response_html}_{pred_name_html}_diff.html"
        fig_diff_file_open = f"./{response_html}_{pred_name_html}_diff.html"
        fig_diff.write_html(file=fig_diff_file_save, include_plotlyjs="cdn")
        diff_link = (
            "<a target='blank' href=" + fig_diff_file_open + "><div>Plot</div></a>"
        )

        # Create processed df
        if pred_name == predicts_col.columns[0]:
            pred_proc = pd.concat([response_col, pred_data], axis=1)
        else:
            pred_proc = pd.concat([pred_proc, pred_data], axis=1)

        # Add to results table
        results.loc[pred_name] = pd.Series(
            {
                "Response": response,
                "Predictor Type": relate_link,
                "t Score": t_score,
                "p Value": p_value,
                "Regression Plot": reg_link,
                "Diff Mean of Response (Unweighted)": msd_uw,
                "Diff Mean of Response (Weighted)": msd_w,
                "Diff Mean Plot": diff_link,
            }
        )

    return pred_proc, results
Exemple #17
0
import plotly.express as px
tips = px.data.tips() # tips dataset can be loaded from plotly
# data_canada = px.data.gapminder().query("country == 'Canada'")

import pandas as pd
tips.to_csv('/Users/vivekparashar/Downloads/tips.csv')

import altair as alt

import statsmodels.api as sm

# Dot plot shows changes between two (or more) points in time or between two (or more) conditions.
t = tips.groupby(['day','sex']).mean()[['total_bill']].reset_index()
px.scatter(t, x='day', y='total_bill', color='sex', 
        title='Average bill by gender by day', 
        labels={'day':'Day of the week', 'total_bill':'Average Bill in $'})

# Bar (vertical and horizontal)
tips.groupby('sex').mean()['total_bill'].plot(kind='bar') # using pandas plot
tips.groupby('sex').mean()['tip'].plot(kind='barh')

t = tips.groupby(['day','sex']).mean()[['total_bill']].reset_index()
px.bar(t, x='day', y='total_bill') # Using plotly
px.bar(t, x='total_bill', y="day", orientation='h')

# Stacked Bar - need to unstack one of the levels and fill na values
tips.groupby(['day','sex']).mean()[['total_bill']]\
        .unstack('sex').fillna(0)\
        .plot(kind='bar', stacked=True) # using pandas plot; kind='barh' for horizontal plot 
import pandas as pd
import csv
import plotly.express as px

df = pd.read_csv("data.csv")
mean = df.groupby(["student_id", "level"], as_index=False)["attempt"].mean()

fig = px.scatter(mean,
                 x="student_id",
                 y="level",
                 size="attempt",
                 color="attempt")
fig.show()
Exemple #19
0
    df.washer = df.washer.map(yes_no_dict)
    df.cable_tv = df.cable_tv.map(yes_no_dict)
    df.kitchen = df.kitchen.map(yes_no_dict)

    # rename columns
    df.rename(columns={'neighborhood': 'Neighborhood', 'room_type': 'Room Type', 'accommodates': 'Accommodates',
                       'bedrooms': 'Bedrooms', 'number_of_reviews': 'Number of Reviews', 'wifi': 'Wifi',
                       'cable_tv': 'Cable TV',
                       'washer': 'Washer', 'kitchen': 'Kitchen', 'price': 'Price (US Dollars)'}, inplace=True)

    # remove outliers
    df = df[df['Price (US Dollars)'] < 501]

    return df


# clean data
df = clean_data(df)


#show data
fig = px.scatter(df, x='Neighborhood', y='Price (US Dollars)'
                 ,size='Accommodates'
                 ,hover_data=['Bedrooms', 'Wifi', 'Cable TV', 'Kitchen', 'Washer', 'Number of Reviews']
                 ,color= 'Room Type')
fig.update_layout(template='plotly_white')
fig.update_layout(title='How much should you charge in a Berlin neighborhood?')
# fig.show() # display it locally

# write to html
pio.write_html(fig, file='templates/visss.html', auto_open=True)
Exemple #20
0
import dash
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import pandas as pd

external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)

df = pd.read_csv(
    'https://gist.githubusercontent.com/chriddyp/5d1ea79569ed194d432e56108a04d188/raw/a9f9e8076b837d541398e999dcbac2b2826a81f8/gdp-life-exp-2007.csv'
)

fig = px.scatter(df,
                 x='gdp per capita',
                 y='life expectancy',
                 size='population',
                 color='continent',
                 hover_name='country',
                 log_x=True,
                 size_max=60)

app.layout = html.Div([dcc.Graph(id='life-exp-vs-gdp', figure=fig)])

if __name__ == '__main__':
    app.run_server(debug=True)
from dash.dependencies import Input, Output

import json

gapminder = px.data.gapminder()

app = dash.Dash(__name__)

app.layout = html.Div([
    html.H1("Gapminder Data"),
    dcc.Graph(id="my_graph",
              figure=px.scatter(gapminder,
                                x="gdpPercap",
                                y="lifeExp",
                                size="pop",
                                log_x=True,
                                log_y=True,
                                color="continent",
                                hover_data=["country"],
                                animation_frame="year",
                                size_max=80)),
    dcc.Graph(id="show_data")
])


@app.callback(Output("show_data", "figure"), [Input("my_graph", "clickData")])
def update_data(hoverData):
    if hoverData is None:
        jp_gap = gapminder[gapminder.country.isin(["Japan"])]
        return px.line(jp_gap,
                       x="year",
                       y="gdpPercap",
Exemple #22
0
import csv
with open("class1.csv", newline="") as f:
    reader = csv.reader(f)
    data = list(reader)
data.pop(0)
totalmarks = 0
n = len(data)
for i in data:
    totalmarks += float(i[1])
mean = totalmarks / n
print(mean)
import pandas as pd
import plotly.express as px
df = pd.read_csv("class1.csv")
fig = px.scatter(df, x="Student Number", y="Marks")
fig.update_layout(shapes=[dict(type="line", y0=mean, y1=mean, x0=0, x1=n)])
fig.show()
Exemple #23
0
date_last = df.index[-1]

df_simu = mymodule.mysimu(df, previous_days, money_ini, bitcoin_ini, prop_ini)

df_simu['Relative Closing Price'] = df_simu['Closing Price'] / df_simu[
    'Closing Price'][0]
df_simu['Relative Total Worth'] = df_simu['Total Worth'] / df_simu[
    'Total Worth'][0]
df_simu['Relative Time'] = [
    x / (len(df_simu.index) - 1) for x in [*range(len(df_simu.index))]
]

fig2 = px.scatter(
    df_simu,
    x='Relative Closing Price',
    y='Relative Total Worth',
    color='Relative Time',
    color_continuous_scale=px.colors.sequential.Viridis,
    title='Performance against "Buy and Hold" across selected period.')
line = df_simu['Relative Closing Price'] if df_simu[
    'Relative Closing Price'].max() < df_simu['Relative Total Worth'].max(
    ) else df_simu['Relative Total Worth']
fig2.add_scatter(x=line,
                 y=line,
                 mode='lines',
                 opacity=0.5,
                 name='Unitary reference')
fig2.update(layout_showlegend=False)

app = dash.Dash(external_stylesheets=[dbc.themes.BOOTSTRAP])
server = app.server
Exemple #24
0
def update_graph(subsidio_type_value, ayuda_type_value, xaxis_type,
                 yaxis_type):
    # get the data
    data_key_name = "%s_%s" % (subsidio_type_value, ayuda_type_value)
    precalculated_data_bytes = redis.get("precalculated_data")
    precalculated_data_str = precalculated_data_bytes.decode("utf-8").replace(
        "'", '"')
    precalculated_data = json.loads(precalculated_data_str)
    if data_key_name in precalculated_data:
        data = precalculated_data[data_key_name]
    else:
        data = get_data_for_graphic(subsidio_type_value, ayuda_type_value)
        new_precalculated_data = {}
        new_precalculated_data[data_key_name] = {
            "data": data,
            "precalculation_date": str(datetime.datetime.now())
        }
        precalculated_data = {**precalculated_data, **new_precalculated_data}
        redis.set("precalculated_data", str(precalculated_data))
        data = precalculated_data[data_key_name]
    precalculation_date = datetime.datetime.strptime(
        data["precalculation_date"], '%Y-%m-%d %H:%M:%S.%f')

    # get corresponding labels
    x_axis_label = [
        x_option["label"] for x_option in available_x_axis
        if x_option["value"] == subsidio_type_value
    ][0]
    y_axis_label = [
        y_option["label"] for y_option in available_y_axis
        if y_option["value"] == ayuda_type_value
    ][0]

    # create graphic
    df = pd.DataFrame(data["data"])
    fig = px.scatter(df,
                     x=x_axis_label,
                     y=y_axis_label,
                     title="Datos actualizados al %s" %
                     precalculation_date.strftime("%d/%m/%Y, a las %H:%M"))

    # linear regression
    if xaxis_type == 'Lineal' and yaxis_type == 'Lineal':
        df['regression'] = sm.OLS(df[y_axis_label],
                                  sm.add_constant(
                                      df[x_axis_label])).fit().fittedvalues
        fig.add_trace(
            go.Scatter(name='Regresión lineal',
                       x=df[x_axis_label],
                       y=df['regression'],
                       mode='lines'))

    fig.update_xaxes(type='linear' if xaxis_type == 'Lineal' else 'log')

    fig.update_yaxes(type='linear' if yaxis_type == 'Lineal' else 'log')

    fig = go.Figure(fig)
    fig.update_traces(
        marker_size=10,
        marker_color='#3fa652',
    )

    return fig, None
Exemple #25
0
import plotly.express as px
import csv

with open("cups of coffee vs hours of sleep.csv") as csv_file:
    df = csv.DictReader(csv_file)
    fig = px.scatter(df, x="Coffee in ml", y="sleep in hours")
    fig.show()
# df = pd.read_csv("Teacher refrence\data.csv")
# fig = px.scatter(df, x="Population", y="Per capita",
# 	          size="Percentage",color="Country",
#                    size_max=60)
# fig.show()
Exemple #26
0
 dcc.Dropdown(
     id="event-dd",
     options=[{
         "label": e,
         "value": e
     } for e in ["all", *touch_df["type"].unique()]],
 ),
 html.Div(
     children=[
         html.Div(
             children=[
                 dcc.Graph(
                     id="event-graph",
                     figure=px.scatter(touch_df,
                                       x="x",
                                       y="y",
                                       color="team",
                                       hover_data=["type"],
                                       render_mode="svg"),
                 )
             ],
             className="six columns",
         ),
         html.Div(
             children=[
                 dcc.Graph(
                     id="stat-graph",
                     figure=px.bar(
                         stat_df.pipe(
                             lambda df: df.groupby("stat")["value"].sum().
                             rename("sum").reset_index().merge(df)).assign(
                                 rel=lambda df: df["value"] / df["sum"]),
Exemple #27
0

# In[243]:


figb = px.histogram(dt, x="Player", template="plotly_dark", 
                    title="How many times has each player been mentioned in the Top 1000 posts this year?")
figb.show()
figb.write_html("FigB.html")


# In[253]:


figc = px.scatter(dt, x = "Upvotes", y = "Player", hover_name = "Title",
                  color = "Awards", template="plotly_dark", 
                  color_continuous_scale=["blue", "yellow", "purple", "red"],
                  title="Distribution of Upvotes on Posts about each Player")
figc.show()
figc.write_html("FigC.html")


# In[245]:


figd = px.scatter(dt, x = "Comments", y = "Player", hover_name = "Title", 
                  color = "Awards", template="plotly_dark",
                  color_continuous_scale=["blue", "yellow", "purple", "red"],
                 title="Distribution of Comments on Posts about each Player")
figd.show()
figd.write_html("FigD.html")
import dash_core_components as dcc
import dash_html_components as html
import plotly.express as px
import plotly.graph_objects as go
import pandas as pd


df = pd.read_sql_table("dataset_Titanic_Dataset", db.engine)
df["Pclass"] = df["Pclass"].astype(str)

fig_scatter = px.scatter(
    df,
    x="Fare",
    y="Age",
    size="SibSp",
    color="Pclass",
    hover_name="Name",
    hover_data=["Sex", "Survived"],
    category_orders={"Pclass": ["1", "2", "3"]},
    log_x=False,
    size_max=60,
)


Fare = df["Fare"]
Age = df["Age"]

fig_markers = go.Figure()
# Add traces
fig_markers.add_trace(go.Scatter(x=Fare, y=Age, mode="markers", name="markers"))
fig_markers.add_trace(
    go.Scatter(x=Fare, y=Age, mode="lines+markers", name="lines+markers")
Exemple #29
0
def plotFigure(datapath):
    with open(datapath) as csv_file:
        df = csv.DictReader(csv_file)
        fig = px.scatter(df, x="Days Present", y="Marks In Percentage")
        fig.show()
def update_graph1(data, tab):
    if tab != 'tab-gen':
        return None
    dff = pd.read_json(data, orient='split')
    return px.scatter(dff, x="stat_value", y="Overall", color="Position")