コード例 #1
0
ファイル: chart.py プロジェクト: indiesoul2/stock
def balance_chart(input_ticker, balance_df):
    #부채비율, 유동비율, 당좌비율
    st.subheader('Asset, Liabilities, ShareholderEquity')
    x_data = balance_df.index
    title = '(' + input_ticker + ') <b>Asset & Liabilities</b>'
    titles = dict(text=title, x=0.5, y=0.85)
    fig = make_subplots(specs=[[{'secondary_y': True}]])
    #y_data_bar3 = ['totalAssets', 'totalLiabilities', 'totalShareholderEquity']
    y_data_bar3 = ['totalLiabilities', 'totalShareholderEquity']
    y_data_line3 = ['Debt/Equity', 'QuickRatio', '유동부채/자기자본']

    for y_data, color in zip(y_data_bar3, marker_colors):
        fig.add_trace(go.Bar(name=y_data,
                             x=x_data,
                             y=balance_df[y_data],
                             text=balance_df[y_data],
                             textposition='outside',
                             marker_color=color),
                      secondary_y=False)

    for y_data, color in zip(y_data_line3, marker_colors):
        fig.add_trace(go.Scatter(mode='lines+markers+text',
                                 name=y_data,
                                 x=x_data,
                                 y=balance_df.loc[:, y_data],
                                 text=balance_df[y_data],
                                 textposition='top center',
                                 marker_color=color),
                      secondary_y=True)
    fig.update_traces(texttemplate='%{text:.3s}')
    fig.update_yaxes(range=[0, max(balance_df.loc[:, y_data_bar3[0]]) * 2],
                     secondary_y=False)
    fig.update_yaxes(range=[
        -max(balance_df.loc[:, y_data_line3[0]]),
        max(balance_df.loc[:, y_data_line3[0]]) * 1.2
    ],
                     secondary_y=True)
    fig.update_yaxes(title_text="Liabilities Rate",
                     showticklabels=True,
                     showgrid=True,
                     zeroline=True,
                     zerolinecolor='LightPink',
                     ticksuffix="%",
                     secondary_y=True)
    fig.update_yaxes(title_text="Asset",
                     showticklabels=True,
                     showgrid=False,
                     zeroline=True,
                     tickprefix="$",
                     secondary_y=False)
    fig.update_layout(title=titles,
                      titlefont_size=15,
                      legend=dict(orientation="h"),
                      template=template)
    fig.update_layout(barmode='stack')
    fig.update_layout(template="myID")
    st.plotly_chart(fig)

    #무형자산총자금비율, 현금자산비율
    x_data = balance_df.index
    title = '(' + input_ticker + ') <b>IntangibleAssets & Cash And ShortTermInvestments</b>'
    titles = dict(text=title, x=0.5, y=0.85)
    fig = make_subplots(specs=[[{'secondary_y': True}]])
    y_data_bar4 = ['무형자산비율', '현금성자산비율']
    y_data_bar4_name = ['intangible/Assets', 'Cash/Assets']
    fig.add_trace(go.Bar(name=y_data_bar4_name[1],
                         x=x_data,
                         y=balance_df[y_data_bar4[1]],
                         text=balance_df[y_data_bar4[1]],
                         textposition='outside',
                         marker_color=marker_colors[0]),
                  secondary_y=False)
    fig.add_trace(go.Scatter(mode='lines+markers+text',
                             name=y_data_bar4_name[0],
                             x=x_data,
                             y=balance_df[y_data_bar4[0]],
                             text=balance_df[y_data_bar4[0]],
                             textposition='top center',
                             marker_color=marker_colors[2]),
                  secondary_y=True)
    fig.update_traces(texttemplate='%{text:.3s}')
    fig.update_yaxes(title_text="Cash/Assets",
                     showticklabels=True,
                     showgrid=True,
                     zeroline=True,
                     ticksuffix="%",
                     secondary_y=False)
    fig.update_yaxes(title_text="intangible/Assets",
                     showticklabels=True,
                     showgrid=False,
                     zeroline=True,
                     ticksuffix="%",
                     secondary_y=True)
    fig.update_layout(title=titles,
                      titlefont_size=15,
                      legend=dict(orientation="h"),
                      template=template)
    fig.update_layout(template="myID")
    st.plotly_chart(fig)
コード例 #2
0
ファイル: app2.py プロジェクト: Yadav-Shubham/digilab
def app():
    def upload_data(uploaded_file):
        df = pd.read_csv(uploaded_file, sep=';')
        numeric_cols = list(df.select_dtypes(['float64', 'int64']).columns)
        text_data = df.select_dtypes(['object'])
        text_cols = text_data.columns
        return df, numeric_cols, text_cols

    st.subheader('Visualization')
    st.info(
        'Exploring the world of Machine Learning and Artificial Intelligence with the magic of data'
    )
    with st.beta_expander("Upload"):
        col1, col2 = st.beta_columns(2)
        with col1:
            uploaded_file = st.file_uploader(label="Upload your csv file:",
                                             type=['csv', 'xlsx'])
            if uploaded_file is not None:
                try:
                    df, numeric_cols, text_cols = upload_data(uploaded_file)
                except Exception as e:
                    df = pd.read_excel(uploaded_file)
                    numeric_cols = list(
                        df.select_dtypes(['float', 'int']).columns)
        try:
            if uploaded_file is not None:
                if st.button('View Data'):
                    latest_iteration = st.empty()
                    for i in range(100):
                        latest_iteration.info(f' {i + 1} %')
                        time.sleep(0.05)
                    time.sleep(0.2)
                    latest_iteration.empty()
                    st.info(uploaded_file.name)
                    st.write(df)
                    x_val = df.shape[0]
                    y_val = df.shape[1]
                    st.write("Data-shape :", x_val, "Features :", y_val)
            else:
                st.error("Please Upload a File")
        except Exception as e:
            print('')
    with st.beta_expander("Let's Visualise"):
        col3, col4 = st.beta_columns((1, 3))
        if uploaded_file is not None:
            with col3:
                chart_select = st.selectbox(label="Select the chart-type",
                                            options=[
                                                'Scatter-plots', 'Histogram',
                                                'Distplot', 'Box-plot',
                                                'Violin-plot', 'Line-chart',
                                                'Heat-map'
                                            ])
                if chart_select == 'Scatter-plots':
                    st.subheader("Scatter-plot Settings:")
                    x_values = st.selectbox('X-axis', options=numeric_cols)
                    y_values = st.selectbox('Y-axis', options=numeric_cols)
                    with col4:
                        plot = px.scatter(data_frame=df,
                                          x=x_values,
                                          y=y_values)
                        st.plotly_chart(plot)
                if chart_select == 'Histogram':
                    st.subheader("Histogram Settings:")
                    x_values = st.selectbox('value', options=numeric_cols)
                    x_val = np.array(df[x_values])
                    fig, ax = plt.subplots(figsize=(15, 9))
                    sns.set_style("dark")
                    sns.set_style("darkgrid")
                    sns.histplot(data=x_val, kde=True)
                    with col4:
                        st.pyplot(fig)
                if chart_select == 'Distplot':
                    st.subheader("Distplot Settings:")
                    x_values = st.selectbox('value', options=numeric_cols)
                    x_val = np.array(df[x_values])
                    fig, ax = plt.subplots(figsize=(15, 9))
                    sns.set_style("dark")
                    sns.set_style("darkgrid")
                    sns.distplot(x_val)
                    with col4:
                        st.pyplot(fig)
                if chart_select == 'Box-plot':
                    st.subheader("Box-plot Settings:")
                    x_values = st.selectbox('X-axis', options=numeric_cols)
                    y_values = st.selectbox('Y-axis', options=numeric_cols)
                    with col4:
                        plot = px.box(data_frame=df, x=x_values, y=y_values)
                        st.plotly_chart(plot)
                if chart_select == 'Violin-plot':
                    st.subheader("Violin-plot Settings:")
                    x_values = st.selectbox('X-axis', options=numeric_cols)
                    y_values = st.selectbox('Y-axis', options=numeric_cols)
                    with col4:
                        plot = px.violin(data_frame=df,
                                         x=x_values,
                                         y=y_values,
                                         points='all',
                                         box=True)
                        st.plotly_chart(plot)
                if chart_select == 'Heat-map':
                    st.subheader('Heat-map')

                    @st.cache
                    def create_data():
                        data_val = pd.DataFrame(df)
                        return data_val

                    data_val = create_data()
                    fig, ax = plt.subplots(figsize=(15, 9))
                    sns.set_style("darkgrid")
                    sns.set_style("dark")
                    sns.set_theme(style='darkgrid', palette='deep')
                    sns.heatmap(data_val.corr(),
                                ax=ax,
                                annot=True,
                                fmt='.3f',
                                linewidths=.9,
                                cbar_kws={"orientation": "horizontal"},
                                cmap='BuPu')
                    with col4:
                        st.pyplot(fig)
                if chart_select == 'Line-chart':
                    print(uploaded_file.name)
                    st.subheader("Line-3d-chart Settings:")
                    option1 = False
                    if uploaded_file.name == 'student-por.csv' or uploaded_file.name == 'student-mat.csv':
                        error_entry = st.success("Grade-column created!!")
                        time.sleep(0.1)
                        error_entry.empty()
                        grade = []
                        dgp = df
                        for i in dgp['G3'].values:
                            if i in range(0, 10):
                                grade.append('F')
                            elif i in range(10, 12):
                                grade.append('D')
                            elif i in range(12, 14):
                                grade.append('C')
                            elif i in range(14, 16):
                                grade.append('B')
                            else:
                                grade.append('A')
                        se = pd.Series(grade)
                        dgp['Grade'] = se.values
                        option1 = True
                        if uploaded_file.name == 'student-por.csv' or uploaded_file.name == 'student-mat.csv' and option1 == True:
                            ncols = list(
                                dgp.select_dtypes(['float64',
                                                   'int64']).columns)
                            feature_selection = st.multiselect(
                                label="Features to plot",
                                options=ncols,
                                default=ncols[0])
                            feature_ticker = st.selectbox(
                                'Feature ticker',
                                options=list(["A", "B", "C", "D", "E"]))
                            print(feature_selection)
                            if feature_selection:
                                df1 = dgp
                                df2 = df1[df1['Grade'] == feature_ticker]
                                df_features = df2[feature_selection]
                                with col4:
                                    plot = px.line(data_frame=df_features,
                                                   x=df_features.index,
                                                   y=feature_selection)
                                    st.plotly_chart(plot)
                            elif feature_selection == []:
                                st.error("Please select one Feature-selection")

        else:
            st.error("Please upload file in 'Upload' section")
    st.subheader("Pre-processing, Spliting, Training")
    col6, col7, col8 = st.beta_columns((1, 1, 1))
    col9, col10 = st.beta_columns((6, 1))
    if uploaded_file is not None:
        with col6:
            pg = st.beta_expander("Preprocessing")
            with pg:
                ppd = st.checkbox(label="Preprocess-data")
                if ppd:
                    dataset = df
                    sc = {
                        'GP': 1,
                        'MS': 2,
                    }
                    parent = {
                        'mother': 1,
                        'father': 2,
                        'other': 3,
                    }
                    reas = {
                        'home': 1,
                        'reputation': 2,
                        'course': 3,
                        'other': 4,
                    }
                    mjob = {
                        'teacher': 1,
                        'health': 2,
                        'services': 3,
                        'at_home': 4,
                        'other': 5,
                    }
                    fjob = {
                        'teacher': 1,
                        'health': 2,
                        'services': 3,
                        'at_home': 4,
                        'other': 5,
                    }
                    change = {
                        'yes': 1,
                        'no': 0,
                    }

                    dataset['address'].replace(to_replace="U",
                                               value=1,
                                               inplace=True)
                    dataset['address'].replace(to_replace="R",
                                               value=2,
                                               inplace=True)
                    dataset['famsize'].replace(to_replace="LE3",
                                               value=1,
                                               inplace=True)
                    dataset['famsize'].replace(to_replace="GT3",
                                               value=2,
                                               inplace=True)
                    dataset['Pstatus'].replace(to_replace="T",
                                               value=1,
                                               inplace=True)
                    dataset['Pstatus'].replace(to_replace="A",
                                               value=2,
                                               inplace=True)
                    dataset['romantic'] = dataset['romantic'].map(change)
                    dataset['internet'] = dataset['internet'].map(change)
                    dataset['famsup'] = dataset['famsup'].map(change)
                    dataset['schoolsup'] = dataset['schoolsup'].map(change)
                    dataset['sex'].replace(to_replace="M",
                                           value=1,
                                           inplace=True)
                    dataset['sex'].replace(to_replace="F",
                                           value=2,
                                           inplace=True)
                    dataset['Mjob'] = dataset['Mjob'].map(mjob)
                    dataset['Fjob'] = dataset['Fjob'].map(fjob)
                    dataset['activities'] = dataset['activities'].map(change)
                    dataset['paid'] = dataset['paid'].map(change)
                    dataset['nursery'] = dataset['nursery'].map(change)
                    dataset['higher'] = dataset['higher'].map(change)
                    dataset['reason'] = dataset['reason'].map(reas)
                    dataset['guardian'] = dataset['guardian'].map(parent)
                    dataset['school'] = dataset['school'].map(sc)
                    grade = []
                    for i in dataset['G3'].values:
                        if i in range(0, 10):
                            grade.append(4)
                        elif i in range(10, 12):
                            grade.append(3)
                        elif i in range(12, 14):
                            grade.append(2)
                        elif i in range(14, 16):
                            grade.append(1)
                        else:
                            grade.append(0)

                    Data1 = dataset
                    se = pd.Series(grade)
                    Data1['Grade'] = se.values
                    dataset.drop(dataset[dataset.G1 == 0].index, inplace=True)
                    dataset.drop(dataset[dataset.G3 == 0].index, inplace=True)
                    d1 = dataset
                    d1['All_Sup'] = d1['famsup'] & d1['schoolsup']

                    def max_parenteducation(d1):
                        return (max(d1['Medu'], d1['Fedu']))

                    d1['maxparent_edu'] = d1.apply(
                        lambda row: max_parenteducation(row), axis=1)
                    # d1['PairEdu'] = d1[['Fedu', 'Medu']].mean(axis=1)
                    d1['more_high'] = d1['higher'] & (d1['schoolsup']
                                                      | d1['paid'])
                    d1['All_alc'] = d1['Walc'] + d1['Dalc']
                    d1['Dalc_per_week'] = d1['Dalc'] / d1['All_alc']
                    d1.drop(['Dalc'], axis=1, inplace=True)
                    d1.drop(['Walc'], axis=1, inplace=True)
                    d1['studytime_ratio'] = d1['studytime'] / (d1[[
                        'studytime', 'traveltime', 'freetime'
                    ]].sum(axis=1))
                    d1.drop(['studytime'], axis=1, inplace=True)
                    d1.drop(['Fedu'], axis=1, inplace=True)
                    d1.drop(['Medu'], axis=1, inplace=True)
                    X = d1.iloc[:, [
                        1, 2, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,
                        19, 20, 21, 22, 23, 24, 25, 26, 27, 29, 30, 31, 32, 33,
                        34
                    ]]
                    Y = d1.iloc[:, [28]]
                    time.sleep(0.01)
                    dp = st.success("Data-Preprocessed")
                    time.sleep(1)
                    dp.empty()

        with col7:
            sg = st.beta_expander("Splitting")
            with sg:
                sd = st.checkbox(label="Splitting Training Data")
                if sd:
                    test_size = st.number_input('Test-size', value=0.3)
                    random_state = st.number_input('Random-state', value=42)
                    xTrain, xTest, yTrain, yTest = train_test_split(
                        X, Y, test_size=test_size, random_state=random_state)

        with col8:
            tdd = st.beta_expander("Train")
            with tdd:
                classifier_name = st.selectbox("Select Classifier :",
                                               ("LVQ", "PNN"))
                if classifier_name == "LVQ":
                    check_box5 = st.checkbox(label="LVQ Classifier-Settings")
                    if check_box5:
                        feat_range = d1.shape[1]
                        n_inp1 = st.selectbox('Features-inputs',
                                              range(feat_range))
                        n_cla1 = st.number_input('Classes', 0)
                        step1 = st.number_input('Step', 0.01)
                    with col9:
                        t = st.button("Train")
                        if t:
                            Lvq_net = algorithms.LVQ21(n_inputs=n_inp1,
                                                       n_classes=n_cla1,
                                                       verbose=False,
                                                       step=step1,
                                                       shuffle_data=False)
                            Lvq_net.train(xTrain, yTrain, epochs=100)
                            y_training = Lvq_net.predict(xTrain)
                            y_prediction = Lvq_net.predict(xTest)
                            time.sleep(0.1)
                            zz = st.balloons()
                            st.markdown(
                                'Prediction accuracy of LVQ Train data : ',
                                unsafe_allow_html=True)
                            st.write('{:.2%}\n'.format(
                                metrics.accuracy_score(yTrain, y_training)))
                            st.markdown(
                                'Prediction accuracy of LVQ Test data : ',
                                unsafe_allow_html=True)
                            st.write('{:.2%}\n'.format(
                                metrics.accuracy_score(yTest, y_prediction)))
                            cohen_score = cohen_kappa_score(
                                yTest, y_prediction)
                            st.markdown('LVQ Cohen-Kappa Score :',
                                        unsafe_allow_html=True)
                            st.write(cohen_score)
                            time.sleep(1)
                            zz.empty()

                if classifier_name == "PNN":
                    check_box5 = st.checkbox(label="PNN Classifier-Settings")
                    if check_box5:
                        std_dev = st.number_input("Standard-deviation", 5)
                    with col9:
                        p = st.button("Train")
                        if p:
                            pnn = algorithms.PNN(std=std_dev, verbose=False)
                            pnn.train(xTrain, yTrain)
                            y_training1 = pnn.predict(xTrain)
                            y_prediction1 = pnn.predict(xTest)
                            time.sleep(0.1)
                            xy = st.balloons()
                            st.markdown(
                                'Prediction accuracy of PNN Train data : ',
                                unsafe_allow_html=True)
                            st.write('{:.2%}\n'.format(
                                metrics.accuracy_score(yTrain, y_training1)))
                            st.markdown(
                                'Prediction accuracy of PNN Test data : ',
                                unsafe_allow_html=True)
                            st.write('{:.2%}\n'.format(
                                metrics.accuracy_score(yTest, y_prediction1)))
                            cohen_score = cohen_kappa_score(
                                yTest, y_prediction1)
                            st.markdown('PNN Cohen-Kappa Score :',
                                        unsafe_allow_html=True)
                            st.write(cohen_score)
                            time.sleep(1)
                            xy.empty()
    else:
        st.error("Please upload a file in 'Upload' section.")
コード例 #3
0
                 y='DECODER.acc',
                 facet_col=sort,
                 hover_data=[
                     'TAPE.stability.S_Corr', 'TAPE.fluorescence.S_Corr',
                     'DMS.protein_g.P_corr', 'DMS.1D5R.P_corr',
                     'DMS.2H11.P_corr'
                 ],
                 color='MODEL',
                 labels={
                     'DECODER.acc': 'Accuracy of decoder',
                     'MODEL_PARAMS.channels': 'chnls:'
                 },
                 barmode="group")
    fig.update_layout(title='Decoder Accuracy', width=900, height=700)

st.plotly_chart(fig)

if st.checkbox("Get  explanation of model names?"):
    st.markdown(expl_model_names())

#-------------------------------------------------------------
#             DOWNSTREAM MODEL COMPARISONS
#-------------------------------------------------------------

if display_mdl_compare:
    st.header("Downstream Model Performances ")

    # plotting options
    st.markdown('Options:')
    # add option to select models to plot
    options_models = ['all'] + [i for i in list(df['models'])]
コード例 #4
0
    # ay=-30,
    bordercolor="#c7c7c7",
    borderwidth=2,
    borderpad=4,
    bgcolor="#ff7f0e",
    opacity=0.7)
fig.update_shapes(dict(xref='x', yref='y'))

config = dict({
    "modeBarButtonsToRemove": ['autoScale2d', 'toggleSpikelines'],
})
fig.update_layout(autosize=False,
                  width=900,
                  height=550,
                  margin=dict(l=20, r=40, b=40, t=70))
st.plotly_chart(fig, config=config)

stem_df = update_stem(
    session_state)  #, taps_per_phase, gen_2X, max_fft, dsp_type)
try:
    # ipdb.set_trace()
    num_rows = len(stem_df)
    if num_rows > 1024:
        div = num_rows // 1024
        stem_df = stem_df.iloc[::div, :]

    fig = plotly_time_helper(stem_df,
                             opacity=[.8] * 2,
                             index_str='sig_idx',
                             y_name='Taps',
                             stem_plot=False,
コード例 #5
0
#New York Presbyterian
hospital_nyp = df_hospital_2[df_hospital_2['hospital_name'] == 'NEW YORK-PRESBYTERIAN HOSPITAL']
inpatient_nyp = df_inpatient_2[df_inpatient_2['provider_name'] == 'NEW YORK-PRESBYTERIAN HOSPITAL']
outpatient_nyp = df_outpatient_2[df_outpatient_2['provider_name'] == 'New York-Presbyterian Hospital']

#Bar Chart
st.subheader('**Hospital Types Within NY**')
bar1 = hospitals_ny['hospital_type'].value_counts().reset_index()
st.dataframe(bar1)

st.markdown('This chart shows the number of hospitals within NY that identify as acute care, psychiatric, critical access, acute care - department of defense, and childrens. The majority of hospitals identify as acute care with 144 hospitals, followed by psychiatric with 27 hospitals, critical access with 18 hospitals, acute care - department of defense with 1 hospital, and childrens with 1 hospital.')

#Pie Chart
st.subheader('**Types of Hospitals Within NY**')
fig = px.pie(bar1, values='hospital_type', names='index')
st.plotly_chart(fig)

st.markdown('This pie chart visually displays the same data presented in the previous bar chart. The largest percentage of NY hospitals are classified as acute care with 75.4%.')
          
#Map
st.subheader('**NY Hospital Locations**')

hospitals_ny_gps = hospitals_ny['location'].str.strip('()').str.split(' ', expand=True).rename(columns={0: 'Point', 1:'lon', 2:'lat'}) 	
hospitals_ny_gps['lon'] = hospitals_ny_gps['lon'].str.strip('(')
hospitals_ny_gps = hospitals_ny_gps.dropna()
hospitals_ny_gps['lon'] = pd.to_numeric(hospitals_ny_gps['lon'])
hospitals_ny_gps['lat'] = pd.to_numeric(hospitals_ny_gps['lat'])

st.map(hospitals_ny_gps)

st.markdown('This is an interactive map that displays the locations for the NY hospital sites found within this dataset.')
コード例 #6
0
def show_graph2(df, n,cu):
	past_graph = px.line(df[-n:], x='date',y=cu)
	past_graph.update_traces(mode="markers+lines")
	st.plotly_chart(past_graph)
コード例 #7
0
def main():
    # Render the readme as markdown using st.markdown.
    readme_text = st.markdown(get_file_content_as_string("intro.md"))

    # get the 1000 genomes samples
    dfsamples = get_1kg_samples_app()

    # Once we have the dependencies, add a selector for the app mode on the sidebar.
    st.sidebar.title("Visualization Settings")
    # select which set of SNPs to explore
    aisnp_set = st.sidebar.radio(
        "Set of ancestry-informative SNPs:",
        ("kidd et al. 55 aisnps", "seldin et al. 128 aisnps"),
    )
    if aisnp_set == "kidd et al. 55 aisnps":
        aisnps_1kg = vcf2df_app("data/aisnps/kidd.aisnp.1kg.vcf", dfsamples)
        n_aisnps = 55
    elif aisnp_set == "seldin et al. 128 aisnps":
        aisnps_1kg = vcf2df_app("data/aisnps/seldin.aisnp.1kg.vcf", dfsamples)
        n_aisnps = 128

    # Encode 1kg data
    X_encoded, encoder = encode_genotypes_app(aisnps_1kg)
    # Dimensionality reduction
    dimensionality_reduction_method = st.sidebar.radio(
        "Dimensionality reduction technique:", ("pca", "umap", "t-SNE"))
    # perform dimensionality reduction on the 1kg set
    X_reduced, reducer = dimensionality_reduction_app(
        X_encoded, algorithm=dimensionality_reduction_method)

    # Which population to plot
    population_level = st.sidebar.radio("Population Resolution:",
                                        ("super population", "population"))

    # predicted population
    knn = KNeighborsClassifier(n_neighbors=9, weights="distance", n_jobs=2)

    # upload the user genotypes file
    user_file = st.sidebar.file_uploader("Upload your genotypes:")
    # Collapsable user aisnps DataFrame
    if user_file is not None:
        try:
            with st.spinner("Uploading your genotypes..."):
                userdf = SNPs(user_file.getvalue()).snps
        except Exception as e:
            st.error(
                f"Sorry, there was a problem processing your genotypes file.\n {e}"
            )
            user_file = None

        # filter and encode the user record
        user_record, aisnps_1kg = filter_user_genotypes_app(userdf, aisnps_1kg)
        user_n_missing = (user_record.drop(
            columns=["super population", "population", "gender"]).isnull().sum(
                axis=1)["your_sample"])
        user_encoded = encoder.transform(user_record)
        X_encoded = np.concatenate((X_encoded, user_encoded))
        del userdf

        # impute the user record and reduce the dimensions
        user_imputed = impute_missing(X_encoded)
        user_reduced = reducer.transform([user_imputed])
        # fit the knn before adding the user sample
        knn.fit(X_reduced, dfsamples[population_level])

        # concat the 1kg and user reduced arrays
        X_reduced = np.concatenate((X_reduced, user_reduced))
        dfsamples.loc["me"] = ["me"] * 3

        # plot
        plotly_3d = plot_3d(X_reduced, dfsamples, population_level)
        st.plotly_chart(plotly_3d, user_container_width=True)

        # missingness
        st.subheader("Missing AIsnps")
        st.text(
            f"Your file upload was missing {user_n_missing} ({round((user_n_missing / n_aisnps) * 100, 1)}%) of the {n_aisnps} total AIsnps.\nThese locations were imputed during prediction."
        )

        # predict the population for the user sample
        user_pop = knn.predict(user_reduced)[0]
        st.subheader(f"Your predicted {population_level}")
        st.text(
            f"Your predicted population using knn classifier is {user_pop}")
        # show the predicted probabilities for each population
        st.subheader(f"Your predicted {population_level} probabilities")
        user_pop_probs = knn.predict_proba(user_reduced)
        user_probs_df = pd.DataFrame([user_pop_probs[0]],
                                     columns=knn.classes_,
                                     index=["me"])
        st.dataframe(user_probs_df)

        show_user_gts = st.sidebar.checkbox("Show Your Genotypes")
        if show_user_gts:
            user_table_title = "Genotypes of Ancestry-Informative SNPs in Your Sample"
            st.subheader(user_table_title)
            st.dataframe(user_record)

    else:
        # plot
        plotly_3d = plot_3d(X_reduced, dfsamples, population_level)
        st.plotly_chart(plotly_3d, user_container_width=True)

    # Collapsable 1000 Genomes sample table
    show_1kg = st.sidebar.checkbox("Show 1k Genomes Genotypes")
    if show_1kg is True:
        table_title = (
            "Genotypes of Ancestry-Informative SNPs in 1000 Genomes Project Samples"
        )
        with st.spinner("Loading 1k Genomes DataFrame"):
            st.subheader(table_title)
            st.dataframe(aisnps_1kg)

    # Render the readme as markdown using st.markdown.
    readme_text = st.markdown(get_file_content_as_string("details.md"))
コード例 #8
0
ファイル: app.py プロジェクト: MainakRepositor/Notebooker-Pro
def main():
    activities = [
        'EDA', 'Visualization', 'Regression', 'Classification',
        'Documentation', 'About Us'
    ]
    #st.sidebar.success('Updates Coming Soon! 🌟🎉')
    option = st.sidebar.selectbox('Choose a section', activities)
    st.sidebar.markdown(
        '''Use this section for finding useful insights about your data,and feel free to use them in your notebooks
                                             
    🎯   Version : 1.0.2  ''')

    if option == 'EDA':
        st.subheader("Explanatory Data Analysis")

        data = st.file_uploader("Please upload a CSV dataset ", type=['csv'])

        st.warning('Your dataset goes here...')
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df)
            st.info('Some useful data insights about your data')
            if st.checkbox("Display shape"):
                r, c = df.shape
                st.write('Rows = ', r, 'Columns = ', c)

            if st.checkbox('Display columns'):
                st.write(df.columns)

            if st.checkbox('Select multiple columns'):
                selected_col = st.multiselect('Select preferred columns',
                                              df.columns)
                df1 = df[selected_col]
                st.dataframe(df1)

            if st.checkbox("Head"):
                st.write(df.head())

            if st.checkbox('Tail'):
                st.write(df.tail())

            if st.checkbox('Null values'):
                st.write(df.isnull().sum())

            if st.checkbox('Data types'):
                st.write(df.dtypes)

            if st.checkbox('Random sample'):
                st.write(df.sample(20))

            if st.checkbox('Display correlations'):
                st.write(df.corr())

            if st.checkbox('Summary'):
                st.write(df.describe(include='all').T)

    elif option == 'Visualization':
        st.subheader("Data Visualization and Graphing")

        st.sidebar.subheader("File Upload")

        # Setup file upload
        uploaded_file = st.sidebar.file_uploader(
            label="Upload your CSV file. (200MB max)", type=['csv'])

        if uploaded_file is not None:
            st.success('Your data goes here')

        try:
            df = pd.read_csv(uploaded_file)
        except Exception as e:
            st.warning('Data not found')

        global numeric_columns
        global non_numeric_columns
        try:
            st.write(df)
            numeric_columns = list(df.select_dtypes(['float', 'int']).columns)
            non_numeric_columns = list(df.select_dtypes(['object']).columns)
            non_numeric_columns.append(None)
            print(non_numeric_columns)
        except Exception as e:
            print(e)

        chart_select = st.sidebar.selectbox(label="Select the chart type",
                                            options=[
                                                'Scatterplots', 'Lineplots',
                                                'Histogram', 'Boxplot',
                                                'Violinplot', 'Piechart'
                                            ])

        st.info('The Graphs generated will be displayed here')

        if chart_select == 'Scatterplots':
            st.sidebar.subheader("Scatterplot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.scatter(data_frame=df,
                                  x=x_values,
                                  y=y_values,
                                  color=color_value)
                # display the chart
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Lineplots':
            st.sidebar.subheader("Line Plot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.line(data_frame=df,
                               x=x_values,
                               y=y_values,
                               color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Histogram':
            st.sidebar.subheader("Histogram Settings")
            try:
                x = st.sidebar.selectbox('Feature', options=numeric_columns)
                bin_size = st.sidebar.slider("Number of Bins",
                                             min_value=10,
                                             max_value=100,
                                             value=40)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.histogram(x=x, data_frame=df, color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Boxplot':
            st.sidebar.subheader("Boxplot Settings")
            try:
                y = st.sidebar.selectbox("Y axis", options=numeric_columns)
                x = st.sidebar.selectbox("X axis", options=non_numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.box(data_frame=df, y=y, x=x, color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

        if chart_select == 'Piechart':
            st.sidebar.subheader("Piechart Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=non_numeric_columns)
                plot = px.pie(data_frame=df, values=x_values, names=y_values)
                st.plotly_chart(plot)

            except Exception as e:
                print(e)

        if chart_select == 'Violinplot':
            st.sidebar.subheader("Violin Plot Settings")
            try:
                x_values = st.sidebar.selectbox('X axis',
                                                options=numeric_columns)
                y_values = st.sidebar.selectbox('Y axis',
                                                options=numeric_columns)
                color_value = st.sidebar.selectbox("Color",
                                                   options=non_numeric_columns)
                plot = px.violin(data_frame=df,
                                 x=x_values,
                                 y=y_values,
                                 color=color_value)
                st.plotly_chart(plot)
            except Exception as e:
                print(e)

    elif option == 'Regression':
        st.subheader("Regression ML Model Builder")

        # Model building
        def build_model(df):
            l = len(df)

            #df = df.iloc[:100]
            X = df.iloc[:, :
                        -1]  # Using all column except for the last column as X
            Y = df.iloc[:, -1]  # Selecting the last column as Y

            st.markdown('**1.2. Dataset dimension**')
            st.write('X (Independent Axis)')
            st.info(X.shape)
            st.write('Y (Dependent Axis)')
            st.info(Y.shape)

            st.markdown('**1.3. Variable details**:')
            st.write('X variable (first few are shown)')
            st.info(list(X.columns[:int(l / 5)]))
            st.write('Y variable')
            st.info(Y.name)

            # Build lazy model
            X_train, X_test, Y_train, Y_test = train_test_split(
                X, Y, test_size=split_size, random_state=seed_number)
            reg = LazyRegressor(verbose=0,
                                ignore_warnings=False,
                                custom_metric=None)
            models_train, predictions_train = reg.fit(X_train, X_train,
                                                      Y_train, Y_train)
            models_test, predictions_test = reg.fit(X_train, X_test, Y_train,
                                                    Y_test)

            st.subheader('2.Model Performance Plot (Training Set)')

            st.write('Training set')
            st.write(predictions_train)
            st.markdown(filedownload(predictions_train, 'training.csv'),
                        unsafe_allow_html=True)

            st.write('Test set')
            st.write(predictions_test)
            st.markdown(filedownload(predictions_test, 'test.csv'),
                        unsafe_allow_html=True)

            st.subheader('3.Model Performance Plot(Test set)')

            with st.markdown('**R-squared**'):
                # Tall
                predictions_test["R-Squared"] = [
                    0 if i < 0 else i for i in predictions_test["R-Squared"]
                ]
                plt.figure(figsize=(3, 9))
                sns.set_theme(style="darkgrid")
                ax1 = sns.barplot(y=predictions_test.index,
                                  x="R-Squared",
                                  data=predictions_test)
                ax1.set(xlim=(0, 1))
            st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(12, 3))
            sns.set_theme(style="darkgrid")
            ax1 = sns.barplot(x=predictions_test.index,
                              y="R-Squared",
                              data=predictions_test)
            ax1.set(ylim=(0, 1))
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'),
                        unsafe_allow_html=True)

            with st.markdown('**RMSE (capped at l/2)**'):
                # Tall
                predictions_test["RMSE"] = [(l / 2) if i > (l / 2) else i
                                            for i in predictions_test["RMSE"]]
                plt.figure(figsize=(3, 9))
                sns.set_theme(style="darkgrid")
                ax2 = sns.barplot(y=predictions_test.index,
                                  x="RMSE",
                                  data=predictions_test)
            st.markdown(imagedownload(plt, 'plot-rmse-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(12, 3))
            sns.set_theme(style="darkgrid")
            ax2 = sns.barplot(x=predictions_test.index,
                              y="RMSE",
                              data=predictions_test)
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-rmse-wide.pdf'),
                        unsafe_allow_html=True)

            with st.markdown('**Calculation time**'):
                # Tall
                predictions_test["Time Taken"] = [
                    0 if i < 0 else i for i in predictions_test["Time Taken"]
                ]
                plt.figure(figsize=(3, 9))
                sns.set_theme(style="darkgrid")
                ax3 = sns.barplot(y=predictions_test.index,
                                  x="Time Taken",
                                  data=predictions_test)
            st.markdown(imagedownload(plt, 'plot-calculation-time-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(9, 3))
            sns.set_theme(style="darkgrid")
            ax3 = sns.barplot(x=predictions_test.index,
                              y="Time Taken",
                              data=predictions_test)
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-calculation-time-wide.pdf'),
                        unsafe_allow_html=True)

        def filedownload(df, filename):
            csv = df.to_csv(index=False)
            b64 = base64.b64encode(
                csv.encode()).decode()  # strings <-> bytes conversions
            href = f'<a href="data:file/csv;base64,{b64}" download={filename}>Download {filename} File</a>'
            return href

        def imagedownload(plt, filename):
            s = io.BytesIO()
            plt.savefig(s, format='pdf', bbox_inches='tight')
            plt.close()
            b64 = base64.b64encode(
                s.getvalue()).decode()  # strings <-> bytes conversions
            href = f'<a href="data:image/png;base64,{b64}" download={filename}>Download {filename} File</a>'
            return href

        with st.sidebar.header('File Uploader Section'):
            uploaded_file = st.sidebar.file_uploader(
                "Upload an input as CSV file", type=["csv"])

        with st.sidebar.header(
                'Set the optimization parameters\n (Grab the slider and set to any suitable point)'
        ):

            split_size = st.sidebar.slider('Data split ratio (in fraction):',
                                           0.0, 1.0, 0.7, 0.01)
            seed_number = st.sidebar.slider('Set the random-seed-value :', 0,
                                            1, 100, 5)

        with st.sidebar.header('Project made by:'):
            st.write("Made by: MAINAK CHAUDHURI")

        #---------------------------------#

        st.subheader('Dataset display')

        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
            st.markdown('**Snap of the dataset**')
            st.write(df)
            build_model(df)
        else:
            st.info('Upload a file')
            st.info('OR')
            if st.button('Use preloaded data instead'):
                st.info("Dataset used : Pima diabetes")

                diabetes = load_diabetes()

                X = pd.DataFrame(diabetes.data,
                                 columns=diabetes.feature_names).loc[:100]
                Y = pd.Series(diabetes.target, name='response').loc[:100]
                df = pd.concat([X, Y], axis=1)

                st.markdown(
                    'Displaying results form a sample preloaded data :')
                st.write(df.head(5))

                build_model(df)

    elif option == 'Classification':
        st.subheader("Classifier ML Model Builder")

        def build_model(df):
            l = len(df)

            #df = df.iloc[:100]
            X = df.iloc[:, :
                        -1]  # Using all column except for the last column as X
            Y = df.iloc[:, -1]  # Selecting the last column as Y

            st.markdown('**1.2. Dataset dimension**')
            st.write('X (Independent Axis)')
            st.info(X.shape)
            st.write('Y (Dependent Axis)')
            st.info(Y.shape)

            st.markdown('**1.3. Variable details**:')
            st.write('X variable (first few are shown)')
            st.info(list(X.columns[:int(l / 5)]))
            st.write('Y variable')
            st.info(Y.name)

            # Build lazy model
            X_train, X_test, Y_train, Y_test = train_test_split(
                X, Y, test_size=split_size, random_state=seed_number)
            clf = LazyClassifier(verbose=0,
                                 ignore_warnings=False,
                                 custom_metric=None)
            models_train, predictions_train = clf.fit(X_train, X_train,
                                                      Y_train, Y_train)
            models_test, predictions_test = clf.fit(X_train, X_test, Y_train,
                                                    Y_test)

            st.subheader('2.Model Performance Plot (Training Set)')

            st.write('Training set')
            st.write(predictions_train)
            st.markdown(filedownload(predictions_train, 'training.csv'),
                        unsafe_allow_html=True)

            st.write('Test set')
            st.write(predictions_test)
            st.markdown(filedownload(predictions_test, 'test.csv'),
                        unsafe_allow_html=True)

            st.subheader('3.Model Performance Plot(Test set)')

            with st.markdown('**Accuracy**'):
                # Tall
                predictions_test["Accuracy"] = [
                    0 if i < 0 else i for i in predictions_test["Accuracy"]
                ]
                plt.figure(figsize=(5, 12))
                sns.set_theme(style="darkgrid")
                ax1 = sns.barplot(y=predictions_test.index,
                                  x="Accuracy",
                                  data=predictions_test)
                ax1.set(xlim=(0, 1))
            st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'),
                        unsafe_allow_html=True)
            # Wide
            plt.figure(figsize=(12, 5))
            sns.set_theme(style="darkgrid")
            ax1 = sns.barplot(x=predictions_test.index,
                              y="Accuracy",
                              data=predictions_test)
            ax1.set(ylim=(0, 1))
            plt.xticks(rotation=90)
            st.pyplot(plt)
            st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'),
                        unsafe_allow_html=True)

        def filedownload(df, filename):
            csv = df.to_csv(index=False)
            b64 = base64.b64encode(
                csv.encode()).decode()  # strings <-> bytes conversions
            href = f'<a href="data:file/csv;base64,{b64}" download={filename}>Download {filename} File</a>'
            return href

        def imagedownload(plt, filename):
            s = io.BytesIO()
            plt.savefig(s, format='pdf', bbox_inches='tight')
            plt.close()
            b64 = base64.b64encode(
                s.getvalue()).decode()  # strings <-> bytes conversions
            href = f'<a href="data:image/png;base64,{b64}" download={filename}>Download {filename} File</a>'
            return href

        with st.sidebar.header('File Uploader Section'):
            uploaded_file = st.sidebar.file_uploader(
                "Upload an input as CSV file", type=["csv"])

        with st.sidebar.header(
                'Set the optimization parameters\n (Grab the slider and set to any suitable point)'
        ):

            split_size = st.sidebar.slider('Data split ratio (in fraction):',
                                           0.0, 1.0, 0.7, 0.01)
            seed_number = st.sidebar.slider('Set the random-seed-value :', 0,
                                            1, 100, 5)

        with st.sidebar.header('Project made by:'):
            st.write("Made by: MAINAK CHAUDHURI")

        #---------------------------------#

        st.subheader('Dataset display')

        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
            st.markdown('**Snap of the dataset**')
            st.write(df)
            build_model(df)
        else:
            st.info('Upload a file')
            st.info('OR')
            if st.button('Use preloaded data instead'):
                st.info("Dataset used : Pima diabetes")

                diabetes = load_diabetes()

                X = pd.DataFrame(diabetes.data,
                                 columns=diabetes.feature_names).loc[:100]
                Y = pd.Series(diabetes.target, name='response').loc[:100]
                df = pd.concat([X, Y], axis=1)

                st.markdown(
                    'Displaying results form a sample preloaded data :')
                st.write(df.head(5))

                build_model(df)

    elif option == 'Documentation':
        st.subheader("How to use Notebooker Pro")
        st.markdown(
            '''The notebooker pro is a user-friendly software designed to help you make a good data science notebook in few steps.
Well, notebooker pro will not be making a notebook for you, but will provide you with all the data insights that you 
will need to put in your kernel. The notebooker pro has been provided with 4 major sections:

i.  **EDA (Explanatory Data Analysis)**  --> used to find important data and statistical insights from the uploaded files

ii. **Visualization** --> Used to perform data visualization with 5 basic important types of graphs

iii.**Regression** --> Loops through **30** different regression models and returns the complexity statistics of the result
		   of regression modelling for your dataset for chosen seed values and size. The only thing to keep in
		   mind while using this is that, the data must be fitting with a regression modelling. Datasets used
		   for classification algorithm might generate vague results. So use a proper dataset.
		   **[eg.: do not use iris,cancer,penguins etc. classifier dataset]**

iv. **Classification** --> Loops through **30** different classification models and returns the complexity statistics of the result
		   of classification modelling for your dataset for chosen seed values and size. The only thing to keep in
		   mind while using this is that, the data must be fitting with a classification modelling. Datasets used
		   for non-classification algorithm might generate vague results. So use a proper dataset.
		   

**Features:**

**Upload file** => Upload only csv files.

**Data split**  => This is a linear slidebar, that will let you choose split ratio between 0 to 1

**Random seed** => Helps to randomize the data in training and testing data samples. 
	       You may change to get the best accuracy of for a particular model.
	       ''')

    elif option == 'About Us':
        st.subheader("About Us 😊")
        st.markdown(
            '''This web application is made by Mainak Chaudhuri. He is a Computer Science and Engineering student of the SRM University, studying in the second year of B.Tech. The main idea of this application is to help beginners and data science enthusiasts chalk out a plan for preparing a good data science notebook, for college projects, online courses or to add in their portfolio. This application accepts a dataset from the user and displays useful insights about the data. Additionally, it also helps the user visualize the data, choose the best supervised machine learning model (regression & classifaction handled separately) and decide the best suit depending on the dataset size,split and seed values which can be set by the user with the help of the side panel. This application claims to be the first of it's kind ever developed till date by a single developer and also has a serving history and positive reports from 180+ users.
                    
                    
     👉   N.B. : This application is an intellectual property of Mainak Chaudhuri and hence holds a reserved copyright. Any form of illegal immitation of graphics, contents or documentation without prior permission of the owner if proved, can result in legal actions against the plagiarist.'''
        )

        st.success('For more info, feel free to contact @ : ')
        url = 'https://www.linkedin.com/in/mainak-chaudhuri-127898176/'

        if st.button('Mainak Chaudhuri'):
            webbrowser.open_new_tab(url)
コード例 #9
0
        plotly_fig1 = px.bar(excluded_df,
                             x=excluded_df['Department'],
                             y=excluded_df['Average'],
                             title='Average grades by department',
                             labels=dict(x="Department", y="Average Grade"))
        plotly_fig1.update_traces(marker_color='rgb(221,55,55)')
        # Centre the plot title.
        plotly_fig1.update_layout(
            title={
                'text': 'Average grades by department',
                'y': 0.9,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top'
            })
        st.plotly_chart(plotly_fig1)

        plotly_fig2 = px.bar(levels,
                             x='Level',
                             y='Average',
                             hover_data=['Level', 'Average'],
                             color='Module Count',
                             labels={"Module Count": "Number of modules used"},
                             height=400,
                             color_continuous_scale=[
                                 "rgb(221,55,55)", "rgb(137,164,204)",
                                 "rgb(30,77,155)"
                             ])
        # Centre the plot title.
        plotly_fig2.update_layout(title={
            'text': 'Average grades by level',
コード例 #10
0
                 args=[{
                     'visible': [True, True]
                 }, {
                     'title': 'Linear scale',
                     'yaxis': {
                         'type': 'linear',
                         'domain': [0.35, 1]
                     }
                 }])
        ]),
    )
])

layout = dict(
    updatemenus=updatemenus,
    title='Linear scale',
    width=900,
    height=700,
    autosize=False,
    yaxis1=dict(domain=[0.35, 1]),
    yaxis2=dict(domain=[0, .3]),
)
fig.update_layout(layout)

fig.add_trace(trace, row=1, col=1)
fig.add_trace(trace2, row=2, col=1)

st.plotly_chart(fig, use_container_width=False)

# st.plotly_chart(go.Figure(go.Bar(x=table['date'], y=table['Δ'], name='diff. of COVID cases')))
コード例 #11
0
ファイル: Aukey_ranks.py プロジェクト: raceychan/Aukey
def main():
    menu = ['主页', '排名', '查询', '其他']
    choice = st.sidebar.selectbox('工具箱', menu)

    @st.cache
    def load_data():
        data = pd.read_csv('project_20_listings.csv')
        data.rename(columns={'snapshotted_at': 'date'}, inplace=True)
        data['date'] = pd.to_datetime(pd.to_datetime(data['date']).dt.date)
        return data

    data = load_data()
    if choice == '主页':
        st.title('傲基2.0品类管理')
        st.header('品类详情')
        st.markdown('针对每个asin,日排名规则为:')
        st.markdown('''
        每日排名取当日排名的最高值
        ''')

    elif choice == '排名':
        c1, c2 = st.beta_columns(2)
        category = st.selectbox('CategoryID:', data['category_id'].unique())
        table = pd.pivot_table(data[data['category_id'] == category],
                               values=['ranking'],
                               index=['asin'],
                               columns=['date'],
                               aggfunc={'ranking': max})
        st.write(table['ranking'])
        with c1:
            chosed_asin = st.selectbox(
                'Asin', data[data['category_id'] == category].asin.unique())
            st.write(table['ranking'].loc[f'{chosed_asin}'])
        with c2:

            df = table['ranking'].loc[f'{chosed_asin}']
            fig = px.line(df, x=df.index, y=df, title=f'{chosed_asin}排名变化')
            st.subheader('当月排名变化情况')
            st.plotly_chart(fig)

        start_time = st.slider('该品类下所有Asin的最高日排名',
                               value=datetime(2021, 1, 28),
                               format='MM/DD/YY')
        if start_time in table['ranking'].columns:
            st.write(
                table['ranking'][f'{start_time}'],
                'and change in data compared to previous date is:',
            )
        else:
            st.write('data is not yet available now')

        with st.beta_expander('原始数据详情', expanded=True):
            i = st.number_input('输入你想要看到的条数', min_value=1, value=50, step=50)
            detail = data.iloc[:i, :]
            st.write(detail)

    elif choice == '查询':

        ci = st.multiselect('品类ID', data['category_id'].unique())
        newdate = st.multiselect(
            '日期', data[(data['category_id'].isin(ci))].date.unique())
        asin = st.multiselect(
            'ASIN', data[data['category_id'].isin(ci)
                         & (data['date'].isin(newdate))].asin.unique())
        newtable = data[(data['category_id'].isin(ci))
                        & (data['asin'].isin(asin)) &
                        (data['date'].isin(newdate))]
        st.write(newtable)
コード例 #12
0
def main():
    st.title('APS Regressão Linear ')
    st.text(
        'Gabriel Oliveira Ramos do Nascimento RA: 21022939 \nJackson do Nascimento Silva RA: 21022770 \nLaura  Damaceno de Almeida  RA: 20964736 \nVictor Hugo Kawabata Fuzaro RA: 20760102'
    )
    st.image('image.png', width=900)
    file = st.file_uploader('Escolha seu arquivo', type='csv')

    if file is not None:
        slider = st.slider('Quantidade de linhas', 0, 100)
        df = pd.read_csv(file)
        st.dataframe(df.head(slider))
        st.markdown('**Nome das colunas**')
        st.write(df.columns)
        st.markdown('**Número de linhas**')
        st.write(df.shape[0])
        st.markdown('**Número de colunas**')
        st.write(df.shape[1])
        exploracao = pd.DataFrame({
            'nomes': df.columns,
            'tipos': df.dtypes,
            'NA #': df.isna().sum(),
            'NA %': df.isna().sum() / df.shape[0] * 100
        })
        st.markdown('**Contagem dos tipos de dados**')
        st.write(exploracao.tipos.value_counts())
        st.markdown('**Nome das colunas do tipo int64**')
        st.markdown(list(exploracao[exploracao['tipos'] == 'int64']['nomes']))
        st.markdown('**Nomes das colunas do tipo float64:**')
        st.markdown(list(
            exploracao[exploracao['tipos'] == 'float64']['nomes']))
        st.markdown('**Nomes das colunas do tipo object:**')
        st.markdown(list(exploracao[exploracao['tipos'] == 'object']['nomes']))
        st.markdown('**Tabela com coluna e percentual de dados faltantes :**')
        st.table(exploracao[exploracao['NA #'] != 0][['tipos', 'NA %']])
        st.markdown('**Descrição dos dados :**')

        st.table(df.describe())
        opcoes = df.columns
        aux = pd.DataFrame({"coluna": df.columns, "tipos": df.dtypes})
        colunas_numericas = list(aux[aux['tipos'] != 'object']['coluna'])
        st.subheader('Estatística descritiva')

        col = st.selectbox('Selecione a coluna', colunas_numericas)
        if (col is not None):
            st.markdown('Selecione o que deseja analisar')
            mean = st.checkbox('Média')
            if mean:
                st.markdown(df[col].mean())
            mediana = st.checkbox('Mediana')
            if mediana:
                st.markdown(df[col].median())
            desvio_padrao = st.checkbox('Desvio Padrão')
            if desvio_padrao:
                st.markdown(df[col].std())
            kurtosis = st.checkbox('Kurtosis')
            if kurtosis:
                st.markdown(df[col].kurtosis())
            skewness = st.checkbox('Skewness')
        if skewness:
            st.markdown(df[col].skew())
        st.subheader('Visualização dos dados')
        selected_atributos = st.multiselect('Selecione os atributos', opcoes)
        type_visualize = st.selectbox('Selecione o tipo de visualização', [
            'selecione', 'boxplot', 'scatter plot', 'barchart', 'histograma',
            'Matriz de correlação'
        ])

        df.dropna(inplace=True)
        if (len(selected_atributos) > 2):
            st.markdown('Selecione no máximo 2 atributos')
        if (len(selected_atributos) <= 2):
            if (type_visualize == 'barchart'):
                plot_data = df[selected_atributos[0]]
                st.bar_chart(plot_data)
            if (type_visualize == 'boxplot'):
                if (len(selected_atributos) == 1):
                    fig = px.box(df,
                                 y=selected_atributos[0],
                                 hover_data=['Country'])
                    #df.boxplot([selected_atributos[0]])
                else:
                    fig = px.box(df,
                                 x=selected_atributos[0],
                                 y=selected_atributos[1],
                                 hover_data=['Country'])
                    #df.boxplot([selected_atributos[0]], by=[selected_atributos[1]])
                #st.pyplot()
                st.plotly_chart(fig)
            if (type_visualize == 'scatter plot'):
                if (len(selected_atributos) == 1):
                    fig = px.scatter(df,
                                     x=selected_atributos[0],
                                     hover_data=['Country'])
                    st.plotly_chart(fig, use_container_width=True)

                if (len(selected_atributos) == 2):
                    fig = px.scatter(df,
                                     x=selected_atributos[0],
                                     y=selected_atributos[1],
                                     hover_data=['Country'])
                    st.plotly_chart(fig, use_container_width=True)
            if (type_visualize == 'histograma'):
                sns.distplot(df[selected_atributos[0]])
                st.pyplot()
            if (type_visualize == 'Matriz de correlação'):
                st.write(df.corr())

        st.markdown('**Regressão Linear**')
        Y = st.selectbox('Selecione a variável Y', opcoes)
        x = st.multiselect('Selecione a variável X', opcoes)
        if ((Y != None) & (len(x) >= 1)):
            modelo = LinearRegression()
            X_train, X_test, y_train, y_test = train_test_split(
                df[x], df[Y], test_size=0.3, random_state=2811)
            modelo.fit(X_train, y_train)
            st.text("R quadrado = {}".format(
                modelo.score(X_train, y_train).round(2)))
            y_predict_train = modelo.predict(X_train)
            lm = modelo.predict(X_test)
            st.text("R quadrado de teste = {}".format(
                metrics.r2_score(y_test, lm).round(2)))
            sns.regplot(x=y_predict_train, y=y_train)
            st.pyplot()
            index = x
            index.append('Intercept')
            st.markdown('**Formula da Regressão Linear**')
            st.image('formula.png', width=500)
            st.table(
                pd.DataFrame(data=np.append(modelo.intercept_, modelo.coef_),
                             index=index,
                             columns=['Parametros']))
コード例 #13
0
ファイル: chart.py プロジェクト: indiesoul2/stock
def earning_chart(input_ticker, earning_df, ea_df, price_df):

    #주가와 EPS
    title = '(' + input_ticker + ') EPS & Price'
    titles = dict(text=title, x=0.5, y=0.9)
    x_data = earning_df['reportedDate']  # EPS발표 날짜로

    fig = make_subplots(specs=[[{'secondary_y': True}]])
    y_data_bar = ['reportedEPS', 'estimatedEPS', 'surprise', 'ttmEPS']

    for y_data, color in zip(y_data_bar, marker_colors):
        fig.add_trace(go.Bar(name=y_data,
                             x=x_data,
                             y=earning_df[y_data],
                             marker_color=color),
                      secondary_y=False)

    fig.add_trace(
        go.Scatter(mode='lines',
                   name='Close',
                   x=price_df.index,
                   y=price_df['Close'],
                   text=price_df['Close'],
                   textposition='top center',
                   marker_color='rgb(0,0,0)'),  # marker_colorscale='RdBu'),
        secondary_y=True)

    fig.update_traces(texttemplate='%{text:.3s}')
    fig.update_yaxes(title_text='Close',
                     showticklabels=True,
                     showgrid=False,
                     zeroline=True,
                     tickprefix="$")
    fig.update_yaxes(title_text='EPS',
                     showticklabels=True,
                     showgrid=True,
                     zeroline=True,
                     tickprefix="$",
                     secondary_y=False)
    fig.update_layout(
        title=titles,
        titlefont_size=15,
        legend=dict(orientation="h"),
        template=template
    )  #, xaxis_tickformat = 'd')#  legend_title_text='( 단위 : $)'
    fig.update_layout(template="myID")
    fig.update_layout(
        showlegend=True,
        legend=dict(orientation="h",
                    yanchor="bottom",
                    y=1.02,
                    xanchor="right",
                    x=1),
        xaxis=go.layout.XAxis(rangeselector=dict(buttons=list([
            dict(count=6, label="6m", step="month", stepmode="backward"),
            dict(count=1, label="YTD", step="year", stepmode="todate"),
            dict(count=1, label="1y", step="year", stepmode="backward"),
            dict(count=5, label="5y", step="year", stepmode="backward"),
            dict(count=10, label="10y", step="year", stepmode="backward"),
            dict(step="all")
        ])),
                              rangeslider=dict(visible=True),
                              type="date"))
    st.plotly_chart(fig)

    fig2 = go.Figure()
    title = '(' + input_ticker + ') reportedEPS Statistics'
    titles = dict(text=title, x=0.5, y=0.9)
    fig2.add_trace(
        go.Box(x=earning_df.loc[:, 'reportedEPS'],
               name='reportedEPS',
               boxpoints='all',
               marker_color='indianred',
               boxmean='sd',
               jitter=0.3,
               pointpos=-1.8))
    fig2.update_layout(title=titles,
                       titlefont_size=15,
                       legend=dict(orientation="h"),
                       template=template)
    # fig2.add_trace(go.Box(x=earning_df.loc[:,'EPS Change'], name='EPS Change'))
    st.plotly_chart(fig2)
コード例 #14
0
ファイル: chart.py プロジェクト: indiesoul2/stock
def kor_earning_chart(input_ticker, com_name, ttm_df, annual_df):

    #주가와 ttm EPS
    title = '(' + com_name + ') TTM EPS & Price'
    titles = dict(text=title, x=0.5, y=0.9)
    x_data = ttm_df.index  # EPS발표 날짜로

    fig = make_subplots(specs=[[{'secondary_y': True}]])
    y_data = ['EPS', 'Price']

    # for y_data, color in zip(y_data_bar, marker_colors) :
    #     fig.add_trace(go.Bar(name = y_data, x = x_data, y = earning_df[y_data], marker_color= color), secondary_y = False)
    fig.add_trace(go.Bar(name=y_data[0],
                         x=x_data,
                         y=ttm_df[y_data[0]],
                         marker_color=marker_colors[1]),
                  secondary_y=False)

    fig.add_trace(
        go.Scatter(mode='lines',
                   name='Close',
                   x=ttm_df.index,
                   y=ttm_df['Price'],
                   text=ttm_df['Price'],
                   textposition='top center',
                   marker_color='rgb(0,0,0)'),  # marker_colorscale='RdBu'),
        secondary_y=True)

    fig.update_traces(texttemplate='%{text:.3s}')
    fig.update_yaxes(title_text='Close',
                     showticklabels=True,
                     showgrid=False,
                     zeroline=True)
    fig.update_yaxes(title_text='TTM EPS',
                     showticklabels=True,
                     showgrid=True,
                     zeroline=True,
                     secondary_y=False)
    fig.update_layout(
        title=titles,
        titlefont_size=15,
        legend=dict(orientation="h"),
        template=template
    )  #, xaxis_tickformat = 'd')#  legend_title_text='( 단위 : $)'
    fig.update_layout(template="myID")
    st.plotly_chart(fig)

    #주가와 annual EPS
    title = '(' + com_name + ') Annual EPS & Price'
    titles = dict(text=title, x=0.5, y=0.9)
    x_data = annual_df.index  # EPS발표 날짜로

    fig = make_subplots(specs=[[{'secondary_y': True}]])
    y_data = ['EPS', 'Price']

    # for y_data, color in zip(y_data_bar, marker_colors) :
    #     fig.add_trace(go.Bar(name = y_data, x = x_data, y = earning_df[y_data], marker_color= color), secondary_y = False)
    fig.add_trace(go.Bar(name=y_data[0],
                         x=x_data,
                         y=annual_df[y_data[0]],
                         marker_color=marker_colors[1]),
                  secondary_y=False)

    fig.add_trace(
        go.Scatter(mode='lines',
                   name='Close',
                   x=annual_df.index,
                   y=annual_df['Price'],
                   text=annual_df['Price'],
                   textposition='top center',
                   marker_color='rgb(0,0,0)'),  # marker_colorscale='RdBu'),
        secondary_y=True)

    fig.update_traces(texttemplate='%{text:.3s}')
    fig.update_yaxes(title_text='Close',
                     showticklabels=True,
                     showgrid=False,
                     zeroline=True)
    fig.update_yaxes(title_text='Annual EPS',
                     showticklabels=True,
                     showgrid=True,
                     zeroline=True,
                     secondary_y=False)
    fig.update_layout(
        title=titles,
        titlefont_size=15,
        legend=dict(orientation="h"),
        template=template
    )  #, xaxis_tickformat = 'd')#  legend_title_text='( 단위 : $)'
    fig.update_layout(template="myID")
    st.plotly_chart(fig)

    fig2 = go.Figure()
    title = '(' + com_name + ') EPS Statistics'
    titles = dict(text=title, x=0.5, y=0.9)
    fig2.add_trace(
        go.Box(x=ttm_df.loc[:, 'EPS'],
               name='EPS',
               boxpoints='all',
               marker_color='indianred',
               boxmean='sd',
               jitter=0.3,
               pointpos=-1.8))
    fig2.update_layout(title=titles,
                       titlefont_size=15,
                       legend=dict(orientation="h"),
                       template=template)
    # fig2.add_trace(go.Box(x=earning_df.loc[:,'EPS Change'], name='EPS Change'))
    st.plotly_chart(fig2)

    #PER, PBR, ROE 추이
    x_data = ttm_df.index
    title = com_name + '(' + input_ticker + ') TTM PER PBR & ROE'
    titles = dict(text=title, x=0.5, y=0.85)
    fig = make_subplots(specs=[[{'secondary_y': True}]])
    y_data_line2 = ['PER', 'PBR']
    y_data_bar2 = ['ROE']

    fig.add_trace(go.Scatter(mode='lines+markers+text',
                             name=y_data_line2[0],
                             x=x_data,
                             y=ttm_df[y_data_line2[0]],
                             text=ttm_df[y_data_line2[0]],
                             textposition='top center',
                             marker_color=marker_colors[0]),
                  secondary_y=False)
    fig.add_trace(go.Scatter(mode='lines+markers+text',
                             name=y_data_line2[1],
                             x=x_data,
                             y=ttm_df[y_data_line2[1]],
                             text=ttm_df[y_data_line2[1]],
                             textposition='top center',
                             marker_color=marker_colors[1]),
                  secondary_y=True)

    fig.add_trace(go.Bar(name=y_data_bar2[0],
                         x=x_data,
                         y=ttm_df[y_data_bar2[0]],
                         text=ttm_df[y_data_bar2[0]],
                         textposition='outside',
                         marker_color=marker_colors[2]),
                  secondary_y=False)

    fig.update_traces(texttemplate='%{text:.3s}')
    fig.update_yaxes(title_text='ROE', secondary_y=False)
    fig.update_yaxes(title_text='PER', secondary_y=False)
    fig.update_yaxes(title_text='PBR', secondary_y=True)
    fig.update_yaxes(showticklabels=True, showgrid=False,
                     zeroline=True)  #, ticksuffix="%")##cja
    fig.update_layout(title=titles,
                      titlefont_size=15,
                      legend=dict(orientation="h"),
                      template=template)
    fig.update_layout(template="myID")
    st.plotly_chart(fig)

    # ROE와 마진율
    x_data = ttm_df.index
    title = com_name + '(' + input_ticker + ') Margin & ROE'
    titles = dict(text=title, x=0.5, y=0.85)
    fig = make_subplots(specs=[[{'secondary_y': True}]])
    y_data_line2 = ['OPM', 'NPM']
    y_data_bar2 = ['ROE']

    for y_data, color in zip(y_data_line2, marker_colors):
        fig.add_trace(go.Scatter(mode='lines+markers+text',
                                 name=y_data,
                                 x=x_data,
                                 y=ttm_df[y_data],
                                 text=ttm_df[y_data],
                                 textposition='top center',
                                 marker_color=color),
                      secondary_y=True)

    for y_data, color in zip(y_data_bar2, marker_colors):
        fig.add_trace(go.Bar(name=y_data,
                             x=x_data,
                             y=ttm_df[y_data],
                             text=ttm_df[y_data],
                             textposition='outside',
                             marker_color=color),
                      secondary_y=False)

    fig.update_traces(texttemplate='%{text:.3s}')
    fig.update_yaxes(title_text='ROE',
                     range=[0, max(ttm_df.loc[:, y_data_bar2[0]]) * 2],
                     secondary_y=False)
    fig.update_yaxes(title_text='Margin Rate',
                     range=[
                         -max(ttm_df.loc[:, y_data_line2[0]]),
                         max(ttm_df.loc[:, y_data_line2[0]]) * 1.2
                     ],
                     secondary_y=True)
    fig.update_yaxes(showticklabels=True,
                     showgrid=False,
                     zeroline=True,
                     ticksuffix="%")
    fig.update_layout(title=titles,
                      titlefont_size=15,
                      legend=dict(orientation="h"),
                      template=template)
    fig.update_layout(template="myID")
    st.plotly_chart(fig)

    #배당
    title = '(' + com_name + ') Annual DPS & DY'
    titles = dict(text=title, x=0.5, y=0.9)
    x_data = annual_df.index  # EPS발표 날짜로

    fig = make_subplots(specs=[[{'secondary_y': True}]])
    y_data = ['DPS', 'DY']

    # for y_data, color in zip(y_data_bar, marker_colors) :
    #     fig.add_trace(go.Bar(name = y_data, x = x_data, y = earning_df[y_data], marker_color= color), secondary_y = False)
    fig.add_trace(go.Bar(name=y_data[0],
                         x=x_data,
                         y=annual_df[y_data[0]],
                         marker_color=marker_colors[0]),
                  secondary_y=False)

    fig.add_trace(
        go.Scatter(
            mode='lines',
            name='Dividend Yeild',
            x=annual_df.index,
            y=annual_df[y_data[1]],
            text=annual_df[y_data[1]],
            textposition='top center',
            marker_color=marker_colors[1]),  # marker_colorscale='RdBu'),
        secondary_y=True)

    fig.update_traces(texttemplate='%{text:.3s}')
    fig.update_yaxes(title_text='Dividend Yeild',
                     showticklabels=True,
                     showgrid=False,
                     zeroline=True)
    fig.update_yaxes(title_text='Annual DPS',
                     showticklabels=True,
                     showgrid=True,
                     zeroline=True,
                     secondary_y=False)
    fig.update_layout(
        title=titles,
        titlefont_size=15,
        legend=dict(orientation="h"),
        template=template
    )  #, xaxis_tickformat = 'd')#  legend_title_text='( 단위 : $)'
    fig.update_layout(template="myID")
    st.plotly_chart(fig)
コード例 #15
0
def plot_raw_data():
    fig = go.Figure()
    fig.add_trace(go.Scatter(x=data['Date'], y=data['Open'], name="stock_open"))
    fig.add_trace(go.Scatter(x=data['Date'], y=data['Close'], name="stock_close"))
    fig.layout.update(title_text='Time Series data with Rangeslider', xaxis_rangeslider_visible=True)
    st.plotly_chart(fig)
コード例 #16
0
def run_the_analysis():
    caching.clear_cache()

    def load_metadata():
        return pd.read_csv(os.path.join(path, r'myrecord.csv'))

    stabledf = load_metadata()
    stabledf['Date'] = pd.to_datetime(stabledf['Date'])
    stabledf = stabledf.set_index('Date')
    stabledf = stabledf.sort_index()
    stabledf

    monthtoview = st.selectbox('which month to view?',
                               stabledf.index.month.drop_duplicates())

    st.subheader('What you have spent this month?')

    thismonthdf = stabledf.loc[stabledf.index.month == monthtoview].groupby(
        ['Category']).sum()
    thismonthdf
    st.write('In total is $', round(thismonthdf.sum()[0], 2))

    #    #------
    #    print(thismonthdf[thismonthdf.index=='Food'].values)
    #    comparison_labels = ['Food','daily goods','Transportaion']
    #    comparison_values = [thismonthdf[thismonthdf.index==comparison_labels[0]].values,thismonthdf[thismonthdf.index==comparison_labels[1]].values,thismonthdf[thismonthdf.index==comparison_labels[2]].values]
    #    comparison_values[0]
    #    fig = go.Figure(data=[go.Pie(labels=['Food','daily goods','Transportaion'], values=[32,24,35])])
    #    st.plotly_chart(fig)
    #

    import plotly.express as px
    fig = px.pie(thismonthdf, values='Amount', names=thismonthdf.index)
    st.plotly_chart(fig)

    category_filter = st.selectbox('Which category to look deeper into?',
                                   stabledf['Category'].drop_duplicates())
    filterdata = stabledf.loc[(stabledf.index.month == monthtoview)
                              & (stabledf['Category'] == category_filter)]

    st.subheader('selected')
    st.bar_chart(filterdata['Amount'])

    st.subheader('Compare with budgeting analysis')

    def load_metabudget():
        return pd.read_csv(os.path.join(path, r'mybudget.csv'))

    budgetdf = load_metabudget()

    analysisdf = thismonthdf.merge(budgetdf,
                                   left_index=True,
                                   right_on='Category',
                                   suffixes=('_spent', '_budgeted'))
    analysisdf = analysisdf.set_index('Category')
    analysisdf

    st.subheader("how you've spent this month")
    fig2 = {
        'data': [
            go.Bar(x=analysisdf.index,
                   y=analysisdf["Amount_budgeted"],
                   name="Amount_budgeted"),
            go.Bar(x=analysisdf.index,
                   y=analysisdf["Amount_spent"],
                   name="Amount_spent")
        ],
        'layout':
        go.Layout(barmode='overlay')
    }

    st.plotly_chart(fig2)

    st.subheader('Warning Section')
    currentyear = filterdata.index.year[0]

    for index, row in analysisdf.iterrows():

        last_date_of_month = datetime(currentyear, monthtoview,
                                      1) + relativedelta(months=1, days=-1)
        delta = datetime.today() - datetime(currentyear, monthtoview, 1)

        daysthismonth = last_date_of_month - datetime(currentyear, monthtoview,
                                                      1)

        if delta / daysthismonth <= row['Amount_spent'] / row[
                'Amount_budgeted']:
            st.write('Category', index, 'is over the proportion limit!!!')
コード例 #17
0
ratings_all = get_ratings_all()
ratings_df = pd.DataFrame(ratings_all)
ratings_df = ratings_df.drop('game',
                             1).assign(**ratings_df.game.apply(pd.Series))
ratings_df = ratings_df.drop('earnings',
                             1).assign(**ratings_df.earnings.apply(pd.Series))

fig_all = px.scatter(ratings_df,
                     x='downloads',
                     y='revenue',
                     hover_name='name',
                     hover_data=['platform', 'publisher'],
                     color='genre',
                     template='plotly_dark',
                     title="All game revenue vs downloads with genre colormap")
st.plotly_chart(fig_all)

st.write(
    "From the combined chart above, you can interact with the following chart to focus on the top 10 ranking type and gaming platform:"
)

left_column, right_column = st.beta_columns(2)
with right_column:
    rank_type = st.sidebar.radio('Sorting rank type',
                                 ('top free', 'top paid', 'top grossing'))
with left_column:
    os_type = st.sidebar.radio('Sorting platform', ('android', 'iOS'))

ra_search = ratings_df.loc[(ratings_df['rank_type'] == rank_type)
                           & (ratings_df['platform'] == os_type)]
fig_r = px.scatter(
コード例 #18
0
ファイル: app.py プロジェクト: andrewng88/hdb
def main():
    # Menu
    menu = ['EDA', 'Interactive Charts', 'Prediction', 'About']
    choices = st.sidebar.selectbox('Select Menu', menu)

    # load dataframe
    data = load_data('data/data.csv')
    # we established from EDA that year post 2013 is more stable and more reflective of
    # current market trends
    data2013 = data.query('year_sold >=2013')

    # When 'EDA' is selected at the menu.
    if choices == 'EDA':

        st.title('EDA')

        st.header("Project Title : HDB Price Prediction ML App")
        st.subheader(" Problem Statement")
        st.markdown(
            'How do flat buyers know if they have snatched a good deal?\
        Alternatively, how do flat sellers benchmark their property reasonably?\
        In order to help flat buyers and flat sellers make an informed decision, we decided to find out more about resale flat prices in Singapore.\
        Ultimately, we want to predict the price of resale flats in Singapore.'
        )

        st.markdown(
            'Complete notebook can be found [here](https://nbviewer.jupyter.org/github/andrewng88/hdb/blob/master/2_Exploratory_Data_Analysis.ipynb)'
        )

        st.subheader("The Data")
        st.markdown(
            'Obtained from [Data.gov.sg](http://data.gov.sg/dataset/resale-flat-prices)\
        the dataset is from **1990 to 2019**.')

        if st.checkbox("Show Summary of the Dataset"):
            st.write(data.describe())

        # display overall hdb price trend chart
        table1 = data.groupby("year_sold")["resale_price"].agg(
            ["median"]).reset_index()
        table1.rename(columns={"median": "resale_price"}, inplace=True)
        resale_price = px.line(table1, x="year_sold", y="resale_price")
        resale_price.update_layout(
            title_text='HDB Resale Price trend (1990 - 2019)',
            template='ggplot2')
        st.plotly_chart(resale_price)

        # chart commentary
        st.markdown(
            'The decline in resale price and sudden surge in the number of units sold following 1997 is due to the 1997\
        [Asian financial crisis](https://www.todayonline.com/singapore/divergent-hdb-resale-private-home-price-trends-will-not-last).\
        With regards to the sharp spike in 2007 is because HDB has stopped Walk-In-Selection and replace it with Sale of Balance Flats\
        which is only twice per year and hence everyone went with the Resale')

        # display overall hdb transactions trend chart
        table2 = data.groupby(
            "year_sold")["resale_price"].count().reset_index()
        table2 = table2.rename(columns={"resale_price": "number_of_resale"})
        resale_transaction = px.line(table2,
                                     x="year_sold",
                                     y="number_of_resale")
        resale_transaction.update_layout(
            title_text='HDB Resale Transactions between (1990 - 2019)',
            template='ggplot2')
        st.plotly_chart(resale_transaction)

        # chart commentary
        st.markdown(
            'Implementation of the revised [cooling measures](https://www.srx.com.sg/cooling-measures) to cool the residential market from 2010 onwards\
        led to the drop in resale price and low number of units sold during this period.Specifically the lowering of LTV(Loan-To-Value) from \
        90% to 80% - meaning buyers have to pay more initally.')

        # display overall dollar per square meter based on flat type
        data['dollar_psf'] = data['resale_price'] / (data['floor_area_sqm'] *
                                                     10.764)
        table3 = data.groupby(["year_sold", 'flat_type'
                               ])["dollar_psf", ].agg(["median"
                                                       ]).reset_index()
        table3.rename(columns={"median": "dollar_psf"}, inplace=True)
        dollar_per_sq_f = px.line(table3,
                                  x="year_sold",
                                  y="dollar_psf",
                                  color='flat_type')
        dollar_per_sq_f.update_layout(
            title_text=
            'Median Dollar Per Square Feet between 1990 and 2019 based on flat type',
            template='ggplot2')
        st.plotly_chart(dollar_per_sq_f)

        # chart commentary
        st.markdown(
            'Similar trend if we break down based on flat type, the median went up by two fold from 2007 to 2013 and gradually\
        went down because of additional cooling measures')

        # display overall dollar per square meter based on storey
        table4 = data.groupby(["year_sold", 'storey_range'
                               ])["dollar_psf", ].agg(["median"
                                                       ]).reset_index()
        table4.rename(columns={"median": "dollar_psf"}, inplace=True)
        median_storey = px.line(table4,
                                x="year_sold",
                                y="dollar_psf",
                                color='storey_range')
        median_storey.update_layout(
            title_text=
            'Median Dollar Per Square Feet between 1990 and 2019 based on storey',
            template='ggplot2')
        st.plotly_chart(median_storey)

        st.markdown(
            'Similar trend if we break down based on storey, but for high storey more than 40, price is still climbing.\
        We can also notice that high rise flats ( > 30 storeys ) starts from around 2005 onwards( less 3 years)'
        )

        st.markdown(
            '**We decided to work with data from 2013**. This is because the 1997 Asian financial crisis is a once off event and does not provide an \
        accurate reflection of the current situation.In addition, with the cooling measures still in place, using data from 2013\
        will ensure consistency in this aspect.')

        st.subheader(
            'Complete notebook can be found [here](https://nbviewer.jupyter.org/github/andrewng88/hdb/blob/master/2_Exploratory_Data_Analysis.ipynb)'
        )

    # When 'Interactive Charts' is selected at the menu.

    if choices == 'Interactive Charts':
        st.title('Interactive Charts')

        # 3D map component
        st.subheader("HDB Transactions Visualized using 3D")
        # from 1990 to 2019, defaults to 2019
        year = st.slider('Year to look at', 1990, 2019, 2019)
        data = data[data['year_sold'] == year]

        st.markdown("HDB transactions in **%i**" % (year))
        midpoint = (np.average(data["latitude"]),
                    np.average(data["longitude"]))
        st.write(
            pdk.Deck(
                map_style="mapbox://styles/mapbox/light-v9",
                #display the mid of SG
                initial_view_state={
                    "latitude": midpoint[0],
                    "longitude": midpoint[1],
                    "zoom": 11,
                    "pitch": 50,
                },
                #displays the GPS of each HDB based on year_sold, GPS
                layers=[
                    pdk.Layer("HexagonLayer",
                              data=data[['year_sold', 'latitude',
                                         'longitude']],
                              get_position=["longitude", "latitude"],
                              auto_highlight=True,
                              radius=100,
                              extruded=True,
                              pickable=True,
                              elevation_scale=4,
                              elevation_range=[0, 1000]),
                ],
            ))

        #displays the Median price by Flat type

        st.subheader('View HDB Median price by Flat type')
        flat_type_values = sorted(list(data['flat_type'].unique()))
        flat_type_values.insert(0, 'ALL')
        flat_option = st.selectbox("Flat_type", flat_type_values)
        sort_option = st.radio("Sort by", ("Ascending", "Descending"))
        flat_type_display_text = f'<sup>You selected {flat_option} and {sort_option}</sup>'
        st.markdown(flat_type_display_text, unsafe_allow_html=True)

        sort_option_dict = {
            'Ascending': False,
            'Descending': True,
        }

        if flat_option == 'ALL':
            revenue_all = data.groupby(
                ['town'])['resale_price'].median().reset_index().sort_values(
                    by=['resale_price'],
                    ascending=sort_option_dict[sort_option])
            figure_to_plot = revenue_all
        else:
            revenue = data[data['flat_type'] == flat_option]
            revenue = revenue.groupby(
                ['town'])['resale_price'].median().reset_index().sort_values(
                    by=['resale_price'],
                    ascending=sort_option_dict[sort_option])
            figure_to_plot = revenue
        fig_median = px.bar(figure_to_plot,
                            x='resale_price',
                            y='town',
                            orientation="h",
                            height=600,
                            template='ggplot2')
        fig_median_title = f'HDB Median price for {flat_option} flats in {sort_option} order'
        fig_median.update_layout(title_text=fig_median_title)
        st.plotly_chart(fig_median)

        #displays the Median price by MRT

        st.subheader('View HDB Median price by MRT')
        mrt_values = sorted(list(data2013['mrt'].unique()))
        mrt_values.insert(0, 'ALL')
        mrt_option = st.selectbox("MRT", mrt_values)
        mrt_display_text = f'<sup>You selected {mrt_option}</sup>'
        st.markdown(mrt_display_text, unsafe_allow_html=True)

        if mrt_option == 'ALL':
            mrt_all = data2013.query('nearest_mrt_distance <1').groupby(
                ['mrt'])['resale_price'].median().reset_index().sort_values(
                    by=['resale_price'])
            fig_median = px.bar(mrt_all,
                                x='resale_price',
                                y='mrt',
                                orientation='h',
                                height=600,
                                template='ggplot2')
            st.write(mrt_all)
        else:
            mrt = data2013[data2013['mrt'] == mrt_option]
            mrt = mrt.query('nearest_mrt_distance <1').groupby([
                'mrt', 'flat_type'
            ])['resale_price'].median().reset_index().sort_values(
                by=['resale_price']).drop('mrt', axis=1)
            fig_median = px.bar(mrt,
                                x='flat_type',
                                y='resale_price',
                                height=400,
                                template='ggplot2')
            fig_median_title = f'HDB Median price for HDB flats near {mrt_option}'
            fig_median.update_layout(title_text=fig_median_title)
        st.plotly_chart(fig_median)

    # When 'Prediction' is selected at the menu.

    if choices == 'Prediction':
        st.subheader('Predictions')

        # load the unique database for speed
        df_unique_deploy = load_data('data/df_unique_deploy.csv')

        #obtain Postcode input from end user
        input_postcode = st.text_input("Postcode : ", 560216)  #560216
        postcode_list = df_unique_deploy['postcode'].unique().tolist()

        # we proceed with HDB transaction prediction, if the postcode is in the list
        if int(input_postcode) in postcode_list:

            input_postcode_results = f"Postcode is **{input_postcode}** "

            #auto retrieve the flat_type for selection based on postcode
            flat_type = df_unique_deploy[df_unique_deploy['postcode'] == int(
                input_postcode)]['flat_type'].unique().tolist()
            flat_type = st.selectbox("The flat_type", (flat_type))
            flat_type_results = f"Flat Type is **{flat_type}**."

            #auto retrieve the flat_model for selection based on postcode
            f_model = df_unique_deploy[df_unique_deploy['postcode'] == int(
                input_postcode)]['flat_model'].unique().tolist()
            flat_model = st.selectbox("The flat_model", (f_model))
            flat_model_results = f"Flat Model is **{flat_model}**."

            #auto retrieve town for selection based on postcode
            town = df_unique_deploy[df_unique_deploy['postcode'] == int(
                input_postcode)]['town'].unique()[0]
            town_results = f" and it is located in **{town }** town ."

            #storey requires input from end user as we're not mind reader :P
            storey = st.slider("Storey level : ", 1, 50, 6)  #8
            storey_results = f"Storey is **{storey}**."

            #auto retrieve floor_area_sqm for selection based on postcode
            area = df_unique_deploy[df_unique_deploy['postcode'] == int(
                input_postcode)]['floor_area_sqm'].unique().tolist()
            floor_area_sqm = st.selectbox("Floor_area_sqm", (area))
            area_results = f"Area is **{floor_area_sqm }**."

            # calculate remaining lease = start year + 99 - current year
            today = date.today()
            year_sold = today.year
            month_sold = today.month
            lease_commence_date = df_unique_deploy[
                df_unique_deploy['postcode'] == int(
                    input_postcode)]['lease_commence_date'].tolist()[0]
            remaining_lease = int(lease_commence_date) + 99 - year_sold
            remaining_lease_results = f"Remaining lease is **{remaining_lease}** years ."

            #auto retrieve nearest_mrt_distance for selection based on postcode
            nearest_mrt_distance = df_unique_deploy[
                df_unique_deploy['postcode'] == int(input_postcode)][
                    'nearest_mrt_distance'].unique().tolist()[0]
            nearest_mrt_distance_results = f"MRT is **{nearest_mrt_distance:.2f}** km away."

            #auto retrieve CBD_distance for selection based on postcode
            CBD_distance = df_unique_deploy[
                df_unique_deploy['postcode'] == int(
                    input_postcode)]['CBD_distance'].unique().tolist()[0]
            cbd_distance_results = f"CBD is **{CBD_distance:.2f}** km away."

            #auto retrieve nearest_mall_distance for selection based on postcode
            nearest_mall_distance = df_unique_deploy[
                df_unique_deploy['postcode'] == int(input_postcode)][
                    'nearest_mall_distance'].unique().tolist()[0]
            nearest_mall_distance_results = f"Nearest Mall is **{nearest_mall_distance:.2f}** km away."

            #auto retrieve nearest_school_distance
            nearest_school_distance = df_unique_deploy[
                df_unique_deploy['postcode'] == int(input_postcode)][
                    'nearest_school_distance'].unique().tolist()[0]
            nearest_school_distance_results = f"Nearest school is **{nearest_school_distance:.2f}** km away."

            #condolidate all data for prediction
            sample_data = [[
                floor_area_sqm, year_sold, month_sold, remaining_lease,
                nearest_mrt_distance, CBD_distance, nearest_mall_distance,
                nearest_school_distance, storey, town, flat_type, flat_model
            ]]

            list_columns = [
                'floor_area_sqm', 'year_sold', 'month_sold', 'remaining_lease',
                'nearest_mrt_distance', 'CBD_distance',
                'nearest_mall_distance', 'nearest_school_distance', 'storey',
                'town', 'flat_type', 'flat_model'
            ]

            sample_data = pd.DataFrame(sample_data, columns=list_columns)

            #load model and predict
            predictor = load_prediction_models('data/rf.sav')
            predictor.predict(sample_data)

            #display data input
            if st.checkbox('Verbose ON/OFF:'):
                st.markdown(input_postcode_results + town_results)
                st.markdown(flat_type_results)
                st.markdown(flat_model_results)
                st.markdown(storey_results)
                st.markdown(area_results)
                st.markdown(remaining_lease_results)
                st.markdown(nearest_mrt_distance_results)
                st.markdown(cbd_distance_results)
                st.markdown(nearest_mall_distance_results)
                st.markdown(nearest_school_distance_results)
                st.write('Data collated for prediction:')
                st.write(sample_data)

            #prefix $ and convert prediction to int
            prediction = "{} {}".format('$',
                                        int(predictor.predict(sample_data)))
            st.subheader('HDB valuation:')
            st.success(prediction)

            #display other HDB data from the same block
            st.subheader(
                "Other transactions from 2013 onwards(sorted by latest transaction)"
            )
            st.dataframe(data2013[data2013['postcode']==int(input_postcode)].sort_values(by='month', ascending=False)\
            [['resale_price','dollar_psf','month','flat_type','flat_model','storey_range','lease_commence_date','floor_area_sqm']])

        #message to display if Postcode does not exists
        else:
            st.warning('Please input valid Postcode')

    if choices == 'About':
        st.header('About')

        st.subheader('Project by:')
        st.markdown('**Andrew Ng** [email protected]')
        st.markdown('https://www.linkedin.com/in/sc-ng-andrew/')
        st.markdown('**Lau Lee Ling** [email protected]')
        st.markdown('https://www.linkedin.com/in/lauleeling/')
コード例 #19
0
st.subheader("Distribuição de imóveis por preço")

# definindo a faixa de valores
faixa_valores = st.slider("Faixa de preço", float(data.MEDV.min()), 150.,
                          (10.0, 100.0))

# filtrando os dados
dados = data[data['MEDV'].between(left=faixa_valores[0],
                                  right=faixa_valores[1])]

# plot a distribuição dos dados
f = px.histogram(dados, x="MEDV", nbins=100, title="Distribuição de Preços")
f.update_xaxes(title="MEDV")
f.update_yaxes(title="Total Imóveis")
st.plotly_chart(f)

st.sidebar.subheader("Defina os atributos do imóvel para predição")

# mapeando dados do usuário para cada atributo
crim = st.sidebar.number_input("Taxa de Criminalidade", value=data.CRIM.mean())
indus = st.sidebar.number_input("Proporção de Hectares de Negócio",
                                value=data.CRIM.mean())
chas = st.sidebar.selectbox("Faz limite com o rio?", ("Sim", "Não"))

# transformando o dado de entrada em valor binário
chas = 1 if chas == "Sim" else 0

nox = st.sidebar.number_input("Concentração de óxido nítrico",
                              value=data.NOX.mean())
コード例 #20
0
        "Select countries", myData.countryNames.tolist(),
        myData.getTopDailyNewCasesByCountry("confirmed").index.tolist())

    st.latex(myData.getCumulativeDataSummary(countryNameOptions))

    #############################################################################
    st.markdown("""---""")

    daysOption = st.radio("", ("last 45 days", "last 60 days", "all days"), 0)
    st.write(
        '<style>div.Widget.row-widget.stRadio > div{flex-direction:row;}</style>',
        unsafe_allow_html=True)
    myData.setNumDays(option=daysOption)

    st.plotly_chart(
        myData.getTopCountriesNewCasesGraph(option="confirmed",
                                            numCountries=5))
    st.plotly_chart(
        myData.getTopCountriesNewCasesGraph(option="deaths", numCountries=5))

    #st.plotly_chart(myData.getTopCountriesActivePercentGraph(numCountries=5,numDays=45))

    #############################################################################
    st.markdown("""---""")
    countsOption = st.radio("Select an option",
                            ("confirmed", "active", "recovered", "deaths",
                             "activeRatio", "recoveredRatio", "deathsRatio"),
                            2)

    st.plotly_chart(myData.getGlobalCountsMap(countsOption))
    st.plotly_chart(myData.getGlobalCountsScatterPlot(countsOption))
コード例 #21
0
def app():
    st.title("Welcome to IPL-cric-data!")
    st.sidebar.title('Find Player Profile')
    user_input_player = st.sidebar.text_input(
        label="Enter Cricketer's Name Eg. (Tendulkar)"
    )  #, value="SR Tendulkar")

    if not user_input_player:
        st.write(
            "You can try putting a cricket player's name in the left panel to see his profile as well as visualise the data."
        )

    if user_input_player:
        player_name = find_name(user_input_player, ipl=True)
        #player_name = user_input_player

        if player_name is None:
            st.markdown('**' + user_input_player + '** ' + " is not found.")
        else:
            bat_bowl = st.sidebar.selectbox(label="Batting/Bowling Profile",
                                            options=("bat", "bowl"))

            bat = True
            xaxis = 'season'
            yaxis = 'Runs'
            if bat_bowl == 'bowl':
                bat = False
                yaxis = 'Wickets'

            year_from = st.sidebar.number_input("Year from",
                                                min_value=2008,
                                                max_value=2021,
                                                value=2008,
                                                step=1)
            year_to = st.sidebar.number_input("Year to",
                                              min_value=2008,
                                              max_value=2021,
                                              value=2021,
                                              step=1)
            visualize = st.sidebar.checkbox(label="Visualize", value=False)

            st.markdown('**' + player_name + '**')
            df = get_player_profile(player_name,
                                    batsman=bat,
                                    year_from=year_from,
                                    year_to=year_to,
                                    ipl=True)
            st.table(df)

            if visualize:
                numeric_cols = list(
                    df.select_dtypes(include=np.number).columns.values)
                xaxis = st.sidebar.selectbox(label="x-axis",
                                             options=numeric_cols)
                yaxis = st.sidebar.selectbox(label="y-axis",
                                             options=numeric_cols,
                                             index=numeric_cols.index(yaxis))

                fig = px.bar(df, x=xaxis,
                             y=yaxis)  #, range_x=[year_from, year_to])
                st.plotly_chart(fig)

    #st.sidebar.title('Team Profile')
    #all_ipl_teams=("Chennai Super Kings", "Delhi Capitals", "Punjab Kings", "Kolkata Knight Riders",
    #        "Mumbai Indians", "Rajasthan Royals", "Royal Challengers Bangalore", "Sunrisers Hyderabad")
    #team_name = st.sidebar.selectbox(label="Team name",
    #                                options=all_ipl_teams)
    Footer()
コード例 #22
0
st.markdown(
    'The majority of hospitals in NY are acute care, while the least are childrens hospitals. '
)

st.subheader('Hospital Ownership  - NY')
ownership_ny = df_hospital_2[df_hospital_2['state'] == 'NY']
bar4 = ownership_ny['hospital_ownership'].value_counts().reset_index()
st.dataframe(bar4)

st.markdown(
    'The majority of hospitals in NY are Private non-profit voluntary, while the least are owned by the department of defense.  '
)

st.subheader('With a PIE Chart:')
fig = px.pie(bar4, values='hospital_ownership', names='index')
st.plotly_chart(fig)

st.subheader('Map of NY Hospital Locations')

hospitals_ny_gps = hospitals_ny['location'].str.strip('()').str.split(
    ' ', expand=True).rename(columns={
        0: 'Point',
        1: 'lon',
        2: 'lat'
    })
hospitals_ny_gps['lon'] = hospitals_ny_gps['lon'].str.strip('(')
hospitals_ny_gps = hospitals_ny_gps.dropna()
hospitals_ny_gps['lon'] = pd.to_numeric(hospitals_ny_gps['lon'])
hospitals_ny_gps['lat'] = pd.to_numeric(hospitals_ny_gps['lat'])

st.map(hospitals_ny_gps)
コード例 #23
0
ファイル: estado.py プロジェクト: teoria/covid_brasil_io
def monta_estados(taxa_mortalidade):
    df = load_data_brasil_io()
    states = df['state'].sort_values(ascending=True).unique()
    if states is not None:
        state = st.sidebar.selectbox('Qual o estado você deseja visualizar?', states)

        dados_estado =df[(df['state'] == state)&(df['place_type']=='state')]
        dados_estado_cities =df[(df['state'] == state)&(df['place_type'] != 'state')]

        st.subheader(f"Dados de COVID em {state}")

        dados_estado_plot = dados_estado[['date', 'confirmed', 'deaths']].sort_values(by=['date'], ascending=True)
        dados_estado_plot.reset_index(drop=True, inplace=True)
        dados_estado_plot.set_index(['date'], inplace=True)

        hoje = dados_estado[dados_estado['is_last']]
        hoje.reset_index(drop=True, inplace=True)
        dia_atual = hoje['date'].dt.strftime('%d-%m-%Y')[0]

        confirmados = hoje['confirmed'][0]
        mortes = hoje['deaths'][0]
        quantidade_estimada = (100 * mortes / taxa_mortalidade).astype(int)
        taxa = round(hoje['death_rate'][0] * 10000) / 100

        st.markdown(f"O estado de **{state}** teve até o dia **{dia_atual}** "
                    f"um total de **{confirmados}** casos confirmados e"
                    f" **{mortes}** mortes com uma taxa de mortalidade de **{taxa}%**.")
        if mortes > 0:
            st.markdown(f"Com base na taxa de mortalidade de outros países (**{taxa_mortalidade}%** dos infectados) "
                        f"a quantidade estimada de infectados seria de **{quantidade_estimada}** para a quantidade de mortos atual.")

        #st.line_chart(dados_estado_plot)

        data_state = get_map_state(state)
        data_cities = get_map_city(state)
        view = get_view(state)

        slide =  st.slider('Semana epidemiológica', 0, 255, 1 )

        dia_atual_mapa = dados_estado_cities[dados_estado_cities.is_last==True]

        st.write(  dia_atual_mapa)
        for feature in data_cities['features']:
            id_city = feature['id']
            dados =dia_atual_mapa[dia_atual_mapa.city_ibge_code == id_city].reset_index().T.rename(columns={0: 'dados'})
            feature['properties'] = dados.to_dict()

        # m = folium.Map(location=[45.5236, -122.6750])
        # html = m.get_root().render()
        # st.markdown(html.encode('utf8'),False)
        #st.write(data_cities)
        # Set the viewport location
        view_state = pdk.ViewState(
            longitude=view[1],
            latitude=view[0],
            zoom=6,
            min_zoom=1,
            max_zoom=60,
            pitch=50,#40.5,
            bearing=0)#-27.36

        geojson = pdk.Layer(
            'GeoJsonLayer',
            data_state,
            opacity=1,
            #stroked=False,
            filled=True,
            #extruded=True,
            #wireframe=True,
            get_fill_color=[255,  255, 255],
            get_line_color=[100, 100, 90],
            #pickable=True
        )

        geojson2 = pdk.Layer(
            'GeoJsonLayer',
            data_cities,
            opacity=0.8,
            stroked=False,
            filled=True,
            extruded=True,
            wireframe=True,
            get_elevation='properties.dados.deaths*1000',
            get_fill_color='[255/2, properties.dados.confirmed  , 255]',
            get_line_color=[0, slide, 255],
            pickable=True
        )
        max_val=1000
        min_val=0

        # Combined all of it and render a viewport
        r = pdk.Deck(layers=[geojson,geojson2],
                     tooltip={"html": f"<b>Color Value:</b> {state}", "style": {"color": "white"}},

                     initial_view_state=view_state,
                     height=800,
                     width=800,
                     map_style="mapbox://styles/mapbox/light-v9",
                     mapbox_key='pk.eyJ1IjoidGVvcmlhIiwiYSI6ImNqODRpNWJrNjA5dGIyd3FoMnZ6am13NjcifQ.OgxGf081lfoKQAOhlYh1Tg'
                     )
        st.pydeck_chart(r)

        dados_estado_melt = pd.melt(
            dados_estado[['date', 'confirmed', 'deaths']],
            id_vars=['date'],
            value_vars=['confirmed', 'deaths'])

        df = dados_estado_melt.groupby(["date", 'variable']).sum().reset_index()

        fig = px.line(df, x="date", y="value", color='variable')

        fig.update_layout(title=f'Casos de Covid em {state}',
                          xaxis_title='Data',
                          yaxis_title='Número de casos')
        st.plotly_chart(fig)
#
# """This app demonstrates the use of the awesome [deck.gl]() framework for visual
# exploratory data analysis of large datasets.
#
# Deck.gl is now (as of Streamlit v. 0.53) supported via the
# [`st.pydeck_chart`](https://docs.streamlit.io/api.html?highlight=pydeck#streamlit.pydeck_chart)
# function.
#
# We use data from the
# [Global Power Plant Database](http://datasets.wri.org/dataset/globalpowerplantdatabase) to
# illustrate the locations, fuel types and capacities of the worlds power plants.
# """
#
#
# import pathlib
#
# import pandas as pd
# import pydeck as pdk
# import streamlit as st
#
# POWER_PLANT_PATH = (
#     pathlib.Path.cwd() / "gallery/global_power_plant_database/global_power_plant_database.csv"
# )
#
# POWER_PLANT_URL = (
#     "https://raw.githubusercontent.com/MarcSkovMadsen/awesome-streamlit/master/"
#     "gallery/global_power_plant_database/global_power_plant_database.csv"
# )
#
# LATITUDE_COLUMN = "latitude"
# LONGITUDE_COLUMN = "longitude"
#
# LOCATIONS = {
#     "Orsted Copenhagen HQ": {"latitude": 55.676098, "longitude": 12.568337},
#     "Orsted Boston": {"latitude": 2.361145, "longitude": -71.057083},
# }
# ORSTED_CPH_HQ = LOCATIONS["Orsted Copenhagen HQ"]
#
# FUEL_COLORS = {
#     "Oil": "black",
#     "Solar": "green",
#     "Gas": "black",
#     "Other": "gray",
#     "Hydro": "blue",
#     "Coal": "black",
#     "Petcoke": "black",
#     "Biomass": "green",
#     "Waste": "green",
#     "Cogeneration": "gray",
#     "Storage": "orange",
#     "Wind": "green",
# }
#
# COLORS_R = {"black": 0, "green": 0, "blue": 0, "orange": 255, "gray": 128}
#
# COLORS_G = {"black": 0, "green": 128, "blue": 0, "orange": 165, "gray": 128}
#
# COLORS_B = {"black": 0, "green": 0, "blue": 255, "orange": 0, "gray": 128}
#
#
# class ViewStateComponent:
#     """Component to let the user set the initial view state to for example Copenhagen or Boston"""
#
#     def __init__(self):
#         self.latitude = ORSTED_CPH_HQ["latitude"]
#         self.longitude = ORSTED_CPH_HQ["longitude"]
#         self.zoom = 1
#         self.pitch = 40.0
#
#     def edit_view(self):
#         """Lets the user edit the attributes"""
#         location = st.sidebar.selectbox("Location", options=list(LOCATIONS.keys()), index=0)
#         self.latitude = LOCATIONS[location]["latitude"]
#         self.longitude = LOCATIONS[location]["longitude"]
#
#         self.zoom = st.sidebar.slider("Zoom", min_value=0, max_value=20, value=self.zoom)
#         self.pitch = st.sidebar.slider(
#             "Pitch", min_value=0.0, max_value=100.0, value=self.pitch, step=10.0
#         )
#
#     @property
#     def view_state(self) -> pdk.ViewState:
#         """The ViewState according to the attributes
#
#         Returns:
#             pdk.ViewState -- [description]
#         """
#         return pdk.ViewState(
#             longitude=self.longitude,
#             latitude=self.latitude,
#             zoom=self.zoom,
#             min_zoom=0,
#             max_zoom=15,
#             pitch=self.pitch,
#             # bearing=-27.36,
#         )
#
#
# class GlobalPowerPlantDatabaseApp:
#     """The main app showing the Global Power Plant Database"""
#
#     def __init__(self):
#         self.view_state_component = ViewStateComponent()
#         self.data = self.get_data()
#         self.show_data = False
#
#     @staticmethod
#     @st.cache
#     def get_data() -> pd.DataFrame:
#         """The Global Power Plant data
#
#         Returns:
#             pd.DataFrame -- The Global Power Plant data cleaned and transformed
#         """
#         try:
#             data = pd.read_csv(POWER_PLANT_PATH)
#         except FileNotFoundError:
#             data = pd.read_csv(POWER_PLANT_URL)
#
#         # Clean
#         data.primary_fuel = data.primary_fuel.fillna("NA")
#         data.capacity_mw = data.capacity_mw.fillna(1)
#
#         # Transform
#         data["primary_fuel_color"] = data.primary_fuel.map(FUEL_COLORS)
#         data["primary_fuel_color"] = data["primary_fuel_color"].fillna("gray")
#         data["color_r"] = data["primary_fuel_color"].map(COLORS_R)
#         data["color_g"] = data["primary_fuel_color"].map(COLORS_G)
#         data["color_b"] = data["primary_fuel_color"].map(COLORS_B)
#         data["color_a"] = 140
#
#         return data[
#             [
#                 "capacity_mw",
#                 LATITUDE_COLUMN,
#                 LONGITUDE_COLUMN,
#                 "primary_fuel_color",
#                 "color_r",
#                 "color_g",
#                 "color_b",
#                 "color_a",
#             ]
#         ]
#
#     def _scatter_plotter_layer(self):
#         return pdk.Layer(
#             "ScatterplotLayer",
#             data=self.data,
#             get_position=[LONGITUDE_COLUMN, LATITUDE_COLUMN],
#             get_fill_color="[color_r, color_g, color_b, color_a]",
#             get_radius="capacity_mw*10",
#             pickable=True,
#             opacity=0.8,
#             stroked=False,
#             filled=True,
#             wireframe=True,
#         )
#
#     def _deck(self):
#         return pdk.Deck(
#             map_style="mapbox://styles/mapbox/light-v9",
#             initial_view_state=self.view_state_component.view_state,
#             layers=[self._scatter_plotter_layer()],
#             tooltip={"html": "<b>Color Value:</b> {primary_fuel}", "style": {"color": "white"}},
#         )
#
#     def view(self):
#         """Main view of the app"""
#         # self.view_state_component.edit_view() # Does not work
#         st.write(__doc__)
#
#         st.pydeck_chart(self._deck())
#
#         st.write(
#             """The maps shows the power plant
#
# - **location** by latitude, longitude coordinates
# - **fuel type** by color and
# - **capacity in MW** by bubble size
# """
#         )
#         st.json(FUEL_COLORS)
#
#         st.write(
#             """Unfortunately **tooltips are not supported**. And there are also other issues.
# See
#
# - [Issue 984](https://github.com/streamlit/streamlit/issues/984)
# - [Issue 985](https://github.com/streamlit/streamlit/issues/985)"""
#         )
#
#
# APP = GlobalPowerPlantDatabaseApp()
# APP.view()
コード例 #24
0
def compute_class(data, target):
    st.title("Classification Report")
    #--------------------------------------preprocessing-------------------------------
    drop_str = [col for col in data.columns if type(data[col][0]) == str]
    data_head = data.copy(deep=True)  #is used in presentation
    data = data.drop(drop_str, axis=1)
    data = data.drop(target.name, axis=1)  #dropping y from data
    pca = False  # FIX THIS!
    if len(data.columns) > 2:
        pca = True

    #------------------------------scaling------------------------
    sc_1 = StandardScaler()
    x_scaled = sc_1.fit_transform(data)

    #----------------------------splits---------------------
    x_train, x_test, y_train, y_test = train_test_split(x_scaled,
                                                        target,
                                                        test_size=0.2,
                                                        random_state=177013)

    #-----------------------PCA only if >2 cols---------------
    if pca == True:
        pca = PCA(n_components=2)
        x_train = pd.DataFrame(data=pca.fit_transform(x_train),
                               columns=['pc1', "pc2"]).iloc[:, :].values
        x_test = pca.transform(x_test)

    #----------------------------algorithms-----------------------------NB is disqualified
    # (has some reservations about neg values)

    classification_models = {
        "LR": LogisticRegression(),
        "SVC": SVC(kernel="rbf"),
        "DTC": DecisionTreeClassifier(),
        "RFC": RandomForestClassifier(n_estimators=500),
        "XGBC": XGBClassifier(n_estimators=500)
    }

    metric_dict = {}
    accu_dict = {}
    for name, algorithm in tqdm(classification_models.items()):
        model = algorithm
        model.fit(x_train, y_train)
        y_pred = model.predict(x_test)
        metric_dict[name] = {
            "precision":
            round(
                precision_score(y_test,
                                y_pred,
                                pos_label=y_pred[0],
                                average="micro"), 2),
            "recall":
            round(
                recall_score(y_test,
                             y_pred,
                             pos_label=y_pred[0],
                             average="micro"), 2),
            "f1_score":
            round(f1_score(y_test, y_pred, average='micro'), 2),
            "accuracy":
            accuracy_score(y_test, y_pred),
            "confusion":
            confusion_matrix(y_test, y_pred),
            "ROC_Vals":
            roc_curve(y_test, y_pred, pos_label=y_pred[0])
        }
        accu_dict[name] = accuracy_score(y_test, y_pred)

    #-------------------------helper FUNCTIONS---------------------
    def list_maker(metric_dict, keyword="accuracy"):
        key_list = list(metric_dict.keys())
        return [metric_dict[key][keyword] for key in key_list]

    def random_color(metric_dict):
        return [
            "#" +
            ''.join([random.choice('0123456789ABCDEF') for j in range(6)])
            for i in range(len(metric_dict))
        ]

    metric_df = pd.DataFrame(metric_dict).drop(["confusion", "ROC_Vals"],
                                               axis=0)
    metric_df.reset_index(inplace=True)

    #--------------------------------------------- presentation and graphs -----------------------------------0

    #-------------------------------------view data --------------------------

    st.header("Lets look at what we are dealing with ")
    st.dataframe(data_head.head())

    #-----------------------------corelation_plot----------------------------------

    st.header("Corelation Plot")
    st.markdown("zoom if intelligible")
    corr_val = data.corr()
    corr = ff.create_annotated_heatmap(y=corr_val.index.tolist(),
                                       x=corr_val.columns.tolist(),
                                       z=corr_val.values)
    for i in range(len(corr.layout.annotations)):
        corr.layout.annotations[i].font.size = 8
        corr.layout.annotations[i].text = str(
            round(float(corr.layout.annotations[i].text), 4))
    corr.update_layout(width=800, height=800)
    st.plotly_chart(corr)
    st.header("METRICS FOR CLASSIFICATION ALGORITHMS")

    #------------------------metric_table-----------------
    table = ff.create_table(metric_df)
    table.update_layout(width=1350)
    st.plotly_chart(table)

    #--------------heatmaps------------------------------
    st.markdown("### CONFUSION MATRICES")

    fig = make_subplots(rows=1,
                        cols=len(metric_df.columns[1:].values),
                        shared_yaxes=True,
                        horizontal_spacing=0.05,
                        subplot_titles=metric_df.columns[1:].values)

    annot_var = []
    axis_count = 0
    row_col = []
    for row in range(1, 2):
        for col in range(1, 6):
            row_col.append([row, col])
    row_col_pos = 0
    for al in metric_df.columns[1:].values:
        heatmap2 = ff.create_annotated_heatmap(
            z=metric_dict[al]["confusion"],
            x=["1_pred", "0_pred"],
            y=["1_true", "0_true"],
            annotation_text=metric_dict[al]["confusion"])
        fig.add_trace(heatmap2.data[0], row_col[row_col_pos][0],
                      row_col[row_col_pos][1])
        annot_temp = list(heatmap2.layout.annotations)
        axis_count = axis_count + 1
        row_col_pos = row_col_pos + 1
        for k in range(len(annot_temp)):
            annot_temp[k]['xref'] = "x" + str(axis_count)
            annot_temp[k]['yref'] = 'y' + str(axis_count)
        annot_var = annot_var + annot_temp

    lo = list(fig['layout']["annotations"]) + annot_var
    fig.update_layout(annotations=lo, autosize=True, width=1350)

    st.plotly_chart(fig)

    #------------scatter plots----------------

    fpr, tpr, thres = roc_curve(y_test, y_pred, pos_label=y_pred[0])

    scatter_plot = go.Figure(
        go.Scatter(x=[0, 1], y=[0, 1], mode="lines", name="ref"))

    for al in metric_df.columns[1:].values:

        AUC_val = auc(metric_dict[al]["ROC_Vals"][0].tolist(),
                      metric_dict[al]["ROC_Vals"][1].tolist())

        scat = go.Scatter(x=metric_dict[al]["ROC_Vals"][0].tolist(),
                          y=metric_dict[al]["ROC_Vals"][1].tolist(),
                          name=f"{al} - AUC val - {AUC_val:.2f}")
        scatter_plot.add_trace(scat)

    scatter_plot.update_layout(width=1300, height=500)
    st.header("ROC_curves")
    st.plotly_chart(scatter_plot)

    #-------------funnel-chart-----------------

    st.header("Recommendations")
    st.markdown(
        "the percent below classifier represents recommended probability for classifier"
    )
    accu_dict = dict(
        sorted(accu_dict.items(), key=lambda item: item[1], reverse=True))
    funnel = go.Figure(
        go.Funnelarea(values=list(accu_dict.values()),
                      text=list(accu_dict.keys())))
    funnel.update_layout(showlegend=False)
    st.plotly_chart(funnel)
コード例 #25
0
                              key='1')
sentiment_count = data['airline_sentiment'].value_counts()
sentiment_count = pd.DataFrame({
    'Sentiment': sentiment_count.index,
    'Tweets': sentiment_count.values
})

if not st.sidebar.checkbox('Hide', True):
    st.markdown('### Number of Tweets by Sentiment')
    if select == 'Histogram':
        fig = ps.bar(sentiment_count,
                     x='Sentiment',
                     y='Tweets',
                     color='Tweets',
                     height=500)
        st.plotly_chart(fig)
    else:
        fig = ps.pie(sentiment_count,
                     names='Sentiment',
                     values='Tweets',
                     color='Tweets',
                     height=500)
        st.plotly_chart(fig)

#plotting interactive map
st.sidebar.subheader('When and where are users tweeeting from?')
hour = st.sidebar.slider('Hour of the day', 0, 23)
hour = st.sidebar.number_input('Hour of the day', min_value=1, max_value=24)
modified_data = data[data['tweet_created'].dt.hour == hour]

if not st.sidebar.checkbox('Close', True):
コード例 #26
0
def compute_reg(data, target):
    st.title("Regression Report")
    #------------------preprocessing-------------------------

    drop_str = [col for col in data.columns if type(data[col][0]) == str]
    data_head = data.copy(deep=True)  #is used in presentation
    data = data.drop(drop_str, axis=1)
    data = data.drop(target.name, axis=1)  #dropping y from data
    corr_mat = data_head.corr()  # for later use in presentation
    pca = False  # FIX THIS!
    if len(data.columns) > 2:
        pca = True

    #-------------------feature scaling--------------------------------

    sc_1 = StandardScaler()
    sc_2 = StandardScaler()
    x_scaled = sc_1.fit_transform(data)
    y_scaled = sc_2.fit_transform(np.array(target).reshape(-1, 1))

    x_train, x_test, y_train, y_test = train_test_split(x_scaled,
                                                        y_scaled,
                                                        test_size=0.2,
                                                        random_state=177013)

    #----------PCA only if >2 cols---------------
    if pca == True:
        pca = PCA(n_components=2)
        x_train = pd.DataFrame(data=pca.fit_transform(x_train),
                               columns=['pc1', "pc2"]).iloc[:, :].values
        x_test = pca.transform(x_test)

    #------------------------------------------model_building-------------------------
    #----------------POLYNOMIAL REGRESSION is disqualified for reasons

    regression_models = {
        "LINEAR_REG": LinearRegression(),
        "SVR": SVR(),
        "DTR": DecisionTreeRegressor(),
        "RFR": RandomForestRegressor(n_estimators=400),
        "XGBR": GradientBoostingRegressor(n_estimators=400)
    }

    metric_dict = {}
    for name, algorithm in tqdm(regression_models.items()):
        model = algorithm
        model.fit(x_train, y_train.ravel())
        y_pred = model.predict(x_test)
        metric_dict[name] = {
            "Max_error": round(max_error(y_test, y_pred), 5),
            "MAE": round(mean_absolute_error(y_test, y_pred), 3),
            "MSE": round(mean_squared_error(y_test, y_pred), 3),
            "R2-score": round(r2_score(y_test, y_pred), 5),
            "RMSE": round(mean_squared_error(y_test, y_pred, squared=False),
                          3),
            "MAPE": round(mean_absolute_percentage_error(y_test, y_pred), 3)
        }

    metric_df = pd.DataFrame(metric_dict)
    metric_df.reset_index(inplace=True)

    #---------------------------Presentation----------------------------------

    #-------------------------------------view data --------------------------

    st.header("Lets look at what we are dealing with ")
    st.dataframe(data_head.head())
    #-----------------------------corelation_plot----------------------------------
    st.header("Corelation Plot")
    corr_val = corr_mat
    corr = ff.create_annotated_heatmap(y=corr_val.index.tolist(),
                                       x=corr_val.columns.tolist(),
                                       z=corr_val.values)
    for i in range(len(corr.layout.annotations)):
        corr.layout.annotations[i].font.size = 8
        corr.layout.annotations[i].text = str(
            round(float(corr.layout.annotations[i].text), 4))
    corr.update_layout(width=800, height=800)
    st.plotly_chart(corr)

    #-------------------------------metric table----------------------------
    st.header("METRICS FOR REGRESSION ALGORITHMS")
    table = ff.create_table(metric_df)
    table.update_layout(width=1350)
    st.plotly_chart(table)
    st.markdown(
        "MAPE does not represent the output as a percentage in range [0, 100]. Instead, it represents in range [0, 1/eps]."
    )

    #------------------------------RADAR_plots------------------------------
    radar = go.Figure()
    metric_df = metric_df.drop([0], axis=0)

    for metric_lis in metric_df.columns[1:].values:
        radar.add_trace(
            go.Scatterpolar(r=metric_df[metric_lis].tolist(),
                            theta=metric_df["index"].tolist(),
                            fill='toself',
                            name=metric_lis))
    radar.update_layout(
        polar=dict(radialaxis=dict(visible=True, range=[0, 2])),
        showlegend=True,
        title="Radar Plot! (use legend to disable individual algorithms)",
        width=800,
        height=650)

    st.plotly_chart(radar)
コード例 #27
0
def write():
    """Used to write the page in the app.py file"""
    with st.spinner("Loading Map ..."):

        # read CSV

        # CSV for Choropleth Map
        df = pd.read_csv(
            "https://raw.githubusercontent.com/hannahkruck/visuasyl/master/src/datasets/Map.csv",
            encoding="utf8",
            sep=";")
        # CSV for Line Map
        df2 = pd.read_csv(
            "https://raw.githubusercontent.com/hannahkruck/visuasyl/master/src/datasets/Map.csv",
            encoding="utf8",
            sep=";")

        # Title
        st.title("Map view")

        #----------------- Side bar (filter options) -------------------

        # Select map (Choropleth or Line Map)
        selectedMapType = st.sidebar.radio("Map",
                                           ('Choropleth Map', 'Line Map'))
        if selectedMapType == 'Choropleth Map':
            showChoropleth = True
            showLine = False
        else:
            showLine = True
            showChoropleth = False

        # General filter (Age, Gender)
        st.sidebar.header("Filters")
        selectedAge = st.sidebar.multiselect(
            "Select Age", ("under 18", "18 - 34", "35 - 64", "over 65"))
        selectedGender = st.sidebar.selectbox("Select Gender",
                                              ("All", "Male", "Female"))

        # --- Special filter for Choropleth Map --
        st.sidebar.header("Filter for Choropleth Map")
        # Drop down menu for Choropleth Map Information
        selectedMapChoropleth = st.sidebar.selectbox(
            "Select Map Information", ('Applications to target countries',
                                       'Applicants by country of origin'))
        # Information for Choropleth Map based on the chosen map information
        if 'target' in selectedMapChoropleth:
            selectedMapChoropleth = 'destinationCountry'
            selectedCode = 'geoCodeDC'
            mapColor = 'Blues'
        else:
            selectedMapChoropleth = 'homeCountry'
            selectedCode = 'geoCodeHC'
            mapColor = 'Reds'

        # --- Special filter for Line Map ---
        st.sidebar.header("Filter for Line Map")
        # Select type (show routes of asylum seeker from a particular origin country or to a particular target country)
        selectedType = st.sidebar.radio("Select type",
                                        ('Target country', 'Origin country'))
        if selectedType == 'Target country':
            selectedType = df.destinationCountry.unique()
            countryCategory = 'destinationCountry'
            namesToShow = 'homeCountry'
            selectedLon = 'lonDC'
            selectedLat = 'latDC'
        else:
            selectedType = df.homeCountry.unique()
            countryCategory = 'homeCountry'
            namesToShow = 'destinationCountry'
            selectedLon = 'lonHC'
            selectedLat = 'latHC'
        # Drop down menu for selected country
        selectedCountryMapLine = st.sidebar.selectbox("Select country",
                                                      (selectedType))

        #----------------- Website content (Year slider, i-Button) -------------------

        # --- Markdown for Info icon ---
        # CSS and HTML Code
        st.markdown('''
        <!-- https://www.w3schools.com/css/tryit.asp?filename=trycss_tooltip_transition & https://www.w3schools.com/css/tryit.asp?filename=trycss_tooltip_right-->
        <style>
            .tooltip {
              position: relative;
              display: inline-block;
              font-size:1.6rem;
              
            }
            
            .tooltip .tooltiptext {
              visibility: hidden;
              width: 50vw;
              background-color: #f1f3f7;
              color: #262730;
              text-align: justify;
              border-radius: 6px;
              padding: 5px;
              font-size:0.9rem;
              
              /* Position the tooltip */
              position: absolute;
              z-index: 1;
              top: -5px;
              left: 105%;
              
              opacity: 0;
              transition: opacity 0.8s;
            }
            
            .tooltip:hover .tooltiptext {
              visibility: visible;
              opacity: 1;
            }
        </style>
        ''',
                    unsafe_allow_html=True)

        # Text for tooltip
        st.markdown('''
        <div class="tooltip">&#x24D8
        <span class="tooltiptext">
        <b>Choropleth Map</b><br>The Choropleth Map shows the number of asylum applications per country in Europe and the number of refugees per country worldwide for the selected year (see filter 'Select Map Information' for Choropleth Map).
        <br><br>
        <b>Line Map</b><br>The Line Map presents the routes of the refugees depending on the selected type. The type 'target country' shows from which countries the asylum seekers originate based on a specific target country. The type 'origin country' indicates where the asylum seekers are fleeing to from a specific country of origin.
        <br><br>
        <b>Colour gradient</b><br> It should be noted here that the colour gradient adjusts to the maximum and minimum value, i.e. the colour changes with each filtering.
        
        </span></div>
        ''',
                    unsafe_allow_html=True)

        # Slider to choose the year
        selected_year = st.slider("", (int(df["year"].min())),
                                  (int(df["year"].max())))

        # Title for map regarding the chosen year
        st.subheader('Asylum seekers in the year %s' % selected_year)

        #----------------- Data preparation (general) -------------------

        # Remove 'overall' and 'Überseeische Länder und Hoheitsgebiet' for both CSV
        indexNames = df[df['destinationCountry'] == 'Overall'].index
        df.drop(indexNames, inplace=True)
        indexNames = df[df['homeCountry'] == 'Overall'].index
        df.drop(indexNames, inplace=True)

        indexNames = df[df['destinationCountry'] ==
                        'Überseeische Länder und Hoheitsgebiete'].index
        df.drop(indexNames, inplace=True)
        indexNames = df[df['homeCountry'] ==
                        'Überseeische Länder und Hoheitsgebiete'].index
        df.drop(indexNames, inplace=True)

        indexNames = df2[df2['destinationCountry'] == 'Overall'].index
        df2.drop(indexNames, inplace=True)
        indexNames = df2[df2['homeCountry'] == 'Overall'].index
        df2.drop(indexNames, inplace=True)

        indexNames = df2[df2['destinationCountry'] ==
                         'Überseeische Länder und Hoheitsgebiete'].index
        df2.drop(indexNames, inplace=True)
        indexNames = df2[df2['homeCountry'] ==
                         'Überseeische Länder und Hoheitsgebiete'].index
        df2.drop(indexNames, inplace=True)

        # Delete all cells, except one year (both maps)
        indexNames = df[df['year'] != selected_year].index
        df.drop(indexNames, inplace=True)

        indexNames = df2[df2['year'] != selected_year].index
        df2.drop(indexNames, inplace=True)

        #----------------- Data preparation (Choropleth Map) -------------------

        # Information for Choropleth Map (df) based on the chosen gender and age
        df['subtotal'] = 0
        # Check selected gender
        if selectedGender == 'Female':
            # if an age is selected
            if selectedAge:
                # selectedAge is a list of strings
                # Therefore, we have to check every entry in the list and sum up partial results in new column subtotal
                for i in selectedAge:
                    if i == 'under 18':
                        df['subtotal'] = df['subtotal'] + df['fu18']
                    elif i == '18 - 34':
                        df['subtotal'] = df['subtotal'] + df['f18']
                    elif i == '35 - 64':
                        df['subtotal'] = df['subtotal'] + df['f35']
                    elif i == 'over 65':
                        df['subtotal'] = df['subtotal'] + df['fo65']
            else:  # no age is selected, that means the user wants to see all women
                df['subtotal'] = df['subtotal'] + df['womenTotal']
            a = 'subtotal'
        elif selectedGender == 'Male':
            if selectedAge:
                for i in selectedAge:
                    if i == 'under 18':
                        df['subtotal'] = df['subtotal'] + df['mu18']
                    elif i == '18 - 34':
                        df['subtotal'] = df['subtotal'] + df['m18']
                    elif i == '35 - 64':
                        df['subtotal'] = df['subtotal'] + df['m35']
                    elif i == 'over 65':
                        df['subtotal'] = df['subtotal'] + df['mo65']
            else:
                df['subtotal'] = df['subtotal'] + df['menTotal']
            a = 'subtotal'
        else:  # if no gender is selected, that means the user wants to see all
            if selectedAge:
                for i in selectedAge:
                    if i == 'under 18':
                        df['subtotal'] = df['subtotal'] + df['mu18'] + df[
                            'fu18']
                    elif i == '18 - 34':
                        df['subtotal'] = df['subtotal'] + df['m18'] + df['f18']
                    elif i == '35 - 64':
                        df['subtotal'] = df['subtotal'] + df['m35'] + df['f35']
                    elif i == 'over 65':
                        df['subtotal'] = df['subtotal'] + df['fo65'] + df[
                            'mo65']
                a = 'subtotal'
            else:
                a = 'total'

        # Group the countries by year and sum up the number (total) in a new column sum (df['sum']
        df['sum'] = df.groupby([selectedMapChoropleth,
                                'year'])[a].transform('sum')

        #----------------- Data preparation (Line Map) -------------------

        # countryCategory = homeCountry or destinationCountry
        # selectedCountryMapLine is the selected country for the map line (for example Syria (homeCountry))
        indexNames = df2[df2[countryCategory] != selectedCountryMapLine].index
        df2.drop(indexNames, inplace=True)

        df2['subtotal'] = 0

        if selectedGender == 'Female':
            # if an age is selected
            if selectedAge:
                # selectedAge is a list of strings
                # Therefore, we have to check every entry in the list and delete the row if the value in the column for the age is null
                for i in selectedAge:
                    if i == 'under 18':
                        indexNames = df2[df2['fu18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['fu18']
                    elif i == '18 - 34':
                        indexNames = df2[df2['f18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['f18']
                    elif i == '35 - 64':
                        indexNames = df2[df2['f35'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['f35']
                    elif i == 'over 65':
                        indexNames = df2[df2['fo65'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['fo65']
            else:
                indexNames = df2[df2['womenTotal'] == 0].index
                df2.drop(indexNames, inplace=True)
                df2['subtotal'] = df2['subtotal'] + df2['womenTotal']
        elif selectedGender == 'Male':
            if selectedAge:
                # selectedAge is a list of strings
                # Therefore, we have to check every entry in the list and delete the row if the value in the column for the age is null
                for i in selectedAge:
                    if i == 'under 18':
                        indexNames = df2[df2['mu18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['mu18']
                    elif i == '18 - 34':
                        indexNames = df2[df2['m18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['m18']
                    elif i == '35 - 64':
                        indexNames = df2[df2['m35'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['m35']
                    elif i == 'over 65':
                        indexNames = df2[df2['mo65'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['mo65']
            else:
                indexNames = df2[df2['menTotal'] == 0].index
                df2.drop(indexNames, inplace=True)
                df2['subtotal'] = df2['subtotal'] + df2['menTotal']
        else:  # if no gender is selected, that means the user wants to see all
            if selectedAge:
                for i in selectedAge:
                    if i == 'under 18':
                        indexNames = df2[df2['mu18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        indexNames = df2[df2['fu18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['mu18'] + df2[
                            'fu18']
                    elif i == '18 - 34':
                        indexNames = df2[df2['m18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        indexNames = df2[df2['f18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['m18'] + df2[
                            'f18']
                    elif i == '35 - 64':
                        indexNames = df2[df2['m35'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        indexNames = df2[df2['f35'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['m35'] + df2[
                            'f35']
                    elif i == 'over 65':
                        indexNames = df2[df2['mo65'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        indexNames = df2[df2['fo65'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['mo65'] + df2[
                            'fo65']
            else:  # all people are considered
                indexNames = df2[df2['total'] == 0].index
                df2.drop(indexNames, inplace=True)

        # Create list of origin or target countries to display them in hover text
        # Every second index must contain the country name, so a placeholder is necessary in front of it
        # Structur: [placeholder,name+number,placeholder,name+number,...]
        # name = listPlaceholderNames
        # number = listPlaceholderNumber

        listPlaceholderNames = df2[namesToShow].values.tolist()
        listPlaceholderNumber = df2[a].values.tolist()

        nameList = []
        i = 0
        if namesToShow == 'homeCountry':
            for x in listPlaceholderNames:
                nameList.append(i)
                x = x + ': ' + str(listPlaceholderNumber[i])
                nameList.append(x)
                i = i + 1
            if len(nameList) != 0:
                nameList[-2] = None
        else:
            for x in listPlaceholderNames:
                x = x + ': ' + str(listPlaceholderNumber[i])
                nameList.append(x)
                nameList.append(i)
                i = i + 1
            if len(nameList) != 0:
                nameList[-1] = None

        st.write(
            '<style>div.Widget.row-widget.stRadio > div{flex-direction:row;}</style>',
            unsafe_allow_html=True)

        #----------------Create Maps with Plotly (Choropleth and Line Map)---------------------------

        fig = go.Figure()

        # Choropleth Map
        fig.add_trace(
            go.Choropleth(
                locations=df[selectedCode],
                visible=showChoropleth,
                z=df['sum'],
                text=df[selectedMapChoropleth],
                colorscale=mapColor,
                autocolorscale=False,
                reversescale=False,
                name="",
                marker_line_color='darkgray',
                marker_line_width=0.5,
                colorbar_tickprefix='',
                colorbar_title='Number of<br>asylum<br>applications<br>',
            ))

        #--------- Line Map --------------
        # Set selected country
        fig.add_trace(
            go.Scattergeo(
                locationmode='country names',
                lon=df2[selectedLon],
                lat=df2[selectedLat],
                hoverinfo='text',
                name=selectedCountryMapLine,
                text=df2[countryCategory],
                line=dict(width=1, color='red'),
                opacity=0.510,
                visible=showLine,
                mode='markers',
            ))

        # NumPy Array Slicing
        # Longitude and Latitude
        lons = []
        lats = []
        lons = np.empty(2 * len(df2))
        lons[::2] = df2['lonDC']
        lons[1::2] = df2['lonHC']
        lats = np.empty(2 * len(df2))
        lats[::2] = df2['latDC']
        lats[1::2] = df2['latHC']

        # Set lines
        fig.add_trace(
            go.Scattergeo(locationmode='country names',
                          visible=showLine,
                          name='route and number <br>of asylum seekers',
                          hovertemplate=nameList,
                          lon=lons,
                          lat=lats,
                          mode='markers+lines',
                          line=dict(width=1, color='red'),
                          opacity=0.5))

        # Update layout choropleth map
        fig.update_layout(
            showlegend=True,
            geo=go.layout.Geo(
                scope='world',
                #projection_type = 'azimuthal equal area',
                showland=True,
                showcountries=True,
                landcolor='rgb(243, 243, 243)',
                countrycolor='rgb(105,105,105)',
            ),
        )

        # Update layout line map
        fig.update_layout(
            geo=dict(showframe=False,
                     showcoastlines=False,
                     projection_type='equirectangular'),
            autosize=True,
            margin=dict(
                l=0,
                r=0,
                b=0,
                t=20,
            ),
        )

        # Display figure
        st.plotly_chart(
            fig,
            use_container_width=True,
            config={
                'modeBarButtonsToRemove':
                ['lasso2d', 'select2d', 'pan2d', 'hoverClosestGeo']
            })
コード例 #28
0
#cache for loading data
@st.cache
def load_data(ticker):
  data = yf.download(ticker, START, TODAY)
  data.reset_index(inplace = True)
  return data

data_load_state = st.text("Loading Data...")
data = load_data(selected_stock)
data_load_state.text("Loading Data...")

#Prediction of prices using Prophet
df_train = data[['Date','Close']]
df_train = df_train.rename(columns = {"Date":"ds", "Close":'y'})

m = Prophet()
m.fit(df_train)
future = m.make_future_dataframe(periods = period)

forecast = m.predict(future)

st.write(f'Forecast plot for {n_years} years')
fig1 = plot_plotly(m,forecast)
st.plotly_chart(fig1)

#Extra Components graphs
st.write("COMPONENTS")
fig2 = m.plot_components(forecast)
st.write(fig2)
コード例 #29
0
if option == "Pyplot":
    array = np.random.normal(1, 1, size=100)
    plt.hist(array, bins=20)
    st.pyplot()

if option == "Plotly Chart":
    x1 = np.random.randn(200) - 2
    x2 = np.random.randn(200)
    x3 = np.random.randn(200) + 2

    hist_data = [x1, x2, x3]
    group_labels = ['Group 1', 'Group 2', 'Group 3']

    fig = ff.create_distplot(hist_data, group_labels, bin_size=[.1, .25, .5])
    st.plotly_chart(fig, use_container_width=True)

if option == "Graphviz Chart":
    graph = graphviz.Digraph()
    graph.edge('run', 'intr')
    graph.edge('intr', 'runbl')
    graph.edge('runbl', 'run')
    graph.edge('run', 'kernel')
    graph.edge('kernel', 'zombie')
    graph.edge('kernel', 'sleep')
    graph.edge('kernel', 'runmem')
    graph.edge('sleep', 'swap')
    graph.edge('swap', 'runswap')
    graph.edge('runswap', 'new')
    graph.edge('new', 'runmem')
    graph.edge('sleep', 'runmem')
コード例 #30
0
def main():
    st.title("clustering using the top2vec")
    st.subheader("top words on complaint")
    st.write(wc(complaint_words()))
    st.subheader("tweet trends")
    st.write("this dataset based on tweets that has keyword 'koinworks'")

    st.subheader("visualization of the dataset")
    st.markdown("#### doc2vec")
    vectors, topic_vectors, model = load_vectors()
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(x=[a[0] for a in vectors], y=[a[1] for a in vectors], mode="markers")
    )
    fig.add_trace(
        go.Scatter(
            x=[a[0] for a in topic_vectors],
            y=[a[1] for a in topic_vectors],
            mode="markers",
        )
    )
    st.plotly_chart(fig)
    st.markdown("#### kmeans")
    C = _cluster()
    plot_df = C.plot_df()
    k_top, d_top = C.top_words()
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=plot_df["x"],
            y=plot_df["y"],
            marker_color=plot_df["kmeans_label"],
            mode="markers",
        )
    )
    st.plotly_chart(fig)
    st.write(k_top)
    st.markdown("#### dbscan")
    fig = go.Figure()
    fig.add_trace(
        go.Scatter(
            x=plot_df["x"],
            y=plot_df["y"],
            marker_color=plot_df["dbscan_label"],
            mode="markers",
        )
    )
    st.plotly_chart(fig)
    st.write(d_top)

    st.subheader("search tweets")
    query = st.text_input("keyword")
    result = ""
    if query is not "":
        try:
            result = model.search_documents_by_keywords(query.split(), 50)
        except ValueError as e:
            st.write("no tweets detected, maybe try another keyword")
            # print('word is not in vocab')
    s = df_wrapper(result)
    st.dataframe(s, width=1000)
    st.subheader("similar tweets by distance")