Beispiel #1
0
chem_data = pd.json_normalize(chem_data)
chem_data = chem_data.fillna(0)

#Adding chem_data to original DataFrame
data = data.join(chem_data)

#Modifying Data According To The Needs
data = data.drop(columns=["Materials Id"])

#Writing The Data Into The App
st.subheader("Data Information")
st.dataframe(data)
st.write(data.describe())

#Displaying The Bar Graph
chart = st.bar_chart(data)


#Taking User Input
def get_user_input():
    #No. Of Atoms Chosen By User
    li = st.sidebar.slider("Number Of Lithium Atoms", 0, 20, 2)
    mn = st.sidebar.slider("Number Of Manganese Atoms", 0, 20, 1)
    si = st.sidebar.slider("Number Of Silicon Atoms", 0, 20, 2)
    o = st.sidebar.slider("Number Of Oxygen Atoms", 0, 50, 8)
    fe = st.sidebar.slider("Number Of Iron Atoms", 0, 10, 1)
    co = st.sidebar.slider("Number Of Cobalt Atoms", 0, 10, 0)
    formation_energy = st.sidebar.slider("Formation Energy In eV", -3.0, 0.0,
                                         -2.61)
    e_above_hull = st.sidebar.slider("Energy Above Hull in eV", 0.0, 0.2,
                                     0.0582)
    time.sleep(5)

    st.balloons()
    st.success('The results are ready for viewing')

    time.sleep(3)


    my_dataframe = pd.read_csv('dataset/creditcard.csv', nrows=20)
    st.dataframe(my_dataframe)
    # st.table(data.iloc[0:10])


    d_copy = my_dataframe.copy()
    d_copy.drop(d_copy.iloc[:, 1:-2], inplace = True, axis = 1)
    st.dataframe(d_copy)

    st.area_chart(d_copy)
    st.bar_chart(d_copy)
    st.altair_chart(d_copy)


y_pred = bilstm_model.predict(pad_sequences)

y_pred = np.argmax(y_pred, axis=1)

if st.button("Predict"):
    if y_pred[0] == 0:
        st.write("Fraud")
    elif y_pred[0] == 1:
        st.write("Normal")
Beispiel #3
0
def main():

    df = pd.read_csv('base_ajustada.csv', sep=';', decimal=',')

    por_sexo = pd.DataFrame({
        'sexo': (df.SEXO.value_counts()),
        '%_Partic': (df.SEXO.value_counts() / df.shape[0]) * 100
    })
    faixa = pd.DataFrame({
        'faixa_etaria': (df.FAIXA_ETARIA.value_counts()),
        '%_Partic': (df.FAIXA_ETARIA.value_counts() / df.shape[0]) * 100
    })
    apoio_isol = pd.DataFrame({
        'apoio_isol': (df.APOIO_ISOL_SOCIAL.value_counts()),
        '%_Partic': (df.APOIO_ISOL_SOCIAL.value_counts() / df.shape[0]) * 100
    })
    comp_fam = pd.DataFrame({
        'comp_fam': (df.COMP_FAM_ISOL_SOCIAL.value_counts()),
        '%_Partic':
        (df.COMP_FAM_ISOL_SOCIAL.value_counts() / df.shape[0]) * 100
    })
    cruz = pd.DataFrame({
        'comp_fam': (df.COMP_FAM_ISOL_SOCIAL.value_counts()),
        'apoio_isol': (df.APOIO_ISOL_SOCIAL.value_counts())
    })
    part = pd.DataFrame({
        'id': (df.ID),
        'sexo': (df.SEXO),
        'faixa_etaria': (df.FAIXA_ETARIA)
    })
    #feminino = (pd.pivot_table(part,index=['sexo','faixa_etaria'],values=["id"],aggfunc=[len],margins=True))
    #masculino = (pd.pivot_table(part,index=['sexo','faixa_etaria'],values=["id"],aggfunc=[len],margins=True))

    st.sidebar.image('logo.png', width=200)
    st.sidebar.image('Instituto-Olhar-Azul.png', width=200)
    st.sidebar.markdown(
        '**Créditos:** Instituto Olhar https://www.institutoolhar.com.br/')
    st.sidebar.markdown(
        '**Veja a pesquisa na íntegra em:** https://blog.institutoolhar.com.br/termometro-da-crise-do-covid-19-3a-onda-rmbh/'
    )

    st.image('3onda.png', width=500)
    st.title('AceleraDev Data Science')
    st.header('**Termômetro da Crise Covid 19**')
    st.subheader(
        'Região Metropolitana de Belo Horizonte - 16 a 20 de Abril/2020')
    st.markdown(
        'O Instituto Olhar – Pesquisa e Informação Estratégica é uma empresa que se dedica a projetos de pesquisa e inteligência de mercado para auxiliar empresas e organizações governamentais e não-governamentais em seus processos de planejamento e tomada de decisão.'
    )
    st.subheader('Dimensões do Termômetro')
    st.markdown(
        'O Termômetro da Crise Covid-19 é formado por quatro medidas independentes, que variam de 0 a 10, sendo 0 para discorda totalmente e 10 para apoia totalmente:'
    )
    st.markdown(
        '**Isolamento Social**, que mede o quanto as pessoas apoiam e estão comprometidas com o isolamento; **Medo e da Presença do Covid-19** mede o quanto as pessoas estão temerosas e sentem a presença do Covid-19; **Atuação dos Governos** que mede a avaliação da população sobre a atuação dos governos Federal, Estadual e Municipais e **Economia**, referente a percepção sobre o impacto econômico gerado pelo Covid-19, em nível Mundial, Brasil, Estadual, Municipal e na Renda Familiar.'
    )
    st.markdown(
        'Neste exercício trabalharemos com a medida **Isolamento Social**.')

    st.subheader('Conhecendo os dados')
    st.markdown('')

    st.markdown('**Visualizando as informações**')
    check = st.checkbox("Clique aqui para exibir dados")
    if check:
        st.write(df)
        st.write('Quantidade de pessoas entrevistadas:', df.shape[0])
        st.markdown('')

    st.subheader('**Percentual de participação dos entrevistados:**')
    radio = st.radio('Escolha por:', ('Por Sexo', 'Por Faixa Etária'))
    if radio == 'Por Sexo':
        st.write(por_sexo)
        st.bar_chart(por_sexo['%_Partic'])
    if radio == 'Por Faixa Etária':
        st.write(faixa)
        st.bar_chart(faixa['%_Partic'])
        #st.area_chart (faixa['%_Partic'])
        #st.line_chart (faixa['%_Partic'])
        st.markdown('')

    st.write(pd.crosstab(part['sexo'], part['faixa_etaria'], margins=True))

    st.subheader('**Comportamento perante ao isolamento social**:')
    st.text('*Em quantidade de entrevistados, variando termômetro de 0 a 10')
    radio = st.radio(
        'Escolha por:',
        ('Apoio ao isolamento social', 'Comprometimento da família'))
    if radio == 'Apoio ao isolamento social':
        st.write(apoio_isol)
        st.area_chart(apoio_isol['%_Partic'])
    if radio == 'Comprometimento da família':
        st.write(comp_fam)
        st.area_chart(comp_fam['%_Partic'])
    st.markdown('')

    st.subheader('**Comprometimento Familiar x Apoio ao Isolamento Social**')
    st.text('*Em quantidade de entrevistados, variando termômetro de 0 a 10')
    st.write(cruz)
    st.line_chart(cruz)

    st.markdown('**Matriz de Correlação:**')
    sns.heatmap(cruz.corr(),
                vmin=0,
                vmax=1,
                fmt='.2f',
                square=True,
                linewidths=1,
                annot=True)
    st.pyplot()

    st.markdown('**Distribuição dos dados categóricos:**')
    sns.catplot(x="FAIXA_ETARIA",
                y="ID",
                hue="SEXO",
                kind='swarm',
                data=df,
                height=10,
                aspect=0.6)
    st.pyplot()

    st.markdown(
        '**Veja a pesquisa na íntegra em:** https://blog.institutoolhar.com.br/termometro-da-crise-do-covid-19-3a-onda-rmbh/'
    )

    st.sidebar.markdown('')
    st.sidebar.markdown('')
    st.sidebar.markdown('Desenvolvido por:')
    st.sidebar.markdown('**Juliana Figueiras de Souza**')
    st.sidebar.markdown('AceleraDev Data Science')
Beispiel #4
0
DATE_COLUMN = 'date/time'
DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
            'streamlit-demo-data/uber-raw-data-sep14.csv.gz')

@st.cache
def load_data(nrows):
    data = pd.read_csv(DATA_URL, nrows=nrows)
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis='columns', inplace=True)
    data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
    return data

data_load_state = st.text('Loading data...')
data = load_data(10000)
data_load_state.text("Done! (using st.cache)")

if st.checkbox('Show raw data'):
    st.subheader('Raw data')
    st.write(data)

st.subheader('Number of pickups by hour')
hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24, range=(0,24))[0]
st.bar_chart(hist_values)

# Some number in the range 0-23
hour_to_filter = st.slider('hour', 0, 23, 17)
filtered_data = data[data[DATE_COLUMN].dt.hour == hour_to_filter]

st.subheader('Map of all pickups at %s:00' % hour_to_filter)
st.map(filtered_data)
Beispiel #5
0
def main():
    st.image('logo.png', width=200)
    st.title('AceleraDev Data Science')
    st.subheader('Semana 3 - Análise de dados exploratória')

    #data_load_state = st.text('Loading data...')
    #data = load_data(10000)
    #data_load_state.text("Done! (using st.cache)")

    file_in = st.file_uploader('Escolha um arquivo "csv":', type='csv')
    if file_in is not None:
        df = st.cache(pd.read_csv)(file_in)
        n_lin, n_col = df.shape
        aux = pd.DataFrame({
            'types': df.dtypes,
            'NA #': df.isna().sum(),
            'NA %': (df.isna().sum() / n_lin * 100)
        })
        aux.reset_index(inplace=True)
        aux.rename(columns={'index': 'names'}, inplace=True)
        num_cols = list(aux[aux['types'] != 'object']['names'])
        cat_cols = list(aux[aux['types'] == 'object']['names'])
        cols = list(df.columns)

        st.markdown('**Número de linhas e colunas**')
        st.markdown('{} linhas e {} colunas'.format(n_lin, n_col))

        max_lin_slider = n_lin if n_lin < 100 else 100
        slider = st.slider('Escolha a quantidade de linhas para espiar:',
                           min_value=1,
                           max_value=max_lin_slider,
                           value=10)
        st.dataframe(df.head(slider))

        st.markdown(f'**Nomes das {len(cat_cols)} colunas categóricas:**')
        st.text(str(cat_cols).strip('[]').replace('\'', ''))

        st.markdown(f'**Nomes das {len(num_cols)} colunas núméricas:**')
        st.text(str(num_cols).strip('[]').replace('\'', ''))

        st.markdown('**Informações das colunas:**')
        st.table(aux)

        st.markdown('**Detalhes das colunas numéricas:**')
        transpose = '.T' if st.checkbox('Transpor detalhes') else ''
        st.dataframe(eval(f'df.describe(){transpose}'))

        st.subheader('Análise univariada')
        selected_column = st.selectbox(
            'Escolha uma coluna para análise univariada:',
            list(aux[aux['types'] != 'object']['names']))
        hist_bin = 100 if (df[selected_column].nunique() <= 100)\
            else round(df[selected_column].nunique() / 10)
        hist_values = np.histogram(df[selected_column], bins=hist_bin)
        hist_frame = pd.DataFrame(hist_values[0], index=hist_values[1][:-1])
        st.bar_chart(hist_frame)

        col_describe = df[selected_column].describe()
        col_more = pd.Series(
            {
                'skew': df[selected_column].skew(),
                'kurtosis': df[selected_column].kurtosis()
            },
            name=col_describe.name)
        st.write(pd.concat([col_more, col_describe]))

        st.subheader('Cálculos das colunas numéricas:')
        typeCalc = {
            'Média': '.mean()',
            'Mediana': '.median()',
            'Desvio Padrão': '.std()'
        }
        calc_choosed = st.selectbox(
            'Escolha o tipo de cálculo para as colunas numéricas:',
            ['', 'Média', 'Mediana', 'Desvio Padrão'])
        if calc_choosed != '':
            exec("st.table(df[num_cols]{0})".format(typeCalc[calc_choosed]))

        st.markdown('**Percentual dos dados faltantes:**')
        st.table(aux[aux['NA #'] != 0][['types', 'NA %']])

        st.subheader('Imputação de dados numéricos faltantes')
        percentage = st.slider(
            'Escolha o limite percentual faltante das colunas a serem prenchidas:',
            min_value=0,
            max_value=100,
            value=0)
        col_list = list(aux[aux['NA %'] <= percentage]['names'])

        select_method = st.radio('Escolha um método de preenchimento:',
                                 ('Média', 'Mediana'))
        imputed_df = df[col_list].fillna(df[col_list].mean(
        ) if select_method == 'Média' else df[col_list].median())
        impputed_exploration = pd.DataFrame({
            'names':
            imputed_df.columns,
            'types':
            imputed_df.dtypes,
            'NA #':
            imputed_df.isna().sum(),
            'NA %': (imputed_df.isna().sum() / n_lin * 100)
        })
        st.table(impputed_exploration[
            impputed_exploration['types'] != 'object']['NA %'])

        st.subheader('Arquivo com os dados imputados:')
        st.markdown(get_table_download_link(imputed_df),
                    unsafe_allow_html=True)
Beispiel #6
0
st.write('''
    ## 显示面积图
    streamlit.area_chart(data=None, width=0, height=0, use_container_width=True)
''')

chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])
st.area_chart(chart_data)

st.write('''
    ## 显示条形图
    streamlit.area_chart(data=None, width=0, height=0, use_container_width=True)
''')

chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])
st.bar_chart(chart_data)

st.write('''
    ## 显示matplotlib.pyplot图
    streamlit.pyplot(fig=None, clear_figure=None, **kwargs)
    * Matplotlib支持几种不同类型的“后端”。如果使用Matplotlib和Streamlit遇到错误,请尝试将后端设置为“ TkAgg”
    * `echo "backend: TkAgg" >> ~/.matplotlib/matplotlibrc`
''')
import matplotlib.pyplot as plt
import numpy as np

arr = np.random.normal(1, 1, size=100)
plt.hist(arr, bins=20)
st.pyplot()

st.write('''
Beispiel #7
0
def main():
    """資料探勘App"""
    st.title("資料探勘App")
    st.subheader("製作第一個Data App")

    html_temp = """
	<div style="background-color:tomato;"><p style="color:white;font-size:40px;"> App by Streamlit</p></div>
	"""

    st.markdown(html_temp, unsafe_allow_html=True)

    # img_list = glob.glob("images/*.png")
    # # st.write(img_list)
    # # for i in img_list:
    # # 	c_image = Image.open(i)
    # # 	st.image(i)
    # all_image = [Image.open(i) for i in img_list]
    # st.image(all_image)

    # 讀取資料路徑的函數
    def file_selector(folder_path='./datasets'):
        filenames = os.listdir(folder_path)
        selected_filename = st.selectbox('Select a file', filenames)
        return os.path.join(folder_path, selected_filename)

    #
    filename = file_selector()
    st.write('You selected `%s`' % filename)
    df = pd.read_csv(filename)

    # 顯示資料集
    if st.checkbox("顯示資料集"):
        number = st.number_input("Number of Rows to View")
        st.dataframe(df.head(number))
    # 顯示欄位
    if st.button("顯示欄位"):
        st.write(df.columns)

    # Show Shape of Dataset
    if st.checkbox("資料集維度"):
        st.write(df.shape)
        data_dim = st.radio("Show Dimension by", ("Rows", "Columns"))
        if data_dim == 'Rows':
            st.text("Number of  Rows")
            st.write(df.shape[0])
        elif data_dim == 'Columns':
            st.text("Number of Columns")
            st.write(df.shape[1])
    # Show Columns By Selection
    if st.checkbox("顯示選擇欄位"):
        all_columns = df.columns.tolist()
        selected_columns = st.multiselect('Select', all_columns)
        new_df = df[selected_columns]
        st.dataframe(new_df)

    # Datatypes
    if st.button("資料型態"):
        st.write(df.dtypes)

    # Value Counts
    if st.button("計數"):
        st.text("Value Counts By Target/Class")
        st.write(df.iloc[:, -1].value_counts())

    # Summary
    if st.checkbox("摘要"):
        st.write(df.describe())

    st.subheader("Data Visualization")
    # Show Correlation Plots
    # Matplotlib Plot
    if st.checkbox("Correlation Plot [Matplotlib]"):
        plt.matshow(df.corr())
        st.pyplot()

    # Seaborn Plot
    if st.checkbox("Correlation Plot with Annotation[Seaborn]"):
        st.write(sns.heatmap(df.corr(), annot=True))
        st.pyplot()

    # Counts Plots
    if st.checkbox("Plot of Value Counts"):
        st.text("Value Counts By Target/Class")

        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox('Select Primary Column To Group By',
                                   all_columns_names)
        selected_column_names = st.multiselect('Select Columns',
                                               all_columns_names)
        if st.button("Plot"):
            st.text("Generating Plot for: {} and {}".format(
                primary_col, selected_column_names))
            if selected_column_names:
                vc_plot = df.groupby(
                    primary_col)[selected_column_names].count()
            else:
                vc_plot = df.iloc[:, -1].value_counts()
            st.write(vc_plot.plot(kind='bar'))
            st.pyplot()

    # Pie Plot
    if st.checkbox("Pie Plot"):
        all_columns_names = df.columns.tolist()
        # st.info("Please Choose Target Column")
        # int_column =  st.selectbox('Select Int Columns For Pie Plot',all_columns_names)
        if st.button("Generate Pie Plot"):
            # cust_values = df[int_column].value_counts()
            # st.write(cust_values.plot.pie(autopct="%1.1f%%"))
            st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
            st.pyplot()

    # Barh Plot
    if st.checkbox("BarH Plot"):
        all_columns_names = df.columns.tolist()
        st.info("Please Choose the X and Y Column")
        x_column = st.selectbox('Select X Columns For Barh Plot',
                                all_columns_names)
        y_column = st.selectbox('Select Y Columns For Barh Plot',
                                all_columns_names)
        barh_plot = df.plot.barh(x=x_column, y=y_column, figsize=(10, 10))
        if st.button("Generate Barh Plot"):
            st.write(barh_plot)
            st.pyplot()

    # Custom Plots
    st.subheader("Customizable Plots")
    all_columns_names = df.columns.tolist()
    type_of_plot = st.selectbox("Select the Type of Plot",
                                ["area", "bar", "line", "hist", "box", "kde"])
    selected_column_names = st.multiselect('Select Columns To Plot',
                                           all_columns_names)
    # plot_fig_height = st.number_input("Choose Fig Size For Height",10,50)
    # plot_fig_width = st.number_input("Choose Fig Size For Width",10,50)
    # plot_fig_size =(plot_fig_height,plot_fig_width)
    cust_target = df.iloc[:, -1].name

    if st.button("Generate Plot"):
        st.success("Generating A Customizable Plot of: {} for :: {}".format(
            type_of_plot, selected_column_names))
        # Plot By Streamlit
        if type_of_plot == 'area':
            cust_data = df[selected_column_names]
            st.area_chart(cust_data)
        elif type_of_plot == 'bar':
            cust_data = df[selected_column_names]
            st.bar_chart(cust_data)
        elif type_of_plot == 'line':
            cust_data = df[selected_column_names]
            st.line_chart(cust_data)
        elif type_of_plot == 'hist':
            custom_plot = df[selected_column_names].plot(kind=type_of_plot,
                                                         bins=2)
            st.write(custom_plot)
            st.pyplot()
        elif type_of_plot == 'box':
            custom_plot = df[selected_column_names].plot(kind=type_of_plot)
            st.write(custom_plot)
            st.pyplot()
        elif type_of_plot == 'kde':
            custom_plot = df[selected_column_names].plot(kind=type_of_plot)
            st.write(custom_plot)
            st.pyplot()
        else:
            cust_plot = df[selected_column_names].plot(kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()

    st.subheader("Our Features and Target")

    if st.checkbox("Show Features"):
        all_features = df.iloc[:, 0:-1]
        st.text('Features Names:: {}'.format(all_features.columns[0:-1]))
        st.dataframe(all_features.head(10))

    if st.checkbox("Show Target"):
        all_target = df.iloc[:, -1]
        st.text('Target/Class Name:: {}'.format(all_target.name))
        st.dataframe(all_target.head(10))

    # Make Downloadable file as zip,since markdown strips to html
    st.markdown("""[google.com](iris.zip)""")

    st.markdown("""[google.com](./iris.zip)""")

    # def make_zip(data):
    # 	output_filename = '{}_archived'.format(data)
    # 	return shutil.make_archive(output_filename,"zip",os.path.join("downloadfiles"))

    def makezipfile(data):
        output_filename = '{}_zipped.zip'.format(data)
        with ZipFile(output_filename, "w") as z:
            z.write(data)
        return output_filename

    if st.button("Download File"):
        DOWNLOAD_TPL = f'[{filename}]({makezipfile(filename)})'
        # st.text(DOWNLOAD_TPL)
        st.text(DOWNLOAD_TPL)
        st.markdown(DOWNLOAD_TPL)
        ]
        novas_colunas.insert(0, 'Taxonom')
        return novas_colunas


app = app_hub()

option = st.sidebar.selectbox('Escolha o exercicio: ', app.dicionario)
'Voce selecionou a opcao: ', option

if option == app.dicionario[0]:
    st.write(
        'Para cada coluna identique a quantidade de linhas com dados faltantes (em alguns casos, o dado faltante é uma string vazia, em outros casos é uma string contendo algum valor do tipo: "sem informação"). Faça um método que retorna a média de dados faltantes por coluna'
    )
    st.markdown('### Valores vazios ou faltantes: ')
    st.bar_chart(app.count_nulls(app.construir(app.linhas)))
    if st.checkbox('Mostrar lista de dados faltantes'):
        st.write(app.count_nulls(app.construir(app.linhas)))
    st.markdown('### Porcetagem de valores faltantes por coluna: ')
    st.bar_chart(app.media_nulls(app.count_nulls(app.construir(app.linhas))))
    if st.checkbox('Mostrar lista de porcetagem'):
        st.write(
            list(app.media_nulls(app.count_nulls(app.construir(app.linhas)))))

if option == app.dicionario[2]:
    mapa_bio = pd.read_csv('Arquivos/mapa_biodiversidade.csv', header=0)
    st.write(
        'Monte filtros de ocorrências por estados, nome de espécie (nome exato ou parte do nome) e categoria de ameaça, e outros filtros que julgar relevante.'
    )
    municipios = st.multiselect("Escolha os municipios",
                                list(set(mapa_bio['Municipio'])), ["Londrina"])
Beispiel #9
0
        columns=["stock 1", "stock 2"],
        index=pd.date_range("1/2/2011", periods=20, freq="M"),
    )

    st.line_chart(chart_data2)

st.subheader("Example of area chart")

with st.echo():
    st.area_chart(chart_data)

st.subheader("Example of bar chart")

with st.echo():
    trimmed_data = chart_data[["pv", "uv"]].iloc[:10]
    st.bar_chart(trimmed_data)

st.subheader("Matplotlib")

st.write("You can use Matplotlib in Streamlit. "
         "Just use `st.pyplot()` instead of `plt.show()`.")
try:
    # noqa: F401
    with st.echo():
        from matplotlib import cm, pyplot as plt
        from mpl_toolkits.mplot3d import Axes3D

        # Create some data
        X, Y = np.meshgrid(np.arange(-5, 5, 0.25), np.arange(-5, 5, 0.25))
        Z = np.sin(np.sqrt(X**2 + Y**2))
def main():
    "Simple EDA App with Streamlist Components"
    menu = ["Home", 'EDA', "Sweetviz", "Custom Analysis", "ML", "About"]
    choice = st.sidebar.selectbox("Menu", menu)

    if choice == "Home":
        image = Image.open('Data_science.jpg')
        #st.image(image, caption='commons.wikimedia.org' ,use_column_width=True)
        st.image(image, caption='commons.wikimedia.org')
        st.markdown(
            "Download here [data set](https://drive.google.com/file/d/1MAjahv92AkpGQ6-fPFrJbSXM8PkY6_00/view?usp=sharing) for checking stuff"
        )

    if choice == "EDA":
        st.title("Automated EDA with Pandas")
        st.markdown("You can upload your data in 'csv' format")
        data_file = st.file_uploader("Uplod CSV",
                                     type=['csv'],
                                     encoding=None,
                                     key='a')
        if data_file is not None:
            df = pd.read_csv(data_file)
            st.dataframe(df.head())
            profile = ProfileReport(df)
            st_profile_report(profile)

    elif choice == "Sweetviz":
        st.subheader("Automated EDA with Sweetviz")
        st.markdown("You can upload your data in 'csv' format")
        data_file = st.file_uploader("Uplod CSV",
                                     type=['csv'],
                                     encoding=None,
                                     key='a')
        if data_file is not None:
            df = pd.read_csv(data_file)
            st.dataframe(df.head())
            st.subheader("Analysis data with plots")
            if st.button("Sweetviz Report"):
                report = SV.analyze(df)
                report.show_html()
                st_display_sweetviz("SWEETVIZ_REPORT.html")
            # st.subheader("Compare data with plots")
            # if st.button("Compare"):
            #     report = SV.compare(df[100:], df[:100])
            #     report.show_html()
            #     st_display_sweetviz("Compare.html")

    elif choice == 'Custom Analysis':

        st.subheader("Data Visualization")
        data_file = st.file_uploader("Uplod CSV",
                                     type=['csv'],
                                     encoding=None,
                                     key='a')
        if data_file is not None:
            df = pd.read_csv(data_file)
            st.dataframe(df.head())

        if st.checkbox("Correlation Matrix"):
            st.write(sns.heatmap(df.corr(), annot=True))
            st.pyplot()

        if st.checkbox("Pie Chart"):
            all_columns = df.columns.to_list()
            columns_to_plot = st.selectbox("Select one Column", all_columns)
            pie_plot = df[columns_to_plot].value_counts().plot.pie(
                autopct="%1.1f%%")
            st.write(pie_plot)
            st.pyplot()

        all_columns = df.columns.to_list()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ['Area', 'Line', 'Bar', 'hist', 'box', 'kde'])
        selected_col_names = st.multiselect('Select Columns To plot Data',
                                            all_columns)

        if st.button("Produce Plot"):
            st.success(
                f"Creating Customizable Plot of {type_of_plot} for {selected_col_names}"
            )

            # Streamlit plots
            if type_of_plot == 'Area':
                custom_data = df[selected_col_names]
                st.area_chart(custom_data)

            elif type_of_plot == 'Line':
                custom_data = df[selected_col_names]
                st.line_chart(custom_data)

            elif type_of_plot == 'Bar':
                custom_data = df[selected_col_names]
                st.bar_chart(custom_data)

            # Custom Plots
            elif type_of_plot:
                custom_plt = df[selected_col_names].plot(kind=type_of_plot)
                st.write(custom_plt)
                st.pyplot()

    elif choice == "ML":
        st.title("Binary Classification")
        st.markdown("The is an basic idea about ML")
        st.sidebar.title("Binary Classification Web App")
        st.markdown("Are Mushrooms edible or poisonous? 🍄")
        #st.sidebar.markdown("Are your mushrooms edible or poisonous? 🍄")

        @st.cache(persist=True)
        #@st.cache(persist=True)
        def load_data():
            data = pd.read_csv("mushrooms.csv")
            labelEncoder = LabelEncoder()
            for col in data.columns:
                data[col] = labelEncoder.fit_transform(data[col])
            return data

        @st.cache(persist=True)
        def split(df):
            y = df.type
            X = df.drop("type", axis=1)
            X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=0.2,
                                                                random_state=0)
            return X_train, X_test, y_train, y_test

        def plot_metrics(metrics_list):
            if "Confusion Matrix" in metrics_list:
                st.subheader("Confusion Matrix")
                plot_confusion_matrix(model, X_test, y_test)
                st.pyplot()

            if "ROC Curve" in metrics_list:
                st.subheader("ROC Curve")
                plot_roc_curve(model, X_test, y_test)
                st.pyplot()

            if "Precision-Recall Curve" in metrics_list:
                st.subheader("Precision-Recall Curve")
                plot_precision_recall_curve(model, X_test, y_test)
                st.pyplot()

        df = load_data()
        class_names = df['type']

        if st.sidebar.checkbox("Show row data", False):
            st.subheader("Mushroom Data Set (Classification)")
            st.write(df)
            st.write("The shape of data", df.shape)
            st.markdown(
                "This [data set](https://archive.ics.uci.edu/ml/datasets/Mushroom) includes descriptions of hypothetical samples corresponding to 23 species of gilled mushrooms "
                "in the Agaricus and Lepiota Family (pp. 500-525). Each species is identified as definitely edible, definitely poisonous, "
                "or of unknown edibility and not recommended. This latter class was combined with the poisonous one."
            )
            if st.checkbox("Show Summary"):
                st.write(df.describe().T)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.write(all_columns)

            if st.checkbox("Select Columns To See Values"):
                all_columns = df.columns.to_list()
                selected_col = st.multiselect("Select Columns", all_columns)
                new_df = df[selected_col]
                st.dataframe(new_df)

            if st.checkbox("Show value counts"):
                st.write(df.iloc[:, 0].value_counts())

        X_train, X_test, y_train, y_test = split(df)

        st.sidebar.subheader("Choose a Classifier")
        Classifier = st.sidebar.selectbox(
            "Classifier", ("Support Vector Machine (SVM)",
                           "Logistic Regession", "Random Forest"))

        if Classifier == 'Support Vector Machine (SVM)':
            st.sidebar.subheader('Model Hyperparameters')
            ##Choose Parameters\
            C = st.sidebar.number_input("C (Regularization parameter)",
                                        0.01,
                                        10.0,
                                        step=0.01,
                                        key='C_SVM')
            kernel = st.sidebar.radio("Kernel", ("rbf", "linear"),
                                      key='kernel')
            gamma = st.sidebar.radio("Gamma (Kernel Coefficient)",
                                     ("scale", "auto"),
                                     key='gamma')

            metrics = st.sidebar.multiselect(
                "Whitch metrics to plot?",
                ("Confusion Matrix", "Roc-Curve", "Precision-Recall Curve"))

            if st.sidebar.button("Classify", key="classify"):
                st.subheader("Support Vector Machine (SVM) Results")
                model = SVC(C=C, kernel=kernel, gamma=gamma)
                model.fit(X_train, y_train)
                accuracy = model.score(X_test, y_test)
                y_pred = model.predict(X_test)
                st.write("Accuracy: ", accuracy.round(2))
                st.write(
                    "Precision: ",
                    precision_score(y_test, y_pred,
                                    labels=class_names).round(2))
                st.write(
                    "Recall: ",
                    recall_score(y_test, y_pred, labels=class_names).round(2))
                plot_metrics(metrics)

        if Classifier == 'Logistic Regession':
            st.sidebar.subheader("Model Hyperparameters")
            C = st.sidebar.number_input("C (Regularization parameter)",
                                        0.01,
                                        10.0,
                                        step=0.01,
                                        key='C_LR')
            max_iter = st.sidebar.slider("Maximum Number of iterations",
                                         100,
                                         500,
                                         key='max_iter')

            metrics = st.sidebar.multiselect(
                "Which metrics to plot?",
                ("Confusion Matrix", 'ROC-Curve', 'precision-Recall Curve'))

            if st.sidebar.button("Classify", key='Classify'):
                st.subheader("Logistc Regression Results")
                #model = LogisticRegression(C=C, penalty='12', max_iter=max_iter)
                model = LogisticRegression(C=C,
                                           penalty='l2',
                                           max_iter=max_iter)
                #model.fit(X_train, y_train)
                model.fit(X_train, y_train)
                accuracy = model.score(X_test, y_test)
                y_pred = model.predict(X_test)
                st.write("Accuracy: ", accuracy.round(2))
                st.write(
                    "Precision: ",
                    precision_score(y_test, y_pred,
                                    labels=class_names).round(2))
                st.write(
                    'Recall: ',
                    recall_score(y_test, y_pred, labels=class_names).round(2))
                plot_metrics(metrics)

        if Classifier == 'Random Forest':
            st.sidebar.subheader("Model Hyperparameters")
            n_estimators = st.sidebar.number_input(
                "The number of trees in the forest",
                10,
                5000,
                key='n_estimators')
            max_depth = st.sidebar.number_input(
                "The maximum depth of the tree",
                1,
                20,
                step=1,
                key='n_estimators')
            bootstrap = st.sidebar.radio(
                "Bootstrap samples when builidng tees", ("True", 'False'),
                key='bootstrap')

            metrics = st.sidebar.multiselect(
                "Which metrics to plot?",
                ("Confusion Matrix", 'ROC-Curve', 'precision-Recall Curve'))

            if st.sidebar.button("Classify", key='classify'):
                st.subheader("Random Forest Classifer Results")
                model = RandomForestClassifier(n_estimators=n_estimators,
                                               max_depth=max_depth,
                                               bootstrap=bootstrap,
                                               n_jobs=-1)
                model.fit(X_train, y_train)
                accuracy = model.score(X_test, y_test)
                y_pred = model.predict(X_test)
                st.write("Accuracy: ", accuracy.round(2))
                st.write(
                    "Precision: ",
                    precision_score(y_test, y_pred,
                                    labels=class_names).round(2))
                st.write(
                    "Recall: ",
                    recall_score(y_test, y_pred, labels=class_names).round(2))
                plot_metrics(metrics)
#
# ------------------- Analise Data by Hour of Day --------------------------------------------------------------------
#

st.subheader('Analysis of Data by Hour of Day')

report_hist_values = np.histogram(report_df.time.dt.hour,
                                  bins=24,
                                  range=(0, 24))[0]
incident_hist_values = np.histogram(incident_df.time.dt.hour,
                                    bins=24,
                                    range=(0, 24))[0]

st.bar_chart(
    pd.DataFrame({
        'Reports': report_hist_values,
        'Accidents': incident_hist_values,
    }))

# Maps by Hour of Day

hour_to_filter = st.slider('hour', 0, 23, 8)  # min: 0h, max: 23h, default: 17h
period = st.slider('period', 0, 23, 1)  # min: 0h, max: 23h, default: 17h

filtered_report_df = report_df[
    (report_df.time.dt.hour >= hour_to_filter - period)
    & (report_df.time.dt.hour <= hour_to_filter + period)]
report_map_data = filtered_report_df.loc[:, ['lat', 'lng']].copy()
report_map_data.columns = ['lat', 'lon']

filtered_incident_df = incident_df[
Beispiel #12
0
DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
            'streamlit-demo-data/uber-raw-data-sep14.csv.gz')


@st.cache
def load_data(nrows):
    data = pd.read_csv(DATA_URL, nrows=nrows)
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis='columns', inplace=True)
    data[DATE_TIME] = pd.to_datetime(data[DATE_TIME])
    return data


data = load_data(100000)

# hour = 10
# hour = st.selectbox('selecciona la hora', range(0,24),1)
hour = st.slider('selecciona la hora', 0, 24, 10, 1)
data = data[data[DATE_TIME].dt.hour == hour]

if st.checkbox('Vista de datos'):
    st.subheader('Datos de solo %sh' % hour)
    st.write(data)

st.subheader('Datos por minutos a %sh' % hour)
st.bar_chart(
    np.histogram(data[DATE_TIME].dt.minute, bins=60, range=(0, 60))[0])

st.subheader('Mapa de datos a %sh' % hour)
st.map(data)
Beispiel #13
0
 def _get_deltas_that_melt_dataframes(self):
     return [
         lambda df: st.line_chart(df),
         lambda df: st.bar_chart(df),
         lambda df: st.area_chart(df),
     ]
Beispiel #14
0
import streamlit as st

st.bar_chart({"d6": [1, 5, 2, 6, 2, 1]})

with st.expander("See explanation"):
    st.write(
        """
        The chart above shows some numbers I picked for you.
        I rolled actual dice for these, so they're *guaranteed* to
        be random.
        """
    )
    st.image("https://static.streamlit.io/examples/dice.jpg", width=200)
    st.markdown("Photo by [@brett_jordon](https://unsplash.com/photos/4aB1nGtD_Sg)")
Beispiel #15
0
CANVAS_SIZE = 192

col1, col2 = st.beta_columns(2)

with col1:
    canvas = st_canvas(
        fill_color='#000000',
        stroke_width=20,
        stroke_color='#FFFFFF',
        background_color='#000000',
        width=CANVAS_SIZE,
        height=CANVAS_SIZE,
        drawing_mode='freedraw',
        key='canvas'
    )

if canvas.image_data is not None:
    img = canvas.image_data.astype(np.uint8)
    img = cv2.resize(img, dsize=(28, 28))
    preview_img = cv2.resize(img, dsize=(CANVAS_SIZE, CANVAS_SIZE), interpolation=cv2.INTER_NEAREST)

    col2.image(preview_img)

    x = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    x = x.reshape((-1, 28, 28, 1))
    y = model.predict(x).squeeze()

    st.write('## Result: %d' % np.argmax(y))
    st.bar_chart(y)
Beispiel #16
0
def app():
    #st.markdown('<img style="float: left;" src="https://virtual.usal.edu.ar/branding/themes/Usal_7Julio_2017/images/60usalpad.png" />', unsafe_allow_html=True)
    st.markdown(
        '<style>div[data-baseweb="select"] > div {text-transform: capitalize;}body{background-color:#008357;}</style>',
        unsafe_allow_html=True)

    st.markdown(
        "<h3 style='text-align: center; color: green;'>Salas Collaborate</h3>",
        unsafe_allow_html=True)

    #SHEET_ID = '12D4hfpuIkT7vM69buu-v-r-UYb8xx4wM1zi-34Fs9ck'
    df = pd.read_csv(
        'https://docs.google.com/spreadsheets/d/12D4hfpuIkT7vM69buu-v-r-UYb8xx4wM1zi-34Fs9ck/export?format=csv&gid=1912357955'
    )

    #df = pd.read_csv('/mydrive/MyDrive/multiapps/bbc204.csv')
    df = df.sort_values(by=['SessionOwner'])
    #options = ['USAL_lti_production', 'USAL_rest_production','josemarcucci']

    options = ['josemarcucci']
    # selecting rows based on condition
    df = df.loc[~df['SessionOwner'].isin(options)]
    countries = df['SessionOwner'].unique()
    duplit = df.drop_duplicates(subset=['SessionName'])
    df5 = pd.value_counts(duplit['SessionName'])

    df['Minutos'] = round(
        pd.to_timedelta(df['AttendeeTotalTimeInSession']).dt.total_seconds() /
        60)
    totales = df.groupby("SessionOwner")['Minutos'].sum()
    #st.table(totales)
    #st.table(duplit[['SessionOwner','SessionName']])

    buff, col, buff2 = st.beta_columns([1, 3, 1])
    '## Tipo de plataforma'
    if st.checkbox('Ver comparativo UAs'):
        st.bar_chart(totales)
    '## Tipo de plataforma'
    country = buff.selectbox('Usuario BBC o plataforma', countries)

    df[df['SessionOwner'] == country]

    #option = st.selectbox("Seleccionar Unidad", options=list(CHOICES.keys()), format_func=format_func)
    #st.write(f"Seleccionaste {format_func(option)}" )
    #column = format_func(option)
    above_352 = df["SessionOwner"] == country
    #moderador = df["AttendeeRole"] == "Moderator"

    bool_series = df[above_352]["SessionOwner"].str.startswith(country,
                                                               na=False)
    dupli = df[above_352][bool_series].drop_duplicates(subset=['SessionName'])
    #dupli=df[above_352][bool_series].drop_duplicates(['SessionName']).groupby('SessionName').agg({'AttendeeTotalTimeInSession':'sum'})
    sesiones = df[above_352][bool_series]['SessionName'].unique()
    df6 = pd.value_counts(sesiones)
    time = pd.DatetimeIndex(
        df[above_352][bool_series]['AttendeeTotalTimeInSession'])
    times1 = time.hour * 60 + time.minute + time.second / 60
    times = times1.values.sum()
    timeu = pd.DatetimeIndex(df['AttendeeTotalTimeInSession'])
    times1u = timeu.hour * 60 + timeu.minute + timeu.second / 60
    timesu = times1u.values.sum()

    times3t = df5.index
    aulast = len(times3t)
    times3 = df6.index
    aulas = len(times3)
    df['RoomOpened'] = pd.to_datetime(df['RoomOpened']).dt.strftime('%d-%m-%y')
    maxValue = df['RoomOpened'].max()
    minValue = df['RoomOpened'].min()
    st.write('Período:', minValue, ' al ', maxValue)
    st.write('Salas: ', aulas)
    st.write('Minutos usados: ', round(times, 1))
    st.sidebar.markdown(
        "<h3 style='text-align: left; color: black;font-weight:500;'>Minutos y salas (Semanal)</h3>",
        unsafe_allow_html=True)
    st.sidebar.write('Minutos: ', round(timesu, 1))
    st.sidebar.write('Salas: ', aulast)

    st.sidebar.markdown(
        "<h3 style='text-align: left; color: black;font-weight:500;'>Minutos totales</h3>",
        unsafe_allow_html=True)
    st.sidebar.write('Minutos: ', 23883910 + round(timesu, 1))
    #st.sidebar.write('Salas: ',aulast)

    dupli.index = [""] * len(dupli)
    #dupli.columns=['RoomClosed', 'SessionName']
    #dupli.rename(columns={'RoomClosed':'Fecha','SessioName':'Sala'})
    if st.checkbox('Mostrar Salas'):
        #st.table(df[above_352][bool_series][['RoomOpened','SessionName','NameOfAttendee','AttendeeTotalTimeInSession']])

        dupli = dupli.sort_values(by=['RoomClosed'])
        st.table(dupli[['RoomClosed', 'SessionName']])
Beispiel #17
0
if st.sidebar.button('Generate Charts'):
    st.write("Fetching data for the %s %s!" % (option, codeSelect[:-1]))
    if (codeSelect == 'Traded Stock Codes'):
        data = get_history(symbol=option, start=startDate, end=endDate)
    else:
        data = get_history(symbol=option,
                           start=startDate,
                           end=endDate,
                           index=True)

    st.write("""### Closing Price Chart""")
    st.line_chart(data.Close)
    st.write("""### Opening Price Chart""")
    st.line_chart(data.Open)
    st.write("""### High Price Chart""")
    st.bar_chart(data.High)
    st.write("""### Low Price Chart""")
    st.bar_chart(data.Low)
    st.write("""### Opening/Closing Price Chart""")
    arr1 = np.vstack([data.Open, data.Close])
    st.line_chart(pd.DataFrame(arr1.T, columns=["Opening", "Closing"]))
    st.write("""### High/Low Price Chart""")
    arr2 = np.vstack([data.High, data.Low])
    st.line_chart(pd.DataFrame(arr2.T, columns=["High", "Low"]))
    st.write("""### Combined Price Chart""")
    arr = np.vstack([data.Open, data.Close, data.High, data.Low])
    st.line_chart(
        pd.DataFrame(arr.T, columns=["Opening", "Closing", "High", "Low"]))
    st.write("""### Volume""")
    st.line_chart(data.Volume)
Beispiel #18
0
import numpy as np
import pandas as pd
import streamlit as st



with st.echo(code_location="below"):
    st.title('Python Project NYPD statistics')
    st.write("This is simple project with Python using NYPD data about incidents in New York city. Dataset is about gun involving accidents in period 2006-2019. https://www1.nyc.gov/site/nypd/stats/crime-statistics/crime-statistics-landing.page")
    a = pd.read_csv('NYPD_Shooting_Incident_Data__Historic_.csv',
                sep=';', header=0)
    st.write('Amount of attackers by race in New York city total by years')
# FIRST CHART WITH AMOUNT OF ATTACKS WITH GUNS TOTAL OVER YEARS
    suspects = pd.DataFrame({'lab': ['BLACK', 'ASIAN', 'WHITE HISPANIC', 'ASIAN / PACIFIC ISLANDER', 'BLACK HISPANIC', 'UNKNOWN'], 'Amount people attacks per race total:': [a.PERP_RACE.str.count("BLACK").sum(), a.PERP_RACE.str.count(
        "ASIAN").sum(), a.PERP_RACE.str.count("WHITE HISPANIC").sum(), a.PERP_RACE.str.count("ASIAN / PACIFIC ISLANDER").sum(), a.PERP_RACE.str.count("BLACK HISPANIC").sum(), a.PERP_RACE.str.count("UNKNOWN").sum(), ]})
    st.bar_chart(suspects.set_index('lab'))


# SECOND CHART WITH AMOUNT OF VICTIMS TOTOL OVER YEARS

    victims = pd.DataFrame({'lab': ['BLACK', 'ASIAN', 'WHITE HISPANIC', 'ASIAN / PACIFIC ISLANDER', 'BLACK HISPANIC', 'UNKNOWN'], 'Peole race which was attacked per race total:': [a.VIC_RACE.str.count("BLACK").sum(), a.VIC_RACE.str.count(
        "ASIAN").sum(), a.VIC_RACE.str.count("WHITE HISPANIC").sum(), a.VIC_RACE.str.count("ASIAN / PACIFIC ISLANDER").sum(), a.VIC_RACE.str.count("BLACK HISPANIC").sum(), a.VIC_RACE.str.count("UNKNOWN").sum(), ]})
    st.write('Amount of victims by race in New York city total by years')
    st.bar_chart(victims.set_index('lab'))


# MAP DATA,
    st.write("Map of areas where acts took place")
    data = pd.DataFrame({
        'lat': a.Latitude,
        'lon': a.Longitude
Beispiel #19
0
import streamlit as st
import numpy as np

st.title("Wilde-Daten.de")
st.text(" Hier entsteht die Homepage von Wilde-Daten")
st.title("Ein Zufallsdiagramm")

chart = st.bar_chart(np.random.rand(10, 5))
btn = st.button("Zufallsgenerator!")

if btn:
    chart.bar_chart(np.random.rand(10, 5))
Beispiel #20
0
def app():
    st.title('c4project - Black Lives Matter')
    st.text("")
    st.text("")
    """
    """
    #st.write("Tweets")
    data = pd.read_csv('sample_tweets.csv')
    data_size = data.shape[0]

    st.write("There are " + str(data_size) + " tweets available on BLM")

    st.dataframe(data, width=None, height=None)

    #st.line_chart(data.Time)

    st.text("")
    st.text("")
    st.text("")
    #st.write("Web links from Tweets")
    data2 = pd.read_csv('urls.csv')
    data_size2 = data2.shape[0]
    st.write("There are " + str(data_size2) +
             " urls associated with the tweets")
    st.dataframe(data2, width=None, height=None)
    """
    """
    fig, ax = plt.subplots(figsize=(8, 6))

    # Create textblob objects of the tweets
    sentiment_objects = [TextBlob(tweet) for tweet in data['Tweet']]

    # Create list of polarity values and tweet text
    sentiment_values = [[tweet.sentiment.polarity,
                         str(tweet)] for tweet in sentiment_objects]

    # Create dataframe containing the polarity value and tweet text
    sentiment_df = pd.DataFrame(sentiment_values,
                                columns=["polarity", "tweet"])

    # plot polarities on a histogram:
    fig, ax = plt.subplots(figsize=(8, 6))
    st.text("")
    st.text("")
    st.text("")
    st.write("Current Sentiments on Tweets on BLM as of 2 PM CST")
    # Plot histogram of the polarity values
    st.bar_chart(sentiment_df.polarity)

    st.text("")
    st.text("")
    st.text("")
    st.line_chart(data.Time)

    total_points = st.slider("Number of points in tweets", 1, 5000, 200)
    num_turns = st.slider("Number of turns in spiral", 1, 100, 9)
    Point = namedtuple('Point', 'x y')
    data = []
    points_per_turn = total_points / num_turns

    for curr_point_num in range(total_points):
        curr_turn, i = divmod(curr_point_num, points_per_turn)
        angle = (curr_turn + 1) * 2 * math.pi * i / points_per_turn
        radius = curr_point_num / total_points
        x = radius * math.cos(angle)
        y = radius * math.sin(angle)
        data.append(Point(x, y))

    st.altair_chart(
        alt.Chart(pd.DataFrame(data), height=500,
                  width=500).mark_circle(color='#0068c9',
                                         opacity=0.5).encode(x='x:Q', y='y:Q'))
Beispiel #21
0
    def inicializar(self, app=app_hub()):

        opcoes = [
            'Quantidade de valores nao preenchidos',
            'Porcetagem de dados faltantes por coluna'
        ]

        #Exercicio 01 valores vazios
        if self.option == app.dicionario[0]:
            self.option_01 = st.selectbox('Exercicio 01: Escolha o grafico:',
                                          opcoes)
            st.write(
                'Para cada coluna identique a quantidade de linhas com dados faltantes (em alguns casos, o dado faltante é uma string vazia, em outros casos é uma string contendo algum valor do tipo: "sem informação"). Faça um método que retorna a média de dados faltantes por coluna'
            )
            if self.option_01 == opcoes[0]:
                st.markdown('### Valores vazios ou faltantes: ')
                st.bar_chart(app.count_nulls(app.construir(app.linhas)))
                if st.checkbox('Mostrar lista de dados faltantes'):
                    st.write(app.count_nulls(app.construir(app.linhas)))
            if self.option_01 == opcoes[1]:
                st.markdown('### Porcetagem de valores faltantes por coluna: ')
                st.bar_chart(
                    app.media_nulls(app.count_nulls(app.construir(
                        app.linhas))))
                if st.checkbox('Mostrar lista de porcetagem'):
                    st.write(
                        list(
                            app.media_nulls(
                                app.count_nulls(app.construir(app.linhas)))))

        #Exercicio 02 Nivel taxonomico
        if self.option == app.dicionario[1]:
            st.write(
                'Para cada item identifique até qual nível taxônomico a ocorrência foi identificada.'
            )
            #verificaTaxonomia metodo importado do felipe
            st.bar_chart(verificaTaxonomia(app.construir(app.linhas)))
            if st.checkbox('Valores por Coluna'):
                st.write(verificaTaxonomia(app.construir(app.linhas)))

        #Exercicio 03  Filtros
        if self.option == app.dicionario[2]:
            mapa_bio = pd.read_csv('Arquivos/mapa_biodiversidade.csv',
                                   header=0)
            st.write(
                'Monte filtros de ocorrências por estados, nome de espécie (nome exato ou parte do nome) e categoria de ameaça, e outros filtros que julgar relevante.'
            )
            municipios = st.multiselect("Escolha os municipios",
                                        list(set(mapa_bio['Municipio'])),
                                        ["Londrina"])
            data = mapa_bio.loc[mapa_bio['Municipio'].isin(municipios)]

            st.deck_gl_chart(viewport={
                'latitude': -23.37,
                'longitude': -51.28,
                'zoom': 11,
                'pitch': 50,
            },
                             layers=[{
                                 'type': 'HexagonLayer',
                                 'data': data,
                                 'radius': 200,
                                 'elevationScale': 4,
                                 'elevationRange': [0, 1000],
                                 'pickable': True,
                                 'extruded': True,
                             }, {
                                 'type': 'ScatterplotLayer',
                                 'data': data,
                             }])

            if st.checkbox('Mostrar dados'):
                st.dataframe(data)

        #Exercicio 04 - Geocode - Verificar se dados batem
        if self.option == app.dicionario[3]:
            st.image('Arquivos/Erro.jpg')
# Create a text element and let the reader know the data is loading.
data_load_state = st.text('Loading data...')
# Load 10,000 rows of data into the dataframe.
data = load_data()
# Notify the reader that the data was successfully loaded.
data_load_state.text("Done! (using st.cache)")

NOW = dt.datetime(2011,12,10)

segmented_rfm = buildRFM(data, NOW)
st.subheader('RFM Score')
st.write(segmented_rfm.head(5))

st.subheader('Monetary value')
st.bar_chart(segmented_rfm.groupby('RFMScore').agg('monetary_value').mean())

st.subheader('Frequency')
st.bar_chart(segmented_rfm.groupby('RFMScore').agg('frequency').mean())

st.subheader('Recency')
st.bar_chart(segmented_rfm.groupby('RFMScore').agg('recency').mean())

df = pd.read_excel('Online Retail.xlsx', dtype={'CustomerID': str, 'InvoiceID': str}, parse_dates=['InvoiceDate'], infer_datetime_format=True)
df.dropna(subset=['CustomerID'], inplace=True)
n_orders = df.groupby(['CustomerID'])['InvoiceNo'].nunique()
mult_orders_perc = np.sum(n_orders > 1) / df['CustomerID'].nunique()
df = df[['CustomerID', 'InvoiceNo', 'InvoiceDate']].drop_duplicates()
df['order_month'] = df['InvoiceDate'].dt.to_period('M')
df['cohort'] = df.groupby('CustomerID')['InvoiceDate'] \
                 .transform('min') \
Beispiel #23
0
def main():
    """Semi Automated ML App with Streamlit """

    activities = ["EDA", "Plots"]
    choice = st.sidebar.selectbox("Select Activities", activities)

    if choice == 'EDA':
        st.subheader("Exploratory Data Analysis")

        data = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Shape"):
                st.write(df.shape)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.write(all_columns)

            if st.checkbox("Summary"):
                st.write(df.describe())

            if st.checkbox("Show Selected Columns"):
                selected_columns = st.multiselect("Select Columns",
                                                  all_columns)
                new_df = df[selected_columns]
                st.dataframe(new_df)

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts())

            if st.checkbox("Correlation Plot(Matplotlib)"):
                plt.matshow(df.corr())
                st.pyplot()

            if st.checkbox("Correlation Plot(Seaborn)"):
                st.write(sns.heatmap(df.corr(), annot=True))
                st.pyplot()

            if st.checkbox("Pie Plot"):
                all_columns = df.columns.to_list()
                column_to_plot = st.selectbox("Select 1 Column", all_columns)
                pie_plot = df[column_to_plot].value_counts().plot.pie(
                    autopct="%1.1f%%")
                st.write(pie_plot)
                st.pyplot()

    elif choice == 'Plots':
        st.subheader("Data Visualization")
        data = st.file_uploader("Upload a Dataset",
                                type=["csv", "txt", "xlsx"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts().plot(kind='bar'))
                st.pyplot()

            # Customizable Plot

            all_columns_names = df.columns.tolist()
            type_of_plot = st.selectbox(
                "Select Type of Plot",
                ["area", "bar", "line", "hist", "box", "kde"])
            selected_columns_names = st.multiselect("Select Columns To Plot",
                                                    all_columns_names)

            if st.button("Generate Plot"):
                st.success("Generating Customizable Plot of {} for {}".format(
                    type_of_plot, selected_columns_names))

                # Plot By Streamlit
                if type_of_plot == 'area':
                    cust_data = df[selected_columns_names]
                    st.area_chart(cust_data)

                elif type_of_plot == 'bar':
                    cust_data = df[selected_columns_names]
                    st.bar_chart(cust_data)

                elif type_of_plot == 'line':
                    cust_data = df[selected_columns_names]
                    st.line_chart(cust_data)

                # Custom Plot
                elif type_of_plot:
                    cust_plot = df[selected_columns_names].plot(
                        kind=type_of_plot)
                    st.write(cust_plot)
                    st.pyplot()
Beispiel #24
0
st.write("Updated dataframe")
st.write(weed_df)

# df = pd.DataFrame(np.random.randn(200, 3), columns=['a', 'b', 'c'])

st.vega_lite_chart(
    weed_df, {
        'mark': 'circle',
        'encoding': {
            'x': {
                "field": "year_month",
                "type": "temporal"
            },
            'y': {
                "aggregate": "mean",
                "field": "sold",
                "type": "quantitative"
            },
        },
    })

bar_df = pd.DataFrame(weed_df[['region', 'sold']])

st.write("Chart of sold")
st.bar_chart(bar_df)

st.title("This one you can filter stuff with")
option = st.multiselect("Select a region", np.unique(weed_df['region']))
'You selected:', option

st.dataframe(weed_df[weed_df['region'].isin(option)])
def data_app():
    """ Data Processer and Visualizer  """
    st.title("Data Cake")
    st.subheader("A to Z  Data Analysis")

    file = ['./dataset/Ac1', [0, 1]]

    def file_selector():
        filename = st.file_uploader("Upload Excel File", type=['xls', 'xlsx'])
        if filename is not None:
            sheetnames = pd.ExcelFile(filename).sheet_names
            sheet = st.selectbox("Sheet Sheet", sheetnames)
            return [filename, sheet]

    file = file_selector()

    # Read Data
    try:
        df = pd.read_excel(file[0], sheet_name=file[1])
    except Exception as e:
        st.info("Please upload Excel file")

    # Show Datas
    try:
        if st.checkbox("Show Dataset"):
            number = st.number_input("Number of Rows to View", 5, 10)
            st.dataframe(df.head(number))
    except Exception as e:
        st.info("Please upload Excel file")

# Show Columns
    try:
        if st.button("Column Names"):
            st.write(df.columns)
    except Exception as e:
        st.info("Please upload Excel file")

# Show Shape
    try:
        if st.checkbox("Shape of Dataset"):
            st.write(df.shape)
    except Exception as e:
        st.info("Please upload Excel file")

# Select Columns
    try:
        if st.checkbox("Select Columns To Show"):
            all_columns = df.columns.tolist()
            selected_columns = st.multiselect("Select", all_columns)
            new_df = df[selected_columns]
            st.dataframe(new_df)
    except Exception as e:
        st.info("Please upload Excel file")

# Show Datatypes
    try:
        if st.button("Data Types"):
            st.write(df.dtypes)
    except Exception as e:
        st.info("Please upload Excel file")

# Show Summary
    try:
        if st.checkbox("Summary"):
            st.write(df.describe().T)
    except Exception as e:
        st.info("Please upload Excel file")

## Plot and Visualization

    st.subheader("Data Visualization")
    # Correlation
    # Seaborn Plot
    if st.checkbox("Correlation Plot[Seaborn]"):
        st.write(sns.heatmap(df.corr(), annot=True))
        st.pyplot()

# Pie Chart
    if st.checkbox("Pie Plot"):
        all_columns_names = df.columns.tolist()
        if st.button("Generate Pie Plot"):
            st.success("Generating A Pie Plot")
            st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
            st.pyplot()

# Count Plot
    if st.checkbox("Plot of Value Counts"):
        st.text("Value Counts By Target")
        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox("Primary Columm to GroupBy",
                                   all_columns_names)
        selected_columns_names = st.multiselect("Select Columns",
                                                all_columns_names)

        if st.button("Plot"):
            st.text("Generate Plot")
            if selected_columns_names:
                vc_plot = df.groupby(
                    primary_col)[selected_columns_names].count()
            else:
                vc_plot = df.iloc[:, -1].value_counts()
            st.write(vc_plot.plot(kind="bar"))
            st.pyplot()

#Contour Plot
    if st.checkbox("Contour Plot "):
        st.text("3D Contour Plot")
        all_columns_names = df.columns.tolist()

        X = st.selectbox("Select X axis", all_columns_names)
        Y = st.selectbox("Select Y axis", all_columns_names, index=1)
        VS = st.selectbox("Select Z axis", all_columns_names, index=2)

        Z_F = df.pivot_table(index=X, columns=Y, values=VS).T.values

        X_unique = np.sort(df[X].unique())
        Y_unique = np.sort(df[Y].unique())
        X_F, Y_F = np.meshgrid(X_unique, Y_unique)
        pd.DataFrame(Z_F).round(3)
        pd.DataFrame(X_F).round(3)
        pd.DataFrame(Y_F).round(3)

        fig, ax = plt.subplots(1, 1)
        cp = ax.contourf(X_F, Y_F, Z_F)
        fig.colorbar(cp)  # Add a colorbar to a plot
        st.pyplot(fig=fig)


# Customizable Plot
    try:
        st.subheader("Customizable Plot")
        all_columns_names = df.columns.tolist()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns_names = st.multiselect("Select Columns To Plot",
                                                all_columns_names)

        if st.button("Generate Plot"):
            st.success("Generating Customizable Plot of {} for {}".format(
                type_of_plot, selected_columns_names))

    # Plot By Streamlit
        if type_of_plot == 'area':
            cust_data = df[selected_columns_names]
            st.area_chart(cust_data)

        elif type_of_plot == 'bar':
            cust_data = df[selected_columns_names]
            st.bar_chart(cust_data)

        elif type_of_plot == 'line':
            cust_data = df[selected_columns_names]
            st.line_chart(cust_data)

    # Custom Plot
        elif type_of_plot:
            cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()

        if st.button("Ready to ML !"):
            st.balloons()
    except:
        st.info("Please upload Excel file")
    st.sidebar.header("Data Cake")

    st.sidebar.info("Built by Veera Ragavan")
Beispiel #26
0
		retweets = spark_twitter.count_retweet(tweets)
		tags = spark_twitter.count_tags(tweets)

		if retweets != 0:
			t = "<div><span class='highlight blue'><span class='bold'>Numero retweets relativi all'argomento: "+ str(retweets) +" </span></span></div>"
			st.markdown(t, unsafe_allow_html=True)

		if len(tags) > 0:
			df_tags = pd.DataFrame(tags, columns =['Tags','occurences'])
			df_tags = df_tags.rename(columns={'Tags':'index'}).set_index('index') # Setto la colonna hashtag come indice per visualizzarla su xaxis

			df_tags_limit = df_tags.head(20) 	
			st.dataframe(df_tags_limit)

			chat_data_tags = pd.DataFrame(df_tags_limit, columns=['Tags', 'occurences'])
			st.bar_chart(chat_data_tags)

		dict_result = spark_twitter.word_count(tweets)

		#print(len(dict_result))

		if len(dict_result) > 0: 
			df = pd.DataFrame(dict_result, columns =['Hashtag','occurences'])
			df = df.rename(columns={'Hashtag':'index'}).set_index('index') # Setto la colonna hashtag come indice per visualizzarla su xaxis
		  
			st.dataframe(df)

			df_limit = df.head(10)
			#st.dataframe(df_limit)

			chart_data = pd.DataFrame(df_limit, columns =['Hashtag','occurences'])
Beispiel #27
0
#%%
import altair as alt
#%%
#selected_stock = st.selectbox("Please selet the stock", Stocks)
selected_stock = st.sidebar.selectbox("Please selet the stock", Stocks)
st.subheader('Selected Stock')
st.line_chart(df2[selected_stock])
n_test = st.sidebar.slider("Months of prediction:", 1,40)
period = n_test*30

#%%
st.subheader(selected_stock + ' Monthly Return Data')
monthly_return = df3[selected_stock].resample('M').ffill().pct_change()
df6 = pd.DataFrame(monthly_return)
st.bar_chart( df6 )

#%%
st.header('The results of the forecast will be ready in a minute')

df2 = df2.reset_index()
#install prophet by: conda install -c conda-forge prophet
#from plotly import graph_objs as go
#from fbprophet.plot import plot_plotly

#forecasting
df_train = df2[['Date',selected_stock]]
df_train = df_train.rename(columns = {"Date":"ds" , selected_stock:"y"})


m = Prophet()
Beispiel #28
0
def bar_of_nulls(data):
    st.write('Missing Values')
    st.bar_chart(
        data.isnull().sum().to_frame().rename(columns={0: 'Missing values'}))
Beispiel #29
0
cols = st.selectbox('Covid metric' , metrics)

if cols in metrics:
    metricstoshow = cols
    if metricstoshow == 'confirmed':
        st.title("Confirmed cases")
        #confirmed_cases = st.slider("Number of confirmed cases", 1 , int(confirmed_melted["confirmed"].max()))
        fecha = st.selectbox("Select date" , confirmed_melted['fecha'].unique())
        
        pais = st.selectbox("Select country to check data" , confirmed_melted['Country/Region'].unique())
        confirmado_hasta_la_fecha = confirmed_melted[confirmed_melted["Country/Region"] == pais][confirmed_melted["fecha"] == fecha]
        st.text("Casos confirmados a la fecha  : " + fecha)
        st.write(confirmado_hasta_la_fecha)
        confirmado_por_pais = confirmed_melted[confirmed_melted["Country/Region"] == pais]#[confirmed_melted["fecha"] == fecha]
        st.header("Casos de Covid en " + pais)
        st.bar_chart(confirmado_por_pais['confirmed'])
        st.text("Total de casos en todas las fechas del pais: " + pais)
        st.write(confirmado_por_pais)
        total1 = confirmed_melted.loc[confirmed_melted['Country/Region'] == pais][confirmed_melted["fecha"] == fecha]['confirmed'].sum()
        st.text( "Casos totales en " + pais + " a la fecha : " + fecha)
        st.text(total1)
        confirmed_melted['fecha'] = pd.to_datetime(confirmed_melted['fecha'],format= '%m/%d/%y', errors="ignore")
        # fecha1 = datetime.date(20,1,22)
        view = pdk.ViewState(latitude=0,longitude=0,zoom=0.2,)

        covidLayer1 = pdk.Layer(
             "ScatterplotLayer",
             data=confirmed_melted,
             pickable= True,
             opacity=0.3,
             stroked=True,
Beispiel #30
0
    initial_view_state=initial_view_state,
    layers=[
        pdk.Layer(
            'HexagonLayer',
            data=data,
            get_position='[longitude, latitude]',
            radius=10000,
            get_elevation='properties.brightness',
            elevation_scale=200,
            # elevation_range=[0, 1000],
            pickable=True,
            extruded=True,
        ),
        pdk.Layer(
            'ScatterplotLayer',
            data=data,
            get_position='[longitude, latitude]',
            get_color='[200, 30, 0, 160]',
            get_radius=10000,
        ),
    ],
    tooltip=tooltip
))

month_data = df[df[DATE_COLUMN].dt.month ==date.month]
month_data = month_data.set_index(DATE_COLUMN, drop=True)

st.write(f'Fires per day in {month_name}')

st.bar_chart(month_data.groupby([month_data.index.day]).count().brightness)