Esempio n. 1
0
@st.cache
def fetch_data():
    df = pd.read_csv('https://opendata.arcgis.com/datasets/018890d7399245759f05c7932261ef44_7.csv')
    # rename for ST map function requirement & simplicity
    df = df.rename(columns={
        'X': 'lon',
        'Y': 'lat',
        'PrimaryCarePtWARD': 'ward'
        })
    
    return df

data = fetch_data()

# Chart 1: Interactive Map

st.write('## Map of Hospitals')
st.map(data)

# Chart 2: Bar chart of hospitals by ward
@st.cache
def hospitals_by_ward(data):
    return data['ward'].value_counts()

st.write('## Hospital count by ward')
st.bar_chart(hospitals_by_ward(data))

# Table 1: Raw data
st.write('## Raw hospital data')
st.dataframe(data)
Esempio n. 2
0
def main():
    st.image('logo.png', width=200)
    st.title('AceleraDev Data Science')
    st.subheader('Semana 2 - Pré-processamento de Dados em Python')
    st.image('https://media.giphy.com/media/KyBX9ektgXWve/giphy.gif',
             width=200)
    file = st.file_uploader(
        'Escolha a base de dados que deseja analisar (.csv)', type='csv')
    if file is not None:
        st.subheader('Analisando os dados')
        df = pd.read_csv(file)
        st.markdown('**Número de linhas:**')
        st.markdown(df.shape[0])
        st.markdown('**Número de colunas:**')
        st.markdown(df.shape[1])
        st.markdown('**Visualizando o dataframe**')
        number = st.slider('Escolha o numero de colunas que deseja ver',
                           min_value=1,
                           max_value=20)
        st.dataframe(df.head(number))
        st.markdown('**Nome das colunas:**')
        st.markdown(list(df.columns))
        exploracao = pd.DataFrame({
            'nomes': df.columns,
            'tipos': df.dtypes,
            'NA #': df.isna().sum(),
            'NA %': (df.isna().sum() / df.shape[0]) * 100
        })
        st.markdown('**Contagem dos tipos de dados:**')
        st.write(exploracao.tipos.value_counts())
        st.markdown('**Nomes das colunas do tipo int64:**')
        st.markdown(list(exploracao[exploracao['tipos'] == 'int64']['nomes']))
        st.markdown('**Nomes das colunas do tipo float64:**')
        st.markdown(list(
            exploracao[exploracao['tipos'] == 'float64']['nomes']))
        st.markdown('**Nomes das colunas do tipo object:**')
        st.markdown(list(exploracao[exploracao['tipos'] == 'object']['nomes']))
        st.markdown('**Tabela com coluna e percentual de dados faltantes :**')
        st.table(exploracao[exploracao['NA #'] != 0][['tipos', 'NA %']])
        st.subheader('Inputaçao de dados númericos :')
        percentual = st.slider(
            'Escolha o limite de percentual faltante limite para as colunas vocë deseja inputar os dados',
            min_value=0,
            max_value=100)
        lista_colunas = list(
            exploracao[exploracao['NA %'] < percentual]['nomes'])
        select_method = st.radio('Escolha um metodo abaixo :',
                                 ('Média', 'Mediana'))
        st.markdown('Você selecionou : ' + str(select_method))
        if select_method == 'Média':
            df_inputado = df[lista_colunas].fillna(df[lista_colunas].mean())
            exploracao_inputado = pd.DataFrame({
                'nomes':
                df_inputado.columns,
                'tipos':
                df_inputado.dtypes,
                'NA #':
                df_inputado.isna().sum(),
                'NA %': (df_inputado.isna().sum() / df_inputado.shape[0]) * 100
            })
            st.table(exploracao_inputado[
                exploracao_inputado['tipos'] != 'object']['NA %'])
            st.subheader('Dados Inputados faça download abaixo : ')
            st.markdown(get_table_download_link(df_inputado),
                        unsafe_allow_html=True)
        if select_method == 'Mediana':
            df_inputado = df[lista_colunas].fillna(df[lista_colunas].mean())
            exploracao_inputado = pd.DataFrame({
                'nomes':
                df_inputado.columns,
                'tipos':
                df_inputado.dtypes,
                'NA #':
                df_inputado.isna().sum(),
                'NA %': (df_inputado.isna().sum() / df_inputado.shape[0]) * 100
            })
            st.table(exploracao_inputado[
                exploracao_inputado['tipos'] != 'object']['NA %'])
            st.subheader('Dados Inputados faça download abaixo : ')
            st.markdown(get_table_download_link(df_inputado),
                        unsafe_allow_html=True)
Esempio n. 3
0
st.title("Upcoming events Data")

dataset_name = st.sidebar.selectbox(
    "Select Country",
    ('Germany', 'France', 'Italy', 'The Netherlands', 'Switzerland', 'Poland',
     'Greece', 'Sweden', 'Austria', 'Spain', 'Russia', 'China', 'India',
     'Australia', 'Malaysia', 'Japan', 'South Korea', 'United Arab Emirates',
     'Turkey', 'Brazil', 'United States of America', 'Canada', 'Mexico'))

if dataset_name == 'Germany':

    st.subheader("""
    Explore upcoming Events in Germany
    """)
    df = pd.read_csv("Germany.csv")
    st.dataframe(df)

    def csv_downloader(data):
        csvfile = data.to_csv()
        b64 = base64.b64encode(csvfile.encode()).decode()
        new_filename = "Upcoming Events in Germany.csv".format(timestr)
        st.markdown("#### Download File ####")
        href = f'<a href="data:file/csv;base64, {b64}" download = "{new_filename}">Click Here!!</a>'
        st.markdown(href, unsafe_allow_html=True)

    csv_downloader(df)

elif dataset_name == 'France':
    st.subheader("""
    Explore Upcoming Events in France""")
    df = pd.read_csv("France.csv")
Esempio n. 4
0
def search_room(dataframe: pd.DataFrame) -> bool:

    # Search top 100
    top100 = st.sidebar.checkbox(
        "Filter top 100 apartments",
        help="filter only the top 100 apartments by price",
    )

    # Search by price
    min_price, max_price = st.sidebar.slider(
        "Search apartments by price",
        min(dataframe.price),
        max(dataframe.price),
        (min(dataframe.price), max(dataframe.price)),
        help="Insert the min and max price",
    )

    # Search by review_scores_rating

    # Search by room type

    # Search by Beds

    # Search by Beds

    # Search by Bathrooms

    # Search by Accomodates

    # Select columns for plot
    to_select = st.sidebar.multiselect(
        "Seleziona le colonne che vuoi visualizzare",
        list(dataframe.columns),
        [i for i in list(dataframe.columns)],
        help="Seleziona le colonne che vuoi considerare",
    )

    if top100:
        dataframe = dataframe.groupby("price").head(100)

    dataframe_filtered = dataframe[to_select]

    dataframe_filtered = dataframe_filtered.loc[
        dataframe.price.between(min_price, max_price)
    ]
    # Launch the data visualization
    main_room_type(dataframe_filtered)

    st.sidebar.markdown("Select plot axis")
    axis1 = st.sidebar.selectbox(
        "Select first axis", list(dataframe_filtered.columns)
    )
    axis2 = st.sidebar.selectbox(
        "Select second axis", list(dataframe_filtered.columns)
    )

    scatterplot = st.sidebar.button(
        "Scatterplot", key="bscatterplot", help="Launch the scatterplot"
    )
    if scatterplot:
        fig = px.scatter(dataframe_filtered, x=axis1, y=axis2)
        st.markdown(f"Plot with: {axis1}, {axis2}")
        st.plotly_chart(fig)
        st.markdown("Raw data used")

        st.dataframe(
            dataframe_filtered.style.highlight_max(axis=0)
            .format({axis2: "{:.2%}"})
            .highlight_null(null_color="red")
            .set_caption("Result table with all the data filtered")
        )
        return True

    barplot = st.sidebar.button(
        "Barplot", key="bggplot", help="Launch the ggplot"
    )
    if barplot:

        st.markdown(
            "To launch this plot please remember to select all the columns in the data"
        )
        # plot_folder_path = os.path.join(get_folder_path("."), "plots")

        fig = (
            pn.ggplot(dataframe_filtered)
            + pn.aes(x=axis1, fill=axis2)
            + pn.geom_bar()
            + pn.theme(axis_text_x=pn.element_text(angle=45, hjust=1))
        )

        st.markdown("### Barplot")
        st.markdown(f"Displaying: {axis1} over {axis2}")
        st.pyplot(
            pn.ggplot.draw(fig),
            clear_figure=True,
            width=100,
            height=200,
            dpi=600,
        )
        # st.image(fig_path)
        # st.write(fig)

        # st.pyplot(fig)

    histogram = st.sidebar.button(
        "Histogram", key="bp9histogram", help="Launch the ggplot histogram"
    )
    if histogram:
        fig = (
            pn.ggplot(dataframe_filtered)
            + pn.aes(x="price")
            + pn.geom_histogram(fill="blue", colour="black", bins=30)
            + pn.xlim(0, 200)
        )

        st.markdown("### Histogram")
        st.markdown(f"Displaying: {axis1} over {axis2}")
        st.pyplot(
            pn.ggplot.draw(fig),
            clear_figure=True,
            width=100,
            height=200,
            dpi=600,
        )

    density = st.sidebar.button(
        "Density", key="bp9density", help="Launch the ggplot density"
    )
    if density:

        fig = (
            pn.ggplot(dataframe_filtered.head(1000))
            + pn.aes(x="price")
            + pn.geom_density(fill="blue", colour="black", alpha=0.5)
            + pn.xlim(0, 200)
        )

        st.markdown("### Density Plot")
        st.pyplot(
            pn.ggplot.draw(fig),
            clear_figure=True,
            width=100,
            height=200,
            dpi=600,
        )

    latlong = st.sidebar.button(
        "Latitude-Longitude",
        key="bp9latlon",
        help="Launch the ggplot latitude and longitude categorical comparison",
    )
    if latlong:
        # color categorical variable
        fig = (
            pn.ggplot(
                dataframe_filtered,
                pn.aes(x="latitude", y="longitude", colour="room_type"),
            )
            + pn.geom_point(alpha=0.5)
        )

        st.markdown("### Color categorical variable")
        st.pyplot(
            pn.ggplot.draw(fig),
            clear_figure=True,
            width=100,
            height=200,
            dpi=600,
        )

        return True

    return False
Esempio n. 5
0
def main():
    st.set_page_config(page_title="Bike rental prediction", layout='wide')

    options = [
        'Home', 'EDA', 'Visualization', 'Model building and evaluation',
        'Prediction'
    ]
    choice = st.sidebar.selectbox('Choose the followings', options)

    if choice == 'Model building and evaluation':
        st.subheader(
            'Build **AutoML** models with 30 different algorithms and corresponding evaluation'
        )
        uploaded_file = st.file_uploader('', type=['csv'])
        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
            with st.beta_expander('Expand dataframe'):
                st.dataframe(df)

            X = df.drop(['cnt', 'instant', 'dteday'], axis=1)
            Y = df['cnt']

            X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                                Y,
                                                                test_size=0.25)
            reg = LazyRegressor(verbose=0,
                                ignore_warnings=False,
                                custom_metric=None)
            models_train, predictions_train = reg.fit(X_train, X_train,
                                                      Y_train, Y_train)
            models_test, predictions_test = reg.fit(X_train, X_test, Y_train,
                                                    Y_test)

            st.subheader('2. Table of Model Performance on Test set')
            st.write(predictions_test)

            st.subheader('3. Plot of Model Performance (Test set)')

            with st.markdown('**R-squared**'):
                plt.figure(figsize=(9, 3))

                ax1 = sns.barplot(x=predictions_test.index,
                                  y="R-Squared",
                                  data=predictions_test)
                ax1.set(ylim=(0, 1))
                plt.xticks(rotation=90)
                st.pyplot(plt)

            with st.markdown('**RMSE (capped at 50)**'):

                plt.figure(figsize=(9, 3))

                ax2 = sns.barplot(x=predictions_test.index,
                                  y="RMSE",
                                  data=predictions_test)
                plt.xticks(rotation=90)
                st.pyplot(plt)

    elif choice == 'Prediction':
        st.subheader('Prediction for unseen data')
        st.sidebar.header('User Input Features')
        uploaded_file = st.sidebar.file_uploader("Upload your input CSV file",
                                                 type=["csv"])
        if uploaded_file is not None:
            input_df = pd.read_csv(uploaded_file)
        else:
            st.sidebar.subheader('Or input your features manually')

            def user_input_features():
                season = st.sidebar.selectbox('Season', np.arange(1, 5))
                yr = st.sidebar.selectbox('Year', np.arange(0, 2))
                month = st.sidebar.selectbox('Month', np.arange(1, 13))
                holiday = st.sidebar.selectbox('Is Holiday', (0, 1))
                weekday = st.sidebar.selectbox('Number of day',
                                               np.arange(1, 8))
                workingday = st.sidebar.selectbox('Is workind day', (0, 1))
                weathersit = st.sidebar.selectbox('Weather Number',
                                                  np.arange(1, 5))
                temp = st.sidebar.slider('Tempareture', 0.05, 0.86, 0.20)
                atemp = st.sidebar.slider('Atemp', 0.07, 0.84, 0.15)
                hum = st.sidebar.slider('Humadity', 0.0, 0.97, 0.55)
                windspeed = st.sidebar.slider('Windspeed', 0.02, 0.5, 0.08)
                casual = st.sidebar.slider('Casual', 2, 3410, 50)
                registered = st.sidebar.slider('Registered', 20, 6946, 5589)
                data = {
                    'season': season,
                    'yr': yr,
                    'mnth': month,
                    'holiday': holiday,
                    'weekday': weekday,
                    'workingday': workingday,
                    'weathersit': weathersit,
                    'temp': temp,
                    'atemp': atemp,
                    'hum': hum,
                    'windspeed': windspeed,
                    'casual': casual,
                    'registered': registered
                }
                features = pd.DataFrame(data, index=[0])
                return features

            input_df = user_input_features()

            st.subheader('User input features :')
            st.dataframe(input_df)

            if st.button('Start prediction'):
                model = pickle.load(open('LassoLarsIC.pkl', 'rb'))

                pred = model.predict(input_df)
                st.write('The prediction is :', pred)

    elif choice == 'EDA':
        st.subheader('Explanatory data analysis')
        uploaded_file = st.file_uploader('', type=['csv'])
        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
            with st.beta_expander('Expand dataframe'):
                st.dataframe(df)

            with st.beta_expander('Full profile information'):
                st_profile_report(ProfileReport(df, explorative=True))

            with st.beta_expander('Display basic summary'):
                st.write(df.describe().T)
            with st.beta_expander('Display data type'):
                st.write(df.dtypes)

    elif choice == 'Visualization':
        st.subheader('Data Visualization')
        uploaded_file = st.file_uploader('', type=['csv'])
        if uploaded_file is not None:
            df = pd.read_csv(uploaded_file)
            with st.beta_expander('Expand dataframe'):
                st.dataframe(df)

            with st.beta_expander('Display bike rental along with time axis'):
                df2 = df.copy(deep=True)
                df2.dteday = pd.to_datetime(df2.dteday)
                df2.set_index('dteday', inplace=True)
                plt.figure(figsize=(20, 6))
                df2['cnt'].plot()
                st.pyplot()
                st.write(
                    'These shows that bike rental counts has seasonality and quite upwards trend.'
                )
            with st.beta_expander('Display heatmap'):
                plt.figure(figsize=(10, 6))
                sns.heatmap(df.corr(), annot=True)
                st.pyplot()
                st.write('There are some multicolliearity.')
            col1, col2 = st.beta_columns(2)
            with col1:
                with st.beta_expander(
                        'Display total bike rental counts with different seasons'
                ):
                    df.groupby('season')['cnt'].sum().plot(kind='bar')
                    st.pyplot()
                    st.write('Maximum bike rent was in season 3.')
                with st.beta_expander(
                        'Display total bike rental counts along with months and years'
                ):
                    df.groupby(['mnth',
                                'yr'])['cnt'].sum().unstack().plot(kind='bar')
                    st.pyplot()
                    st.write(
                        'This plot shows the total bike rental count of every month of 2011 and 2012'
                    )
                    st.write(
                        'From MAY to OCTOBER the total bike rental count was high in every year and total rental in every month has increased from 2011 to 2012'
                    )
                with st.beta_expander(
                        'Display the pie chart of weathersit based on bike rental'
                ):
                    plt.pie(df.groupby('weathersit')['cnt'].sum(),
                            labels=['1', '2', '3'],
                            explode=(0.05, 0, 0),
                            radius=1,
                            autopct='%0.2f%%',
                            shadow=True)
                    plt.tight_layout()
                    plt.legend(loc='upper left')
                    plt.axis('equal')
                    plt.show()
                    st.pyplot()
                    st.write(
                        'we have found total out of total bike rental count, 68.57% count was in "Clear, Few clouds, Partly cloudy, Partly cloudy" weatherand 30.27% was in " Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist" weather.'
                    )
                with st.beta_expander('Display the outliers'):
                    num_var = [
                        'temp', 'atemp', 'hum', 'windspeed', 'casual',
                        'registered'
                    ]
                    for i in num_var:
                        sns.boxplot(y=i, data=df)
                        plt.title('Boxplot of ' + i)
                        plt.show()
                        st.pyplot()
                    st.write(
                        'We have found some outliers on the features - casual,windspeed and humidity'
                    )
            with col2:
                with st.beta_expander(
                        'Display the relationship between bike rental count and temperature'
                ):
                    sns.scatterplot(x='temp', y='cnt', data=df)
                    st.pyplot()
                    st.write(
                        'We found almost linear relation between temp and count.'
                    )
                with st.beta_expander(
                        'Display the relationship between bike rental count and windspeed'
                ):
                    sns.scatterplot(x='windspeed', y='cnt', data=df)
                    st.pyplot()
                    st.write('There is not much interpretation')
                with st.beta_expander(
                        'Display violine plot of seasons along with bike rental count'
                ):
                    sns.violinplot(x=df.season, y=df.cnt)
                    st.pyplot()
                    st.write(
                        'Less count was in season 1 and it is right skewed and rest 3 seasons has not exactly any long tail and more or less season 2,3,4 have similar distribution'
                    )

    elif choice == 'Home':

        image = Image.open('RentBike_25-09-17_02.jpg')
        st.image(image, use_column_width=True)
        st.title(
            'Bike rental analysis, visualization, model building, evaluation and prediction in a single window'
        )
                                    'Annual Profit%': str,
                                    'Max Drawdown%': str,
                                    'Winners%': str,
                                    'Recovery Factor': str,
                                    'Profit Factor': str,
                                    'Sharpe': str
                                })

    my_key = key_config + str(s_year_ini) + str(s_year_end) + str(s_axs_target)

    if df_strg_sumry[df_strg_sumry['id'] == my_key]['id'].count() == 0:
        today_is = datetime.now().strftime('%Y-%m-%d %H:%M:%S')
        next_row = df_strg_sumry.shape[0]
        df_strg_sumry.loc[next_row] = [
            my_key, today_is, s_contract, s_timetable,
            str(s_rng_years),
            str(s_rng_adjust),
            str(s_axs_target),
            str(s_year_ini) + '-' + str(s_year_end), b1, c1, e0, d1, d2, d0
        ]

    if reset_strg_tab_on:
        last = df_strg_sumry.shape[0]
        df_strg_sumry.drop(df_strg_sumry.index[range(last)], inplace=True)
        reset_strg_tab = False

    st.subheader('Registered Strategies')
    st.dataframe(df_strg_sumry.T)

    df_strg_sumry.to_csv(my_table, index=False)
st.title("Data App - Forescasting Voltage (p.u) bus 32")

# subtítulo
st.markdown("This app forecast voltage values using Machine Learning for the problem of forescating voltages.")

# verificando o dataset
st.subheader("Select a set of information")

# atributos para serem exibidos por padrão
defaultcols = ["Voltage_bus32"]

# defindo atributos a partir do multiselect
cols = st.multiselect("Information", data.columns.tolist(), default=defaultcols)

# exibindo os top 10 registro do dataframe
st.dataframe(data[cols].head(10))


st.subheader("Probability Density Function Voltages (p.u)")

# definindo a faixa de valores
faixa_valores = st.slider("Range Voltage", float(data.Voltage_bus32.min()), 150., (10.0, 100.0))

# filtrando os dados
dados = data[data['Voltage_bus32'].between(left=faixa_valores[0],right=faixa_valores[1])]

# plot a distribuição dos dados
f = px.histogram(dados, x="Voltage_bus32", nbins=100, title="Probability Density Function")
f.update_xaxes(title="Voltage_bus32")
f.update_yaxes(title="PDF")
st.plotly_chart(f)
Esempio n. 8
0
def desc(data=None, pays=None):
	#discriptive analysis
	st.header(f"Descriptive analysis: {pays}.")
	if st.checkbox('Descriptive analysis', key=3):
		if st.button('Describe data', key=14):
			st.subheader('Describe data')
			st.dataframe(data.describe())

		
		if st.checkbox('correlation', key=4):
			st.subheader('Correlation')
			st.dataframe(data.corr())
			with st.beta_expander('Learn more'):
				st.markdown(""" 
				Two variables are positive correlated if only if

				1. $corr >= 0.5$.
				2. $corr >= 0.8$ (strong).

				Two variables are negative correlated if only if

				1. $corr < -0.5$.
				2. $corr <= -0.8$ (strong).

				Assume that $x$ and $y$ are two independent variables; if we compute $corr(x,y) >= 0.5$ or $corr(x,y) < -0.5$ or 
				$corr(x,y) >= 0.8$ or $corr(x,y)<= -0.8$,
				we can plot this
				> $y = f(x)$ this means that the trend of $y$ depends on trend of $x$. 

				**N.B**: 

				1. Positive correlation means that $x$ and $y$ go together i.e if $x$ increase over time, $y$ increase over time.
				2. Negative correlation means that $x$ and $y$ does not go together if $x$ increase over time, $y$ decrease over time.

				""")

			year = st.selectbox('Select year:', range(1997,2021))

			#correlation for each year
			result = 'corr(latitude, longitude) = {}.'.format(data[data.YEAR==year].corr().loc['LATITUDE', ['LONGITUDE']].values[0])
			st.success(result)

			fig, ax = plt.subplots()
			sns.regplot(x='LONGITUDE', y='LATITUDE', data=data[data.YEAR==year], lowess=True)
			title = 'Regression plot latitude and longitude for year {}'.format(year)
			ax.set_title(title)
			st.pyplot(fig)


			cols = ['EVENT_TYPE','ADMIN1','LOCATION','SUB_EVENT_TYPE','FATALITIES', 'EVENT_DATE']
			if st.checkbox('Geolocalization',key=5):
				st.subheader('Geolocalization')
				st.dataframe(data[data.YEAR==year][cols])
				st.dataframe(data[data.YEAR==year][['EVENT_DATE','LOCATION','NOTES', 'FATALITIES']])

			@st.cache
			def curiosity():

				#initialize
				corr = []
				years = []
				total_fata = []
				admin1 = []
				ev_tpe = []
				sub_type = []

				for u in range(1997,2021):

					corr.append(data[data.YEAR==u].corr().loc['LATITUDE', ['LONGITUDE']].values[0])
					years.append(u)
					admin1.append(data[data.YEAR==u].ADMIN1.mode().values)
					total_fata.append(data[data.YEAR==u].FATALITIES.sum())
					ev_tpe.append(data[data.YEAR==u].EVENT_TYPE.mode().values)
					sub_type.append(data[data.YEAR==u].SUB_EVENT_TYPE.mode().values)

				cdata = pd.DataFrame()

				cdata['corr(lat,long)'] = corr
				cdata['year'] = years
				cdata['total_fatalities'] = total_fata
				cdata['admin1_mode'] = admin1
				cdata['event_type_mode'] = ev_tpe
				cdata['sub_event_type_mode'] = sub_type

				return cdata

			if st.checkbox('Some curiosity',key=6):
				st.subheader('Relevant informative data')
				df = curiosity()
				cd = df.set_index('year')
				st.dataframe(df)

				if st.button('plot corr(lat,long) vs total_fatalities'):
					c = alt.Chart(df).mark_bar().encode(x='corr(lat,long)', y='total_fatalities', 
						tooltip=['corr(lat,long)', 'total_fatalities'])
					st.altair_chart(c, use_container_width=True)

				if st.button('Heatmap calendar'):
					
					fig, ax = plt.subplots()
					sns.heatmap(cd[['corr(lat,long)', 'total_fatalities']],center=0, annot=True, fmt='.6g')
					ax.set_title('Heatmap calendar.')
					st.pyplot(fig)

		#conflict is spreading
		if st.checkbox('is conflict spreading?', key=7):
			st.subheader('Conflict is spreading.')
			year_fata = data[data.YEAR!=2021] .groupby('YEAR')['FATALITIES'].agg('sum')
			#event type section
			if st.checkbox('event type', key=8):
				st.subheader('Event type')
				if st.button('fatalities barplot'):
					fig, ax = plt.subplots()
					year_fata.plot(kind='bar')
					ax.set_ylabel('cummulative fatalities')
					ax.set_title(F'Progresssive of fatalities caused by conflict in {pays}.')
					st.pyplot(fig)

				event_conflict = pd.pivot_table(data, values='FATALITIES', 
						columns='EVENT_TYPE', index='YEAR', aggfunc='sum')

				if st.button('calendar event type', key=15):
					fig, ax = plt.subplots()
					sns.heatmap(event_conflict, center=0, annot=True, fmt='.6g')
					ax.set_title(F'Heatmap of conflict in {pays}.')
					st.pyplot(fig)

					with st.beta_expander('Learn more'):
						st.markdown("""
						The blank space means that no data are recorded in that year corresponding to the event type. 
						 """)

				if st.button('event type describe', key=16):
					st.dataframe(event_conflict.describe())

				if st.button('event type similarity', key=17):
					st.dataframe(event_conflict.corr())
					with st.beta_expander('Learn more'):
						st.markdown("""
						Refer to correlation learn more section.
						 """)

				if st.button('sub event similarity', key=18):
					sub_conflict = pd.pivot_table(data, values='FATALITIES', index='YEAR',
					 columns='SUB_EVENT_TYPE', aggfunc='sum')

					st.dataframe(sub_conflict.corr())
					with st.beta_expander('Learn more'):
						st.markdown("""
						NaN: Not a Number.  
						 """)

			# Administrative region
			if st.checkbox('conflict administrative region', key=9):
				st.subheader('Conflict administrative region')
				region = pd.pivot_table(data, values='FATALITIES', columns='ADMIN1',
				 index='YEAR', aggfunc='sum')

				if st.checkbox('fatalities calendar'):
					if st.button('scaling'):
						fig, ax = plt.subplots(figsize=(15, 5), dpi=150)
						fmt = '.2g'
						annot = False
					else:
						fig, ax = plt.subplots()
						fmt = '.3g'
						annot = True
					sns.heatmap(region, annot=annot, fmt=fmt)
					ax.set_title(f'Fatalities calandar conflict in {pays} ')
					st.pyplot(fig)

				if st.button('conflict describe'):
					st.dataframe(region.describe())

				if st.button('conflict similarity'):
					st.dataframe(region.corr())

					with st.beta_expander('Learn more'):
						st.markdown("""
						correlation give similarity between two variables for data going to 1997 to 2020.
						  """)
Esempio n. 9
0
def main():

    # Create Sidebar
    st.sidebar.title("Avihs Technologies")
    st.sidebar.subheader("Analytics/Automation Platform")
    sdbar_select = st.sidebar.radio(
        "What service do you want to use today",
        ["Internet Leased Line", "Email Automation", "Whatsap Automation"])

    # ILL Landing page
    if sdbar_select == "Internet Leased Line":
        st.title("Howdy!! Welcome to ILL Analytics World")
        img = mpimg.imread("Avihs1.jpg")
        st.image(img, width=700)

        # Upload the file
        data = st.file_uploader("Upload the file", type=["csv", "txt", "xls"])
        if data is not None:
            df = pd.read_csv(data)
            if st.checkbox("Check if database is loaded"):
                if st.dataframe(df.head()):
                    st.success("Loading Suceess")

                df['FAN Creation Date'] = pd.to_datetime(
                    df['FAN Creation Date'], format='%Y-%m-%d %H:%M:%S')
                df = df.sort_values(by='FAN Creation Date', ascending=False)
                df_new = df[df["Point A City"] == "BANGALORE"]
                df_new = df_new[df["Location"] != "BANGALORE"]
                df_new["Bandwidth"] = df_new["Bandwidth"].str.replace(
                    "Mbps", "")
                if st.checkbox("Fetch Probable Leads"):
                    number = st.slider("How many latest enties required?", 1,
                                       len(df_new))
                    st.dataframe(df_new.head(number))

                    if st.checkbox("Top Company names based on Bandwidth"):
                        st.info(
                            "These leads are published based on high Bandwidth requirements"
                        )
                        number = st.number_input(
                            "Choose the count you want to view", 1,
                            len(df_new))
                        top_ten = df_new.head(number)
                        top_ten = top_ten[["Company Name", "Bandwidth"]]
                        top_ten['Bandwidth'] = pd.to_numeric(
                            top_ten["Bandwidth"])
                        top_ten.sort_values(by="Bandwidth",
                                            ascending=False,
                                            inplace=True)
                        top_ten.reset_index(drop=True, inplace=True)

                        # alt.Chart(top_ten).mark_bar().encode(
                        #     x='Bandwidth', y='Company Name')
                        # st.altair_chart(alt.Chart(top_ten).mark_bar().encode(
                        #     x=alt.X('Bandwidth'), y='Company Name'), sort=None)

                        st.altair_chart(
                            alt.Chart(top_ten).mark_bar().encode(
                                x='Bandwidth',
                                y=alt.Y('Company Name', sort='-x')))
                        st.dataframe(top_ten)

                        # Download file
                        # import base64

                        # def get_table_download_link(df):
                        #     csv = df.to_csv('final.csv', index=False)
                        #     b64 = base64.b64encode(csv.encode()).decode()
                        #     href = f'<a href="data:file/csv;base64,{b64}">Download csv file</a>'

                        # st.markdown(get_table_download_link(
                        #     dt), unsafe_allow_html=True)

    if sdbar_select == "Email Automation":
        st.title("Hi! Welcome to Email Automation Center")
        img = mpimg.imread("Avihs2.jpg")
        st.image(img, width=700)
        if st.checkbox("Insert Email Id's"):
            contacts = st.text_area("")
            if st.button("Send Email"):
                msg = EmailMessage()
                msg['Subject'] = "This an automated email"
                msg['From'] = '*****@*****.**'
                msg['To'] = ", ".join(contacts)
                msg.set_content("This is the body of an email")

                # For pdf create lst on similar line
                files = ['Avihs1.jpg', 'waymo_car.jpg']

                for file in files:
                    with open('Avihs1.jpg', 'rb') as f:
                        file_data = f.read()
                        # No need of file_type for pdf
                        file_type = imghdr.what(f.name)
                        file_name = f.name

                        # For pdf, maintype="application" & subtype='octet-stream'
                        msg.add_attachment(file_data,
                                           maintype='image',
                                           subtype=file_type,
                                           filename=file_name)

                with smtplib.SMTP_SSL('smtp.gmail.com', 465) as smtp:
                    smtp.login('*****@*****.**', '3')
                    smtp.send_message(msg)

    if sdbar_select == "Whatsap Automation":
        st.title("Hello!! Welcome to Whatsap Automation")
        img = mpimg.imread("Avihs3.jpg")
        st.image(img, width=700)

        st.text_area("Enter phone numbers")
        st.button("Send")
Esempio n. 10
0
def main():
    """Prediction App"""

    menu = ["Home", "Login", "About"]
    submenu = ["Visualisation", "Prediction"]

    choice = st.sidebar.selectbox("Menu", menu)
    if choice == "Home":
        st.title("Do you want to know your EMPLOYEES?")
        st.subheader("If so, please Log IN!")
        st.write("Special market situations require flexible and efficient solutions, be it for example the successful implementation of a new project, coping with a crisis in a timely manner or covering a sudden vacancy in a key management position. The rapid implementation of measures is crucial to success in today's dynamic economic environment!")
        st.text("")
        st.write("Copyright © 2021")

    elif choice == "Login":
        username = st.sidebar.text_input("User Name (Enter: Any name)")
        password = st.sidebar.text_input("Password (Enter: 123)", type='password')
        st.sidebar.write('Check to ENTER')
        if st.sidebar.checkbox("Login"):
            if password in ["HR", "123"]:
                st.success("Welcome {}".format(username))

                activity = st.selectbox("CHOOSE:", submenu)
                if activity == "Visualisation":
                    st.subheader("Data Visualisation")
                    df = pd.read_csv("HR_data.csv")
                    st.dataframe(df)

                    fig1 = plt.figure()
                    df['Geography'].value_counts().plot(kind='bar', color='dodgerblue')
                    plt.title('Geography')
                    st.pyplot(fig1)

                    fig2 = plt.figure()
                    df['Gender'].value_counts().plot(kind='bar', color='dodgerblue')
                    plt.title('Gender')
                    st.pyplot(fig2)

                    fig3 = plt.figure()
                    df['Age'].hist(alpha=1,color='dodgerblue', bins=30)
                    plt.title('Age Distribution')
                    st.pyplot(fig3)
                    
                    fig4 = plt.figure()
                    df['Tenure'].hist(alpha=1,color='dodgerblue', bins=11)
                    plt.title('Tenure Distribution')
                    st.pyplot(fig4)

                    fig5 = plt.figure()
                    df['ActiveEmployee'].value_counts().plot(kind='bar', color='dodgerblue')
                    plt.title('Active Employee')
                    st.pyplot(fig5)

                    fig6 = plt.figure()
                    df['Salary'].hist(alpha=1,color='dodgerblue', bins=100)
                    plt.title('Salary Distribution')
                    st.pyplot(fig6)

                    fig7 = plt.figure()
                    df['Left'].value_counts().plot(kind='bar', color='dodgerblue')
                    plt.title('People Left')
                    st.pyplot(fig7)     

                elif activity == "Prediction":
                    st.subheader("Please choose atributes:")

                    geography = st.radio("Geography:", tuple(geography_dict.keys()))
                    gender = st.radio("Gender:", tuple(gender_dict.keys()))
                    age = st.number_input("Age:",1,100,18)
                    tenure = st.number_input("Years at the firm:",1,45,1)
                    active = st.radio("Is active employee:", tuple(feature_dict.keys()))
                    salary = st.slider("Yearly salary (EUR):",10000,200000,10000)

                    feature_list = [get_geography_value(geography), get_gender_value(gender), age, tenure, get_feature_value(active), salary]
                    st.subheader("Output to be sent to the model:")
                    pretty_result = {"geography": geography, "gender": gender, "age": age, "tenure": tenure, "active": active, "salary": salary}
                    st.json(pretty_result)

                    data = np.array(feature_list).reshape(1,-1)
                    data_sample = pd.DataFrame(data).rename(columns = {0:"Geography", 1:"Gender", 2:"Age", 3:"Tenure", 4:"ActiveEmployee", 5:"Salary"}) 

                    # ML
                    model_choice = st.selectbox("Select model:", ["Logistic regression", "K-NN", "Random forest"])
                    if st.button("Predict"):
                        if model_choice == "Logistic regression":
                            
                            df = pd.read_csv("HR_data.csv")
                            df = df[["Geography","Gender","Age","Tenure","ActiveEmployee","Salary","Left"]]
                            df['Geography'] = df['Geography'].replace(["Germany"], 1)
                            df['Geography'] = df['Geography'].replace(["France"], 2)
                            df['Geography'] = df['Geography'].replace(["Spain"], 3)
                            df['Gender'] = df['Gender'].replace(["Female"], 0)
                            df['Gender'] = df['Gender'].replace(["Male"], 1)

                            X = df.drop('Left', axis=1)
                            y = df['Left']

                            from sklearn.model_selection import train_test_split
                            X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)
                            
                            from sklearn.preprocessing import MinMaxScaler
                            scaler = MinMaxScaler()
                            scaler.fit(X_train)
                            X_train = scaler.transform(X_train) # from 0 to 1
                            X_test = scaler.transform(X_test) # from 0 to 1

                            from sklearn.linear_model import LogisticRegression
                            logmodel = LogisticRegression()
                            logmodel.fit(X_train,y_train)
                            predictions = logmodel.predict(X_test)

                            from sklearn.metrics import confusion_matrix
                            cm = confusion_matrix(y_test,predictions)                         
                            proc = round((cm[0,0]+cm[1,1])/((sum(cm)[0]+sum(cm)[1])/100),2)
                            st.write('Accuracy: {}%'.format(proc))

                            prob = round(logmodel.predict_proba(scaler.transform(data_sample))[0,0],4)
                            st.write('Probability the employee will stay at the company: {}%'.format(round(prob*100,2)))
                            
                            st.write(logmodel.predict_proba(scaler.transform(data_sample)))
                            if prob >= 0.5:
                                st.success('The employee will stay :)')
                            else:
                                st.warning('The employee will leave :(')
                        
                        elif model_choice == "K-NN":
                            df = pd.read_csv("HR_data.csv")
                            df = df[["Geography","Gender","Age","Tenure","ActiveEmployee","Salary","Left"]]
                            df['Geography'] = df['Geography'].replace(["Germany"], 1)
                            df['Geography'] = df['Geography'].replace(["France"], 2)
                            df['Geography'] = df['Geography'].replace(["Spain"], 3)
                            df['Gender'] = df['Gender'].replace(["Female"], 0)
                            df['Gender'] = df['Gender'].replace(["Male"], 1)

                            X = df.drop('Left', axis=1)
                            y = df['Left']

                            from sklearn.model_selection import train_test_split
                            X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)
                            
                            from sklearn.preprocessing import MinMaxScaler
                            scaler = MinMaxScaler()
                            scaler.fit(X_train)
                            X_train = scaler.transform(X_train) # from 0 to 1
                            X_test = scaler.transform(X_test) # from 0 to 1

                            from sklearn.neighbors import KNeighborsClassifier
                            knn = KNeighborsClassifier(n_neighbors=18)
                            knn.fit(X_train,y_train)
                            prediction2 = knn.predict(X_test)

                            from sklearn.metrics import classification_report, confusion_matrix
                            cm = confusion_matrix(y_test,prediction2)
                            proc = round((cm[0,0]+cm[1,1])/((sum(cm)[0]+sum(cm)[1])/100),2)
                            st.write('Accuracy: {}%'.format(proc))
                            
                            prob = round(knn.predict_proba(scaler.transform(data_sample))[0,0],4)
                            st.write('Probability the employee will stay at the company: {}%'.format(round(prob*100,2)))
                            
                            st.write(knn.predict_proba(scaler.transform(data_sample)))
                            if prob >= 0.5:
                                st.success('The employee will stay :)')
                            else:
                                st.warning('The employee will leave :(')

                        elif model_choice == "Random forest":
                            df = pd.read_csv("HR_data.csv")
                            df = df[["Geography","Gender","Age","Tenure","ActiveEmployee","Salary","Left"]]
                            df['Geography'] = df['Geography'].replace(["Germany"], 1)
                            df['Geography'] = df['Geography'].replace(["France"], 2)
                            df['Geography'] = df['Geography'].replace(["Spain"], 3)
                            df['Gender'] = df['Gender'].replace(["Female"], 0)
                            df['Gender'] = df['Gender'].replace(["Male"], 1)
                            
                            X = df.drop('Left', axis=1)
                            y = df['Left']

                            from sklearn.model_selection import train_test_split
                            X_train, X_test, y_train, y_test = train_test_split(X,y,test_size=0.3)
                            
                            from sklearn.preprocessing import MinMaxScaler
                            scaler = MinMaxScaler()
                            scaler.fit(X_train)
                            X_train = scaler.transform(X_train) # from 0 to 1
                            X_test = scaler.transform(X_test) # from 0 to 1

                            from sklearn.ensemble import RandomForestClassifier
                            rfc = RandomForestClassifier(n_estimators=600)
                            rfc.fit(X_train,y_train)
                            prediction3 = rfc.predict(X_test)

                            from sklearn.metrics import classification_report, confusion_matrix
                            cm = confusion_matrix(y_test,prediction3)
                            proc = round((cm[0,0]+cm[1,1])/((sum(cm)[0]+sum(cm)[1])/100),2)
                            st.write('Accuracy: {}%'.format(proc))
                            
                            prob = round(rfc.predict_proba(scaler.transform(data_sample))[0,0],4)
                            st.write('Probability the employee will stay at the company: {}%'.format(round(prob*100,2)))
                            
                            st.write(rfc.predict_proba(scaler.transform(data_sample)))
                            if prob >= 0.5:
                                st.success('The employee will stay :)')
                            else:
                                st.warning('The employee will leave :(')

            else:
                st.warning("Incorrect Username or Password!")
   
    elif choice == "About":
        st.title("About me / My mission:")
        st.subheader("I can help you to predict, if and when your employees will leave your company!")
        st.write("My name is Jaroslav Kotrba and I am very passionate about working with data and its visualization or transformation in an understandable manner. In year 2020 I wrote and defended my diploma thesis: 'The Systemic Risk at the Mortgage Market', which allowed me to unfold my skills in the data science area."
                 "My passion for numbers, modeling, and predictions led me to courses linked to econometrics and statistics taught at the Faculty of Economics, where I got acquainted with data analytics. Then followed courses related to the statistics in computer science at the Faculty of Informatics that were not compulsory. However, I wanted to get a better insight and acquire more knowledge in practical data science from real IT experts."
                 "The first working experience was obtained at the Czech Embassy in Vienna. My tasks were to compile statistics related to the Austrian economy and write summarisations on them accompanied by graphic elements predominantly sent to the headquarters in Prague. Writing about Prague gives me the opportunity to mention the membership in the Economics Commission for National Economy, where I discuss and elaborate possible changes, steps, and recommendations for the government to stabilize and improve the Czech Republic's economic situation. Nowadays, I devote my time to work with software R where I love to solve new problems and challenges. As I do like R very much, I am very keen on dealing with SQL, Python, Excel, or Tableau. I am currently working by Honeywell as Data Analyst.")
Esempio n. 11
0
def main():
	st.image('unidadvictimas_logo2018-01.jpg', use_column_width=False)
#	""" Common ML Dataset Explorer """
	st.title("UARIV")
	st.subheader("Proyecto Data Science - DS4A Colombia 2.0")
	html_temp = """
	<div style="background-color:tomato;"><p style="color:white;font-size:15px;padding:8px">Automatic loading and classification of the daily event log (BDE) - Victims Attention and Repair Unit</p></div>
	"""
	st.markdown(html_temp,unsafe_allow_html=True)

	def file_selector(folder_path='./data'):
		filenames = os.listdir(folder_path)
		selected_filename = st.selectbox("Select A file",filenames)
		return os.path.join(folder_path,selected_filename)

	filename = file_selector()
	st.info("You Selected {}".format(filename))

	# Read Data
#	df = pd.read_csv(filename, delimiter=',')
	with open(filename, 'rb') as f:
		result = chardet.detect(f.read())  # or readline if the file is large
	df = pd.read_csv(filename, encoding=result['encoding'])
	st.dataframe(web_scrp())
#    df = pd.read_csv(filename, encoding='utf-8')
	# Show Dataset

	# Show Columns
#	if st.button("Column Names"):
#		st.write(df.columns)

	# Show Shape
#	if st.checkbox("Shape of Dataset"):
#		data_dim = st.radio("Show Dimension By ",("Rows","Columns"))
#		if data_dim == 'Rows':
#			st.text("Number of Rows")
#			st.write(df.shape[0])
#		elif data_dim == 'Columns':
#			st.text("Number of Columns")
#			st.write(df.shape[1])
#		else:
#			st.write(df.shape)

	# Select Columns
	if st.checkbox("Select Columns To Show"):
		all_columns = df.columns.tolist()
		selected_columns = st.multiselect("Select",all_columns)
#		new_df = df[selected_columns]
		st.dataframe(new_df)
	
	# Show Values
#	if st.button("Value Counts"):
#		st.text("Value Counts By Target/Class")
#		st.write(df.iloc[:,-1].value_counts())

	# Show Summary
	if st.checkbox("Summary"):
		st.write(df.describe().T)

	## Plot and Visualization

	st.subheader("Data Visualization")
	# Correlation
	# Seaborn Plot
	if st.checkbox("Correlation Plot[Seaborn]"):
		st.write(sns.heatmap(df.corr(),annot=True))
		st.pyplot()
        
	
	# Pie Chart
	if st.checkbox("Pie Plot"):
		all_columns_names = df.columns.tolist()
		if st.button("Generate Pie Plot"):
			st.success("Generating A Pie Plot")
			st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%%"))
			st.pyplot()

	# Count Plot
	if st.checkbox("Plot of Value Counts"):
		st.text("Value Counts By Target")
		all_columns_names = df.columns.tolist()
		primary_col = st.selectbox("Primary Columm to GroupBy",all_columns_names)
		selected_columns_names = st.multiselect("Select Columns",all_columns_names)
		if st.button("Plot"):
			st.text("Generate Plot")
			if selected_columns_names:
				vc_plot = df.groupby(primary_col)[selected_columns_names].count()
			else:
				vc_plot = df.iloc[:,-1].value_counts()
			st.write(vc_plot.plot(kind="bar"))
			st.pyplot()

	st.sidebar.image('ds4aColombia2.png', use_column_width=False)
	st.sidebar.header("Options")
	if st.sidebar.button("Media tracking"):
		if st.sidebar.checkbox("El Heraldo"):
			st.dataframe(web_scrp())
	st.sidebar.button("News clasification")
	st.sidebar.button("Heatmap")
#		st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%%"))
            
#    st.sidebar.info("Automatic loading and classification of the daily event log (BDE) - Victims Attention and Repair Unit - Victims' #Attention and Repair Unit")

#	st.sidebar.header("Get Datasets")
#	st.sidebar.markdown("[Common ML Dataset Repo]("")")

	st.sidebar.header("Team-68")
	st.sidebar.info("Nathalia Chaparro; Jesús Mannios; "\
            " Raul Cuervo; Juan García; "\
            " Rafael Nino; Jairo Maya")
#	st.sidebar.text("Built with Streamlit")
	st.sidebar.text("2020 - Maintained by Jairo Maya")
Esempio n. 12
0
def write():
    st.title("Test with MongoDB")

    #@st.cache(hash_funcs={MongoClient: id})
    #def get_client():
    #    return MongoClient("mongodb://127.0.0.1/admin")

    enter = st.text_input("Address", 'mongodb://localhost:27017')
    if st.button("Enter", key=1):
        result = enter.title()
        st.success(result)

    client = MongoClient(enter)

    # ------------- Настройка выбора коллекции --------------

    #client = get_client()

    db = client.list_database_names()
    c = st.write(db)
    select1 = st.selectbox("Select a database", db)
    selected_database = client[select1]

    a = selected_database.list_collection_names()
    b = st.write(a)
    select = st.selectbox("Select a collection", a)
    selected_filename = selected_database[select]

    collection_select = pd.DataFrame(list(selected_filename.find()))

    # ------------------ Превью коллекции ---------------------

    if st.checkbox("Preview collection", key=1):
        number = st.number_input("Number of Rows to View", 1, 200)
        st.dataframe(collection_select.head(number))

# --------------- Показать всю коллекцию -----------------

    if st.checkbox("Show all collection", key=2):
        view = st.write(collection_select)

# ---------------------- Тип данных ------------------------

    if st.checkbox("Data Types in the collection", key=3):
        st.write(collection_select.dtypes)

# --------- Общая информация о кол-ве стобцов/строк ---------

    if st.checkbox("Shape of Dataset", key=4):
        st.text("Number of Rows")
        st.write(collection_select.shape[0])
        st.text("Number of Columns")
        st.write(collection_select.shape[1])

# --------------- Нарисуем какие-нибудь графики ---------------

    if st.checkbox("Plot with select columns", key=5):
        st.text("Works extremely unstable, needs to be improved")
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ["Area", "Bar", "Line", "Hist", "Box", "Kde"])

        all_columns_names = collection_select.columns.tolist()
        selected_columns_names = st.multiselect("Select Columns To Plot",
                                                all_columns_names)

        if st.button("Generate Plot"):
            st.success("Generating Customizable Plot of {} for {}".format(
                type_of_plot, selected_columns_names))

        if type_of_plot == 'Area':
            cust_data = collection_select[selected_columns_names]
            st.area_chart(cust_data)

        elif type_of_plot == 'Bar':
            cust_data = collection_select[selected_columns_names]
            st.bar_chart(cust_data)

        elif type_of_plot == 'Line':
            cust_data = collection_select[selected_columns_names]
            st.line_chart(cust_data)

        elif type_of_plot:
            cust_plot = collection_select[selected_columns_names].plot(
                kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()
Esempio n. 13
0
def main():
    """Semi Automatic ML App powered by Streamlit"""    
    
    st.title("Semi Automatic ML App")
    st.text("Using Streamlit v0.52.1+")
    
    activities = ["EDA", "Plot", "Model Building", "About"]
    
    choice = st.sidebar.selectbox("Select Activity", activities)

    if choice == 'EDA':
        st.subheader("Exploratory Data Analysis")
        
        data = st.file_uploader("Upload Dataset", type = ["csv", "txt", "xls"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())
            
            if st.checkbox("Show Shape"):
                st.write(df.shape)
        
            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.write(all_columns)
                
            if st.checkbox("Select Columns To Show"):
                selected_columns = st.multiselect("Select Columns", all_columns)
                new_df = df[selected_columns]
                st.dataframe(new_df)
                
            if st.checkbox("Show Summary"):
                st.write(df.describe())
                
            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts())        
       
    elif choice == 'Plot':
        st.subheader("Data Visualization")

        data = st.file_uploader("Upload Dataset", type = ["csv", "txt", "xls"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())
            
        if st.checkbox("Correlation with Seaborn"):
            st.write(sns.heatmap(df.corr(), annot = True))   
            plt.yticks(rotation = 0)
            st.pyplot()
        
        if st.checkbox("Pie Chart"):
            all_columns = df.columns.to_list()
            columns_to_plot = st.selectbox("Select 1 Column", all_columns)
            pie_plot = df[columns_to_plot].value_counts().plot.pie(autopct="%1.1f%%")
            st.write(pie_plot)
            st.pyplot()  
   
        all_columns_names = df.columns.tolist()
        type_of_plot = st.selectbox("Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names)
        
        if st.button("Generate Plot"):
            st.success("Generating Customize Plot of {} for {}".format(type_of_plot, selected_columns_names))
            
            #Plot By Streamlit
            if type_of_plot == 'area':
                cust_data = df[selected_columns_names]
                st.area_chart(cust_data)
                
            elif type_of_plot == 'bar':
                cust_data = df[selected_columns_names]
                st.bar_chart(cust_data)
                
            elif type_of_plot == 'line':
                cust_data = df[selected_columns_names]
                st.line_chart(cust_data)
                        
            #Custom Plot
            elif type_of_plot:
                cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
                st.write(cust_plot)
                st.pyplot()
             
    elif choice == 'Model Building':
        st.subheader("Building ML Model")            
            
        data = st.file_uploader("Upload Dataset", type = ["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())   
            
            # Model Building
            X = df.iloc[:, 0:-1]
            Y = df.iloc[:, -1]
            seed = 8
            
            #Model
            models = []
            models.append(("LR", LogisticRegression()))
            models.append(("LDA", LinearDiscriminantAnalysis()))
            models.append(("KNN", KNeighborsClassifier()))
            models.append(("CART", DecisionTreeClassifier()))
            models.append(("NB", GaussianNB()))
            models.append(("SVM", SVC()))
            
            #Evaluate each model in turn
            
            #List
            model_names = []
            model_mean = []
            model_std = []
            all_models = []
            scoring = 'accuracy'
            
            for name, model in models:
                kfold = model_selection.KFold(n_splits = 10, random_state = seed)
                cv_results = model_selection.cross_val_score(model, X, Y, cv = kfold, scoring = scoring)
                model_names.append(name)
                model_mean.append(cv_results.mean())
                model_std.append(cv_results.std())
                
                #check accuracy results and create a standard dictionary for the model, accuracy and standard deviation
                accuracy_results = {"model_name": name, "model_accuracy": cv_results.mean(), "standard_deviation":cv_results.std()}
                all_models.append(accuracy_results) 
                
            if st.checkbox("Metrics as Table"):
                st.dataframe(pd.DataFrame(zip(model_names, model_mean, model_std), columns=["Model Name", "Model Accuracy", "Standard Deviation"]))
        
        #Create JSON box
            if st.checkbox("Metrics as JSON"):
                st.json(all_models)       
        
    elif choice == 'About':
        st.subheader("About")
        st.text("")    
    if len(price) == 0 or len(name) == 0:
        st.error('Name or price cannot be empty')
    else:
        if dict_to_append['Name'] in shop_list.data['Name'].tolist():
            st.write('An item with the same name already exists')
        else:
            shop_list.add_item(dict_to_append)
            st.write('Item ' + name + ' has been added to the list')

if st.button('Delete last added item'):
    shop_list.delete_item(last_added='yes')

if st.button('New list'):
    shop_list.new_list()

st.dataframe(shop_list.data)

st.dataframe(
    shop_list.data.pivot_table(values='Price',
                               index=['Room', 'Category'],
                               aggfunc='sum'))

st.write('Total price ' + str(shop_list.data['Price'].sum()))

st.sidebar.header('Load')

file = st.sidebar.file_uploader('Select the file')
if file != None:
    shop_list.read_list(file)
    st.sidebar.info('File loaded')
Esempio n. 15
0
import streamlit as st

from aqua import ui, data, processing, ml, plots

ui.make_title()
processing_options, modelling_options = ui.make_sidebar()

raw_data = data.load_raw_data().pipe(processing.process_force_data,
                                     processing_options)

st.markdown("## 1. Data description")

if st.checkbox("Show raw data", key="show-raw"):
    st.dataframe(raw_data)

targets, variables = ml.variables_targets_split(raw_data,
                                                modelling_options["targets"])

st.markdown("### 1.1 Variables")
plots.plot_anthropometry(variables)
plots.plot_forces(variables)

st.markdown("### 1.2 Targets")
plots.plot_targets(targets)

st.markdown("### 1.3 Correlation matrix")
plots.plot_correlation_matrix(variables, targets)

st.markdown("## 2. Data modelling")
X_train, X_test, y_train, y_test = ml.train_test_split(
    variables, targets, modelling_options["test_size"])
Esempio n. 16
0
def main():
    activities = ['EDA', 'Visualisation', 'model', 'About us']
    option = st.sidebar.selectbox('Selection option:', activities)

    #DEALING WITH THE EDA PART

    if option == 'EDA':
        st.subheader("Exploratory Data Analysis")

        data = st.file_uploader("Upload dataset:",
                                type=['csv', 'xlsx', 'txt', 'json'])
        st.success("Data successfully loaded")
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head(50))

            if st.checkbox("Display shape"):
                st.write(df.shape)
            if st.checkbox("Display columns"):
                st.write(df.columns)
            if st.checkbox("Select multiple columns"):
                selected_columns = st.multiselect('Select preferred columns:',
                                                  df.columns)
                df1 = df[selected_columns]
                st.dataframe(df1)

            if st.checkbox("Display summary"):
                st.write(df1.describe().T)

            if st.checkbox('Display Null Values'):
                st.write(df.isnull().sum())

            if st.checkbox("Display the data types"):
                st.write(df.dtypes)
            if st.checkbox('Display Correlation of data variuos columns'):
                st.write(df.corr())

#DEALING WITH THE VISUALISATION PART

    elif option == 'Visualisation':
        st.subheader("Data Visualisation")

        data = st.file_uploader("Upload dataset:",
                                type=['csv', 'xlsx', 'txt', 'json'])
        st.success("Data successfully loaded")
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head(50))

            if st.checkbox('Select Multiple columns to plot'):
                selected_columns = st.multiselect(
                    'Select your preferred columns', df.columns)
                df1 = df[selected_columns]
                st.dataframe(df1)

            if st.checkbox('Display Heatmap'):
                st.write(
                    sns.heatmap(df1.corr(),
                                vmax=1,
                                square=True,
                                annot=True,
                                cmap='viridis'))
                st.pyplot()
            if st.checkbox('Display Pairplot'):
                st.write(sns.pairplot(df1, diag_kind='kde'))
                st.pyplot()
            if st.checkbox('Display Pie Chart'):
                all_columns = df.columns.to_list()
                pie_columns = st.selectbox("select column to display",
                                           all_columns)
                pieChart = df[pie_columns].value_counts().plot.pie(
                    autopct="%1.1f%%")
                st.write(pieChart)
                st.pyplot()

    # DEALING WITH THE MODEL BUILDING PART

    elif option == 'model':
        st.subheader("Model Building")

        data = st.file_uploader("Upload dataset:",
                                type=['csv', 'xlsx', 'txt', 'json'])
        st.success("Data successfully loaded")
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head(50))

            if st.checkbox('Select Multiple columns'):
                new_data = st.multiselect(
                    "Select your preferred columns. NB: Let your target variable be the last column to be selected",
                    df.columns)
                df1 = df[new_data]
                st.dataframe(df1)

                #Dividing my data into X and y variables

                X = df1.iloc[:, 0:-1]
                y = df1.iloc[:, -1]

            seed = st.sidebar.slider('Seed', 1, 200)

            classifier_name = st.sidebar.selectbox(
                'Select your preferred classifier:',
                ('KNN', 'SVM', 'LR', 'naive_bayes', 'decision tree'))

            def add_parameter(name_of_clf):
                params = dict()
                if name_of_clf == 'SVM':
                    C = st.sidebar.slider('C', 0.01, 15.0)
                    params['C'] = C
                else:
                    name_of_clf == 'KNN'
                    K = st.sidebar.slider('K', 1, 15)
                    params['K'] = K
                    return params

            #calling the function

            params = add_parameter(classifier_name)

            #defing a function for our classifier

            def get_classifier(name_of_clf, params):
                clf = None
                if name_of_clf == 'SVM':
                    clf = SVC(C=params['C'])
                elif name_of_clf == 'KNN':
                    clf = KNeighborsClassifier(n_neighbors=params['K'])
                elif name_of_clf == 'LR':
                    clf = LogisticRegression()
                elif name_of_clf == 'naive_bayes':
                    clf = GaussianNB()
                elif name_of_clf == 'decision tree':
                    clf = DecisionTreeClassifier()
                else:
                    st.warning('Select your choice of algorithm')

                return clf

            clf = get_classifier(classifier_name, params)

            X_train, X_test, y_train, y_test = train_test_split(
                X, y, test_size=0.2, random_state=seed)

            clf.fit(X_train, y_train)

            y_pred = clf.predict(X_test)
            st.write('Predictions:', y_pred)

            accuracy = accuracy_score(y_test, y_pred)

            st.write('Nmae of classifier:', classifier_name)
            st.write('Accuracy', accuracy)


#DELING WITH THE ABOUT US PAGE

    elif option == 'About us':

        st.markdown(
            'This is an interactive web page for our ML project, feel feel free to use it. This dataset is fetched from the UCI Machine learning repository. The analysis in here is to demonstrate how we can present our wok to our stakeholders in an interractive way by building a web app for our machine learning algorithms using different dataset.'
        )

        st.balloons()
Esempio n. 17
0
def app_main():
    st.title("自动化机器学习平台")
    if st.sidebar.checkbox('定义数据源'):
        file_folder = st.sidebar.text_input('文件夹', value="data")
        data_file_list = list_files(file_folder, 'csv')
        if len(data_file_list) == 0:
            st.warning(f'当路径无可用数据集')
        else:
            file_selected = st.sidebar.selectbox('选择文件', data_file_list)
            file_selected_path = concat_file_path(file_folder, file_selected)
            nrows = st.sidebar.number_input('行数', value=-1)
            n_rows_str = '全部' if nrows == -1 else str(nrows)
            st.info(f'已选择文件:{file_selected_path},读取行数为{n_rows_str}')
    else:
        file_selected_path = None
        nrows = 100
        st.warning(f'当前选择文件为空,请选择。')
    if st.sidebar.checkbox('探索性分析'):
        if file_selected_path is not None:
            if st.sidebar.button('一键生成报告'):
                df = load_csv(file_selected_path, nrows)
                pr = ProfileReport(df, explorative=True)
                st_profile_report(pr)
        else:
            st.info(f'没有选择文件,无法进行分析。')

    if st.sidebar.checkbox('快速建模'):
        if file_selected_path is not None:
            task = st.sidebar.selectbox('选择任务', ML_TASK_LIST)
            if task == '回归':
                model = st.sidebar.selectbox('选取模型', RG_MODEL_LIST)
            elif task == '分类':
                model = st.sidebar.selectbox('选取模型', RG_MODEL_LIST)
            df = load_csv(file_selected_path, nrows)
            try:
                cols = df.columns.to_list()
                target_col = st.sidebar.selectbox('选取预测对象', cols)
            except BaseException:
                st.sidebar.warning(f'数据格式无法正确读取')
                target_col = None

            if target_col is not None and st.sidebar.button('训练模型'):
                if task == '回归':
                    st.success(f'数据预处理。。。')
                    pc_rg.setup(df,
                                target=target_col,
                                log_experiment=True,
                                experiment_name='ml_',
                                log_plots=True,
                                silent=True,
                                verbose=False,
                                profile=True)
                    st.success(f'数据预处理完毕。')
                    st.success(f'训练模型。。。')
                    pc_rg.create_model(model, verbose=False)
                    st.success(f'模型训练完毕。。。')
                    #pc_rg.finalize_model(model)
                    st.success(f'模型已经创建')
                elif task == '分类':
                    st.success(f'数据预处理。。。')
                    pc_cl.setup(df,
                                target=target_col,
                                fix_imbalance=True,
                                log_experiment=True,
                                experiment_name='ml_',
                                log_plots=True,
                                silent=True,
                                verbose=False,
                                profile=True)
                    st.success(f'数据预处理完毕。')
                    st.success(f'训练模型。。。')
                    pc_cl.create_model(model, verbose=False)
                    st.success(f'模型训练完毕。。。')
                    #pc_cl.finalize_model(model)
                    st.success(f'模型已经创建')
    if st.sidebar.checkbox('查看系统日志'):
        n_lines = st.sidebar.slider(label='行数', min_value=3, max_value=50)
        if st.sidebar.button("查看"):
            logs = get_model_training_logs(n_lines=n_lines)
            st.text('系统日志')
            st.write(logs)
    try:
        all_runs = mlflow.search_runs(experiment_ids=0)
    except:
        all_runs = []
    if len(all_runs) != 0:
        if st.sidebar.checkbox('预览模型'):
            ml_logs = 'http://kubernetes.docker.internal:5000/  -->开启mlflow,命令行输入:mlflow ui'
            st.markdown(ml_logs)
            st.dataframe(all_runs)
        if st.sidebar.checkbox('选择模型'):
            selected_run_id = st.sidebar.selectbox(
                '从已保存模型中选择', all_runs[all_runs['tags.Source'] ==
                                      'create_model']['run_id'].tolist())
            selected_run_info = all_runs[(
                all_runs['run_id'] == selected_run_id)].iloc[0, :]
            st.code(selected_run_info)
            if st.sidebar.button('预测数据'):
                model_uri = f'runs:/' + selected_run_id + '/model/'
                model_loaded = mlflow.sklearn.load_model(model_uri)
                df = pd.read_csv(file_selected_path, nrows=nrows)
                #st.success(f'模型预测中。。。   ')
                pred = model_loaded.predict(df)
                pred_df = pd.DataFrame(pred, columns=['预测值'])
                st.dataframe(pred_df)
                pred_df.plot()
                st.pyplot()
    else:
        st.sidebar.warning('没有找到训练好的模型')
Esempio n. 18
0
def main():
    """CNN Detector"""
    features = [
        'duration', 'src_bytes', 'dst_bytes', 'land', 'wrong_fragment',
        'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
        'root_shell', 'su_attempted', 'num_root', 'num_file_creations',
        'num_shells', 'num_access_files', 'is_host_login', 'is_guest_login',
        'count', 'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate',
        'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate',
        'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count',
        'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
        'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate',
        'dst_host_serror_rate', 'dst_host_srv_serror_rate',
        'dst_host_rerror_rate', 'dst_host_srv_rerror_rate',
        'protocol_type_icmp', 'protocol_type_tcp', 'protocol_type_udp',
        'service_IRC', 'service_X11', 'service_Z39_50', 'service_aol',
        'service_auth', 'service_bgp', 'service_courier', 'service_csnet_ns',
        'service_ctf', 'service_daytime', 'service_discard', 'service_domain',
        'service_domain_u', 'service_echo', 'service_eco_i', 'service_ecr_i',
        'service_efs', 'service_exec', 'service_finger', 'service_ftp',
        'service_ftp_data', 'service_gopher', 'service_harvest',
        'service_hostnames', 'service_http', 'service_http_2784',
        'service_http_443', 'service_http_8001', 'service_imap4',
        'service_iso_tsap', 'service_klogin', 'service_kshell', 'service_ldap',
        'service_link', 'service_login', 'service_mtp', 'service_name',
        'service_netbios_dgm', 'service_netbios_ns', 'service_netbios_ssn',
        'service_netstat', 'service_nnsp', 'service_nntp', 'service_ntp_u',
        'service_other', 'service_pm_dump', 'service_pop_2', 'service_pop_3',
        'service_printer', 'service_private', 'service_red_i',
        'service_remote_job', 'service_rje', 'service_shell', 'service_smtp',
        'service_sql_net', 'service_ssh', 'service_sunrpc', 'service_supdup',
        'service_systat', 'service_telnet', 'service_tftp_u', 'service_tim_i',
        'service_time', 'service_urh_i', 'service_urp_i', 'service_uucp',
        'service_uucp_path', 'service_vmnet', 'service_whois', 'flag_OTH',
        'flag_REJ', 'flag_RSTO', 'flag_RSTOS0', 'flag_RSTR', 'flag_S0',
        'flag_S1', 'flag_S2', 'flag_S3', 'flag_SF', 'flag_SH'
        # 'class_anomaly
        # 'class_normal
    ]
    packet_head = pd.DataFrame(columns=features)
    from tensorflow.keras.models import load_model
    mo = Sequential()
    mo.add(
        Convolution2D(20,
                      4,
                      padding="valid",
                      activation="relu",
                      input_shape=(11, 11, 1)))
    mo.add(MaxPooling2D(pool_size=2, strides=2))
    mo.add(Convolution2D(10, 3, padding="valid", activation="relu"))
    mo.add(MaxPooling2D(2, 2))
    mo.add(Flatten())
    mo.add(Dense(50, activation="relu"))
    mo.add(Dropout(0.2))
    mo.add(Dense(20, activation="relu"))
    mo.add(Dropout(0.2))
    mo.add(Dense(5, activation="softmax"))
    mo.compile(loss="categorical_crossentropy",
               optimizer="adam",
               metrics=['accuracy'])
    mo.load_weights("Models/cnn_conv2d.h5")
    #prediction = mo.predict((check_sam))
    x = pd.DataFrame()
    y = pd.DataFrame()
    st.title("IDS")
    menu = ["Home", "Login", "SignUp", "Profiles"]
    choice = st.sidebar.selectbox("Menu", menu)
    if choice == "Home":
        st.subheader("Home")
        st.write("Hey *There!!!*:sunglasses:")
        st.write(" ### Go to Menu to proceed furthuer!")
    elif choice == "Login":
        st.subheader("Login Section")
        username = st.sidebar.text_input("User Name")
        password = st.sidebar.text_input("Password", type='password')
        if st.sidebar.checkbox("Login"):
            # if password == '12345':
            create_usertable()
            hashed_pswd = make_hashes(password)
            result = login_user(username, check_hashes(password, hashed_pswd))
            if result:
                st.success("Logged In as {}".format(username))
                task = st.selectbox("Get into the software", [
                    "Dataset details and Pre-processed output",
                    "Analyze Training Phase", "Result Analysis",
                    "Lets check the Detector"
                ])
                if task == "Dataset details and Pre-processed output":
                    st.subheader('Dataset')
                    st.write('NSL-KDD')
                    input_df = pd.read_csv(
                        'Samples and Details/Sample NSL-KDD.csv')
                    st.write(' ### Sample NSL-KDD')
                    st.write(input_df)
                    st.write('Features')
                    fea = pd.read_csv('Samples and Details/fea.csv')
                    st.write(' ### The Features Set')
                    st.write(fea)
                    st.markdown(
                        """[NSL-KDD loaded from here!](https://www.unb.ca/cic/datasets/nsl.html)"""
                    )
                    pre = st.button('Pre-processing Results')
                    if pre:
                        st.write(' ### Pre-processed features set (5 rows)')
                        x_head = pd.read_csv('Samples and Details/x.csv')
                        st.write(x_head)
                        st.write(' ### Pre-processed label set (5 rows)')
                        y_head = pd.read_csv('Samples and Details/y.csv')
                        st.write(y_head)
                elif task == "Analyze Training Phase":
                    st.subheader("Will observe Training Phase now")
                    train_model = st.button('Analyze the Training')
                    if train_model:
                        x_des = pd.read_csv('Samples and Details/x_des.csv')
                        st.write(' ### x (Features) Description')
                        st.write(x_des)
                        y_des = pd.read_csv('Samples and Details/y_des.csv')
                        st.write(' ### y (Labels) Description')
                        st.write(y_des)
                elif task == "Result Analysis":
                    st.subheader("Results")
                    testdata = pd.read_csv(
                        'Training and Testing Datasets/Modified Test.csv')
                    y_attack = pd.get_dummies(testdata['attack_type'])
                    outcom = y_attack.columns
                    num_class = len(outcom)
                    y_test_attack = y_attack.values
                    testdata = testdata.drop(columns=['attack_type'])
                    testdata_list = testdata.values.tolist()
                    print(len(testdata_list[0]))
                    acc = Image.open('accuracy.PNG')
                    st.image(acc, caption='Training v/s Validation Accuracy')
                    loss = Image.open('loss.PNG')
                    st.image(loss, caption='Training v/s Validation Loss')
                    st.write("Training Results:")
                    test_1(mo)
                    st.write("Testing Results:")
                    test(mo, y_test_attack)
                elif task == "Lets check the Detector":
                    st.subheader("Test the Model")
                    test_attack = st.button('Test attack sample')
                    test_normal = st.button('Test normal sample')
                    if test_attack:
                        input_df = pd.read_csv(
                            'Training and Testing Datasets/Attack sample.csv')
                        output = packet_test(input_df, mo)
                        if np.array_str(output) == '[[1 0 0 0 0]]':
                            st.warning('Its Denial of Service attack')
                        if np.array_str(output) == '[[0 0 0 0 1]]':
                            st.warning('Its User to Root attack')
                        if np.array_str(output) == '[[0 0 1 0 0]]':
                            st.warning('Its Probe attack')
                        if np.array_str(output) == '[[0 0 0 1 0]]':
                            st.warning('Its Root to Local attack')
                    if test_normal:
                        input_df = pd.read_csv(
                            'Training and Testing Datasets/Normal sample.csv')
                        output = packet_test(input_df, mo)
                        if np.array_str(output) == '[[0 1 0 0 0]]':
                            st.success('Yes Its normal')
                        else:
                            st.warning('Its some type of attack')
                    upload_file = st.sidebar.file_uploader(
                        "Upload your input CSV file (single row)",
                        type=["csv"])
                    if upload_file is not None:
                        input_df = pd.read_csv(upload_file)
                        output = packet_test(input_df, mo)
                        if np.array_str(output) == '[[1 0 0 0 0]]':
                            st.warning('Denial of Service packet')
                        if np.array_str(output) == '[[0 0 0 0 1]]':
                            st.warning('User to Root packet')
                        if np.array_str(output) == '[[0 0 1 0 0]]':
                            st.warning('Probe packet')
                        if np.array_str(output) == '[[0 0 0 1 0]]':
                            st.warning('Root to Local packet')
                        if np.array_str(output) == '[[0 1 0 0 0]]':
                            st.success('Normal')
            else:
                st.warning("Incorrect Username/Password")
    elif choice == "SignUp":
        st.subheader("Create New Account")
        new_user = st.text_input("Username")
        new_password = st.text_input("Password", type='password')
        if st.button("Signup"):
            create_usertable()
            add_userdata(new_user, make_hashes(new_password))
            st.success("You have successfully created a valid Account")
            st.info("Go to Login Menu to login")
    elif choice == "Profiles":
        st.subheader("Below mentioned users are signed up here:")
        user_result = view_all_users()
        clean_db = pd.DataFrame(user_result, columns=["Username", "Password"])
        st.dataframe(clean_db)
Esempio n. 19
0
def app():

    # Title and Image

    st.write("""
    # **Falcon Management Dashboard**
    Risk Ranges Tool
    """)
    # Sidebar Header

    st.sidebar.header('User Input')

    # Get User Input

    def highlight_data(val):

        bullish = 'BULLISH'

        return ['background-color: green'] * len(
            val) if val.Signal == bullish else ['background-color: red'
                                                ] * len(val)

    def get_input():

        start_date = st.sidebar.text_input("Start Date", "2018-01-01")
        end_date = st.sidebar.text_input(
            "End Date",
            (dt.datetime.today() + dt.timedelta(days=1)).strftime("%Y-%m-%d"))
        index = st.sidebar.selectbox(
            "Indexes or Portfolio",
            ("Global Indices", "US Sectors", "Macrowise Portfolio", "Crypto",
             "Country ETFs"))
        volume_weighted = st.sidebar.selectbox("Volume Weighted",
                                               (True, False))
        trade_period = st.sidebar.slider("Trade Period",
                                         min_value=2,
                                         max_value=21,
                                         value=10,
                                         step=1)
        trend_period = st.sidebar.slider("Trend Period",
                                         min_value=21,
                                         max_value=130,
                                         value=63,
                                         step=1)

        return start_date, end_date, index, volume_weighted, trade_period, trend_period

    def get_data(symbol, Start, End, Trade, Trend, VW):

        Global_Indices = [
            '^GSPC', '^IXIC', '^RUT', '^GSPTSE', '^BVSP', '^STOXX50E',
            '^GDAXI', '^N225', '^HSI', '^AXJO'
        ]
        US_Sectors = [
            'XLK', 'XLP', 'XLY', 'XRT', 'XLI', 'XLV', 'XBI', 'XLB', 'XLE',
            'XLF', 'XLC', 'XLU'
        ]
        Macrowise = [
            'ADI', 'ASML', 'TSM', 'ERIC', 'NOK', 'EA', 'ILMN', 'INTC', 'MCHP',
            'COST', 'MELI', 'SLV', 'QGEN', 'EWT', 'SWKS', 'TXN', 'MSFT', 'CCJ',
            'GLD', 'XBI', 'PYPL', 'SQ'
        ]
        Crypto = [
            'BTC-USD', 'ETH-USD', 'DOT1-USD', 'LINK-USD', 'KSM-USD', 'XRP-USD',
            'ATOM1-USD', 'ADA-USD'
        ]
        Country_ETF = [
            'SPY', 'EWC', 'EWW', 'EWZ', 'ECH', 'EWI', 'DAX', 'EWN', 'EWU',
            'GREK', 'TUR', 'EZA', 'RSX', 'INDA', 'FXI', 'EWJ', 'EWY', 'EWM',
            'EWT', 'EWA'
        ]

        if symbol == "Global Indices":
            Data = {}
            New_Data = pd.DataFrame()
            for i in Global_Indices:
                Data[i] = pd.DataFrame(yf.download(i, start=Start, end=End))
            for i, df in Data.items():
                Data[i] = pd.DataFrame(
                    RiskRange(Price_Data=df,
                              window=Trade,
                              length=Trend,
                              volume_weighted=VW,
                              vol_window=Trade))
                Data[i] = pd.DataFrame(Data[i])[-1:]
                Data[i].insert(1, 'Asset', i)
                #Data[i].set_index('Asset')
                New_Data = New_Data.append(Data[i], ignore_index=True)
                Final = New_Data.set_index('Asset')
                Matrix = Final[[
                    'Price', 'Trend', 'Bottom RR', 'Top RR', 'Mid RR'
                ]]

        elif symbol == "US Sectors":
            Data = {}
            New_Data = pd.DataFrame()
            for i in US_Sectors:
                Data[i] = pd.DataFrame(yf.download(i, start=Start, end=End))
            for i, df in Data.items():
                Data[i] = pd.DataFrame(
                    RiskRange(Price_Data=df,
                              window=Trade,
                              length=Trend,
                              volume_weighted=VW,
                              vol_window=Trade))
                Data[i] = pd.DataFrame(Data[i])[-1:]
                Data[i].insert(1, 'Asset', i)
                #Data[i].set_index('Asset')
                New_Data = New_Data.append(Data[i], ignore_index=True)
                Final = New_Data.set_index('Asset')
                Matrix = Final[[
                    'Price', 'Trend', 'Bottom RR', 'Top RR', 'Mid RR'
                ]]

        elif symbol == "Macrowise Portfolio":
            Data = {}
            New_Data = pd.DataFrame()
            for i in Macrowise:
                Data[i] = pd.DataFrame(yf.download(i, start=Start, end=End))
            for i, df in Data.items():
                Data[i] = pd.DataFrame(
                    RiskRange(Price_Data=df,
                              window=Trade,
                              length=Trend,
                              volume_weighted=VW,
                              vol_window=Trade))
                Data[i] = pd.DataFrame(Data[i])[-1:]
                Data[i].insert(1, 'Asset', i)
                #Data[i].set_index('Asset')
                New_Data = New_Data.append(Data[i], ignore_index=True)
                Final = New_Data.set_index('Asset')
                Matrix = Final[[
                    'Price', 'Trend', 'Bottom RR', 'Top RR', 'Mid RR'
                ]]

        elif symbol == "Crypto":
            Data = {}
            New_Data = pd.DataFrame()
            for i in Crypto:
                Data[i] = pd.DataFrame(yf.download(i, start=Start, end=End))
            for i, df in Data.items():
                Data[i] = pd.DataFrame(
                    RiskRange(Price_Data=df,
                              window=Trade,
                              length=Trend,
                              volume_weighted=VW,
                              vol_window=Trade))
                Data[i] = pd.DataFrame(Data[i])[-1:]
                Data[i].insert(1, 'Asset', i)
                #Data[i].set_index('Asset')
                New_Data = New_Data.append(Data[i], ignore_index=True)
                Final = New_Data.set_index('Asset')
                Matrix = Final[[
                    'Price', 'Trend', 'Bottom RR', 'Top RR', 'Mid RR'
                ]]

        elif symbol == "Country ETFs":
            Data = {}
            New_Data = pd.DataFrame()
            for i in Country_ETF:
                Data[i] = pd.DataFrame(yf.download(i, start=Start, end=End))
            for i, df in Data.items():
                Data[i] = pd.DataFrame(
                    RiskRange(Price_Data=df,
                              window=Trade,
                              length=Trend,
                              volume_weighted=VW,
                              vol_window=Trade))
                Data[i] = pd.DataFrame(Data[i])[-1:]
                Data[i].insert(1, 'Asset', i)
                #Data[i].set_index('Asset')
                New_Data = New_Data.append(Data[i], ignore_index=True)
                Final = New_Data.set_index('Asset')
                Matrix = Final[[
                    'Price', 'Trend', 'Bottom RR', 'Top RR', 'Mid RR'
                ]]

        return Matrix

    #Get Data
    start, end, index, vw, trade, trend = get_input()

    Data = get_data(symbol=index,
                    Start=start,
                    End=end,
                    Trade=trade,
                    Trend=trend,
                    VW=vw)

    Data['Signal'] = np.where(Data["Bottom RR"] > Data["Trend"], "BULLISH",
                              "BEARISH")

    #Calculate Risk Ranges

    ############################################################ Display ############################################################

    #Company_Name = yf.Ticker(symbol).info['shortName']

    #st.header(Company_Name +" Risk Ranges\n")

    st.header("Risk Ranges")

    st.dataframe(Data.style.apply(highlight_data, axis=1))
Esempio n. 20
0
def main():
    st.set_page_config(layout="wide") 
    st.markdown('<style>#vg-tooltip-element{z-index: 1000051}</style>',
             unsafe_allow_html=True)

    confirmed_df, death_df, recovery_df = wwConfirmedDataCollection()
    st.title("Covid-19 ­Ъда Pandemic Data Visualization")
    displayRawData(confirmed_df, death_df, recovery_df)
    confirmed_df, death_df, recovery_df = dataMassaging(
        confirmed_df, death_df, recovery_df
    )
    full_table = mergeDataAndDataCorrection(confirmed_df, death_df, recovery_df)

    st.write('\nData from "CSSEGISandData POST data massaging"')
    
    user_selectionbox_input = st.selectbox(
        "Select an option", ["Global", "Select from list of countries"]
    )
    min_date_found = full_table["date"].min()
    max_date_found = full_table["date"].max()

    selected_date = st.date_input(
        "Pick a date",
        (min_date_found, max_date_found)
    )
    if len(selected_date) == 2:
        
        if user_selectionbox_input == "Select from list of countries":
            full_table = full_table[(full_table['date'] >= selected_date[0]) & (full_table['date'] <= selected_date[1])]
            
            # full_table = full_table[full_table["date"] == (between(selected_date[0], selected_date[1]))]
            list_of_countries = full_table["location"].unique()
            selected_country = st.selectbox("Select country", list_of_countries)

            mask_countries = full_table["location"] == (selected_country)
            full_table = full_table[mask_countries]

            # Adding new cases to the table for graphing
            full_table["new_confirmed"] = full_table["confirmed"].diff(1).fillna(0)
            full_table["new_recovered"] = full_table["recovered"].diff(1).fillna(0)
            full_table["new_deaths"] = full_table["deaths"].diff(1).fillna(0)
            

            user_input = st.selectbox(
                "Select an option", ["Total Number of Cases", "New Cases Per Day"]
            )
            st.write(full_table)
            if user_input == "New Cases Per Day":
                source = pd.DataFrame(full_table, columns=["date", "new_confirmed", "new_recovered", "new_deaths"])
                title = f"New Cases Per Day for {selected_country}"
            else:
                source = pd.DataFrame(
                    full_table, columns=["date", "confirmed", "deaths", "recovered"]
                )
                title = f"Total reported cases for {selected_country}"
            
            st.altair_chart(altairLineChartGraphing(title, source), use_container_width=True)    

        else:
            full_table = full_table[full_table["date"] == selected_date[1]]
            confirmed_source = pd.DataFrame(full_table, columns=["location", "lat", "lon", "confirmed"])
            

            #Readable values
            confirmed_source["confirmed_readable"] = confirmed_source["confirmed"].apply(human_format)
            display_confirmed_source = pd.DataFrame(confirmed_source, columns=["location", "lat", "lon", "confirmed_readable"]).reset_index(drop=True)
            display_confirmed_source = display_confirmed_source.rename(columns={"confirmed_readable": "confirmed"})
            st.dataframe(display_confirmed_source)

            INITIAL_VIEW_STATE = pdk.ViewState(
                latitude=55.3781,
                longitude=-3.436,
                zoom=1,
                pitch=25,
            )

            column_layer = pdk.Layer(
                "ColumnLayer",
                data=confirmed_source,
                get_position=["lon", "lat"],
                radius=50000,
                get_elevation="confirmed",
                elevation_scale=0.25,
                get_fill_color=["255,255, confirmed*.01"],
                get_line_color=[255, 255, 255],
                filled=True,
                pickable=True,
                extruded=True,
                auto_highlight=True,
            )
            TOOLTIP = {
                "html": "{location}<br> <b>{confirmed_readable}</b> Confirmed Cases",
                "style": {
                    "background": "grey",
                    "color": "white",
                    "font-family": '"Helvetica Neue", Arial',
                    "z-index": "10000",
                },
            }

            r = pdk.Deck(
                column_layer,
                map_style="mapbox://styles/mapbox/satellite-streets-v11",
                map_provider="mapbox",
                initial_view_state=INITIAL_VIEW_STATE,
                tooltip=TOOLTIP,
            )
            st.write("## Total Number of Confirmed Cases All Time")
            st.pydeck_chart(r)
    else:
        st.write("Select Valid Dates to continue")
Esempio n. 21
0
def main():
    #st.image('img/codenationTD.png',format='PNG')
    st.title('AceleraDev Data Science 2020')
    st.subheader('**Recommend leads**')
    ########## sidebar ##########
    st.sidebar.header("Recomendação de Leads")
    tela = st.sidebar.radio(
        "Selecione uma opção",
        options=["Exemplo com 3 Portfólios", "Gere Leads com seu Portfólio"],
        index=0)
    st.sidebar.markdown('Desenvolvido por: **Tiago Dias**')
    st.sidebar.markdown('Email para contato:')
    st.sidebar.markdown('*****@*****.**')
    st.sidebar.markdown('LinkedIn:')
    st.sidebar.markdown('https://www.linkedin.com/in/diasctiago')
    st.sidebar.markdown('GitHub:')
    st.sidebar.markdown('https://github.com/diasctiago')
    ########## sidebar ##########

    ########## Leitura dos dados ##########
    m1 = pd.read_csv('data/market1.csv')
    m2 = pd.read_csv('data/market2.csv')
    m3 = pd.read_csv('data/market3.csv')
    m4 = pd.read_csv('data/market4.csv')
    df = m1.append(m2).append(m3).append(m4)
    df1 = pd.read_csv('data/port1.csv')
    df2 = pd.read_csv('data/port2.csv')
    df3 = pd.read_csv('data/port3.csv')
    exemplo = pd.read_csv('data/Exemplo.csv')
    ########## Leitura dos dados ##########

    ########## Exemplo ##########
    if tela == "Exemplo com 3 Portfólios":
        # Filtrando df
        base = [
            'id', 'sg_uf', 'de_ramo', 'setor', 'nm_divisao', 'nm_segmento',
            'de_nivel_atividade', 'nm_meso_regiao', 'nm_micro_regiao',
            'de_faixa_faturamento_estimado', 'idade_empresa_anos',
            'de_natureza_juridica', 'fl_me', 'fl_sa', 'fl_epp', 'fl_mei',
            'fl_ltda', 'qt_filiais'
        ]
        df_nao_nulos = df[base]
        # Retirando as observações com nulos
        df_nao_nulos.fillna('SEM INFORMAÇÃO', inplace=True)

        # Transformando as colunas com o LabelEncoder
        colunas_transform = list(
            df_nao_nulos.select_dtypes(include=['object', 'bool']).columns)
        colunas_transform.remove('id')
        encoder = LabelEncoder()
        for label in colunas_transform:
            label_coluna = 'cod_' + label
            df_nao_nulos[label_coluna] = encoder.fit_transform(
                df_nao_nulos[label])

        # Adicionando identificação dos portifólios
        df1['portfolio'] = 1
        df2['portfolio'] = 2
        df3['portfolio'] = 3
        # Juntando os clientes
        df_clientes = df1.append(df2).append(df3)
        # Identificando os clientes na base de mercado e na base de não nulos
        df = df.join(df_clientes.set_index('id'), on='id')
        df_nao_nulos = df_nao_nulos.join(df_clientes.set_index('id'), on='id')
        # Preenchendo os demais portifolios do mercado como 0
        df['portfolio'].fillna(0, inplace=True)
        df_nao_nulos['portfolio'].fillna(0, inplace=True)

        # Selecionando dados de treino
        train = [
            'cod_de_natureza_juridica', 'cod_sg_uf', 'cod_de_ramo',
            'cod_setor', 'cod_nm_divisao', 'cod_nm_segmento',
            'cod_de_nivel_atividade', 'cod_nm_meso_regiao',
            'cod_de_faixa_faturamento_estimado'
        ]
        X = df_nao_nulos[train]
        # Treinando modelo
        kmeans = KMeans(n_clusters=4)
        kmeans.fit(X)
        # Adicionando as classe no df
        labels = kmeans.labels_
        df_nao_nulos['kmeans'] = labels

        # Classe mais intensa em cada portfolio
        class_port1 = df_nao_nulos.query(
            'portfolio == 1')['kmeans'].value_counts().index[0]
        class_port2 = df_nao_nulos.query(
            'portfolio == 2')['kmeans'].value_counts().index[0]
        class_port3 = df_nao_nulos.query(
            'portfolio == 3')['kmeans'].value_counts().index[0]
        # Fazendo seleção do exemplo a ser explorado
        st.markdown('**Seleção do Portfólio Exemplo**')
        select_analise = st.radio(
            'Escolha um portfólio abaixo :',
            ('Portfólio 1', 'Portfólio 2', 'Portfólio 3'),
            index=1)
        if select_analise == 'Portfólio 1':
            df_port = df_nao_nulos.query(
                'kmeans == @class_port1 and portfolio not in ("1")').iloc[:,
                                                                          0:18]
        if select_analise == 'Portfólio 2':
            df_port = df_nao_nulos.query(
                'kmeans == @class_port2 and portfolio not in ("2")').iloc[:,
                                                                          0:18]
        if select_analise == 'Portfólio 3':
            df_port = df_nao_nulos.query(
                'kmeans == @class_port3 and portfolio not in ("3")').iloc[:,
                                                                          0:18]
        # Inicio exploração Portfólio Exemplo
        st.markdown('**Resumo dos Leads e variáveis disponívies**')
        st.dataframe(df_port.head())
        st.markdown('**Analise Gráfica dos Leds**')
        if st.checkbox("Leads por UF"):
            sns.catplot(x="sg_uf",
                        kind="count",
                        palette="ch:.25",
                        data=df_port)
            plt.title('Quantidade de Leads por UF')
            plt.xlabel('UF')
            plt.ylabel('Qdt Leads')
            #plt.figure(figsize=(24,16))
            #plt.legend()
            st.pyplot()
        if st.checkbox("Leads por Setor"):
            treemap = df_port['setor'].value_counts()
            sizes = treemap.values
            label = treemap.index
            sqy.plot(sizes=sizes, label=label, alpha=.8)
            plt.axis('off')
            #plt.figure(figsize=(24,16))
            st.pyplot()
        if st.checkbox("Top por Característica"):
            colunas = list(list(df_port.iloc[:, 1:12].columns))
            opcao = st.selectbox('Selecione a opção de filtro', colunas)
            st.dataframe(df_port[opcao].value_counts())
        st.markdown('**Seleção de Leads por Filtro**')
        if st.checkbox("Seleção de Leads"):
            colunas = list(df_port.columns)
            opcao = st.selectbox('As colunas utilizadas para filtro', colunas)
            filtros = list(df_port[opcao].unique())
            selecao = st.selectbox('O valor utilizado para filtro', filtros)
            df_filter = df_port.loc[df_port[opcao] == selecao]
            head = st.slider('Quantos Leads?', 0, 100, 10)
            st.dataframe(df_filter.head(head))
        # Download dos Leads
        st.markdown('**Download Leads**')
        len_df_fat = int(df_port.shape[0] / 2)
        df_dowload1 = df_port.reset_index().loc[:len_df_fat, :]
        df_dowload2 = df_port.reset_index().loc[len_df_fat + 1:, :]
        st.markdown(get_table_download_link(df_dowload1, 'Download Parte 1'),
                    unsafe_allow_html=True)
        st.markdown(get_table_download_link(df_dowload2, 'Download Parte 2'),
                    unsafe_allow_html=True)

########## Exemplo ##########

########## Geração ##########
    if tela == "Gere Leads com seu Portfólio":
        st.markdown('**Gere Leads com seu Portfólio**')
        if st.checkbox("Exemplo arquivo CSV"):
            st.image('img/exemplo.png', format='PNG')
            st.markdown(get_table_download_link(exemplo,
                                                'Download Arquivo Exemplo'),
                        unsafe_allow_html=True)
        st.markdown('**Upload do seu Portfólio**')
        file = st.file_uploader('Selecione o seu portfolio (.csv)', type='csv')
        if file is not None:
            exemplo = pd.read_csv(file)
            # Filtrando df
            base = [
                'id', 'sg_uf', 'de_ramo', 'setor', 'nm_divisao', 'nm_segmento',
                'de_nivel_atividade', 'nm_meso_regiao', 'nm_micro_regiao',
                'de_faixa_faturamento_estimado', 'idade_empresa_anos',
                'de_natureza_juridica', 'fl_me', 'fl_sa', 'fl_epp', 'fl_mei',
                'fl_ltda', 'qt_filiais'
            ]
            df_exemplo = df[base]
            df_exemplo = df_exemplo.append(exemplo)
            # Retirando as observações com nulos
            df_exemplo.fillna('SEM INFORMAÇÃO', inplace=True)

            # Transformando as colunas com o LabelEncoder
            colunas_transform = list(
                df_exemplo.select_dtypes(include=['object', 'bool']).columns)
            colunas_transform.remove('id')
            encoder_ex = LabelEncoder()
            for label in colunas_transform:
                label_coluna = 'cod_' + label
                df_exemplo[label_coluna] = encoder_ex.fit_transform(
                    df_exemplo[label])
                #st.dataframe(df_exemplo.head())

            # Adicionando identificação do exemplo
            exemplo['portfolio'] = 1
            exemplo = exemplo[['id', 'portfolio']]

            # Identificando os clientes na base de mercado e na base de não nulos
            df_exemplo = df_exemplo.join(exemplo.set_index('id'), on='id')

            # Preenchendo os demais portifolios do mercado como 0
            df_exemplo['portfolio'].fillna(0, inplace=True)

            # Selecionando dados de treino
            train = [
                'cod_de_natureza_juridica', 'cod_sg_uf', 'cod_de_ramo',
                'cod_setor', 'cod_nm_divisao', 'cod_nm_segmento',
                'cod_de_nivel_atividade', 'cod_nm_meso_regiao',
                'cod_de_faixa_faturamento_estimado'
            ]
            X_exemplo = df_exemplo[train]
            # Treinando modelo
            kmeans = KMeans(n_clusters=4)
            kmeans.fit(X_exemplo)
            # Adicionando as classe no df
            labels = kmeans.labels_
            df_exemplo['kmeans'] = labels

            # Classe mais intensa em cada portfolio
            class_port_ex = df_exemplo.query(
                'portfolio == 1')['kmeans'].value_counts().index[0]
            df_port_ex = df_exemplo.query(
                'kmeans == @class_port_ex and portfolio not in ("1")'
            ).iloc[:, 0:18]

            # Inicio exploração Portfólio Exemplo
            st.markdown('**Resumo dos Leads e variáveis disponívies**')
            st.dataframe(df_port_ex.head())
            st.markdown('**Analise Gráfica dos Leds**')
            if st.checkbox("Leads por UF"):
                sns.catplot(x="sg_uf",
                            kind="count",
                            palette="ch:.25",
                            data=df_port_ex)
                plt.title('Quantidade de Leads por UF')
                plt.xlabel('UF')
                plt.ylabel('Qdt Leads')
                #plt.figure(figsize=(24,16))
                #plt.legend()
                st.pyplot()
            if st.checkbox("Leads por Setor"):
                treemap = df_port_ex['setor'].value_counts()
                sizes = treemap.values
                label = treemap.index
                sqy.plot(sizes=sizes, label=label, alpha=.8)
                plt.axis('off')
                #plt.figure(figsize=(24,16))
                st.pyplot()
            if st.checkbox("Top por Característica"):
                colunas = list(list(df_port_ex.iloc[:, 1:12].columns))
                opcao = st.selectbox('Selecione a opção de filtro', colunas)
                st.dataframe(df_port_ex[opcao].value_counts())
            st.markdown('**Seleção de Leads por Filtro**')
            if st.checkbox("Seleção de Leads"):
                colunas = list(df_port_ex.columns)
                opcao = st.selectbox('As colunas utilizadas para filtro',
                                     colunas)
                filtros = list(df_port_ex[opcao].unique())
                selecao = st.selectbox('O valor utilizado para filtro',
                                       filtros)
                df_filter_ex = df_port_ex.loc[df_port_ex[opcao] == selecao]
                head = st.slider('Quantos Leads?', 0, 100, 10)
                st.dataframe(df_filter_ex.head(head))
            # Download dos Leads
            st.markdown('**Download Leads**')
            len_df_fat = int(df_port_ex.shape[0] / 2)
            df_dowload1 = df_port_ex.reset_index().loc[:len_df_fat, :]
            df_dowload2 = df_port_ex.reset_index().loc[len_df_fat + 1:, :]
            st.markdown(get_table_download_link(df_dowload1,
                                                'Download Parte 1'),
                        unsafe_allow_html=True)
            st.markdown(get_table_download_link(df_dowload2,
                                                'Download Parte 2'),
                        unsafe_allow_html=True)
Esempio n. 22
0
def display_data(conn: Connection):
  # st.dataframe(get_data(conn))
  if st.checkbox("display raw data"):
    st.dataframe(get_data(conn))
Esempio n. 23
0
def main():

    # set images in middle at the sidebar
    col1, col2, col3 = st.sidebar.beta_columns([1, 6, 1])
    with col1:
        st.write("")
    with col2:
        st.image("img/ggsipu.png", width=200)
    with col3:
        st.write("")

    # set images in middle at the homepage
    col1, col2, col3 = st.beta_columns([6, 6, 6])
    with col1:
        st.write("")
    with col2:
        st.image("img/ggsipu.png", width=200)
    with col3:
        st.write("")

    #to show title on the main page and sidebar
    # st.sidebar.title("Hepatitis Mortality Prediction Web App")
    st.title("Hepatitis Mortality Prediction Web App")

    # to show image at sidebar and main page
    # st.sidebar.image('ggsipu.png',width=200)
    # st.image('ggsipu.png')

    #list menu and submenu for setting options in the list
    sidebar_menu = [
        "1.Project-Introduction", "2.Home-Page", "3.Login-Form",
        "4.Sign-up-Form"
    ]
    homepage_menu = ["1.Check-Plot", "2.Check-Prediction"]

    #sidebar subheader

    #make a selectbox in the sidebar and pass menu list given above to show choices and return choice
    menu_selected = st.sidebar.selectbox(
        "To get started, Please select from the options given below:",
        sidebar_menu)

    if menu_selected == "1.Project-Introduction":
        st.subheader("Hello, Good Morning Users")
        st.subheader(
            "Welcome to the Hepatitis Mortality Prediction Web Application created using streamlit and other python libraries, which will take inputs of dignosed data and predicts whether patient would live or die using various various machine learning models like linear regression, KNN, etc"
        )
        st.subheader("Created by:-")
        st.subheader("1.Name: Ashwani Kumar  [MCA],  Roll no: 40216404518")
        st.subheader("2.Name: Nitin Sharma   [MCA],  Roll no: 40916404518")

    #if choice is home then show subheader and a text below
    elif menu_selected == "2.Home-Page":
        st.image("./img/Awareness-Banner.png", width=700)
        st.video('https://youtu.be/IxCelFhuhQo')
        st.header("what is hepatitis and which organ it effects?".upper())
        st.subheader(
            "Hepatitis is inflammation of the liver. Inflammation is swelling that happens when tissues of the body are injured or infected. It can damage your liver. This swelling and damage can affect how well your liver functions. Hepatitis can be an acute (short-term) infection or a chronic (long-term) infection. Some types of hepatitis cause only acute infections. Other types can cause both acute and chronic infections."
        )
        st.header("what are its causes?".upper())
        st.subheader(
            "There are different types of hepatitis, with different causes: Viral hepatitis is the most common type. It is caused by one of several viruses -- hepatitis viruses A, B, C, D, and E. In the United States, A, B, and C are the most common. Alcoholic hepatitis is caused by heavy alcohol use Toxic hepatitis can be caused by certain poisons, chemicals, medicines, or supplements Autoimmune hepatitis is a chronic type in which your body's immune system attacks your liver. The cause is not known, but genetics and your environment may play a role."
        )
        st.header("How is viral hepatitis spread?".upper())
        st.subheader(
            "Hepatitis A and hepatitis E usually spread through contact with food or water that was contaminated with an infected person's stool. You can also get hepatitis E by eating undercooked pork, deer, or shellfish. Hepatitis B, hepatitis C, and hepatitis D spread through contact with the blood of someone who has the disease. Hepatitis B and D may also spread through contact with other body fluids. This can happen in many ways, such as sharing drug needles or having unprotected sex."
        )
        st.image("./img/hepatitis_body.jpg", width=700)
        st.header("Who is at risk for hepatitis?".upper())
        st.subheader(
            "The risks are different for the different types of hepatitis. For example, with most of the viral types, your risk is higher if you have unprotected sex. People who drink a lot over long periods of time are at risk for alcoholic hepatitis."
        )
        st.header("What are the symptoms of hepatitis?".upper())
        st.subheader(
            "Some people with hepatitis do not have symptoms and do not know they are infected. If you do have symptoms, they may include Fever, Fatigue, Loss of appetite, Nausea and/or vomiting, Abdominal pain, Dark urine, Clay-colored bowel movements, Joint pain, Jaundice, yellowing of your skin and eyes. If you have an acute infection, your symptoms can start anywhere between 2 weeks to 6 months after you got infected. If you have a chronic infection, you may not have symptoms until many years later."
        )
        st.header("What other problems can hepatitis cause?".upper())
        st.subheader(
            "Chronic hepatitis can lead to complications such as cirrhosis (scarring of the liver), liver failure, and liver cancer. Early diagnosis and treatment of chronic hepatitis may prevent these complications."
        )
        st.header("How is hepatitis diagnosed?".upper())
        st.subheader(
            "To diagnose hepatitis, your health care provider 1.Will ask about your symptoms and medical history ,2.Will do a physical exam, 3.Will likely do blood tests, including tests for viral hepatitis , 4.Might do imaging tests, such as an ultrasound, CT scan, or MRI, 5.May need to do a liver biopsy to get a clear diagnosis and check for liver damage"
        )
        st.image("./img/Hepatitis_A_Symptoms.jpeg", width=700)
        st.header("What are the treatments for hepatitis?".upper())
        st.subheader(
            "Treatment for hepatitis depends on which type you have and whether it is acute or chronic. Acute viral hepatitis often goes away on its own. To feel better, you may just need to rest and get enough fluids. But in some cases, it may be more serious. You might even need treatment in a hospital. There are different medicines to treat the different chronic types of hepatitis. Possible other treatments may include surgery and other medical procedures. People who have alcoholic hepatitis need to stop drinking. If your chronic hepatitis leads to liver failure or liver cancer, you may need a liver transplant."
        )
        st.header("Can hepatitis be prevented?".upper())
        st.subheader(
            "There are different ways to prevent or lower your risk for hepatitis, depending on the type of hepatitis. For example, not drinking too much alcohol can prevent alcoholic hepatitis. There are vaccines to prevent hepatitis A and B. Autoimmune hepatitis cannot be prevented."
        )
        st.image("./img/Top-banner-april-2020-2.png", width=700)

    #if choice is login then show 2 inputs for username and password and get data from the login form
    elif menu_selected == "3.Login-Form":

        #get username and password from the form
        get_username = st.text_input("Username:"******"Password:"******"welcome '{}' to the hepatitis mortality prediction webapp"
                    .format(get_username.upper()))

                #show text and select box with options from the list of submenu given menu
                activity_selected = st.selectbox(
                    "please select from the options given below:".upper(),
                    homepage_menu)

                # if checkplot activity selected from options then following code executes
                if activity_selected == "1.Check-Plot":

                    #show subheader text
                    st.subheader(
                        "showing csv file as dataframe which is used for getting data"
                        .upper())

                    # read csv file from the folder and convert into pandas dataframe
                    pd_dataframe = pd.read_csv(
                        "data/clean_hepatitis_dataset.csv")

                    #show dataframe in webapp
                    st.dataframe(pd_dataframe)

                    # take class column from dataframe then count the number of values and plot bargraph
                    pd_dataframe['class'].value_counts().plot(kind='bar')

                    #show subheader text
                    st.subheader(
                        "showing bar graph for the number of patients belongs to class 1 and 2"
                        .upper())

                    #show pyplot in webapp
                    st.pyplot()

                    # read csv file and convert into dataframe
                    freq_dataframe = pd.read_csv(
                        "data/freq_df_hepatitis_dataset.csv")

                    #show subheader text
                    st.subheader(
                        "showing bar chart for count vs age(0-150 years)".
                        upper())

                    #show bar chart in web app having column name count
                    st.bar_chart(freq_dataframe['count'])

                    #*********************************bug portion 1******************************************
                    #make a multiselector and pass list column names as arguments for options list
                    columns_names = pd_dataframe.columns.to_list()
                    # feature_selected=st.multiselect("choose a feature from the following list to show more results:".upper(),columns_names)

                    #make a new list after getting values from the columns selected above
                    new_column_dataframe = pd_dataframe[columns_names]

                    #show dataframe in webapp
                    # st.dataframe(new_column_dataframe)
                    # st.dataframe(pd_dataframe[feature_selected])

                    #make an area chart using list values from above
                    st.subheader(
                        "showing a frequency graph of all the columns in the dataframe"
                        .upper())

                    st.area_chart(new_column_dataframe)
                    # st.area_chart(pd_dataframe[feature_selected])

#  ******************************bug portion 1 ending*****************************************
#  ******************************bug portion 2 starts*****************************************
                elif activity_selected == "2.Check-Prediction":

                    #show a subheader with text
                    st.subheader(
                        "Prediction Analytics, here you can input your dygnostic details to get prediction"
                        .upper())

                    #set range in input box
                    age = st.number_input("age".upper(), 7, 80)

                    #show radio buttons having options given in male_female_dict dictionary above
                    sex = st.radio("sex".upper(),
                                   tuple(male_female_dict.keys()))

                    #show radio buttons having options given in yes_no_dict above
                    steroid = st.radio("Do you take steroid?".upper(),
                                       tuple(yes_no_dict.keys()))

                    #show radio buttons having options given in yes_no_dict above
                    antivirals = st.radio("Do you take Antivirals?".upper(),
                                          tuple(yes_no_dict.keys()))

                    #show radio buttons having options given in yes_no_dict above
                    fatigue = st.radio("Do you have fatigue?".upper(),
                                       tuple(yes_no_dict.keys()))

                    #show radio buttons having options given in yes_no_dict above
                    spiders = st.radio("Presence of spider naevi".upper(),
                                       tuple(yes_no_dict.keys()))

                    #show select box having options given in yes_no_dict above
                    ascites = st.selectbox("Ascites".upper(),
                                           tuple(yes_no_dict.keys()))

                    #show select box having options given in yes_no_dict above
                    varices = st.selectbox("presence of varices".upper(),
                                           tuple(yes_no_dict.keys()))

                    #show range input
                    bilirubin = st.number_input("bilirubin content".upper(),
                                                0.0, 8.0)

                    #show range input
                    alk_phosphate = st.number_input(
                        "alkaline phosphate content".upper(), 0.0, 296.0)

                    #show range input
                    sgot = st.number_input("Sgot".upper(), 0.0, 648.0)

                    #show range input
                    albumin = st.number_input("albumin".upper(), 0.0, 6.4)

                    #show range input
                    Prothrombin = st.number_input("Prothrombin".upper(), 0.0,
                                                  100.0)

                    #show select box having options given in yes_no_dict
                    histology = st.selectbox("Histology".upper(),
                                             tuple(yes_no_dict.keys()))

                    #making a list of features using functions
                    st.subheader(
                        "showing list of values returned from above input form"
                        .upper())
                    feature_list = [
                        age,
                        get_sex_value(sex),
                        get_yes_no_value(steroid),
                        get_yes_no_value(antivirals),
                        get_yes_no_value(fatigue),
                        get_yes_no_value(spiders),
                        get_yes_no_value(ascites),
                        get_yes_no_value(varices), bilirubin, alk_phosphate,
                        sgot, albumin,
                        int(Prothrombin),
                        get_yes_no_value(histology)
                    ]
                    st.write(feature_list)

                    #dictionary of list
                    st.subheader(
                        "showing in json format after conversion from dictionary"
                        .upper())
                    st.json({
                        "age": age,
                        "sex": sex,
                        "steroid": steroid,
                        "antivirals": antivirals,
                        "spiders": spiders,
                        "ascites": ascites,
                        "varices": varices,
                        "bilirubin": bilirubin,
                        "alk_phosphate": alk_phosphate,
                        "sgot": sgot,
                        "albumin": albumin,
                        "Prothrombin": Prothrombin,
                        "histology": histology
                    })

                    #convert into numpy array and show in webapp
                    st.subheader("After converting into numpy array:".upper())
                    single_sample = np.array(feature_list).reshape(1, -1)
                    st.write(single_sample)

                    #make a selectbox carring options given below
                    model_choice = st.selectbox("select model",
                                                ["LR", "KNN", "DecisionTree"])

                    #make button and it returns true when clicked
                    if st.button("predict"):

                        #work if model is KNN
                        if model_choice == "KNN":

                            #load model file from models folder
                            loaded_model_ML = loading_ML_model(
                                "./models/knn_hepB_model.pkl")

                            #predict from loaded model and store
                            prediction = loaded_model_ML.predict(single_sample)

                            #predict probability from loaded model and store
                            pred_prob = loaded_model_ML.predict_proba(
                                single_sample)

                        #work if model is DecisionTree
                        elif model_choice == "DecisionTree":

                            loaded_model_ML = loading_ML_model(
                                "models/decision_tree_clf_hepB_model.pkl")

                            prediction = loaded_model_ML.predict(single_sample)

                            pred_prob = loaded_model_ML.predict_proba(
                                single_sample)

                        #work if model is LR
                        else:

                            loaded_model_ML = loading_ML_model(
                                "models/logistic_regression_hepB_model.pkl")

                            prediction = loaded_model_ML.predict(single_sample)

                            pred_prob = loaded_model_ML.predict_proba(
                                single_sample)

                        #if prediction came from above is 1 then patient dies
                        if prediction == 1:

                            #show warning message
                            st.warning("Patient dies".upper())

                        else:

                            #show success message
                            st.success("Patient lives".upper())

                            #make a dictionary to store percentage of living or die
                            pred_probability_score = {
                                "Die": pred_prob[0][0] * 100,
                                "Live": pred_prob[0][1] * 100
                            }

                            #show a subheader
                            st.subheader(
                                "Prediction probability score using {}".upper(
                                ).format(model_choice))

                            #show json format in webapp
                            st.json(pred_probability_score)

                        if st.checkbox("Interpret".upper()):
                            if model_choice == "KNN":
                                loaded_model_ML = loading_ML_model(
                                    "models/knn_hepB_model.pkl")

                            elif model_choice == "DecisionTree":
                                loaded_model_ML = loading_ML_model(
                                    "models/decision_tree_clf_hepB_model.pkl")

                            else:
                                loaded_model_ML = loading_ML_model(
                                    "models/logistic_regression_hepB_model.pkl"
                                )

                            # loaded_model_ML = loading_ML_model("models/logistic_regression_model.pkl")
                            # 1 Die and 2 Live
                            df = pd.read_csv(
                                "data/clean_hepatitis_dataset.csv")
                            x = df[[
                                'age', 'sex', 'steroid', 'antivirals',
                                'fatigue', 'spiders', 'ascites', 'varices',
                                'bilirubin', 'alk_phosphate', 'sgot',
                                'albumin', 'protime', 'histology'
                            ]]
                            feature_names = [
                                'age', 'sex', 'steroid', 'antivirals',
                                'fatigue', 'spiders', 'ascites', 'varices',
                                'bilirubin', 'alk_phosphate', 'sgot',
                                'albumin', 'protime', 'histology'
                            ]
                            class_names = ['Die(1)', 'Live(2)']
                            explainer = lime.lime_tabular.LimeTabularExplainer(
                                x.values,
                                feature_names=feature_names,
                                class_names=class_names,
                                discretize_continuous=True)
                            # The Explainer Instance
                            exp = explainer.explain_instance(
                                np.array(feature_list),
                                loaded_model_ML.predict_proba,
                                num_features=13,
                                top_labels=1)
                            exp.show_in_notebook(show_table=True,
                                                 show_all=False)
                            # exp.save_to_file('lime_oi.html')
                            st.write(exp.as_list())
                            new_exp = exp.as_list()
                            label_limits = [i[0] for i in new_exp]
                            # st.write(label_limits)
                            label_scores = [i[1] for i in new_exp]
                            plt.barh(label_limits, label_scores)
                            st.pyplot()
                            plt.figure(figsize=(20, 10))
                            fig = exp.as_pyplot_figure()
                            st.pyplot()
                        else:
                            st.warning("some error takes place".upper())

                else:
                    st.warning("some error takes place".upper())

            else:

                #set warning message with text
                st.warning("Incorrect username/Password".upper())

        else:
            st.warning(
                "either you did not login to the system or you did not sign up yet"
                .upper())

#    ********************************bug 2 ending*******************************************

    elif menu_selected == "4.Sign-up-Form":

        #get username from input box
        new_username = st.text_input("username".upper())

        #get password from input box
        new_password = st.text_input("password".upper(), type='password')

        #get confirm password from input box
        confirm_password = st.text_input("confirm password".upper(),
                                         type='password')

        #if password and confirm password same
        if new_password == confirm_password and new_password != '' and confirm_password != '':

            #show success message if both password same
            st.success("password completely matched".upper())

        elif new_password == '' or confirm_password == '':

            #show warning message if password not matched
            st.warning(
                "either password box or confirm password box is empty, please fill before going ahead"
                .upper())

        else:

            #show warning message if password not matched
            st.warning("password and confirm password did not matched".upper())

        #show submit button
        if st.button("Sign up".upper()):

            #cal function in manage_db file
            create_table_db()

            #call generate hashes function and return hashed password
            hashed_new_password = generate_hash_passwords(new_password)

            #call adduserdata function in manage_db file
            warning_if_any = add_user_db(new_username, hashed_new_password)

            if warning_if_any:
                st.warning(warning_if_any.upper())

            else:
                #show success message
                st.success("new account has been created".upper())

                #show info message
                st.info(
                    "please choose login-form option from sidebar to login and start your session"
                    .upper())

        else:
            st.warning(
                "either you did not click sign up button or some failure occurs"
                .upper())

    else:
        st.warning("wrong choice, please choose again".upper())
    size='COVID-19 Cases',
    title="Location Map of all COVID-19 Cases in Cavite")
#map_cavite.update_layout(mapbox_style="carto-darkmatter", title_font_size=24)
map_cavite.update_layout(mapbox_style="dark",
                         mapbox_accesstoken=token,
                         title_font_size=24)
map_cavite.update_traces(marker=dict(color='red'))

st.plotly_chart(map_cavite, use_column_width=True)

# Graph
cavite_hist = px.histogram(covid_cavite,
                           x='Age',
                           nbins=20,
                           color='Sex',
                           height=400,
                           width=800)
cavite_hist.update_layout(
    xaxis_title="Patients with COVID-19",
    yaxis_title="Age",
    title="Histogram: COVID-19 Cases in Cavite as to Age and Sex",
    title_font_size=20)
st.plotly_chart(cavite_hist, use_container_width=True)

st.markdown('## COVID-19 Cavite Data')
st.markdown(
    '#### Source: [DOH Data Drop](https://www.google.com/url?q=http://bit.ly/dohcovid19data&sa=D&ust=1587041816428000&usg=AFQjCNED0zWXo_krHyneN4hQQaEJaPXUxg)'
)

st.dataframe(covid_cavite)
def main():
  activities = ['Prediction', 'EDA', 'model', 'About us']
  option=st.sidebar.selectbox("Selection option:", activities)

  if option=='Prediction':

    # st.header("Tweet prediction")
    st.info("Prediction with Classification ML Models")

    option1 = st.selectbox('Choose the model to make the Prediction with:',("Logistic Regression", "KNN", "SVM", "Naive Bayes",  "LinearSVC", "GBC", "SGDC"))
    st.write('You selected:', option1)
    if option1=="Logistic Regression":
      # Creating a text box for user input
      tweet_text = st.text_area("Enter Text","Type Here or Copy and Paste Tweet Here")

      if st.button("Classify"):
        # Transforming user input with vectorizer
        vect_text = tweet_cv.transform([tweet_text]).toarray()
        # Load your .pkl file with the model of your choice + make predictions
        # Try loading in multiple models to give the user a choice
        predictor = joblib.load(open(os.path.join("resources/lr_model_.pkl"),"rb"))
        prediction = predictor.predict(vect_text)

        if prediction == 1 :

          st.success("Text Categorized as: Pro ")
        elif prediction == 0 :

          st.success("Text Categorized as: Neutral ")
        elif prediction == -1 :

          st.success("Text Categorized as: Anti ")
        else :
          st.success("Text Categorized as: News ")
        

        # When model has successfully run, will print prediction
        # You can use a dictionary or similar structure to make this output
        # more human interpretable.
        # st.success("Text Categorized as: {}".format(prediction))


    if option1=="Naive Bayes":
      # Creating a text box for user input
      tweet_text = st.text_area("Enter Text","Type Here or Copy and Paste Tweet Here")

      if st.button("Classify"):
        # Transforming user input with vectorizer
        vect_text = tweet_cv.transform([tweet_text]).toarray()
        # Load your .pkl file with the model of your choice + make predictions
        # Try loading in multiple models to give the user a choice
        predictor = joblib.load(open(os.path.join("resources/nb_model_.pkl"),"rb"))
        prediction = predictor.predict(vect_text)

        # When model has successfully run, will print prediction
        # You can use a dictionary or similar structure to make this output
        # more human interpretable.
        if prediction == 1 :
          st.success("Text Categorized as: Pro ")
        elif prediction == 0 :
          st.success("Text Categorized as: Neutral ")
        elif prediction == -1 :
          st.success("Text Categorized as: Anti ")
        else :
          st.success("Text Categorized as: News ")
        
        #st.success("Text Categorized as: {}".format(prediction))

    if option1=="KNN":
      # Creating a text box for user input
      tweet_text = st.text_area("Enter Text","Type Here or Copy and Paste Tweet Here")

      if st.button("Classify"):
        # Transforming user input with vectorizer
        vect_text = tweet_cv.transform([tweet_text]).toarray()
        # Load your .pkl file with the model of your choice + make predictions
        # Try loading in multiple models to give the user a choice
        predictor = joblib.load(open(os.path.join("resources/knc_model_.pkl"),"rb"))
        prediction = predictor.predict(vect_text)

        # When model has successfully run, will print prediction
        # You can use a dictionary or similar structure to make this output
        # more human interpretable.
        if prediction == 1 :
          st.success("Text Categorized as: Pro ")
        elif prediction == 0 :
          st.success("Text Categorized as: Neutral ")
        elif prediction == -1 :
          st.success("Text Categorized as: Anti ")
        else :
          st.success("Text Categorized as: News ")
        #st.success("Text Categorized as: {}".format(prediction))

    # if option1=="Random Forest":
    #   # Creating a text box for user input
    #   tweet_text = st.text_area("Enter Text","Type Here or Copy and Paste Tweet Here")

    #   if st.button("Classify"):
    #     # Transforming user input with vectorizer
    #     vect_text = tweet_cv.transform([tweet_text]).toarray()
    #     # Load your .pkl file with the model of your choice + make predictions
    #     # Try loading in multiple models to give the user a choice
    #     predictor = joblib.load(open(os.path.join("resources/rfc_model.pkl"),"rb"))
    #     prediction = predictor.predict(vect_text)

    #     # When model has successfully run, will print prediction
    #     # You can use a dictionary or similar structure to make this output
    #     # more human interpretable.
    #     if prediction == 1 :
    #       st.success("Text Categorized as: Pro ")
    #     elif prediction == 0 :
    #       st.success("Text Categorized as: Neutral ")
    #     elif prediction == -1 :
    #       st.success("Text Categorized as: Anti ")
    #     else :
    #       st.success("Text Categorized as: News ")
    #     #st.success("Text Categorized as: {}".format(prediction))

    if option1=="LinearSVC":
      # Creating a text box for user input
      tweet_text = st.text_area("Enter Text","Type Here or Copy and Paste Tweet Here")

      if st.button("Classify"):
        # Transforming user input with vectorizer
        vect_text = tweet_cv.transform([tweet_text]).toarray()
        # Load your .pkl file with the model of your choice + make predictions
        # Try loading in multiple models to give the user a choice
        predictor = joblib.load(open(os.path.join("resources/lsvc_model.pkl"),"rb"))
        prediction = predictor.predict(vect_text)

        # When model has successfully run, will print prediction
        # You can use a dictionary or similar structure to make this output
        # more human interpretable.

        if prediction == 1 :
          st.success("Text Categorized as: Pro ")
        elif prediction == 0 :
          st.success("Text Categorized as: Neutral ")
        elif prediction == -1 :
          st.success("Text Categorized as: Anti ")
        else :
          st.success("Text Categorized as: News ")
        
        #st.success("Text Categorized as: {}".format(prediction))

    if option1=="SVM":
      # Creating a text box for user input
      tweet_text = st.text_area("Enter Text","Type Here or Copy and Paste Tweet Here")

      if st.button("Classify"):
        # Transforming user input with vectorizer
        vect_text = tweet_cv.transform([tweet_text]).toarray()
        # Load your .pkl file with the model of your choice + make predictions
        # Try loading in multiple models to give the user a choice
        predictor = joblib.load(open(os.path.join("resources/svm_model.pkl"),"rb"))
        prediction = predictor.predict(vect_text)

        # When model has successfully run, will print prediction
        # You can use a dictionary or similar structure to make this output
        # more human interpretable.

        if prediction == 1 :
          st.success("Text Categorized as: Pro ")
        elif prediction == 0 :
          st.success("Text Categorized as: Neutral ")
        elif prediction == -1 :
          st.success("Text Categorized as: Anti ")
        else :
          st.success("Text Categorized as: News ")
        

        #st.success("Text Categorized as: {}".format(prediction))

    if option1=="GBC":
      # Creating a text box for user input
      tweet_text = st.text_area("Enter Text","Type Here or Copy and Paste Tweet Here")

      if st.button("Classify"):
        # Transforming user input with vectorizer
        vect_text = tweet_cv.transform([tweet_text]).toarray()
        # Load your .pkl file with the model of your choice + make predictions
        # Try loading in multiple models to give the user a choice
        predictor = joblib.load(open(os.path.join("resources/gbc_model.pkl"),"rb"))
        prediction = predictor.predict(vect_text)

        # When model has successfully run, will print prediction
        # You can use a dictionary or similar structure to make this output
        # more human interpretable.

        if prediction == 1 :
          st.success("Text Categorized as: Pro ")
        elif prediction == 0 :
          st.success("Text Categorized as: Neutral ")
        elif prediction == -1 :
          st.success("Text Categorized as: Anti ")
        else :
          st.success("Text Categorized as: News ")
        
        # st.success("Text Categorized as: {}".format(prediction))
    if option1=="SGDC":
      # Creating a text box for user input
      tweet_text = st.text_area("Enter Text","Type Here or Copy and Paste Tweet Here")

      if st.button("Classify"):
        # Transforming user input with vectorizer
        vect_text = tweet_cv.transform([tweet_text]).toarray()
        # Load your .pkl file with the model of your choice + make predictions
        # Try loading in multiple models to give the user a choice
        predictor = joblib.load(open(os.path.join("resources/sgdc_model.pkl"),"rb"))
        prediction = predictor.predict(vect_text)

        # When model has successfully run, will print prediction
        # You can use a dictionary or similar structure to make this output
        # more human interpretable.

        if prediction == 1 :
          st.success("Text Categorized as: Pro ")
        elif prediction == 0 :
          st.success("Text Categorized as: Neutral ")
        elif prediction == -1 :
          st.success("Text Categorized as: Anti ")
        else :
          st.success("Text Categorized as: News ")
       
  if option=="EDA":
  	 data=st.file_uploader("Upload Dataset and View:", type=['csv', 'xlsx', 'text','json'])
  	 st.success('Data uploaded successfully')
  	 if data is not None:

  	 	df1=pd.read_csv(data)
  	 	st.dataframe(df1.head(10))

  	 df = train.copy()
  	 sentiment_counts = df.groupby('sentiment').size().reset_index(name='counts')
  	 def sentiment(df):
  	 	sentiment = df['sentiment']
  	 	sentiment_class = []
  	 	for i in sentiment :
  	 		if i == 1 :
  	 			sentiment_class.append('Pro')
  	 		elif i == 0 :
  	 			sentiment_class.append('Neutral')
  	 		elif i == -1 :
  	 			sentiment_class.append('Anti')
  	 		else :
  	 			sentiment_class.append('News')
  	 	df['sentiment'] = sentiment_class
  	 	return df
  	 df = sentiment(df)


  	 sent_count = df.groupby('sentiment').size().reset_index(name='counts')
  	 st.sidebar.subheader('Exploratory Data Analysis')
  	 if st.sidebar.checkbox('View sentiment count'):
  	 	st.write(sent_count)

  	 # Create a function for emoji extraction
  	 def extract_emojis(s):
  	 	return ''.join(c for c in s if c in emoji.UNICODE_EMOJI)
  	 #extracting emojis on train data
  	 df['emoji'] = df['message'].apply(extract_emojis)
  	 # Create the function to extract the emojis from data
  	 def extract_emojis(df):
  	 	for char in df:
  	 		if char in emoji.UNICODE_EMOJI:
  	 			return True
  	 		else:
  	 			return False
  	 df['emoji'] = df['message'].apply(extract_emojis)

  	 #convert emojies on the dataframe to text
  	 def text_emoji(txt):
  	 	emoji_converter = emoji.demojize(txt, delimiters=("", ""))
  	 	return emoji_converter
  	 # remove special characters, numbers, punctuations from train data
  	 df['message'] = df['message'].str.replace("[^a-zA-Z#]", " ")
  	 #removing short words from train data
  	 df['message'] = df['message'].apply(lambda x: ' '.join([w for w in x.split() if len(w)>3]))
  	 #removing short words from train data
  	 df['message'] = df['message'].apply(lambda x: ' '.join([w for w in x.split() if len(w)>3]))
  	 #Replace the word https with nothing: train
  	 df['message'] = df['message'].str.replace('https', '')
  	 # Remove Line breaks: train
  	 df['message']=df['message'].replace('\n', ' ')

  	 #st.write(df)
  	 st.sidebar.subheader("Visualising the Dataset")
  	 if st.sidebar.checkbox('Sentiments'):
  	 	fig, ax = plt.subplots()
  	 	ax  =sns.countplot(x='sentiment',data=df, palette="Blues_d")
  	 	st.pyplot(fig)

  	 if st.sidebar.checkbox("Tweet Length Distribution"):
  	 	df['tweet length'] = df['message'].apply(len)
  	 	fig, ax = plt.subplots()
  	 	ax = sns.FacetGrid(df,col='sentiment')
  	 	ax.map(plt.hist,'tweet length')
  	 	st.pyplot(ax)

  	 # #Top 10 Hashtags from the tweets
  	 # hashtags_pro = []
  	 # for message in Pro:
  	 # 	hashtag = re.findall(r"#(\w+)", message)
  	 # 	hashtags_pro.append(hashtag)

  	 # hashtags_pro = sum(hashtags_pro,[])
  	 # a = nltk.FreqDist(hashtags_pro)
  	 # d = pd.DataFrame({'Hashtag': list(a.keys()),'Count': list(a.values())})
  	 # d = d.nlargest(columns="Count", n = 10)
  	 # if checkbox("Top 10 Hashtags in Pro Tweets"):
  	 # 	fig, ax =  plt.figure(figsize=(10,5))
  	 # 	ax = sns.barplot(data=d, x= "Hashtag", y = "Count",palette=("Blues_d"))
  	 # 	plt.setp(ax.get_xticklabels(),rotation='vertical', fontsize=10)
  	 # 	plt.title('Top 10 Hashtags in Pro Tweets', fontsize=14) 
  	 # 	st.pyplot(ax)
  	 

  	 import collections
  	 df['temp_list'] = df['message'].apply(lambda x:str(x).split())
  	 top = collections.Counter([item for sublist in df['temp_list'] for item in sublist])
  	 temp = pd.DataFrame(top.most_common(20))
  	 temp.columns = ['Common_words','count']
  	 temp_list = temp.style.background_gradient(cmap='Blues')

  	 if st.sidebar.checkbox("Frequent Words"):
  	 	fig = px.bar(temp, x="count", y="Common_words", title='Commmon Words in Tweets', orientation='h', width=700, height=700)
  	 	st.write(fig)
  	 	st.pyplot()

  	 #most common words Sentiment wise
  	 Pro = df[df['sentiment']=='Pro']
  	 News = df[df['sentiment']=='News']
  	 Neutral = df[df['sentiment']=='Neutral']
  	 Anti =df[df['sentiment']=='Anti']

  	 #MosT common positive words based on sentiment
  	 top = collections.Counter([item for sublist in Pro['temp_list'] for item in sublist])
  	 pro_tweet = pd.DataFrame(top.most_common(20))
  	 pro_tweet.columns = ['Common_words','count']

  	 if st.sidebar.checkbox("Figure representing common words in Pro tweets"):
  	 	fig = px.bar(pro_tweet, x="count", y="Common_words", title='Commmon used words in Pro tweet', orientation='h', 
             width=700, height=700)
  	 	st.write(fig)
  	 	st.pyplot()
	
  elif option=="model":
  	st.header("Models Building")
  	df = train.copy()
  	if st.sidebar.checkbox("Data we will be working with"):
  		st.write(df)
  		# Diving the data into X and Y variables.
  	X=df["message"]
  	y=df["sentiment"]

  	Classifier_name = st.sidebar.selectbox("Preferred Classifier:", ("LR", "KNN", "SVM", "Naive Bayes", "Random Forest", "LinearSVC", "GBC", "SGDC"))
  	def add_parameter(name_of_cls):
  		param=dict()
  		if name_of_cls=="LR":
  			C=st.sidebar.slider("C",0.01, 1.0)
  			max_iter=st.sidebar.slider("max_iter",1, 1000)
  			penalty=st.sidebar.radio("penalty", ("l2", "l1", "elasticnet", "None"), key = 'l2')
  			random_state = st.sidebar.number_input("random_state", 1, 1000, step = 1, key = "random_state")
  			solver=st.sidebar.radio("solver", ("newton-cg", "lbfgs", "liblinear", "sag", "saga"), key = 'lbfgs')
  			param["C"]=C
  			param["solver"]=solver
  			param["max_iter"]=max_iter
  			param["penalty"]=penalty
  			param["random_state"]=random_state
  		if name_of_cls=="Random Forest":
  			n_estimators=st.sidebar.slider("n_estimators", 1, 1000)
  			random_state=st.sidebar.slider("random_state", 0, 100)
  			param["n_estimators"]=n_estimators
  			param["random_state"]=random_state
  		if name_of_cls=="SVM":
  			C=st.sidebar.slider("C", 0.01,15)
  		if name_of_cls=="KNN":
  			n_neighbors=st.sidebar.slider("n_neighbors", 1, 100)
  			algorithm=st.sidebar.radio("algorithm", ("auto", "ball_tree", "kd_tree", "brute"), key = 'auto')
  			param["n_neighbors"]=n_neighbors
  		if name_of_cls=="LinearSVC":
  			max_iter=st.sidebar.slider("max_iter",1, 1000)
  			random_state=st.sidebar.number_input("random_state", 1, 1000, step = 1, key = "random_state")
  			C=st.sidebar.slider("C", 0.01,1.0)
  			param["random_state"]=random_state
  			param["max_iter"]=max_iter
  			param['C']=C
  		if name_of_cls=="GBC":
  			n_estimators=st.sidebar.slider("n_estimators", 1, 100)
  			param["n_estimators"]=n_estimators
  		if name_of_cls=="SGDC":
  			max_iter=st.sidebar.slider("max_iter",1, 5000)
  			param["max_iter"]=max_iter
  		return param
  	param=add_parameter(Classifier_name)


  	def get_classifier(name_of_cls, param):

  		clf=None
  		if name_of_cls=="KNN":
  			clf=KNeighborsClassifier(n_neighbors=param["n_neighbors"], algorithm=param["algorithm"], leaf_size=param["leaf_size"])
  		elif name_of_cls=="LR":
  			clf=LogisticRegression(C=param["C"], max_iter=param['max_iter'], penalty=param["penalty"], solver=param["solver"])
  		elif name_of_cls=="Naive Bayes":
  			clf = MultinomialNB()
  		elif name_of_cls=="Random Forest":
  			clf=RandomForestClassifier(n_estimators=param["n_estimators"], random_state=param["random_state"])
  		elif name_of_cls=="LinearSVC":
  			clf=LinearSVC(C=param["C"], random_state=param["random_state"], max_iter=param["max_iter"])
  		elif name_of_cls=="GBC":
  			clf=GradientBoostingClassifier(n_estimators=param["n_estimators"], learning_rate=1.0, max_depth=1)
  		elif name_of_cls=="SGDC":
  			clf=SGDClassifier(tol=0.01)
  		else:
  			st.warnings("Select your choice of algorithm.")
  		return clf
  	clf=get_classifier(Classifier_name,param)

  	vectorizer = TfidfVectorizer(ngram_range=(1,2), min_df=2, stop_words="english")
  	X_vectorized = vectorizer.fit_transform(X)
  	X_train,X_val,y_train,y_val = train_test_split(X_vectorized,y,test_size=.3,shuffle=True, stratify=y, random_state=11)
  	
  	clf.fit(X_train, y_train)
  	y_pred=clf.predict(X_val)

  	st.write("### Name of classifier:",Classifier_name)
  	if st.checkbox("View the Prediction sentiments"):
  		st.write("Prediction of the sentiments:",y_pred)
  		accuracy=metrics.classification_report(y_val,y_pred)
  		confusion_matrix=metrics.confusion_matrix(y_val,y_pred)

  	if st.checkbox("View Classification Report"):
  		st.write("Accuracy:",accuracy)

  	if st.checkbox("View the Confusion Matrix"):
  		st.write("Confusion Matrix:",confusion_matrix)

  	
  	if st.checkbox("Display heatmap for Confusion Matrix"):
  		fig, ax = plt.subplots()
  		ax = sns.heatmap(confusion_matrix, annot=True)
  		st.pyplot(fig)
 
  	if Classifier_name=="LR":
  		if st.sidebar.checkbox("Description of the model parameters"):
  			st.markdown("""**penalty: {‘l1’, ‘l2’, ‘elasticnet’, ‘none’}, default=’l2’** <br>
Used to specify the norm used in the penalization. The ‘newton-cg’, ‘sag’ and ‘lbfgs’ solvers support only l2 penalties. ‘elasticnet’ is only supported by the ‘saga’ solver. If ‘none’ (not supported by the liblinear solver), no regularization is applied.<br>

**C : float, default=1.0** <br>
Inverse of regularization strength; must be a positive float. Like in support vector machines, smaller values specify stronger regularization.<br>

**random_state: int, RandomState instance, default=None**<br>
Used when solver == ‘sag’, ‘saga’ or ‘liblinear’ to shuffle the data. See Glossary for details.<br>

**solver: {‘newton-cg’, ‘lbfgs’, ‘liblinear’, ‘sag’, ‘saga’}, default=’lbfgs’**<br>
Algorithm to use in the optimization problem.<br>

- For small datasets, ‘liblinear’ is a good choice, whereas ‘sag’ and ‘saga’ are faster for large ones.<br>

- For multiclass problems, only ‘newton-cg’, ‘sag’, ‘saga’ and ‘lbfgs’ handle multinomial loss; ‘liblinear’ is limited to one-versus-rest schemes.<br>

- ‘newton-cg’, ‘lbfgs’, ‘sag’ and ‘saga’ handle L2 or no penalty<br>

- ‘liblinear’ and ‘saga’ also handle L1 penalty<br>

- ‘saga’ also supports ‘elasticnet’ penalty<br>

- ‘liblinear’ does not support setting penalty='none'<br>

**max_iter : int, default=100**<br>
Maximum number of iterations taken for the solvers to converge.<br>
    """, unsafe_allow_html=True)
  	if Classifier_name=="KNN":
  		if st.sidebar.checkbox("Description of the model parameters"):
  			st.markdown("""**n_neighbors**: int, default=5<br>
Number of neighbors to use by default for kneighbors queries.<br>

**algorithm** : {‘auto’, ‘ball_tree’, ‘kd_tree’, ‘brute’}, default=’auto’<br>
- Algorithm used to compute the nearest neighbors:<br>

- `ball_tree` will use BallTree<br>

- `kd_tree` will use KDTree<br>

- `brute` will use a brute-force search.<br>

- `auto` will attempt to decide the most appropriate algorithm based on the values passed to fit method.<br>

**leaf_size**: int, default=30
Leaf size passed to BallTree or KDTree. This can affect the speed of the construction and query, as well as the memory required to store the tree. The optimal value depends on the nature of the problem.<br>


    """, unsafe_allow_html=True)
  	if Classifier_name=="Random Forest":
  		if st.sidebar.checkbox("Description of the model parameters"):
  			st.markdown("""**n_estimators : int, default=100**<br>
The number of trees in the forest.<br>

**random_state : int or RandomState, default=None**<br>
Controls both the randomness of the bootstrapping of the samples used when building trees (if bootstrap=True) and the sampling of the features to consider when looking for the best split at each node (if max_features < n_features).<br>
    """, unsafe_allow_html=True)
  	if Classifier_name=="LinearSVC":
  		if st.sidebar.checkbox("Description of the model parameters"):
  			st.markdown("""**C : float, default=1.0**<br>
Regularization parameter. The strength of the regularization is inversely proportional to C. Must be strictly positive.<br>

**random_state : int or RandomState instance, default=None**
Controls the pseudo random number generation for shuffling the data for the dual coordinate descent (if dual=True). When dual=False the underlying implementation of LinearSVC is not random and random_state has no effect on the results. Pass an int for reproducible output across multiple function calls.<br>

**max_iter : int, default=1000**<br>
The maximum number of iterations to be run.<br>
    """, unsafe_allow_html=True)
  	if Classifier_name=="GBC":
  		if st.sidebar.checkbox("Description of the model parameters"):
  			st.markdown("""**n_estimators : int, default=100**<br>
The number of boosting stages to perform. Gradient boosting is fairly robust to over-fitting so a large number usually results in better performance.<br>
    """, unsafe_allow_html=True)
  	if Classifier_name=="SGDC":
  		if st.sidebar.checkbox("Description of the model parameters"):
  			st.markdown("""**max_iter : int, default=1000**<br>
The maximum number of passes over the training data (aka epochs). It only impacts the behavior in the fit method, and not the partial_fit method.<br>    
	""", unsafe_allow_html=True)



  elif option=="About us":
  	st.markdown("<h1 style='text-align: center; color: blue;'>About Us</h1>", unsafe_allow_html=True)
  	image =  Image.open("images/About us page.png")
  	st.image(image, use_column_width=True)
Esempio n. 26
0
                           df["batch_id"].unique().tolist())
        if bid:
            how_was_my_day = pd.DataFrame()
            trades = load_trades(day_to_analyze)
            start_time = df[df.batch_id == bid].start_time
            start_time = pytz.utc.localize(start_time.min()).astimezone(est)
            how_was_my_day["symbol"] = trades.loc[trades["batch_id"] ==
                                                  bid]["symbol"].unique()
            how_was_my_day["revenues"] = how_was_my_day["symbol"].apply(
                lambda x: calc_batch_revenue(x, trades, bid))
            how_was_my_day["count"] = how_was_my_day["symbol"].apply(
                lambda x: count_trades(x, trades, bid))
            st.text(
                f"batch_id:{bid}\nstart time:{start_time }\nrevenue=${round(sum(how_was_my_day['revenues']), 2)}"
            )
            st.dataframe(how_was_my_day)

            uploaded_file: UploadedFile = st.file_uploader(
                label="Select tradeplan file", type=["toml", "TOML"])
            if uploaded_file:
                byte_str = uploaded_file.read()
                toml_as_string = byte_str.decode("UTF-8")

                if len(toml_as_string):
                    conf_dict = toml.loads(toml_as_string)  # type: ignore

                    if not conf_dict:
                        st.error(
                            "Failed to load TOML configuration file, retry")
                        st.stop()
                else:
Esempio n. 27
0
    # our input image is now represented as a NumPy array of shape
    # (inputShape[0], inputShape[1], 3) however we need to expand the
    # dimension by making the shape (1, inputShape[0], inputShape[1], 3)
    # so we can pass it through the network
    image = np.expand_dims(image, axis=0)
    # pre-process the image using the appropriate function based on the
    # model that has been loaded (i.e., mean subtraction, scaling, etc.)
    image = preprocess(image)

    preds = model.predict(image)
    predictions = imagenet_utils.decode_predictions(preds)
    imagenetID, label, prob = predictions[0][0]

    st.image(bytes_data, caption=[f"{label} {prob*100:.2f}"])
    st.subheader(f"Top Predictions from {network}")
    st.dataframe(
        pd.DataFrame(predictions[0],
                     columns=["Network", "Classification", "Confidence"]))


# Download a single file and make its content available as a string.
@st.cache(show_spinner=False)
def get_file_content_as_string(path):
    url = "https://raw.githubusercontent.com/nithishr/streamlit-ml-demo/main/" + path
    response = urllib.request.urlopen(url)
    return response.read().decode("utf-8")


if show_code:
    st.code(get_file_content_as_string("ml_frontend.py"))
Esempio n. 28
0
acc_df = query_data(localhostname='localhost',
                    user='******',
                    pw='root',
                    db='accidents2',
                    port=8889,
                    table='Accidents',
                    nrow=nrow_acc)
acc_df['start_time'] = acc_df['start_time'].dt.normalize()
acc_df['end_time'] = acc_df['end_time'].dt.normalize()
acc_df['weather_timestamp'] = acc_df['weather_timestamp'].dt.normalize()
acc_df['year'] = pd.DatetimeIndex(acc_df['start_time']).year
acc_df['month'] = pd.DatetimeIndex(acc_df['start_time']).month
c_box = st.checkbox("View Raw Data")
if c_box == True:
    s_acc_df = st.dataframe(acc_df)
    'accidents_df', s_acc_df

######### CREATING SEVERITY BARCHART A###############
severity_bar = acc_df.groupby('severity').count()
severity_bar = severity_bar.reset_index()
severity_bar['severity'] = severity_bar['severity'].astype('category')

sev_fig = px.bar(severity_bar, x="severity", y="id")
sev_fig.update_layout(title='Count of accidents by severity',
                      xaxis_type='category',
                      xaxis_title='Severity Score',
                      yaxis_title='Count')
st.plotly_chart(sev_fig, use_container_width=True)

st.write("Most of the crashes from the dataset have a severity score of 2.")
Esempio n. 29
0
def run(st,data,mongocls,session_id):
    st.markdown("### Steps -")

    view_sample_data = st.checkbox("View Sample Data - ")
    if view_sample_data:

        if mongocls.get_session_info({'session_id': session_id + '_ml_df'}) is not None:
            st.dataframe(pd.DataFrame(mongocls.get_session_info({'session_id':session_id+'_ml_df'})["data_dict"]).head())
        else:
            st.dataframe(data.head())

    c_load,c_session,c_restrict_columns = st.beta_columns(3)
    if c_load.button("Reload Data!!!"):
        data = get_data(data,mongocls,session_id)


    if c_session.button("Reset Session!!!"):
        mongocls.delete_session({'session_id': session_id + '_ml_df'})

    if c_restrict_columns.checkbox("Restrict Columns: "):
        restrict_columns = st.multiselect("Restrict Columns ", data.columns.tolist())
        data = data[restrict_columns]

    expander = st.beta_expander("Data Preparation:",expanded=False)


    data = get_data(data,mongocls,session_id)
    columns = data.columns.tolist()
    with expander:
            c1,c2,c3 = st.beta_columns(3)
            fs = c1.checkbox("Feature Selection")
            if fs:
                st.text("Currently under development!!!")
                #fs_option = st.selectbox("Selection Option",['SelectKBest','RFE','PCA','LDA'])

            dimpute = c2.checkbox("Data Imputers")
            if dimpute:
                impute_options = st.selectbox("Impute Option",['SimpleImputer'])
                if impute_options:

                    imputer = dp.Imputers(dataframe=data,select_imputer=impute_options)
                    imputer.select_imputers(imputerSelect = impute_options)
                    data = imputer.fit_transform()
                    data = pd.DataFrame(data, columns=columns)
                    mongocls.delete_session({'session_id': session_id + '_ml_df'})
                    mongocls.write_session_info(
                        {'session_id': session_id + '_ml_df', 'data_dict': data.to_dict("records")})
                    st.dataframe(data.head())
    expander_enc = st.beta_expander("Data Encoding", expanded=False)
    with expander_enc:
        encode = st.checkbox("Apply Encoding")
        if encode:
            c1_encode,c2_encode,c3_encode = st.beta_columns(3)
            encoding_option = c1_encode.selectbox("Select Encoder",['LabelEncoder','OneHotEncoder','OrdinalEncoder','Binarizer','LabelBinarizer','MultiLabelBinarizer'])
            Y_col_encode = c2_encode.selectbox("Select Y (target) (Encoding)", data.columns.tolist())
            cat_columns =  c3_encode.multiselect("Select Categorical Columns to Encode",data.columns.tolist())
            encode_btn = st.button("Encode Data!!!")
            if encode_btn:
                if len(cat_columns)==0:
                    encode_cls = dp.Encoders(df=data,y=Y_col_encode)
                else:
                    encode_cls = dp.Encoders(df=data,y=Y_col_encode, cat_columns = cat_columns)
                encode_cls.select_encoder(encode_type=encoding_option)
                data = encode_cls.compile_encoding()
                st.dataframe(data.head())
                mongocls.delete_session({'session_id': session_id + '_ml_df'})
                mongocls.write_session_info(
                    {'session_id': session_id + '_ml_df', 'data_dict': data.to_dict("records")})

    expander_sample = st.beta_expander("Data Sampling", expanded=False)
    with expander_sample:
        c1, c2, c3, c4 = st.beta_columns(4)
        sample_options = c1.selectbox("Sampling Options", ["Over", "Under","RandomOverSampler"])
        sampling_ratio = c2.slider('Sampling Ratio', min_value=0.1, max_value=1.0, step=0.05)
        Y_col = c3.selectbox("Select Y (target) (Sampling)", data.columns.tolist())
        if Y_col != '':
            X_cols = c4.multiselect("Select X Columns (default is all)",
                                    [col for col in data.columns.tolist() if col != Y_col])
            if len(X_cols) <= 0:
                X_cols = [col for col in data.columns.tolist() if col != Y_col]
                X_val = data[X_cols]

            X_val = data[X_cols]
            Y_val = data[Y_col]
        else:
            st.warning("Please select Target column!!!")
        sampler_btn = st.button("Run Sampler")
        if sampler_btn:
            sample_cls = dp.Sampling(df=data[X_cols + [Y_col]], target=Y_col, sampling_option=sample_options)
            X_val, Y_val = sample_cls.run_sampler()
            data = (pd.concat([X_val, Y_val], axis=1))
            st.dataframe(data.head())
            mongocls.delete_session({'session_id': session_id + '_ml_df'})
            mongocls.write_session_info(
                {'session_id': session_id + '_ml_df', 'data_dict': data.to_dict("records")})

    expander_scale = st.beta_expander("Data Scaling", expanded=False)
    with expander_scale:
        scale = st.checkbox("Apply Scaling")
        if scale:
            c1, c2, c3 = st.beta_columns(3)
            scaling_options = c1.selectbox("Sampling Options", ["StandardScaler", "MaxAbsScaler", "MinMaxScaler","RobustScaler",
                                                                "Normalizer","PowerTransformer","QuantileTransformer"])
            Y_col = c2.selectbox("Select Y (target) (Scaling)", data.columns.tolist())
            cat_columns = c3.multiselect("Categorical Columns",[col for col in data.columns.tolist() if col != Y_col])
            scale_btn = expander_scale.button("Scale Data!!!")
            if scale_btn:
                scaling = dp.scaling(df=data,cat_columns=cat_columns,scalar_type=scaling_options,y=Y_col)
                data = scaling.compile_scalar()
                st.dataframe(data.head())
                mongocls.delete_session({'session_id': session_id + '_ml_df'})
                mongocls.write_session_info(
                    {'session_id': session_id + '_ml_df', 'data_dict': data.to_dict("records")})
                pass

    expander_model = st.beta_expander("Model Training", expanded=False)
    with expander_model:
        c1,c2,c3 =  st.beta_columns(3)
        model_type = c1.selectbox("Model Type",['Regression','Classification'])
        y_col = c2.selectbox("Select Target Variable",data.columns.tolist())
        model_exe = ml_models.MLmodels(df=data,y_column=y_col,problem_type=model_type)
        select_models = c3.multiselect("Select ML models",model_exe.get_model_list())
        run_models = st.button("Run Models - ")
        model_storage = {}
        if run_models:
            for model in select_models:
                model_exe.select_model_to_run(model_select=model)
                model_storage[model]= model_exe.compile_modeling()
                train_x, train_y, test_x, test_y = model_exe.get_train_test()
            mongocls.delete_session({'session_id': session_id + '_models_ran'})
            mongocls.write_session_info(
                {'session_id': session_id + '_models_ran','models_trained': pickle.dumps(model_storage),'train_test_data':pickle.dumps(
                    {
                        'train_x':train_x,
                        'train_y':train_y,
                        'test_x':test_x,
                        'test_y':test_y
                    }
                )})
        if len(model_storage.keys())>0:
            st.write("Model Run completed for the below models - ")
            st.code(model_storage)

    expander_metrics = st.beta_expander("Evaluation Metrics", expanded=False)
    with expander_metrics:
        #try:
            models_trained = pickle.loads(mongocls.get_session_info({'session_id': session_id + '_models_ran'})["models_trained"])
            loaded_info = pickle.loads(mongocls.get_session_info({'session_id': session_id + '_models_ran'})["train_test_data"])
            train_x = loaded_info['train_x']
            train_y = loaded_info['train_y']
            test_x = loaded_info['test_x']
            test_y = loaded_info['test_y']
            metric_selected = {}
            for model,trained_model in models_trained.items():
                st.text(model)
                metric_cls = ml_metrics.Metrics(y_test=test_y)
                metric_selected[model] = st.multiselect('Select Metrics to see for the Model ('+model+')',metric_cls.get_metric_list())
            metrics_btn = st.button("Click to see the metrics")
            if metrics_btn:
                for model,metrics in metric_selected.items():
                    for metric in metrics:
                        metric_cls.select_metrics(metric)
                        st.write(metric)
                        st.write(metric_cls.metrics_solve(estimator=models_trained[model], test_x=test_x))

        #except:
        #    st.warning("No Models trained yet!!!")
Esempio n. 30
0
def main():

    st.info(
        "Stramlit documentaion are very much fun to read, it is not like the common hestic doc of other tech. I reccomand all our group members to read it"
    )
    st.info("link for doc:https://streamlit.io/docs/api.html")

    st.title("Dataset Explorer Steamlit App")
    image = Image.open("assistant.jpeg")

    st.image(image,
             caption='Hi iam your assistant! lets have a Tour over this App',
             use_column_width=True)

    user_name = st.text_input("Please Enter Your Name")

    st.write("Cool!{}, Lets go ahead".format(user_name))

    filename = file_selector()
    st.write("you selected {}".format(filename))
    df = pd.read_csv(filename)

    if st.checkbox("Show datasets"):
        number = st.number_input("Number of Rows to view", 5, 10)
        st.dataframe(df.head(number))

    if st.button("Column names"):
        st.write(df.columns)
    if st.checkbox("Shape of the dataset"):
        st.write(df.shape)
        data_dim = st.radio("show Dimension By", ("Rows", "Column"))
        if data_dim == "Rows":
            st.text("Number of Rows")
            st.write(df.shape[0])
        elif data_dim == "Column":
            st.text("Number of Rows")
            st.write(df.shape[1])
        else:
            st.write(df.shape)

    if st.checkbox("Seelet cloumns To show"):
        all_columns = df.columns.tolist()
        selected_columns = st.multiselect("select", all_columns)
        new_df = df[selected_columns]
        st.dataframe(new_df)

    if st.button("Check the value count"):
        st.text("Value count by Target/Class")
        st.write(df.iloc[:, -1].value_counts())

    if st.button("Data Types"):
        st.write(df.dtypes)

    if st.button("Summary of the DataSet"):
        st.write(df.describe().T)

    st.subheader("Data Visualization")

    st.subheader("Plots")

    all_columns_names = df.columns.tolist()
    type_Of_plot = st.selectbox(
        "Select Type of pllot",
        ["area", "bar", "line", "hist", "box", "kde", "Scatter Plot"])
    selected_columns_names = st.multiselect("Select Columns To Plot",
                                            all_columns_names)

    st.write("Steamlit Comes With its in build Plots :sunglasses:")
    st.write(
        "if you want to save this plots you can do it by clicking the circle near the plot. This only works for Streamlit Generated plots"
    )

    if st.button("Generate Plot"):
        st.success("Produce plot for {}  and {}".format(
            type_Of_plot, selected_columns_names))

    if type_Of_plot == 'area':
        cust_data = df[selected_columns_names]
        st.area_chart(cust_data)

    if type_Of_plot == 'bar':
        cust_data = df[selected_columns_names]
        st.bar_chart(cust_data)

    if type_Of_plot == 'line':
        cust_data = df[selected_columns_names]
        st.line_chart(cust_data)

    if type_Of_plot == "Scatter Plot":
        X = selected_columns_names[0]
        Y = selected_columns_names[1]

        fig = px.scatter(df, x=X, y=Y)
        # Plot!

        st.plotly_chart(fig)

    if type_Of_plot:
        st.write("Below are matplotlib Plots")
        cust_plot = df[selected_columns_names].plot(kind=type_Of_plot)
        st.write(cust_plot)
        st.pyplot()

    if st.button("Pie Plot"):
        all_columns_names = df.columns.tolist()
        st.success("Generating your plot")
        st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
        st.pyplot()

    if st.checkbox("Correlation Polt"):
        st.write(sns.heatmap(df.corr(), annot=True))
        st.pyplot()

    if st.checkbox("Value count"):
        st.text("Value count by Target")
        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox("Primary column to GroupBy",
                                   all_columns_names)
        selected_columns_names = st.multiselect("Select Columns",
                                                all_columns_names)
        if st.button("Plot"):
            st.text("Generate Plot")
            if selected_columns_names:
                vc_plot = df.groupby(
                    primary_col)[selected_columns_names].count()
            else:
                vc_plot = df.iloc[:, -1].value_counts()
            st.write(vc_plot.plot(kind='bar'))
            st.pyplot()

    if st.button("Click me to get Excited"):
        st.balloons()