if __name__ == '__main__': st.write('Hello welcome to the Stock Market Analytics Web App!!') today = datetime.date.today() def_start_date = time.strftime('%Y-%m-1') def_start_date = datetime.datetime.strptime(def_start_date, '%Y-%m-%d') option = st.selectbox( 'Please choose the stock tickers you are interested to Analyze', ['SPY', 'AAPL', 'AMZN']) from_date = st.date_input('from date', (def_start_date)) to_date = st.date_input('to date', ) st.write('You have selected:', option) data = get_data(''.join(option), from_date, to_date) st.write(data) # col_option = st.multiselect('Select the column to perform Uni-Variate Analysis', ['Open','High', 'Low', 'Close', 'Volume']) col_option = st.selectbox( 'Select the column to perform Uni-Variate Analysis', ['Open', 'High', 'Low', 'Close', 'Volume']) st.write('column:', col_option) print('column selection', col_option) data = data.loc[:, col_option] st.write(data) st.line_chart(data) st.write('Univariate Analysis') st.write('Mean', data.values.mean()) st.write('Median', statistics.median(data.values)) st.write('Mode', statistics.mode(data.values)) st.write(f'Min: {min(data.values)} - Max: {max(data.values)}')
def main(): st.sidebar.title("What to do") activities = [ "Exploratory Data Analysis", "Plotting and Visualization", "Building Model", "About" ] choice = st.sidebar.selectbox("Select Activity", activities) # Add a slider to the sidebar: st.sidebar.markdown("# Lang") x = st.sidebar.slider('Select a lang for ACF and PACF analysis', 50, 60) # Add a slider to the sidebar: st.sidebar.markdown("# Seasonal") s = st.sidebar.slider( 'Select a seasonal parameter from previous ACF and PACF analysis', 24, 48) # cloud logo st.sidebar.title("Built on:") st.sidebar.image("src/ibmcloud_logo.png", width=200) # Upload file uploaded_file = st.file_uploader("Choose a CSV file", type="csv") if uploaded_file is not None and choice == "Exploratory Data Analysis": data = pd.read_csv(uploaded_file) st.subheader(choice) # Show dataset if st.checkbox("Show Dataset"): rows = st.number_input("Number of rows", 5, len(data)) st.dataframe(data.head(rows)) # Show columns if st.checkbox("Columns"): st.write(data.columns) # Data types if st.checkbox("Column types"): st.write(types(data)) # Show Shape if st.checkbox("Shape of Dataset"): data_dim = st.radio("Show by", ("Rows", "Columns", "Shape")) if data_dim == "Columns": st.text("Number of Columns: ") st.write(data.shape[1]) elif data_dim == "Rows": st.text("Number of Rows: ") st.write(data.shape[0]) else: st.write(data.shape) # Check null values in dataset if st.checkbox("Check null values"): nvalues = null_values(data) st.write(nvalues) # Show Data summary if st.checkbox("Show Data Summary"): st.text("Datatypes Summary") st.write(data.describe()) # Plot time series, ACF and PACF if st.checkbox("Select column as time series"): columns = data.columns.tolist() selected = st.multiselect("Choose", columns) series = data[selected] if st.button('Plot Time Series, ACF and PACF'): tsplot(series, lags=x) st.pyplot() elif uploaded_file is not None and choice == "Plotting and Visualization": st.subheader(choice) data = pd.read_csv(uploaded_file) df = data.copy() all_columns = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot", [ "area", "line", "scatter", "pie", "bar", "correlation", "distribution" ]) if type_of_plot == "line": select_columns_to_plot = st.multiselect("Select columns to plot", all_columns) cust_data = df[select_columns_to_plot] st.line_chart(cust_data) elif type_of_plot == "area": select_columns_to_plot = st.multiselect("Select columns to plot", all_columns) cust_data = df[select_columns_to_plot] st.area_chart(cust_data) elif type_of_plot == "bar": select_columns_to_plot = st.multiselect("Select columns to plot", all_columns) cust_data = df[select_columns_to_plot] st.bar_chart(cust_data) elif type_of_plot == "pie": select_columns_to_plot = st.selectbox("Select a column", all_columns) st.write(df[select_columns_to_plot].value_counts().plot.pie()) st.pyplot() elif type_of_plot == "correlation": st.write( sns.heatmap(df.corr(), annot=True, linewidths=.5, annot_kws={"size": 7})) st.pyplot() elif type_of_plot == "scatter": st.write("Scatter Plot") scatter_x = st.selectbox("Select a column for X Axis", all_columns) scatter_y = st.selectbox("Select a column for Y Axis", all_columns) st.write(sns.scatterplot(x=scatter_x, y=scatter_y, data=df)) st.pyplot() elif type_of_plot == "distribution": select_columns_to_plot = st.multiselect("Select columns to plot", all_columns) st.write(sns.distplot(df[select_columns_to_plot])) st.pyplot() elif uploaded_file is not None and choice == "Building Model": st.subheader(choice) data = pd.read_csv(uploaded_file) df = data.copy() st.write("Select the columns to use for training") columns = df.columns.tolist() selected_column = st.multiselect("Select Columns", columns) new_df = df[selected_column] st.write(new_df) if st.checkbox("Train/Test Split"): y_train, y_test = temporal_train_test_split(new_df.T.iloc[0]) st.text("Train Shape") st.write(y_train.shape) st.text("Test Shape") st.write(y_test.shape) plot_ys(y_train, y_test, labels=["y_train", "y_test"]) st.pyplot() if st.button("Training a Model"): model_selection = st.selectbox("Model to train", ["AutoArima", "LSTM", "MLP", "RNN"]) if model_selection == "AutoArima": y_train, y_test = temporal_train_test_split(new_df.T.iloc[0]) forecasting_autoarima(y_train, y_test, s) elif choice == "About": st.title("About") st.write("The app developed by Alexander Robles.") st.write("Stack: Python, Streamlit, Docker, Kubernetes")
end_row = 0 # Start the date from top of the data set and go down to see if the user date is less than or equal to the date in the given data for i in range(0, len(df)): if start <= pd.to_datetime(df['Date'][i]): start_row = i break # Start the data set from top of the data set and go down to see if the user date is for j in range(0, len(df)): if end <= pd.to_datetime(df['Date'][len(df) - 1 - j]): end_row = len(df) - 1 - j df = df.set_index(pd.DatetimeIndex(df['Date'].values)) return df.iloc[start_row:end_row + 1, :] # Get the user input start, end, symbol = get_input() # Get the data df = get_data(symbol, start, end) # Get the company name company_name = get_company_name(symbol.upper()) # Display the close price st.header(company_name + ' Close\n') st.line_chart(df['Close']) # Display the volume st.header(company_name + " Volume\n") st.line_chart(df['Volume']) # Get stats on the data st.header('Data statistics') st.write(df.describe())
import yfinance as yf import streamlit as st import pandas as pd st.write(""" #Simple Stock Price Shown are the stock closing price and volume of google """) tickerSymbol = 'GOOGL' tickerData = yf.Ticker(tickerSymbol) tickerDf = tickerData.History(period='1d', start='2010-5-31', end='2020-5-31') st.line_chart(tickerDf.Close) st.line_chart(tickerDf.Volume)
""") pd.set_option("display.max_rows", None, "display.max_columns", None) df = pd.read_csv('symbols.csv') df1 = df[['symbol']] result = df1.head(None) # result.to_csv('tickers.csv') # print(result) stock = st.selectbox( 'Which stock you would like to select?', (result)) stock_name = df.loc[df['symbol'] == stock, 'name'].iloc[0] # name = stock_name.head(None) st.write('You have selected:', stock_name) col1, col2 = st.columns(2) with col1: From = st.date_input("From", datetime.date(2020, 1, 1), min_value=None, max_value=None, key=None) with col2: To = st.date_input("To", value=None, min_value=None, max_value=None, key=None) stockSymbol = stock stockData = yf.Ticker(stockSymbol) stockDf = stockData.history(period='1d', start=From, end=To) st.line_chart(stockDf.Close) st.line_chart(stockDf.Volume) #x = st.slider('x') #st.write(x, 'squared is', x * x)
def stock_predict_LSTM(stock, time_steps=3): # getting the data df = web.DataReader(stock, 'stooq') df.sort_values('Date', ascending=True, inplace=True) # dataset # data has dates and prices # dataset has only closing prices data = df.filter(['Close']) dataset = data.values training_data_len = math.ceil(len(dataset) * .9) # scaled data scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) # training dataset training_data = scaled_data[0:training_data_len, :] x_train = [] y_train = [] for i in range(time_steps, len(training_data)): x_train.append(training_data[i - time_steps:i]) y_train.append(training_data[i, 0]) x_train, y_train = np.array(x_train), np.array(y_train) x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) # print(x_train.shape) # creating test datset / x_test and y_test test_data = scaled_data[training_data_len - time_steps:, :] x_test = [] y_test = dataset[training_data_len:, :] for i in range(time_steps, len(test_data)): x_test.append(test_data[i - time_steps:i, 0]) # convert into numpy array x_test = np.array(x_test) # reshaping the data into 3 dimensions x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) #model model = Sequential() # 1st layer with Dropout regularisation # * units = add 100 neurons is the dimensionality of the output space # * return_sequences = True to stack LSTM layers so the next LSTM layer has a three-dimensional sequence input # * input_shape => Shape of the training dataset model.add( LSTM(units=100, return_sequences=True, input_shape=(x_train.shape[1], 1))) # 20% of the layers will be dropped model.add(Dropout(0.2)) # 2nd LSTM layer # * units = add 50 neurons is the dimensionality of the output space # * return_sequences = True to stack LSTM layers so the next LSTM layer has a three-dimensional sequence input model.add(LSTM(units=50, return_sequences=True)) # 20% of the layers will be dropped model.add(Dropout(0.2)) # 3rd LSTM layer # * units = add 50 neurons is the dimensionality of the output space # * return_sequences = True to stack LSTM layers so the next LSTM layer has a three-dimensional sequence input model.add(LSTM(units=50, return_sequences=True)) # 50% of the layers will be dropped model.add(Dropout(0.5)) # 4th LSTM layer # * units = add 50 neurons is the dimensionality of the output space model.add(LSTM(units=50)) # 50% of the layers will be dropped model.add(Dropout(0.5)) # Dense layer that specifies an output of one unit model.add(Dense(units=1)) # compiling model model.compile(optimizer='adam', loss='mean_squared_error') # training the model model.fit(x_train, y_train, batch_size=1, epochs=1) # get predicted values predictions = model.predict(x_test) predictions = scaler.inverse_transform(predictions) # RMSE rmse = np.sqrt(np.mean(((predictions - y_test)**2))) print("Root Means Squared Error:", rmse) # Plot the data train = data[:training_data_len] valid = data[training_data_len:] print(predictions.shape, valid.shape) valid['Predictions'] = predictions[:, 0] st.title('{} Long-Short-Term-Memory.'.format(stock.upper())) st.line_chart(valid[['Close', 'Predictions']]) st.title("Model Metrics for Long-Short-Term-Memory.") st.subheader("Root Means Squared Error") st.subheader(rmse)
X_test, y_test = x[test_idx], y[test_idx] X_train, y_train = sampling.fit_resample(X_train, y_train) rf = RandomForestClassifier(**cfg, random_state=42) rf.fit(X_train, y_train) y_pred_prob = rf.predict_proba(X_test) result = metrics.roc_auc_score(y_test, y_pred_prob[:, 1]) results.append(result) result = np.mean(results) print(result) return result pb = st.progress(0) status_txt = st.empty() chart = st.line_chart() status_txt2 = st.empty() environment = Environment(rf_from_cfg, config_space=cs, mem_in_mb=2048, cpu_time_in_s=30, seed=42, data=x) mosaic = Search(environment=environment, policy_arg={ "c_ucb": 1.1, "coef_progressive_widening": 0.6 }, verbose=True)
def main(): st.title("COVID-19") st.sidebar.title('Menú COVID-19') add_selectbox = st.sidebar.selectbox( 'Por favor, selecciona una opción: ', ('Principal', 'Análisis Exploratorio por País', 'Análisis Exploratorio a Nivel Mundial')) ##NUEVO MENU: PRINCIPAL if add_selectbox == 'Principal': st.header('Introducción al COVID-19') st.write( 'Coronavirus y Nuevo Coronavirus son los nombres de los virus, y CoViD-19 es el nombre de la enfermedad causada por el Nuevo Coronavirus. Identificados en la década de 1960, los Coronavirus pertenecen a la subfamilia taxonómica Orthocoronavirinae de la familia Coronaviridae, del orden Nidovirales. Se subdividen en varios géneros, especies y cepas. Algunos atacan a los animales, otros afectan a la especie humana, causando desde enfermedades leves con síntomas similares a un resfriado común, hasta enfermedades graves como el SARS, el MERS y el actual CoViD-19.' ) st.write( 'El coronavirus se dio a conocer en todo el mundo en 2002 cuando causó el Síndrome Respiratorio Agudo Severo (SARS), una enfermedad que se propagó a varios países desde China, causando alrededor de 800 víctimas, el 10% de las 8000 personas que se infectaron. Una nueva cepa de coronavirus, que apareció en Jordania en 2012, causó el Síndrome Respiratorio del Medio Oriente (MERS), una enfermedad que se propagó en el Medio Oriente y llegó a Corea del Sur, matando al 35% de las 2500 personas infectadas, una enfermedad que mostró una impresionante tasa de letalidad del 35%. Y en China, en 2019, surgió otra cepa del virus, el Nuevo Coronavirus, responsable de la enfermedad CoViD-19 que está causando esta pandemia. Su tasa de mortalidad es del 3 al 4%, pero su capacidad de transmisión es mucho mayor que la de otros virus, ya que se transmite incluso por personas sin ningún tipo de síntomas y tiene una capacidad y velocidad de reproducción mucho más altas que las de otros virus.' ) st.header('¿Qué es COVID-19?') st.write( 'COVID-19 es una enfermedad causada por el coronavirus SARS-CoV-2, que presenta un cuadro clínico que abarca desde infecciones asintomáticas hasta afecciones respiratorias graves. Según la Organización Mundial de la Salud (OMS), la mayoría de los pacientes con COVID-19 (aproximadamente el 80%) pueden ser asintomáticos y aproximadamente el 20% de los casos pueden requerir atención hospitalaria porque tienen dificultad para respirar y de estos casos aproximadamente el 5% puede necesitar apoyo para el tratamiento de la insuficiencia respiratoria (respirador artificial)' ) st.subheader('¿Qué es el coronavirus?') st.write( 'El coronavirus es una familia de virus que causan infecciones respiratorias. El nuevo agente de coronavirus fue descubierto el 31/12/19 después de los casos registrados en China. Causa la enfermedad llamada coronavirus (COVID-19). Los primeros coronavirus humanos se aislaron por primera vez en 1937. Sin embargo, fue en 1965 que el virus se describió como coronavirus, debido al perfil bajo microscopía, que parecía una corona. La mayoría de las personas se infectan con coronavirus comunes a lo largo de sus vidas, y los niños pequeños tienen más probabilidades de infectarse con el tipo más común del virus. Los coronavirus más comunes que infectan a los humanos son el coronavirus alfa 229E y NL63 y el coronavirus beta OC43, HKU1.' ) st.header('Síntomas') st.write( 'Los síntomas de COVID-19 pueden variar desde un simple resfriado hasta una neumonía grave. Los síntomas más comunes son: Tos, Fiebre, Rinitis, Dolor de garganta, Dificultades respiratorias' ) st.header('Transmisión') st.write( 'La transmisión ocurre de una persona enferma a otra o por contacto cercano a través de: Toque o apretón de manos; Gotitas de saliva; Estornudo; Tos; Catarro; Objetos o superficies contaminadas, como teléfonos celulares, mesas, manijas de puertas, juguetes, teclados de computadora, etc.' ) st.header('Fuente de los Datos') st.write( 'Los datos fueron tomados de Github de OurWorldInData, que es una organización sin fines de lucro que recopila y proporciona datos gratuitos sobre "pobreza, enfermedad, hambre, cambio climático, guerra, riesgos existenciales y desigualdad". Los datos mundiales sobre Covid-19 son actualizados diariamente por la organización, que es una de las más respetadas en el mundo en relación con el suministro de datos. Puede acceder a estos datos públicos actualizados aquí.' ) st.markdown("[OurWorldInData](https://ourworldindata.org/)") st.header('Biblioteca') st.write( 'Comprender los datos es tan importante como el análisis exploratorio, porque si no se comprende lo que significa y de dónde provienen los datos, no se podrá realizar un análisis completo.' ) #Datos tabla informacion = { 'Descripción': [ 'Letras y códigos de los países en base a ISO 3166-1 alpha-3', 'Continente', 'Localización Geográfica', 'Fecha de Observación', 'Total de Casos Confirmados de COVID19', 'Nuevos Casos Confirmados de COVID19', 'Total de Muertes Atribuídas al COVID19', 'Nuevas Muertes Atribuídas al COVID19', 'Total de Casos Confirmados de COVID19 por millón de habitantes', 'Nuevos Casos Confirmados de COVID19 por millón de habitantes', 'Total de Muertes Atribuídas al COVID19 por millón de habitantes', 'Nuevas Muertes Atribuídas al COVID19 por millón de habitantes', 'Total de Pruebas para COVID19', 'Nuevas Pruebas para COVID19', 'Nuevas Pruebas de COVID19 (suavizado por 7 dias)', 'Total de Pruebas de COVID19 por 1000 habitantes', 'Nuevas Pruebas de COVID19 por 1000 habitantes', 'Nuevas Pruebas de COVID19 (suavizado por 7 dias) por 1000 habitantes', 'Unidades usadas para informar datos de pruebas', 'Indice de Respuesta del Gobierno', 'Población en 2020', 'Densidad demográfica (en km cuadrados)', 'Média de Edad de la Población', 'Proporción de la población con 65 años o más', 'Proporción de la población con 70 años o más', 'PIB a la par con el poder de compra (dólares internacionales constantes de 2011)', 'Porcentaje de la población que vive en extrema pobreza, año más reciente desde 2010', 'Tasa de mortalidad por enfermedades cardiovasculares en 2017', 'Prevalencia de diabetes (% de población entre 20 y 79 años) en 2017', 'Porcentaje de mujeres que fuman', 'Porcentaje de hombres que fuman', 'Porcentaje de la población con instalaciones básicas de lavado de manos en las instalaciones, en el último año disponible', 'Camas de hospital por cada 1,000 personas, último año disponible desde 2010', 'Esperanza de vida al nacer en 2019' ], 'Origen': [ 'International Organization for Standardization', 'Our World in Data', 'Our World in Data', 'Our World in Data', 'Centro Europeo para la Prevención y el Control de Enfermedades', 'Centro europeo para la prevención y el control de enfermedades', 'Centro europeo para la prevención y el control de enfermedades', 'Centro europeo para la prevención y el control de enfermedades', 'Centro europeo para la prevención y el control de enfermedades', 'Centro europeo para la prevención y el control de enfermedades', 'Centro europeo para la prevención y el control de enfermedades', 'Centro europeo para la prevención y el control de enfermedades', 'National government reports', 'National government reports', 'National government reports', 'National government reports', 'National government reports', 'National government reports', 'National government reports', ' Oxford COVID-19 Government Response Tracker, Blavatnik School of Government', 'United Nations', 'World Bank', 'UN Population Division', 'Banco Mundial', 'Naciones Unidas, Departamento de Asuntos Económicos y Sociales, División de Población (2017)', ' Banco Mundial', ' Banco Mundial', 'Red Global Colaborativa de Carga de Enfermedades', ' Banco Mundial', ' Banco Mundial', ' Banco Mundial', 'División de Estadística de las Naciones Unidas', 'OCDE, Eurostat, Banco Mundial, registros del gobierno nacional y otras fuentes', 'James C. Riley, Clio Infra, División de Población de las Naciones Unidas' ] } df_info = pd.DataFrame(informacion, columns=['Descripción', 'Origen']) df_info = pd.DataFrame(informacion, columns=['Descripción', 'Origen'], index=df.columns) st.table(df_info) st.subheader( "Base de Datos del COVID19 mostrando los 5 primeras filas") #Transformar la columna "date" no formato datetime df.date = pd.to_datetime(df.date) df.date.max() df.set_index('date', inplace=True) st.dataframe(df[df.location == 'World'].head(5)) #Description if st.checkbox("Mostrar Descripcion de la Base de Datos"): st.write(df.describe()) #Shape if st.checkbox("Mostrar Dimensiones de la Base de Datos"): st.write(df.shape) data_dim = st.radio("Mostrar Dimensiones por: ", ("Filas", "Columnas")) if data_dim == "Filas": st.write("Numero de Filas: ") st.write(df.shape[0]) elif data_dim == "Columnas": st.write("Numero de Columnas: ") st.write(df.shape[1]) #Correlacion st.header("Correlación") st.write( "La correlación indica la fuerza y la dirección de una relación lineal y proporcionalidad entre dos variables estadísticas." ) columnas = df.columns #Una lista con las columnas st.info( "El siguiente mapa de calor representa una correlación visual, que varía de -1 a 1, donde -1 se considera sin correlación y 1 se considera una correlación perfecta." ) columnas_seleccionadas = st.multiselect( "Selecciona las columnas para generar el mapa de correlacion: ", columnas) if len(columnas_seleccionadas) > 0: correlacion = df[columnas_seleccionadas].corr() fig, ax = plt.subplots(figsize=(12, 12)) st.write( sns.heatmap(correlacion, annot=True, fmt='.2f', ax=ax, square=True)) st.pyplot() st.success( 'En el Menú Lateral Puedes Seleccionar y Empezar con el Análisis Exploratorio del COVID-19' ) st.write( "Desarrollado por: Juan Minango, David Minango y Rafael Bolsoni") st.markdown("[JD-TECHN](https://jdtechn.com/)") st.write("Con el auspicio de:") st.markdown( "[Capacitate Ecuador](https://www.facebook.com/Capacitate.Ecu/)") ##NUEVO MENU: ANALISIS EXPLORATORIO POR PAIS elif add_selectbox == 'Análisis Exploratorio por País': #Transformar la columna "date" no formato datetime st.header("Análisis Exploratorio por País") df.date = pd.to_datetime(df.date) df.date.max() df.set_index('date', inplace=True) #Seleccion de Paises list_paises = df.location.unique() paises = st.selectbox("Escoja el Pais", list_paises) paises_df = df[df['location'].str.contains(paises)] countries_images = { 'af': 'Afghanistan', 'al': 'Albania', 'dz': 'Algeria', 'as': 'American Samoa', 'ab': 'Antigua and Barbuda', 'ad': 'Andorra', 'ao': 'Angola', 'ai': 'Anguilla', 'aq': 'Antarctica', 'ag': 'Antigua And Barbuda', 'ar': 'Argentina', 'am': 'Armenia', 'aw': 'Aruba', 'au': 'Australia', 'at': 'Austria', 'az': 'Azerbaijan', 'bs': 'Bahamas', 'bh': 'Bahrain', 'bd': 'Bangladesh', 'bb': 'Barbados', 'by': 'Belarus', 'be': 'Belgium', 'bz': 'Belize', 'bj': 'Benin', 'bm': 'Bermuda', 'bt': 'Bhutan', 'bo': 'Olivia', 'ba': 'Bosnia and Herzegovina', 'bw': 'Botswana', 'bv': 'Bouvet Island', 'br': 'Brazil', 'io': 'British Indian Ocean Territory', 'bn': 'Brunei', 'bg': 'Bulgaria', 'bf': 'Burkina Faso', 'bi': 'Burundi', 'kh': 'Cambodia', 'cm': 'Cameroon', 'ca': 'Canada', 'cv': 'Cape Verde', 'ky': 'Cayman Islands', 'cf': 'Central African Republic', 'td': 'Chad', 'cl': 'Chile', 'cn': 'China', 'cx': 'Hristmas Island', 'cc': 'Cocos (Keeling) Islands', 'co': 'Colombia', 'km': 'Comoros', 'cg': 'Congo', 'cd': 'Democratic Republic of Congo', 'ck': 'Cook Islands', 'cr': 'Costa Rica', 'ci': "Cote d'Ivoire", 'hr': 'Croatia', 'cu': 'Cuba', 'cy': 'Cyprus', 'cz': 'Czech Republic', 'dk': 'Denmark', 'dj': 'Djibouti', 'dm': 'Dominica', 'do': 'Dominican Republic', 'ec': 'Ecuador', 'eg': 'Egypt', 'eh': 'Western Sahara', 'sv': 'El Salvador', 'gq': 'Equatorial Guinea', 'er': 'Eritrea', 'ee': 'Estonia', 'et': 'Ethiopia', 'fk': 'Falkland Islands', 'fo': 'Aroe Islands', 'fj': 'Fiji', 'fi': 'Finland', 'fr': 'France', 'gf': 'French Guiana', 'pf': 'French Polynesia', 'tf': 'French Southern Territories', 'ga': 'Gabon', 'gm': 'Gambia', 'ge': 'Georgia', 'de': 'Germany', 'gh': 'Ghana', 'gi': 'Gibraltar', 'gr': 'Greece', 'gl': 'Greenland', 'gd': 'Grenada', 'gp': 'Guadeloupe', 'gu': 'Guam', 'gt': 'Guatemala', 'gn': 'Guinea', 'gw': 'Guinea-Bissau', 'gy': 'Guyana', 'ht': 'Haiti', 'hm': 'Heard Island And Mcdonald Islands', 'hn': 'Honduras', 'hk': 'Hong Kong', 'hu': 'Hungary', 'is': 'Iceland', 'in': 'India', 'id': 'Indonesia', 'ir': 'Iran', 'iq': 'Iraq', 'ie': 'Ireland', 'il': 'Israel', 'it': 'Italy', 'jm': 'Jamaica', 'jp': 'Japan', 'jo': 'Jordan', 'kz': 'Kazakhstan', 'ke': 'Kenya', 'ki': 'Kiribati', 'kp': "Korea, Democratic People'S Republic Of", 'kr': 'South Korea', 'kw': 'Kuwait', 'kg': 'Kyrgyzstan', 'la': 'Laos', 'lv': 'Latvia', 'lb': 'Lebanon', 'ls': 'Lesotho', 'lr': 'Liberia', 'ly': 'Libya', 'li': 'Liechtenstein', 'lt': 'Lithuania', 'lu': 'Luxembourg', 'mo': 'Macao', 'mk': 'Macedonia', 'mg': 'Madagascar', 'mw': 'Malawi', 'my': 'Malaysia', 'mv': 'Maldives', 'ml': 'Mali', 'mt': 'Malta', 'mh': 'Marshall Islands', 'mq': 'Martinique', 'mr': 'Mauritania', 'mu': 'Mauritius', 'yt': 'Mayotte', 'mx': 'Mexico', 'fm': 'Micronesia, Federated States Of', 'md': 'Moldova', 'mc': 'Monaco', 'mn': 'Mongolia', 'ms': 'Montserrat', 'ma': 'Morocco', 'mz': 'Mozambique', 'mm': 'Myanmar', 'na': 'Namibia', 'nr': 'Nauru', 'np': 'Nepal', 'nl': 'Netherlands', 'an': 'Netherlands Antilles', 'nc': 'New Caledonia', 'nz': 'New Zealand', 'ni': 'Nicaragua', 'ne': 'Niger', 'ng': 'Nigeria', 'nu': 'Niue', 'nf': 'Norfolk Island', 'mp': 'Northern Mariana Islands', 'no': 'Norway', 'om': 'Oman', 'pk': 'Pakistan', 'pw': 'Palau', 'ps': 'Palestine', 'pa': 'Panama', 'pg': 'Papua New Guinea', 'py': 'Paraguay', 'pe': 'Peru', 'ph': 'Philippines', 'pn': 'Pitcairn', 'pl': 'Poland', 'pt': 'Portugal', 'pr': 'Puerto Rico', 'qa': 'Qatar', 're': 'Réunion', 'ro': 'Romania', 'ru': 'Russia', 'rw': 'Rwanda', 'sh': 'Saint Helena', 'kn': 'Saint Kitts and Nevis', 'lc': 'Saint Lucia', 'pm': 'Saint Pierre And Miquelon', 'vc': 'Saint Vincent and the Grenadines', 'ws': 'Samoa', 'sm': 'San Marino', 'st': 'Sao Tome and Principe', 'sa': 'Saudi Arabia', 'sn': 'Senegal', 'cs': 'Montenegro', 'sc': 'Seychelles', 'sl': 'Sierra Leone', 'sg': 'Singapore', 'sk': 'Slovakia', 'si': 'Slovenia', 'sb': 'Solomon Islands', 'so': 'Somalia', 'za': 'South Africa', 'gs': 'South Georgia And South Sandwich Islands', 'es': 'Spain', 'lk': 'Sri Lanka', 'sd': 'Sudan', 'sr': 'Suriname', 'sj': 'Svalbard And Jan Mayen', 'sz': 'Swaziland', 'se': 'Sweden', 'ch': 'Switzerland', 'sy': 'Syria', 'tw': 'Taiwan', 'tj': 'Tajikistan', 'tz': 'Tanzania', 'th': 'Thailand', 'tl': 'Timor', 'tg': 'Togo', 'tk': 'Tokelau', 'to': 'Tonga', 'tt': 'Trinidad and Tobago', 'tn': 'Tunisia', 'tr': 'Turkey', 'tm': 'Turkmenistan', 'tc': 'Turks and Caicos Islands', 'tv': 'Tuvalu', 'ug': 'Uganda', 'ua': 'Ukraine', 'ae': 'United Arab Emirates', 'gb': 'United Kingdom', 'us': 'United States', 'um': 'United States Minor Outlying Islands', 'uy': 'Uruguay', 'uz': 'Uzbekistan', 've': 'Venezuela', 'vu': 'Vanuatu', 'vn': 'Vietnam', 'vg': 'British Virgin Islands', 'vi': 'U.S. Virgin Islands', 'wf': 'Wallis And Futuna', 'ye': 'Yemen', 'zw': 'Zimbabwe', 'bl': 'Bolivia', 'bonaire': 'Bonaire Sint Eustatius and Saba', 'cura': 'Curacao', 'faeroe': 'Faeroe Islands', 'guy': 'Guernsey', 'ilm': 'Isle of Man', 'jey': 'Jersey', 'ksv': 'Kosovo', 'ser': 'Serbia', 'sint': 'Sint Maarten (Dutch part)', 'ssd': 'South Sudan', 'usav': 'United States Virgin Islands', 'vtc': 'Vatican', 'zmb': 'Zambia', 'wrl': 'World', 'int': 'International' } for k, v in countries_images.items(): if v == paises: temp_images = 'cflags/{}.png'.format(k) #st.text(temp_images) img = Image.open(os.path.join(temp_images)).convert('RGB') st.image(img) st.dataframe(paises_df) st.subheader('Descripción de la Base de Datos:') st.dataframe(paises_df.describe()) df_World = df[df['location'] == 'World'] #Total Mundial indice_World = df_World.index[-2] #El penuntilmo indice para asegurar #st.dataframe(df_World) #Graficos st.subheader('Gráficos Estadísticos de {}'.format(paises)) if st.checkbox( "Gráfico del Total de Casos y Muertes de COVID-19 en {}". format(paises)): df_temporal = pd.DataFrame({'Total_Casos': []}) df_temporal['Total_Casos'] = paises_df['total_cases'] df_temporal['Total_Muertes'] = paises_df['total_deaths'] st.line_chart(df_temporal, use_container_width=True) if st.checkbox( "Gráfico de Nuevos Casos Diarios de COVID-19 en {}".format( paises)): df_temporal = pd.DataFrame({'Nuevos_Casos_Diarios': []}) df_temporal['Nuevos_Casos_Diarios'] = paises_df['new_cases'] st.bar_chart(data=df_temporal, use_container_width=True) if st.checkbox( "Gráfico de Nuevas Muertes Diarias de COVID-19 en {}".format( paises)): df_temporal = pd.DataFrame({'Nuevas_Muertes_Diarias': []}) df_temporal['Nuevas_Muertes_Diarias'] = paises_df['new_deaths'] st.bar_chart(data=df_temporal, use_container_width=True) if st.checkbox( "Gráfico de Tasa de Mortalidad de COVID-19 en {}".format( paises)): df_temporal = pd.DataFrame({'Tasa de Mortalidad': []}) df_temporal['Tasa de Mortalidad en %'] = ( paises_df.total_deaths / paises_df.total_cases) * 100 st.bar_chart(data=df_temporal, use_container_width=True) #EN COMPARACION AL MUNDO st.subheader( 'Gráficos Estadísticos de {} en Comparacion al Mundo'.format( paises)) if st.checkbox( "Gráfico del Total de Casos a Nivel Mundial y de {}".format( paises)): plt.plot(paises_df['total_cases'], label='Total de Casos {}'.format(paises)) plt.plot(df_World['total_cases'], label='Total de Casos a Nivel Mundial') plt.yscale("log") plt.title('Escala Logaritmica') plt.ylabel('Cantidad') plt.xlabel('Fechas/Mes') plt.grid() plt.legend() st.pyplot() if st.checkbox( "Gráfico del Total de Muertes a Nivel Mundial y de {}".format( paises)): plt.plot(paises_df['total_deaths'], label='Total de Casos {}'.format(paises)) plt.plot(df_World['total_deaths'], label='Total de Casos a Nivel Mundial') plt.yscale("log") plt.title('Escala Logaritmica') plt.ylabel('Cantidad') plt.xlabel('Fechas/Mes') plt.grid() plt.legend() st.pyplot() if st.checkbox( "Gráfico de Tasa de Mortalidad a Nivel Mundial y de {}".format( paises)): df_mort_pais = (paises_df.total_deaths / paises_df.total_cases) * 100 # Tasas de muertes en el mundo df_mort_mundo = (df_World.total_deaths / df_World.total_cases) * 100 #plt.style.use('ggplot') plt.plot(df_mort_pais, label='Tasa de mortalidad en {}'.format(paises)) plt.plot(df_mort_mundo, label='Tasa de mortalidad en el Mundo') plt.title( 'Tasa de Mortalidad en {} vs Mundo (Mensual)'.format(paises)) plt.ylabel('% de Mortalidad') #plt.xticks(rotation = 45) plt.legend() plt.grid() st.pyplot() #EN COMPARACION con OTROS PAISES st.subheader( 'Gráficos Comparativos de {} con Otros Paises'.format(paises)) paises_seleccionados = df.location.unique().tolist() selected_columns = st.multiselect( "Seleccionar los Paises a Comparar: ", paises_seleccionados) #st.write(selected_columns) if len(selected_columns) > 0: st.warning( "ADVERTENCIA: Es recomendable seleccionar pocos paises para agilizar el procesamiento" ) if st.checkbox("Realizar los Gráficos Comparativos"): selected_columns.append(paises) df_comparacion = df[df['location'].isin(selected_columns)] #Tasa Mortalidad st.subheader( "Comparativa de la Tasa de Mortalidad de COVID-19 de {}". format(paises)) #fig, ax = plt.subplots(figsize=(14,10)) for i in selected_columns: df_mortalidad = (df_comparacion[df_comparacion['location'] == i].total_deaths / df_comparacion[df_comparacion['location'] == i].total_cases) * 100 plt.plot(df_mortalidad, label=i) plt.legend() plt.grid() plt.xlabel("Fechas/Mes") plt.ylabel( "% Mortalidad [Numero Total de Muertes/Numero Total de Casos]" ) st.pyplot() #Total de Casos indice_paises = paises_df.index[ -1] #El penuntilmo indice para asegurar st.subheader( "Total de Casos y Muertes de COVID-19 de {} al {}".format( paises, indice_paises)) df_aux = df.loc[indice_paises, [ 'location', 'total_cases', 'total_deaths', 'total_cases_per_million', 'total_deaths_per_million' ]] df_aux = df_aux[df_aux['location'].isin(selected_columns)] fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6)) sns.barplot(df_aux.location, df_aux.total_cases, palette='Paired', ax=ax1) ax1.set_xlabel('Paises') ax1.set_ylabel('Total de Casos') ax1.set_title('Total de Casos al {}'.format(indice_paises)) sns.barplot(df_aux.location, df_aux.total_deaths, palette='Paired', ax=ax2) ax2.set_xlabel('Paises') ax2.set_ylabel('Total de Muertes') ax2.set_title('Total de Muertes al {}'.format(indice_paises)) plt.tight_layout() st.pyplot() st.subheader( "Total de Casos y Muertes de COVID-19 por millon de habitantes de {} al {}" .format(paises, indice_paises)) fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6)) sns.barplot(df_aux.location, df_aux.total_cases_per_million, palette='Paired', ax=ax1) ax1.set_xlabel('Paises') ax1.set_ylabel('Total de Casos por Millon de Habitantes') ax1.set_title( 'Total de Casos/millon al {}'.format(indice_paises)) sns.barplot(df_aux.location, df_aux.total_deaths_per_million, palette='Paired', ax=ax2) ax2.set_xlabel('Paises') ax2.set_ylabel('Total de Muertes por Millon de Habitantes') ax2.set_title( 'Total de Muertes/millon al {}'.format(indice_paises)) plt.tight_layout() st.pyplot() ##NUEVO MENU: ANALISIS EXPLORATORIO Mundial elif add_selectbox == 'Análisis Exploratorio a Nivel Mundial': #Transformar la columna "date" no formato datetime st.header("Análisis Exploratorio a Nivel Mundial") df.date = pd.to_datetime(df.date) df.date.max() df.set_index('date', inplace=True) df_world = df[df.location == 'World'] #Pega de todo el mundo indice = df_world.index[-1] st.subheader( "TOP 10 Paises con Más Casos de COVID-19 al {}".format(indice)) df_world = df[df.location == 'World'] #Pega de todo el mundo df_aux = df.loc[indice, ['location', 'total_cases']].sort_values( by='total_cases', ascending=False)[1:11] fig, ax = plt.subplots(figsize=(10, 6)) ax.grid() sns.barplot(df_aux.location, df_aux.total_cases, palette='Paired', ax=ax) #ax.set_title('TOP 5 Países com mais casos de COVID-19') ax.set_xlabel('Países') ax.set_ylabel('Total de Casos') plt.tight_layout() st.pyplot() st.subheader( "TOP 10 Paises con Más Muertes de COVID-19 al {}".format(indice)) df_aux = df.loc[indice, ['location', 'total_deaths']].sort_values( by='total_deaths', ascending=False)[1:11] fig, ax = plt.subplots(figsize=(10, 6)) sns.barplot(df_aux.location, df_aux.total_deaths, palette='Paired', ax=ax) ax.grid() #ax.set_title('TOP 5 Países com mais casos de COVID-19') ax.set_xlabel('Países') ax.set_ylabel('Total de Muertes') plt.tight_layout() st.pyplot() st.subheader( "Tasa de Mortalidad de COVID-19 del TOP 10 Paises {}".format( indice)) df_aux = df.loc[ indice, ['location', 'total_deaths', 'total_cases']].sort_values( by='total_deaths', ascending=False)[1:11] df_aux['mortalidad'] = (df_aux.total_deaths / df_aux.total_cases) * 100 fig, ax = plt.subplots(figsize=(10, 6)) sns.barplot(df_aux.location, df_aux.mortalidad, palette='Paired', ax=ax) #ax.set_title('TOP 5 Países com mais casos de COVID-19') ax.set_xlabel('Países') ax.set_ylabel('Tasa de Mortalidad') ax.grid() plt.tight_layout() st.pyplot()
# evaluating accuracy for model built on full featureset full_feat_acc = cross_val_score(lr, bc_X, bc_y, scoring='accuracy', cv=5) # evaluating accuracy for model built on selected featureset sel_feat_acc = cross_val_score(lr, bc_SX, bc_y, scoring='accuracy', cv=5) """) lr = LogisticRegression(max_iter=100000) # accuracy of full feat validation (extracted from the reaserch notebook) full_feat_acc = pd.Series([0.938596, 0.947368, 0.982456, 0.929825, 0.955752]) # evaluating accuracy for model built on selected featureset sel_feat_acc = pd.Series( cross_val_score(lr, bc_SX, bc_y, scoring='accuracy', cv=5)) df_acc = pd.concat([full_feat_acc, sel_feat_acc], axis=1) st.line_chart(df_acc) st.write(""" ### **Accuracy** """) st.dataframe( pd.DataFrame([np.average(full_feat_acc), np.average(sel_feat_acc) ]).T.rename({ 0: "30 features", 1: "{} features".format(k) }, axis=1)) st.write(""" ## Conclusion
st.write( f"""**{top_host_1.iloc[0].host_name}** is at the top with {listingcounts.iloc[0]} property listings. **{top_host_2.iloc[1].host_name}** is second with {listingcounts.iloc[1]} listings. Following are randomly chosen listings from the two displayed as JSON using [`st.json`](https://streamlit.io/docs/api.html#streamlit.json).""" ) st.json({top_host_1.iloc[0].host_name: top_host_1\ [["name", "neighbourhood", "room_type", "minimum_nights", "price"]]\ .sample(2, random_state=4).to_dict(orient="records"), top_host_2.iloc[0].host_name: top_host_2\ [["name", "neighbourhood", "room_type", "minimum_nights", "price"]]\ .sample(2, random_state=4).to_dict(orient="records")}) #diaplay linechart st.write("line chart") st.line_chart(df['price'].head(100)) #display area chart st.write("area chart") st.area_chart(df[['price', 'number_of_reviews']].head(100)) #display bar chart st.write("bar chart") st.bar_chart(df[['price', 'number_of_reviews']].head(100)) # display pydeck_chart # st.subheader("pydeck chart") # st.pydeck_chart(pdk.Deck( # map_style='mapbox://styles/mapbox/light-v9', # initial_view_state=pdk.ViewState( # latitude=37.76,
def main(): st.title("Delhi and Dhaka Data") st.markdown("Select a city to view different charts of various indicators. ") #st.markdown(html_temp, unsafe_allow_html=True ) city = st.sidebar.selectbox(label = "Select a City", index = 0, options = list(COUNTRY_MAPPINGS.values())) indicator = st.sidebar.selectbox("Select the Indicators", index = 0, options = list(INDICATORS_MAPPINGS.values())) st.subheader( city + ' area chat with all the indicators') folder_path='datasets' selected_filename = 'final.csv' filename = os.path.join(folder_path, selected_filename) # Reading Data df = pd.read_csv(filename, usecols = ['Ward_No','Ward_Name', 'Area', 'geometry','No_HH','TOT_P','TOT_M','TOT_F','ch_t_t','tenure_o','l_elect','hh_with_lat', 'no_latr', 'latr_pub', 'latr_o', 'have_bath','cf_fw', 'cf_lpg', 'kf_t','hh_bank','asset_bic', 'asset_2w', 'asset_4w', 'asset_tv_c']) selected_columns_df = ['Ward_No','Ward_Name', 'geometry','No_HH','TOT_P','TOT_M','TOT_F','ch_t_t','tenure_o','l_elect','hh_with_lat', 'no_latr', 'latr_pub', 'have_bath','cf_fw', 'cf_lpg', 'kf_t','hh_bank', 'asset_bic', 'asset_2w', 'asset_4w', 'asset_tv_c'] df = df[selected_columns_df] #drawing first chat st.area_chart(df[['TOT_P','TOT_M','TOT_F']], use_container_width = False, width = 800) # show datasets if st.checkbox("Select to see first 10 Dataset"): st.dataframe(df.head(10)) #number = st.number_input("Number of Rows to View", 5,10) #st.dataframe(df.head(number)) # show columns #df = pd.read_csv(filename) if st.button("Click to see all the column names"): st.write(df.columns) # show columns if st.checkbox("Selected Columns to Show"): all_columns = df.columns.tolist() selected_columns = st.multiselect("Select", all_columns) new_df = df[selected_columns] st.dataframe(new_df) st.subheader( city + ' - ' + indicator) # Plot and visualization #st.subheader("Data Visualization") # Seaborn Plot #if st.checkbox("Correlation Plot by Seaborn"): # st.write(sns.heatmap(df.corr(), annot=True)) # st.pyplot() # st.set_option('deprecation.showPyplotGlobalUse', False) #if st.checkbox("Pie Plot"): # all_columns_names = df.columns.tolist() # if st.button("Generate Plot"): # st.success("Generating a Pie Plot") # st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%")) # st.pyplot() st.subheader("Customizable Plot") all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot", ["area", "bar", "line","hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns to Plot", all_columns_names) if st.button("Generate Customizable Plot"): st.success("Generating plot of {} for {}".format(type_of_plot, selected_columns_names )) #Plot by Streamlit if type_of_plot == 'area': custom_data = df[selected_columns_names] my_bar = st.progress(0) for percent_complete in range(100): time.sleep(0.1) my_bar.progress(percent_complete + 1) st.area_chart(custom_data) st.success('{} plot created'.format(type_of_plot)) elif type_of_plot == 'bar': custom_data = df[selected_columns_names] my_bar = st.progress(0) for percent_complete in range(100): time.sleep(0.1) my_bar.progress(percent_complete + 1) st.bar_chart(custom_data) st.success('{} plot created'.format(type_of_plot)) elif type_of_plot == 'line': custom_data = df[selected_columns_names] st.line_chart(custom_data) elif type_of_plot == 'box': custom_plot = df[selected_columns_names].plot(kind=type_of_plot) st.write(custom_plot) st.plyplot()
st.dataframe(df_a) st.text(' ') st.text(' ') st.text('----------------------------------------------------') # In[5]: st.markdown(''' < 日平均気温データ・グラフ > ''') selected_targets_a = st.multiselect('Month - average', df_a.columns) view_a = df_a[selected_targets_a] st.line_chart(view_a) st.text(' ') st.text(' ') st.text('----------------------------------------------------') # In[6]: # 日最高気温データ(Excel)の読み込み df_h1= pd.read_excel('data_h.xlsx') # 'Year'をindexに設定 df_h = df_h1.set_index('Year')
import streamlit as st import yfinance as yf st.write(""" # Stock Exchange """) googleStock = yf.Ticker('GOOGL') appleStock = yf.Ticker('AAPL') samsungStock = yf.Ticker('AAPL') googleStockData = googleStock.history(period='id', start='2020-01-01', end='2021-01-31') appleStockData = appleStock.history(period='id', start='2020-01-01', end='2021-01-31') samsungStockData = samsungStock.history(period='id', start='2020-01-01', end='2021-01-31') st.line_chart(googleStockData.Close) st.line_chart(appleStockData.Close) st.line_chart(samsungStockData.Close)
generate_plot = st.button('Generate Plot') if generate_plot: st.success("Generating {} plot for {}".format( type_of_plot, selected_columns_names)) if type_of_plot == 'area': custom_data_1 = data[selected_columns_names] st.area_chart(custom_data_1) elif type_of_plot == 'bar': custom_data_2 = data[selected_columns_names] st.bar_chart(custom_data_2) elif type_of_plot == 'line': custom_data_3 = data[selected_columns_names] st.line_chart(custom_data_3) # Custom Plot elif type_of_plot: cust_plot = data[selected_columns_names].plot( kind=type_of_plot) st.write(cust_plot) st.pyplot() elif choice == 'ML Models': st.subheader('ML Models') uploaded_file_2 = st.file_uploader('Upload dataset', type=['csv']) if uploaded_file_2 is not None: data1 = pd.read_csv(uploaded_file_2) st.dataframe(data1)
""" @st.cache() def get_data(): df_s = pd.read_csv('data/housing.csv') return df_s df = get_data() st.header('MVP предсказание стоимости жилья') if st.checkbox('Отобразить данные'): st.write(df) st.line_chart(df) _test_size = st.selectbox('размер тестовой выборки: ', (0.1, 0.15, 0.2, 0.25, 0.3, 0.35), index=3, format_func=lambda x: f"{x*100:0.0f}%") if st.button('Создать модель'): X_train, X_test, y_train, y_test = train_test_split(df.drop('MEDV', axis=1), df['MEDV'], test_size=_test_size, random_state=0) st.text('Размер данных-' + str(X_train.shape) + str(X_test.shape)) st.text('Старт модели')
dim=1) == y.to(device)).float().sum().item() n += y.shape[0] return acc_sum / n ''' # 训练模型 ''' num_epochs = st.slider(label='迭代周期', min_value=5, max_value=200, value=5, step=5) st.subheader("损失折线图") st_chart_loss = st.line_chart() bar = st.progress(0) with st.echo(): def train_ch3(net, train_iter, test_iter, loss, num_epochs, batch_size, params=None, lr=None, optimizer=None, device=None): process = 0
if option != "": for i in top_stories_articles["results"]: str1 = str1 + i["abstract"] words = word_tokenize(str1) words_no_punc = [] for w in words: if w.isalpha(): words_no_punc.append(w.lower()) stopwords = stopwords.words("english") clean_words = [] for w in words_no_punc: if w not in stopwords: clean_words.append(w) fdist = FreqDist(clean_words) chart_data = pd.DataFrame(fdist.most_common(10), fdist.most_common(10)) st.line_chart(chart_data) else: st.write("You have not selected a topic!") st.subheader("III - Wordcloud") if st.checkbox("Click here to generate wordcloud"): if option != "": wordcloud = WordCloud().generate(str1) plt.figure(figsize=(7, 7)) plt.imshow(wordcloud) plt.axis("off") plt.savefig("top_stories_cloud.png") image1 = Image.open("top_stories_cloud.png") st.image(image1) else:
import streamlit as st import pandas_datareader.data as web from datetime import datetime as dt, timedelta as td st.title('Crypto Tracker') opts = st.selectbox('Select Pair', ( 'BTC-USD', 'ETH-USD', 'BNB-USD', 'DOGE-USD', # stocks below 'RELIANCE.NS', 'IDFCFIRSTB.NS', 'SBIN.NS', 'ITC.NS', 'ZOMATO.NS' )) prices = web.get_data_yahoo( opts, start=dt.now() - td(days=365), end=dt.now()) st.line_chart(prices)
def stock_viz_volume(stock): df = web.DataReader(stock, 'stooq') df.sort_values('Date', ascending=True, inplace=True) data = df.filter(['Volume']) st.title('{} volume.'.format(stock.upper())) st.line_chart(data)
st.text("Bat Deterrent Installation /per Turbine") # st.subheader("TBD") st.subheader("${:,.2f}".format(float(TotalBDS))) with col3: st.text("NPV Curtailment Losses") st.subheader("${:,.2f}".format( float(NPVCurtLosses) * float(data["Value"]["Number of Turbines"]))) with col4: st.text("NPV Curtailment Losses/Turbine") st.subheader("${:,.2f}".format(float(NPVCurtLosses))) # st.header("Cumulative Loss Due to Curtailment (USD per Turbine)") # st.line_chart(ProjFinancials[["Cummulative Loss"]]) col1, col2 = st.beta_columns(2) with col1: st.header("Site Information") st.table(data) with col2: st.header("Site Location") st.map(windTurbines.loc[windTurbines["p_name"] == option]) st.header("Annual Revenue and Loss Due to Curtailment (USD per Turbine)") st.line_chart(ProjFinancials[["Annual Revenue", "Annual Curt Loss"]]) st.table(ProjFinancials)
st.header('Crop Classification Demo') st.subheader("Upload Crop NDVI Data") k = st.number_input("Maximum No. of Rows to Read", min_value=10, max_value=1000, step=1, value=10, key='readinput') results = None uploaded_file = st.file_uploader( "Choose a CSV file (Maximum 1000 Rows for Performance)", type="csv", key='test') st.subheader("Upload Ground Truth Label (Only for Testing)") ground_truth_file = st.file_uploader( "Choose a CSV file (Should Match the NDVI CSV)", type="csv", key='truth') if uploaded_file is not None: data = pd.read_csv(uploaded_file, nrows=k) st.write(data) st.subheader("Curve Visualization") st.line_chart(data.T.to_numpy()) max_row = data.shape[0]-1 st.subheader("Plot single NDVI curve") ndvi_nrow = st.number_input( "Pick up a row", min_value=0, max_value=max_row, step=1, value=0, key='singleinput') picked_ndvi = data.iloc[ndvi_nrow] show_ndvi = st.button("Show single NDVI Curve") if show_ndvi: play_line_plots(picked_ndvi) st.subheader("Crop Classification") run_model = st.button("Run ML model") if run_model: with st.spinner('Model Running, Input Curve Row No.{}'.format(ndvi_nrow)): picked_input = data.iloc[[ndvi_nrow]] # scaler_info = read_scaler('./standard_scaler.npy') model_input = prepare_input(picked_input)
else: st.sidebar.error('Error: End date must fall after start date.') if sidebar == "tools": ticker = st.text_input("Please enter ticker below. (For SPX use ^GSPC and for VIX use ^VIX)") status_radio = st.radio('Please click Search when you are ready.', ('Entry', 'Search')) options = ['historical regime', 'smoothed variance probability', 'continuous wavelet transform', 'all'] series_type = ['Close', 'Adjusted Close'] if status_radio == "Search": df = yf.download(ticker, start_date, end_date) df_plot = df[['Close', 'Adj Close']] st.line_chart(df_plot) st.write(df) time_series_type = st.radio("Please select which time series you would like", series_type) time_series_options = st.selectbox("Please select what kind of analysis", options) time_series_start = st.radio("Please Select Run when ready", ("Stop", "Run")) if time_series_start == "Run": timeseries = TimeSeries(df, time_series_type, ticker) if time_series_options == "historical regime": output = timeseries.get_regimes() if time_series_options == "smoothed variance probability": output = timeseries.smoothed_probability()
st.markdown( "### Here is a chart for the data fitted with the test data and the regression line" ) errTotal = polyreg.GetBestPolynomial(XTrain, yTrain, XTest, yTest, polynomial) if st.checkbox("Show Code"): showRegression() data = pd.DataFrame(errTotal, columns=["Train Error", "Test Error"]) errTrain = errTotal[:, :1] errTest = errTotal[:, :2] st.markdown("## Error vs. Model Complexity") lastrowTrain = errTrain[0] lastrowTest = errTrain[0] chart = st.line_chart(errTotal[:1], 800, 800) for i in range(errTotal.shape[0]): newrowTrain = errTotal[i] chart.add_rows(newrowTrain) time.sleep(0.1) st.markdown( "### Let's look into the train and test errors producing the line chart above" ) st.write(data) st.markdown(''' As we can see, as the polynomial value increases, the regression line begins to overfit the data, resulting in a lower train error, but increase in test error. ''') elif option == "Logistic Regression":
key="1").upper() name2 = st.sidebar.text_input("Enter first name:", value="Jason", key="2").upper() name3 = st.sidebar.text_input("Enter first name:", value="Brittany", key="3").upper() name4 = st.sidebar.text_input("Enter first name:", value="Karen", key="4").upper() name5 = st.sidebar.text_input("Enter first name:", value="Gary", key="5").upper() names = [ name for name in [name1, name2, name3, name4, name5] if name in total_df.columns ] st.line_chart(total_df[names].rename(columns=capwords)) st.subheader("Guessing ages using first names") st.markdown(""" The popularity of some names spike very quickly, and the age range of people with that name may be very small. Below are confidence intervals of the age of someone, given their first name¹. """) YEAR = 2019 PERCENT = st.slider("", min_value=25, max_value=90, value=50, step=5, format="%d%%")
y_range_3 = [(x, x**3) for x in x_range] show_graphic = st.button(label="Show") y_x = st.checkbox("y = x") y_x2 = st.checkbox("y = x ^ 2") y_x3 = st.checkbox("y = x ^ 3") df = pd.DataFrame(y_range) df2 = pd.DataFrame(y_range_2) df3 = pd.DataFrame(y_range_3) if show_graphic and y_x: my_bar = st.progress(0) for percent_complete in range(100): time.sleep(0.1) my_bar.progress(percent_complete + 1) st.line_chart(data=df) st.info("График y = x нарисован") elif show_graphic and y_x2: my_bar = st.progress(0) for percent_complete in range(100): time.sleep(0.1) my_bar.progress(percent_complete + 1) st.line_chart(data=df2) st.info("График y = x^2 нарисован") elif show_graphic and y_x3: my_bar = st.progress(0) for percent_complete in range(100): time.sleep(0.1) my_bar.progress(percent_complete + 1) st.line_chart(data=df3) st.info("График y = x^3 нарисован")
def cs_body(): # Magic commands st.subheader('Magic commands') st.markdown('''Magic commands allow you to implicitly `st.write()`''') st.code(''' \'\'\' _This_ is some __Markdown__ \'\'\' a=3 'a', a 'dataframe:', data ''') # Display text st.subheader('Display text') st.code(''' st.text('Fixed width text') st.markdown('_Markdown_') # see * st.latex(r\'\'\' e^{i\pi} + 1 = 0 \'\'\') st.write('Most objects') # df, err, func, keras! st.write(['st', 'is <', 3]) # see * st.title('My title') st.header(My header') st.subheader('My sub') st.code('for i in range(8): foo()') * optional kwarg unsafe_allow_html = True ''') # Display data st.subheader('Display data') st.code(''' st.dataframe(data) st.table(data.iloc[0:10] st.json({'foo':'bar','fu':'ba'}) ''') # Display charts st.subheader('Display charts') st.code(''' st.line_chart(data) st.area_chart(data) st.bar_chart(data) st.pyplot(fig) st.altair_chart(data) st.vega_lite_chart(data) st.plotly_chart(data) st.bokeh_chart(data) st.pydeck_chart(data) st.deck_gl_chart(data) st.graphviz_chart(data) st.map(data) ''') # Display media st.subheader('Display media') st.code(''' st.image('./header.png') st.audio(data) st.video(data) ''') # Display interactive widgets st.subheader('Display interactive widgets') st.code(''' st.button('Hit me') st.checkbox('Check me out') st.radio('Radio', [1,2,3]) st.selectbox('Select', [1,2,3]) st.multiselect('Multiselect', [1,2,3]) st.slider('Slide me', min_value=0, max_value=10) st.text_input('Enter some text') st.number_input('Enter a number') st.text_area('Area for textual entry') st.date_input('Date input') st.time_input('Time entry') st.beta_color_picker('Pick a color') st.file_uploader('File uploader') ''') st.write('Use widgets\' returned values in variables:') st.code(''' >>> for i in range(int(st.number_input('Num:'))): foo() >>> if st.sidebar.selectbox('I:',['f']) == 'f': b() >>> my_slider_val = st.slider('Quinn Mallory', 1, 88) >>> st.write(slider_val) ''') # Control flow st.subheader('Control flow') st.code(''' st.stop() ''') # Display code st.subheader('Display code') st.code(''' st.echo() >>> with st.echo(): >>> # Code below both executed and printed >>> foo = 'bar' >>> st.write(foo) ''') # Display progress and status st.subheader('Display progress and status') st.code(''' st.progress(progress__variable_1_to_100) st.spinner() >>> with st.spinner(text='In progress'): >>> time.sleep(5) >>> st.success('Done') st.balloons() st.error('Error message') st.warning('Warning message') st.info('Info message') st.success('Success message') st.exception(e) ''') # Placeholders, help, and options st.subheader('Placeholders, help, and options') st.code(''' st.empty() >>> my_placeholder = st.empty() >>> my_placeholder.text('Replaced!') st.help(pandas.DataFrame) st.get_option(key) st.set_option(key) st.beta_set_page_config(layout='wide') ''') # Mutate data st.subheader('Mutate data') st.code(''' DeltaGenerator.add_rows(data) >>> my_table = st.table(df1) >>> my_table.add_rows(df2) >>> my_chart = st.line_chart(df1) >>> my_chart.add_rows(df2) ''') # Optimize performance st.subheader('Optimize performance') st.code(''' @st.cache >>> @st.cache ... def foo(bar): ... # Mutate bar ... return data ... >>> d1 = foo(ref1) >>> # Executes as first time >>> >>> d2 = foo(ref1) >>> # Does not execute; returns cached value, d1==d2 >>> >>> d3 = foo(ref2) >>> # Different arg, so function executes ''') return None
def main(): """Semi Auto ML App with Streamlit""" st.title("Semi Auto ML App") st.text("Using Streamlit == 0.52.1+") activities = ["EDA", "Plot", "Model Building", "About"] choice = st.sidebar.selectbox("Select Activity", activities) if choice == 'EDA': st.subheader("Exploratory Data Analysis") data = st.file_uploader("Upload Dataset", type=["csv", "txt", "xls"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show Shape"): st.write(df.shape) if st.checkbox("Show Columns"): all_columns = df.columns.to_list() st.write(all_columns) if st.checkbox("Select Columns To Show"): selected_columns = st.multiselect("Select Columns", all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Show Summary"): st.write(df.describe()) if st.checkbox("Show Value Counts"): st.write(df.iloc[:, -1].value_counts()) elif choice == 'Plot': st.subheader("Data Visualization") data = st.file_uploader("Upload Dataset", type=["csv", "txt", "xls"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Correlation with Seaborn"): st.write(sns.heatmap(df.corr(), annot=True)) plt.yticks(rotation=0) st.pyplot() if st.checkbox("Pie Chart"): all_columns = df.columns.to_list() columns_to_plot = st.selectbox("Select 1 Column", all_columns) pie_plot = df[columns_to_plot].value_counts().plot.pie( autopct="%1.1f%%") st.write(pie_plot) st.pyplot() all_columns_names = df.columns.tolist() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating Customize Plot of {} for {}".format( type_of_plot, selected_columns_names)) #Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) #Custom Plot elif type_of_plot: cust_plot = df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() elif choice == 'Model Building': st.subheader("Building ML Model") data = st.file_uploader("Upload Dataset", type=["csv", "txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) # Model Building X = df.iloc[:, 0:-1] Y = df.iloc[:, -1] seed = 8 #Model models = [] models.append(("LR", LogisticRegression())) models.append(("LDA", LinearDiscriminantAnalysis())) models.append(("KNN", KNeighborsClassifier())) models.append(("CART", DecisionTreeClassifier())) models.append(("NB", GaussianNB())) models.append(("SVM", SVC())) #Evaluate each model in turn #List model_names = [] model_mean = [] model_std = [] all_models = [] scoring = 'accuracy' for name, model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model, X, Y, cv=kfold, scoring=scoring) model_names.append(name) model_mean.append(cv_results.mean()) model_std.append(cv_results.std()) #check accuracy results and create a standard dictionary for the model, accuracy and standard deviation accuracy_results = { "model_name": name, "model_accuracy": cv_results.mean(), "standard_deviation": cv_results.std() } all_models.append(accuracy_results) if st.checkbox("Metrics as Table"): st.dataframe( pd.DataFrame(zip(model_names, model_mean, model_std), columns=[ "Model Name", "Model Accuracy", "Standard Deviation" ])) #Create JSON box if st.checkbox("Metrics as JSON"): st.json(all_models) elif choice == 'About': st.subheader("About") st.text( "This is a Drag and Drop Semi Auto Machine Learning App built using Streamlit and Python" )
def app(): # Add title and image st.write(""" # Statistics Select a stock and date range to view statistics on that stock. """) #Create a sidebar header st.sidebar.header('Date Range:') #Create a function to get the users input def get_input(): start_date = st.sidebar.date_input("Starting Date:", value=(datetime.today() - timedelta(days=365)), min_value=datetime(1817, 3, 8), max_value=datetime.today()) end_date = st.sidebar.date_input("Ending Date:", min_value=datetime(1817, 3, 8), max_value=datetime.today()) with open('./stock symbols.csv', 'r', encoding='utf-8-sig') as stock_file: stock_list = pd.read_csv(stock_file) symbols = stock_list.iloc[:, 0] selected = st.selectbox(label="", options=symbols) index = stock_list[stock_list['Symbol'] == selected].index.values stock_symbol = stock_list['Symbol'][index].to_string(index=False) company_name = stock_list['Name'][index].to_string(index=False) sector = stock_list['Sector'][index].to_string(index=False) return start_date, end_date, stock_symbol.strip(), company_name, sector #Get stock data within time frame entered by the user def get_data(stock_symbol, start_date, end_date): #Get the date range start = pd.to_datetime(start_date) end = pd.to_datetime(end_date) #Load the data df = dr.DataReader(stock_symbol, data_source='yahoo', start=start, end=end) df.reset_index() #Set the start and end index rows both to 0 start_row = 0 end_row = 0 for i in range(0, len(df)): if start <= pd.to_datetime(df.index[i]): start_row = i break for j in range(0, len(df)): if end >= pd.to_datetime(df.index[i]): end_row = len(df) - 1 - j break df = df.set_index(pd.DatetimeIndex(df.index.values)) return df.iloc[start_row:end_row + 1, :] #Get user input start_date, end_date, stock_symbol, company_name, sector = get_input() #Get the data df = get_data(stock_symbol, start_date, end_date) #Display stock name and sector header st.header("Company:" + company_name + "\n") st.header("Market Sector:" + sector + "\n") #Display the close price st.header("Closing Prices from " + str(start_date) + " to " + str(end_date)) st.line_chart(df['Close']) #Display the volume st.header("Volume from " + str(start_date) + " to " + str(end_date)) st.line_chart(df['Volume']) #Get statistics on the data st.header("Statistics from " + str(start_date) + " to " + str(end_date)) st.write(df.describe())
df = pd.read_csv(uploaded_file) date_col = df.columns[0] val_col = df.columns[1] df[date_col] = pd.to_datetime(df[date_col]) df['Week_name'] = df[date_col].dt.week df['Month_name'] = df[date_col].dt.month df['Quarter_name'] = df[date_col].dt.quarter df = df.set_index(date_col) st.markdown("### Dataset") st.write(df[[val_col]]) st.markdown("### Plot") st.line_chart(df[val_col]) st.markdown("### Correlation") sn.heatmap(df.corr(), annot=True) st.pyplot() ########################## Train-Test split split_value = int(0.9 * len(df)) train_df = df.iloc[:split_value] test_df = df.iloc[split_value:] st.markdown("## Train-Test split into 90:10 ratio") st.markdown("### Length of Training set: " + str(len(train_df))) st.markdown("### Length of Test set: " + str(len(test_df))) best_p, best_aic = run_auto_var(train_df, 12)
st.subheader(f"Analysing #{hashtag} from {date}") d = {"Positive": [pos_count], "Negative": [neg_count]} df = pd.DataFrame(data=d) hashtag = f"#{hashtag}" with st.spinner("Getting tweets..."): tweets = tw.Cursor(api.search, q=hashtag, lang="en", since=date).items() total_tweets = st.empty() pos_tweets = st.empty() neg_tweets = st.empty() sentiments = ["Positive", "Negative"] chart = st.line_chart(df) barchart = st.empty() for idx, tweet in enumerate(tweets): tweets_count += 1 output = requests.post("http://backend:8000/api", json={"tweet": tweet.text}) output = output.content.decode("utf8") output = json.loads(output).get("sentiment") if output == "Negative": neg_count += 1 elif output == "Positive": pos_count += 1 total_tweets.text("Tweets Analysed: %d" % tweets_count)