Ejemplo n.º 1
0
if __name__ == '__main__':
    st.write('Hello welcome to the Stock Market Analytics Web App!!')
    today = datetime.date.today()
    def_start_date = time.strftime('%Y-%m-1')
    def_start_date = datetime.datetime.strptime(def_start_date, '%Y-%m-%d')
    option = st.selectbox(
        'Please choose the stock tickers you are interested to Analyze',
        ['SPY', 'AAPL', 'AMZN'])
    from_date = st.date_input('from date', (def_start_date))
    to_date = st.date_input('to date', )
    st.write('You have selected:', option)
    data = get_data(''.join(option), from_date, to_date)
    st.write(data)

    # col_option = st.multiselect('Select the column to perform Uni-Variate Analysis', ['Open','High', 'Low', 'Close', 'Volume'])
    col_option = st.selectbox(
        'Select the column to perform Uni-Variate Analysis',
        ['Open', 'High', 'Low', 'Close', 'Volume'])
    st.write('column:', col_option)
    print('column selection', col_option)
    data = data.loc[:, col_option]
    st.write(data)
    st.line_chart(data)

    st.write('Univariate Analysis')
    st.write('Mean', data.values.mean())
    st.write('Median', statistics.median(data.values))
    st.write('Mode', statistics.mode(data.values))
    st.write(f'Min: {min(data.values)} - Max: {max(data.values)}')
Ejemplo n.º 2
0
def main():
    st.sidebar.title("What to do")
    activities = [
        "Exploratory Data Analysis", "Plotting and Visualization",
        "Building Model", "About"
    ]
    choice = st.sidebar.selectbox("Select Activity", activities)
    # Add a slider to the sidebar:
    st.sidebar.markdown("# Lang")
    x = st.sidebar.slider('Select a lang for ACF and PACF analysis', 50, 60)
    # Add a slider to the sidebar:
    st.sidebar.markdown("# Seasonal")
    s = st.sidebar.slider(
        'Select a seasonal parameter from previous ACF and PACF analysis', 24,
        48)
    # cloud logo
    st.sidebar.title("Built on:")
    st.sidebar.image("src/ibmcloud_logo.png", width=200)
    # Upload file
    uploaded_file = st.file_uploader("Choose a CSV file", type="csv")

    if uploaded_file is not None and choice == "Exploratory Data Analysis":
        data = pd.read_csv(uploaded_file)
        st.subheader(choice)
        # Show dataset
        if st.checkbox("Show Dataset"):
            rows = st.number_input("Number of rows", 5, len(data))
            st.dataframe(data.head(rows))
        # Show columns
        if st.checkbox("Columns"):
            st.write(data.columns)
        # Data types
        if st.checkbox("Column types"):
            st.write(types(data))
        # Show Shape
        if st.checkbox("Shape of Dataset"):
            data_dim = st.radio("Show by", ("Rows", "Columns", "Shape"))
            if data_dim == "Columns":
                st.text("Number of Columns: ")
                st.write(data.shape[1])
            elif data_dim == "Rows":
                st.text("Number of Rows: ")
                st.write(data.shape[0])
            else:
                st.write(data.shape)
        # Check null values in dataset
        if st.checkbox("Check null values"):
            nvalues = null_values(data)
            st.write(nvalues)
        # Show Data summary
        if st.checkbox("Show Data Summary"):
            st.text("Datatypes Summary")
            st.write(data.describe())
        # Plot time series, ACF and PACF
        if st.checkbox("Select column as time series"):
            columns = data.columns.tolist()
            selected = st.multiselect("Choose", columns)
            series = data[selected]
            if st.button('Plot Time Series, ACF and PACF'):
                tsplot(series, lags=x)
                st.pyplot()

    elif uploaded_file is not None and choice == "Plotting and Visualization":
        st.subheader(choice)
        data = pd.read_csv(uploaded_file)
        df = data.copy()
        all_columns = df.columns.tolist()
        type_of_plot = st.selectbox("Select Type of Plot", [
            "area", "line", "scatter", "pie", "bar", "correlation",
            "distribution"
        ])

        if type_of_plot == "line":
            select_columns_to_plot = st.multiselect("Select columns to plot",
                                                    all_columns)
            cust_data = df[select_columns_to_plot]
            st.line_chart(cust_data)

        elif type_of_plot == "area":
            select_columns_to_plot = st.multiselect("Select columns to plot",
                                                    all_columns)
            cust_data = df[select_columns_to_plot]
            st.area_chart(cust_data)

        elif type_of_plot == "bar":
            select_columns_to_plot = st.multiselect("Select columns to plot",
                                                    all_columns)
            cust_data = df[select_columns_to_plot]
            st.bar_chart(cust_data)

        elif type_of_plot == "pie":
            select_columns_to_plot = st.selectbox("Select a column",
                                                  all_columns)
            st.write(df[select_columns_to_plot].value_counts().plot.pie())
            st.pyplot()

        elif type_of_plot == "correlation":
            st.write(
                sns.heatmap(df.corr(),
                            annot=True,
                            linewidths=.5,
                            annot_kws={"size": 7}))
            st.pyplot()

        elif type_of_plot == "scatter":
            st.write("Scatter Plot")
            scatter_x = st.selectbox("Select a column for X Axis", all_columns)
            scatter_y = st.selectbox("Select a column for Y Axis", all_columns)
            st.write(sns.scatterplot(x=scatter_x, y=scatter_y, data=df))
            st.pyplot()

        elif type_of_plot == "distribution":
            select_columns_to_plot = st.multiselect("Select columns to plot",
                                                    all_columns)
            st.write(sns.distplot(df[select_columns_to_plot]))
            st.pyplot()

    elif uploaded_file is not None and choice == "Building Model":
        st.subheader(choice)
        data = pd.read_csv(uploaded_file)
        df = data.copy()
        st.write("Select the columns to use for training")
        columns = df.columns.tolist()
        selected_column = st.multiselect("Select Columns", columns)
        new_df = df[selected_column]
        st.write(new_df)

        if st.checkbox("Train/Test Split"):
            y_train, y_test = temporal_train_test_split(new_df.T.iloc[0])
            st.text("Train Shape")
            st.write(y_train.shape)
            st.text("Test Shape")
            st.write(y_test.shape)
            plot_ys(y_train, y_test, labels=["y_train", "y_test"])
            st.pyplot()

        if st.button("Training a Model"):
            model_selection = st.selectbox("Model to train",
                                           ["AutoArima", "LSTM", "MLP", "RNN"])
            if model_selection == "AutoArima":
                y_train, y_test = temporal_train_test_split(new_df.T.iloc[0])
                forecasting_autoarima(y_train, y_test, s)

    elif choice == "About":
        st.title("About")
        st.write("The app developed by Alexander Robles.")
        st.write("Stack: Python, Streamlit, Docker, Kubernetes")
Ejemplo n.º 3
0
    end_row = 0
    # Start the date from top of the data set and go down to see if the user date is less than or equal to the date in the given data
    for i in range(0, len(df)):
        if start <= pd.to_datetime(df['Date'][i]):
            start_row = i
            break
    # Start the data set from top of the data set and go down to see if the user date is
    for j in range(0, len(df)):
        if end <= pd.to_datetime(df['Date'][len(df) - 1 - j]):
            end_row = len(df) - 1 - j
    df = df.set_index(pd.DatetimeIndex(df['Date'].values))
    return df.iloc[start_row:end_row + 1, :]


# Get the user input
start, end, symbol = get_input()
# Get the data
df = get_data(symbol, start, end)
# Get the company name
company_name = get_company_name(symbol.upper())
# Display the close price
st.header(company_name + ' Close\n')
st.line_chart(df['Close'])
# Display the volume
st.header(company_name + " Volume\n")
st.line_chart(df['Volume'])

# Get stats on the data
st.header('Data statistics')
st.write(df.describe())
import yfinance as yf
import streamlit as st
import pandas as pd

st.write("""
#Simple Stock Price

Shown are the stock closing price and volume of google

""")

tickerSymbol = 'GOOGL'

tickerData = yf.Ticker(tickerSymbol)

tickerDf = tickerData.History(period='1d', start='2010-5-31', end='2020-5-31')

st.line_chart(tickerDf.Close)
st.line_chart(tickerDf.Volume)
Ejemplo n.º 5
0
""")
pd.set_option("display.max_rows", None, "display.max_columns", None)
df = pd.read_csv('symbols.csv')
df1 = df[['symbol']]
result = df1.head(None)
# result.to_csv('tickers.csv') 
# print(result)

stock = st.selectbox(
'Which stock you would like to select?',
(result))

stock_name = df.loc[df['symbol'] == stock, 'name'].iloc[0]
# name = stock_name.head(None)
st.write('You have selected:', stock_name)

col1, col2 = st.columns(2)
with col1:
    From = st.date_input("From", datetime.date(2020, 1, 1), min_value=None, max_value=None, key=None)
with col2:
    To = st.date_input("To", value=None, min_value=None, max_value=None, key=None)

stockSymbol = stock
stockData = yf.Ticker(stockSymbol)
stockDf = stockData.history(period='1d', start=From, end=To)
st.line_chart(stockDf.Close)
st.line_chart(stockDf.Volume)

#x = st.slider('x')
#st.write(x, 'squared is', x * x)
def stock_predict_LSTM(stock, time_steps=3):
    # getting the data
    df = web.DataReader(stock, 'stooq')
    df.sort_values('Date', ascending=True, inplace=True)

    # dataset
    # data has dates and prices
    # dataset has only closing prices

    data = df.filter(['Close'])
    dataset = data.values
    training_data_len = math.ceil(len(dataset) * .9)

    # scaled data
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaled_data = scaler.fit_transform(dataset)

    # training dataset
    training_data = scaled_data[0:training_data_len, :]
    x_train = []
    y_train = []
    for i in range(time_steps, len(training_data)):
        x_train.append(training_data[i - time_steps:i])
        y_train.append(training_data[i, 0])
    x_train, y_train = np.array(x_train), np.array(y_train)
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

    # print(x_train.shape)
    # creating test datset / x_test and y_test

    test_data = scaled_data[training_data_len - time_steps:, :]
    x_test = []
    y_test = dataset[training_data_len:, :]

    for i in range(time_steps, len(test_data)):
        x_test.append(test_data[i - time_steps:i, 0])
    # convert into numpy array
    x_test = np.array(x_test)
    # reshaping the data into 3 dimensions
    x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

    #model
    model = Sequential()
    # 1st layer with Dropout regularisation
    # * units = add 100 neurons is the dimensionality of the output space
    # * return_sequences = True to stack LSTM layers so the next LSTM layer has a three-dimensional sequence input
    # * input_shape => Shape of the training dataset
    model.add(
        LSTM(units=100,
             return_sequences=True,
             input_shape=(x_train.shape[1], 1)))
    # 20% of the layers will be dropped
    model.add(Dropout(0.2))
    # 2nd LSTM layer
    # * units = add 50 neurons is the dimensionality of the output space
    # * return_sequences = True to stack LSTM layers so the next LSTM layer has a three-dimensional sequence input
    model.add(LSTM(units=50, return_sequences=True))
    # 20% of the layers will be dropped
    model.add(Dropout(0.2))
    # 3rd LSTM layer
    # * units = add 50 neurons is the dimensionality of the output space
    # * return_sequences = True to stack LSTM layers so the next LSTM layer has a three-dimensional sequence input
    model.add(LSTM(units=50, return_sequences=True))
    # 50% of the layers will be dropped
    model.add(Dropout(0.5))
    # 4th LSTM layer
    # * units = add 50 neurons is the dimensionality of the output space
    model.add(LSTM(units=50))
    # 50% of the layers will be dropped
    model.add(Dropout(0.5))
    # Dense layer that specifies an output of one unit
    model.add(Dense(units=1))

    # compiling model
    model.compile(optimizer='adam', loss='mean_squared_error')

    # training the model
    model.fit(x_train, y_train, batch_size=1, epochs=1)

    # get predicted values
    predictions = model.predict(x_test)
    predictions = scaler.inverse_transform(predictions)

    # RMSE
    rmse = np.sqrt(np.mean(((predictions - y_test)**2)))
    print("Root Means Squared Error:", rmse)

    # Plot the data
    train = data[:training_data_len]
    valid = data[training_data_len:]
    print(predictions.shape, valid.shape)

    valid['Predictions'] = predictions[:, 0]
    st.title('{} Long-Short-Term-Memory.'.format(stock.upper()))
    st.line_chart(valid[['Close', 'Predictions']])
    st.title("Model Metrics for Long-Short-Term-Memory.")
    st.subheader("Root Means Squared Error")
    st.subheader(rmse)
Ejemplo n.º 7
0
        X_test, y_test = x[test_idx], y[test_idx]
        X_train, y_train = sampling.fit_resample(X_train, y_train)
        rf = RandomForestClassifier(**cfg, random_state=42)
        rf.fit(X_train, y_train)
        y_pred_prob = rf.predict_proba(X_test)
        result = metrics.roc_auc_score(y_test, y_pred_prob[:, 1])
        results.append(result)
    result = np.mean(results)
    print(result)

    return result


pb = st.progress(0)
status_txt = st.empty()
chart = st.line_chart()
status_txt2 = st.empty()

environment = Environment(rf_from_cfg,
                          config_space=cs,
                          mem_in_mb=2048,
                          cpu_time_in_s=30,
                          seed=42,
                          data=x)

mosaic = Search(environment=environment,
                policy_arg={
                    "c_ucb": 1.1,
                    "coef_progressive_widening": 0.6
                },
                verbose=True)
def main():
    st.title("COVID-19")
    st.sidebar.title('Menú COVID-19')
    add_selectbox = st.sidebar.selectbox(
        'Por favor, selecciona una opción: ',
        ('Principal', 'Análisis Exploratorio por País',
         'Análisis Exploratorio a Nivel Mundial'))
    ##NUEVO MENU: PRINCIPAL
    if add_selectbox == 'Principal':
        st.header('Introducción al COVID-19')
        st.write(
            'Coronavirus y Nuevo Coronavirus son los nombres de los virus, y CoViD-19 es el nombre de la enfermedad  causada por el Nuevo Coronavirus. Identificados en la década de 1960, los Coronavirus pertenecen a la  subfamilia taxonómica Orthocoronavirinae de la familia Coronaviridae, del orden Nidovirales.  Se subdividen en varios géneros, especies y cepas. Algunos atacan a los animales, otros afectan a la  especie humana, causando desde enfermedades leves con síntomas similares a un resfriado común, hasta enfermedades graves como el SARS, el MERS y el actual CoViD-19.'
        )
        st.write(
            'El coronavirus se dio a conocer en todo el mundo en 2002 cuando causó el Síndrome Respiratorio Agudo Severo (SARS), una enfermedad que se propagó a varios países desde China, causando alrededor de 800 víctimas, el 10% de las 8000 personas que se infectaron. Una nueva cepa de coronavirus, que apareció en Jordania en 2012, causó el Síndrome Respiratorio del Medio Oriente (MERS), una enfermedad que se propagó en el Medio Oriente y llegó a Corea del Sur, matando al 35% de las 2500 personas infectadas, una enfermedad que mostró una impresionante tasa de letalidad del 35%. Y en China, en 2019, surgió otra cepa del virus, el Nuevo Coronavirus, responsable de la enfermedad CoViD-19 que está causando esta pandemia. Su tasa de mortalidad es del 3 al 4%, pero su capacidad de transmisión es mucho mayor que la de otros virus, ya que se transmite incluso por personas sin ningún tipo de síntomas y tiene una capacidad y velocidad de reproducción mucho más altas que las de otros virus.'
        )
        st.header('¿Qué es COVID-19?')
        st.write(
            'COVID-19 es una enfermedad causada por el coronavirus SARS-CoV-2, que presenta un cuadro clínico que abarca desde infecciones asintomáticas hasta afecciones respiratorias graves. Según la Organización Mundial de la Salud (OMS), la mayoría de los pacientes con COVID-19 (aproximadamente el 80%) pueden ser asintomáticos y aproximadamente el 20% de los casos pueden requerir atención hospitalaria porque tienen dificultad para respirar y de estos casos aproximadamente el 5% puede necesitar apoyo para el tratamiento de la insuficiencia respiratoria (respirador artificial)'
        )
        st.subheader('¿Qué es el coronavirus?')
        st.write(
            'El coronavirus es una familia de virus que causan infecciones respiratorias. El nuevo agente de coronavirus fue descubierto el 31/12/19 después de los casos registrados en China. Causa la enfermedad llamada coronavirus (COVID-19). Los primeros coronavirus humanos se aislaron por primera vez en 1937. Sin embargo, fue en 1965 que el virus se describió como coronavirus, debido al perfil bajo microscopía, que parecía una corona. La mayoría de las personas se infectan con coronavirus comunes a lo largo de sus vidas, y los niños pequeños tienen más probabilidades de infectarse con el tipo más común del virus. Los coronavirus más comunes que infectan a los humanos son el coronavirus alfa 229E y NL63 y el coronavirus beta OC43, HKU1.'
        )
        st.header('Síntomas')
        st.write(
            'Los síntomas de COVID-19 pueden variar desde un simple resfriado hasta una neumonía grave. Los síntomas más comunes son: Tos, Fiebre, Rinitis, Dolor de garganta, Dificultades respiratorias'
        )
        st.header('Transmisión')
        st.write(
            'La transmisión ocurre de una persona enferma a otra o por contacto cercano a través de: Toque o apretón de manos; Gotitas de saliva; Estornudo; Tos; Catarro; Objetos o superficies contaminadas, como teléfonos celulares, mesas, manijas de puertas, juguetes, teclados de computadora, etc.'
        )
        st.header('Fuente de los Datos')
        st.write(
            'Los datos fueron tomados de Github de OurWorldInData, que es una organización sin fines de lucro que recopila y proporciona datos gratuitos sobre "pobreza, enfermedad, hambre, cambio climático, guerra, riesgos existenciales y desigualdad". Los datos mundiales sobre Covid-19 son actualizados diariamente por la organización, que es una de las más respetadas en el mundo en relación con el suministro de datos. Puede acceder a estos datos públicos actualizados aquí.'
        )
        st.markdown("[OurWorldInData](https://ourworldindata.org/)")

        st.header('Biblioteca')
        st.write(
            'Comprender los datos es tan importante como el análisis exploratorio, porque si no se comprende lo que significa y de dónde provienen los datos, no se podrá realizar un análisis completo.'
        )
        #Datos tabla
        informacion = {
            'Descripción': [
                'Letras y códigos de los países en base a ISO 3166-1 alpha-3',
                'Continente', 'Localización Geográfica',
                'Fecha de Observación',
                'Total de Casos Confirmados de COVID19',
                'Nuevos Casos Confirmados de COVID19',
                'Total de Muertes Atribuídas al COVID19',
                'Nuevas Muertes Atribuídas al COVID19',
                'Total de Casos Confirmados de COVID19 por millón de habitantes',
                'Nuevos Casos Confirmados de COVID19 por millón de habitantes',
                'Total de Muertes Atribuídas al COVID19 por millón de habitantes',
                'Nuevas Muertes Atribuídas al COVID19 por millón de habitantes',
                'Total de Pruebas para COVID19', 'Nuevas Pruebas para COVID19',
                'Nuevas Pruebas de COVID19 (suavizado por 7 dias)',
                'Total de Pruebas de COVID19 por 1000 habitantes',
                'Nuevas Pruebas de COVID19 por 1000 habitantes',
                'Nuevas Pruebas de COVID19 (suavizado por 7 dias) por 1000 habitantes',
                'Unidades usadas para informar datos de pruebas',
                'Indice de Respuesta del Gobierno', 'Población en 2020',
                'Densidad demográfica (en km cuadrados)',
                'Média de Edad de la Población',
                'Proporción de la población con 65 años o más',
                'Proporción de la población con 70 años o más',
                'PIB a la par con el poder de compra (dólares internacionales constantes de 2011)',
                'Porcentaje de la población que vive en extrema pobreza, año más reciente desde 2010',
                'Tasa de mortalidad por enfermedades cardiovasculares en 2017',
                'Prevalencia de diabetes (% de población entre 20 y 79 años) en 2017',
                'Porcentaje de mujeres que fuman',
                'Porcentaje de hombres que fuman',
                'Porcentaje de la población con instalaciones básicas de lavado de manos en las instalaciones, en el último año disponible',
                'Camas de hospital por cada 1,000 personas, último año disponible desde 2010',
                'Esperanza de vida al nacer en 2019'
            ],
            'Origen': [
                'International Organization for Standardization',
                'Our World in Data', 'Our World in Data', 'Our World in Data',
                'Centro Europeo para la Prevención y el Control de Enfermedades',
                'Centro europeo para la prevención y el control de enfermedades',
                'Centro europeo para la prevención y el control de enfermedades',
                'Centro europeo para la prevención y el control de enfermedades',
                'Centro europeo para la prevención y el control de enfermedades',
                'Centro europeo para la prevención y el control de enfermedades',
                'Centro europeo para la prevención y el control de enfermedades',
                'Centro europeo para la prevención y el control de enfermedades',
                'National government reports', 'National government reports',
                'National government reports', 'National government reports',
                'National government reports', 'National government reports',
                'National government reports',
                '	Oxford COVID-19 Government Response Tracker, Blavatnik School of Government',
                'United Nations', 'World Bank', 'UN Population Division',
                'Banco Mundial',
                'Naciones Unidas, Departamento de Asuntos Económicos y Sociales, División de Población (2017)',
                '	Banco Mundial', '	Banco Mundial',
                'Red Global Colaborativa de Carga de Enfermedades',
                '	Banco Mundial', '	Banco Mundial', '	Banco Mundial',
                'División de Estadística de las Naciones Unidas',
                'OCDE, Eurostat, Banco Mundial, registros del gobierno nacional y otras fuentes',
                'James C. Riley, Clio Infra, División de Población de las Naciones Unidas'
            ]
        }
        df_info = pd.DataFrame(informacion, columns=['Descripción', 'Origen'])
        df_info = pd.DataFrame(informacion,
                               columns=['Descripción', 'Origen'],
                               index=df.columns)
        st.table(df_info)
        st.subheader(
            "Base de Datos del COVID19 mostrando los 5 primeras filas")
        #Transformar la columna "date" no formato datetime
        df.date = pd.to_datetime(df.date)
        df.date.max()
        df.set_index('date', inplace=True)
        st.dataframe(df[df.location == 'World'].head(5))
        #Description
        if st.checkbox("Mostrar Descripcion de la Base de Datos"):
            st.write(df.describe())
        #Shape
        if st.checkbox("Mostrar Dimensiones de la Base de Datos"):
            st.write(df.shape)
            data_dim = st.radio("Mostrar Dimensiones por: ",
                                ("Filas", "Columnas"))
            if data_dim == "Filas":
                st.write("Numero de Filas: ")
                st.write(df.shape[0])
            elif data_dim == "Columnas":
                st.write("Numero de Columnas: ")
                st.write(df.shape[1])
        #Correlacion
        st.header("Correlación")
        st.write(
            "La correlación indica la fuerza y la dirección de una relación lineal y proporcionalidad entre dos variables estadísticas."
        )
        columnas = df.columns  #Una lista con las columnas
        st.info(
            "El siguiente mapa de calor representa una correlación visual, que varía de -1 a 1, donde -1 se considera sin correlación y 1 se considera una correlación perfecta."
        )
        columnas_seleccionadas = st.multiselect(
            "Selecciona las columnas para generar el mapa de correlacion: ",
            columnas)
        if len(columnas_seleccionadas) > 0:
            correlacion = df[columnas_seleccionadas].corr()
            fig, ax = plt.subplots(figsize=(12, 12))
            st.write(
                sns.heatmap(correlacion,
                            annot=True,
                            fmt='.2f',
                            ax=ax,
                            square=True))
            st.pyplot()

        st.success(
            'En el Menú Lateral Puedes Seleccionar y Empezar con el Análisis Exploratorio del COVID-19'
        )

        st.write(
            "Desarrollado por: Juan Minango, David Minango y Rafael Bolsoni")
        st.markdown("[JD-TECHN](https://jdtechn.com/)")
        st.write("Con el auspicio de:")
        st.markdown(
            "[Capacitate Ecuador](https://www.facebook.com/Capacitate.Ecu/)")

    ##NUEVO MENU: ANALISIS EXPLORATORIO POR PAIS
    elif add_selectbox == 'Análisis Exploratorio por País':
        #Transformar la columna "date" no formato datetime
        st.header("Análisis Exploratorio por País")
        df.date = pd.to_datetime(df.date)
        df.date.max()
        df.set_index('date', inplace=True)
        #Seleccion de Paises
        list_paises = df.location.unique()
        paises = st.selectbox("Escoja el Pais", list_paises)
        paises_df = df[df['location'].str.contains(paises)]
        countries_images = {
            'af': 'Afghanistan',
            'al': 'Albania',
            'dz': 'Algeria',
            'as': 'American Samoa',
            'ab': 'Antigua and Barbuda',
            'ad': 'Andorra',
            'ao': 'Angola',
            'ai': 'Anguilla',
            'aq': 'Antarctica',
            'ag': 'Antigua And Barbuda',
            'ar': 'Argentina',
            'am': 'Armenia',
            'aw': 'Aruba',
            'au': 'Australia',
            'at': 'Austria',
            'az': 'Azerbaijan',
            'bs': 'Bahamas',
            'bh': 'Bahrain',
            'bd': 'Bangladesh',
            'bb': 'Barbados',
            'by': 'Belarus',
            'be': 'Belgium',
            'bz': 'Belize',
            'bj': 'Benin',
            'bm': 'Bermuda',
            'bt': 'Bhutan',
            'bo': 'Olivia',
            'ba': 'Bosnia and Herzegovina',
            'bw': 'Botswana',
            'bv': 'Bouvet Island',
            'br': 'Brazil',
            'io': 'British Indian Ocean Territory',
            'bn': 'Brunei',
            'bg': 'Bulgaria',
            'bf': 'Burkina Faso',
            'bi': 'Burundi',
            'kh': 'Cambodia',
            'cm': 'Cameroon',
            'ca': 'Canada',
            'cv': 'Cape Verde',
            'ky': 'Cayman Islands',
            'cf': 'Central African Republic',
            'td': 'Chad',
            'cl': 'Chile',
            'cn': 'China',
            'cx': 'Hristmas Island',
            'cc': 'Cocos (Keeling) Islands',
            'co': 'Colombia',
            'km': 'Comoros',
            'cg': 'Congo',
            'cd': 'Democratic Republic of Congo',
            'ck': 'Cook Islands',
            'cr': 'Costa Rica',
            'ci': "Cote d'Ivoire",
            'hr': 'Croatia',
            'cu': 'Cuba',
            'cy': 'Cyprus',
            'cz': 'Czech Republic',
            'dk': 'Denmark',
            'dj': 'Djibouti',
            'dm': 'Dominica',
            'do': 'Dominican Republic',
            'ec': 'Ecuador',
            'eg': 'Egypt',
            'eh': 'Western Sahara',
            'sv': 'El Salvador',
            'gq': 'Equatorial Guinea',
            'er': 'Eritrea',
            'ee': 'Estonia',
            'et': 'Ethiopia',
            'fk': 'Falkland Islands',
            'fo': 'Aroe Islands',
            'fj': 'Fiji',
            'fi': 'Finland',
            'fr': 'France',
            'gf': 'French Guiana',
            'pf': 'French Polynesia',
            'tf': 'French Southern Territories',
            'ga': 'Gabon',
            'gm': 'Gambia',
            'ge': 'Georgia',
            'de': 'Germany',
            'gh': 'Ghana',
            'gi': 'Gibraltar',
            'gr': 'Greece',
            'gl': 'Greenland',
            'gd': 'Grenada',
            'gp': 'Guadeloupe',
            'gu': 'Guam',
            'gt': 'Guatemala',
            'gn': 'Guinea',
            'gw': 'Guinea-Bissau',
            'gy': 'Guyana',
            'ht': 'Haiti',
            'hm': 'Heard Island And Mcdonald Islands',
            'hn': 'Honduras',
            'hk': 'Hong Kong',
            'hu': 'Hungary',
            'is': 'Iceland',
            'in': 'India',
            'id': 'Indonesia',
            'ir': 'Iran',
            'iq': 'Iraq',
            'ie': 'Ireland',
            'il': 'Israel',
            'it': 'Italy',
            'jm': 'Jamaica',
            'jp': 'Japan',
            'jo': 'Jordan',
            'kz': 'Kazakhstan',
            'ke': 'Kenya',
            'ki': 'Kiribati',
            'kp': "Korea, Democratic People'S Republic Of",
            'kr': 'South Korea',
            'kw': 'Kuwait',
            'kg': 'Kyrgyzstan',
            'la': 'Laos',
            'lv': 'Latvia',
            'lb': 'Lebanon',
            'ls': 'Lesotho',
            'lr': 'Liberia',
            'ly': 'Libya',
            'li': 'Liechtenstein',
            'lt': 'Lithuania',
            'lu': 'Luxembourg',
            'mo': 'Macao',
            'mk': 'Macedonia',
            'mg': 'Madagascar',
            'mw': 'Malawi',
            'my': 'Malaysia',
            'mv': 'Maldives',
            'ml': 'Mali',
            'mt': 'Malta',
            'mh': 'Marshall Islands',
            'mq': 'Martinique',
            'mr': 'Mauritania',
            'mu': 'Mauritius',
            'yt': 'Mayotte',
            'mx': 'Mexico',
            'fm': 'Micronesia, Federated States Of',
            'md': 'Moldova',
            'mc': 'Monaco',
            'mn': 'Mongolia',
            'ms': 'Montserrat',
            'ma': 'Morocco',
            'mz': 'Mozambique',
            'mm': 'Myanmar',
            'na': 'Namibia',
            'nr': 'Nauru',
            'np': 'Nepal',
            'nl': 'Netherlands',
            'an': 'Netherlands Antilles',
            'nc': 'New Caledonia',
            'nz': 'New Zealand',
            'ni': 'Nicaragua',
            'ne': 'Niger',
            'ng': 'Nigeria',
            'nu': 'Niue',
            'nf': 'Norfolk Island',
            'mp': 'Northern Mariana Islands',
            'no': 'Norway',
            'om': 'Oman',
            'pk': 'Pakistan',
            'pw': 'Palau',
            'ps': 'Palestine',
            'pa': 'Panama',
            'pg': 'Papua New Guinea',
            'py': 'Paraguay',
            'pe': 'Peru',
            'ph': 'Philippines',
            'pn': 'Pitcairn',
            'pl': 'Poland',
            'pt': 'Portugal',
            'pr': 'Puerto Rico',
            'qa': 'Qatar',
            're': 'Réunion',
            'ro': 'Romania',
            'ru': 'Russia',
            'rw': 'Rwanda',
            'sh': 'Saint Helena',
            'kn': 'Saint Kitts and Nevis',
            'lc': 'Saint Lucia',
            'pm': 'Saint Pierre And Miquelon',
            'vc': 'Saint Vincent and the Grenadines',
            'ws': 'Samoa',
            'sm': 'San Marino',
            'st': 'Sao Tome and Principe',
            'sa': 'Saudi Arabia',
            'sn': 'Senegal',
            'cs': 'Montenegro',
            'sc': 'Seychelles',
            'sl': 'Sierra Leone',
            'sg': 'Singapore',
            'sk': 'Slovakia',
            'si': 'Slovenia',
            'sb': 'Solomon Islands',
            'so': 'Somalia',
            'za': 'South Africa',
            'gs': 'South Georgia And South Sandwich Islands',
            'es': 'Spain',
            'lk': 'Sri Lanka',
            'sd': 'Sudan',
            'sr': 'Suriname',
            'sj': 'Svalbard And Jan Mayen',
            'sz': 'Swaziland',
            'se': 'Sweden',
            'ch': 'Switzerland',
            'sy': 'Syria',
            'tw': 'Taiwan',
            'tj': 'Tajikistan',
            'tz': 'Tanzania',
            'th': 'Thailand',
            'tl': 'Timor',
            'tg': 'Togo',
            'tk': 'Tokelau',
            'to': 'Tonga',
            'tt': 'Trinidad and Tobago',
            'tn': 'Tunisia',
            'tr': 'Turkey',
            'tm': 'Turkmenistan',
            'tc': 'Turks and Caicos Islands',
            'tv': 'Tuvalu',
            'ug': 'Uganda',
            'ua': 'Ukraine',
            'ae': 'United Arab Emirates',
            'gb': 'United Kingdom',
            'us': 'United States',
            'um': 'United States Minor Outlying Islands',
            'uy': 'Uruguay',
            'uz': 'Uzbekistan',
            've': 'Venezuela',
            'vu': 'Vanuatu',
            'vn': 'Vietnam',
            'vg': 'British Virgin Islands',
            'vi': 'U.S. Virgin Islands',
            'wf': 'Wallis And Futuna',
            'ye': 'Yemen',
            'zw': 'Zimbabwe',
            'bl': 'Bolivia',
            'bonaire': 'Bonaire Sint Eustatius and Saba',
            'cura': 'Curacao',
            'faeroe': 'Faeroe Islands',
            'guy': 'Guernsey',
            'ilm': 'Isle of Man',
            'jey': 'Jersey',
            'ksv': 'Kosovo',
            'ser': 'Serbia',
            'sint': 'Sint Maarten (Dutch part)',
            'ssd': 'South Sudan',
            'usav': 'United States Virgin Islands',
            'vtc': 'Vatican',
            'zmb': 'Zambia',
            'wrl': 'World',
            'int': 'International'
        }
        for k, v in countries_images.items():
            if v == paises:
                temp_images = 'cflags/{}.png'.format(k)
                #st.text(temp_images)
                img = Image.open(os.path.join(temp_images)).convert('RGB')
                st.image(img)
        st.dataframe(paises_df)
        st.subheader('Descripción de la Base de Datos:')
        st.dataframe(paises_df.describe())
        df_World = df[df['location'] == 'World']  #Total Mundial
        indice_World = df_World.index[-2]  #El penuntilmo indice para asegurar
        #st.dataframe(df_World)
        #Graficos
        st.subheader('Gráficos Estadísticos de {}'.format(paises))
        if st.checkbox(
                "Gráfico del Total de Casos y Muertes de COVID-19 en {}".
                format(paises)):
            df_temporal = pd.DataFrame({'Total_Casos': []})
            df_temporal['Total_Casos'] = paises_df['total_cases']
            df_temporal['Total_Muertes'] = paises_df['total_deaths']
            st.line_chart(df_temporal, use_container_width=True)
        if st.checkbox(
                "Gráfico de Nuevos Casos Diarios de COVID-19 en {}".format(
                    paises)):
            df_temporal = pd.DataFrame({'Nuevos_Casos_Diarios': []})
            df_temporal['Nuevos_Casos_Diarios'] = paises_df['new_cases']
            st.bar_chart(data=df_temporal, use_container_width=True)
        if st.checkbox(
                "Gráfico de Nuevas Muertes Diarias de COVID-19 en {}".format(
                    paises)):
            df_temporal = pd.DataFrame({'Nuevas_Muertes_Diarias': []})
            df_temporal['Nuevas_Muertes_Diarias'] = paises_df['new_deaths']
            st.bar_chart(data=df_temporal, use_container_width=True)
        if st.checkbox(
                "Gráfico de Tasa de Mortalidad de COVID-19 en {}".format(
                    paises)):
            df_temporal = pd.DataFrame({'Tasa de Mortalidad': []})
            df_temporal['Tasa de Mortalidad en %'] = (
                paises_df.total_deaths / paises_df.total_cases) * 100
            st.bar_chart(data=df_temporal, use_container_width=True)
        #EN COMPARACION AL MUNDO
        st.subheader(
            'Gráficos Estadísticos de {} en Comparacion al Mundo'.format(
                paises))
        if st.checkbox(
                "Gráfico del Total de Casos a Nivel Mundial y de {}".format(
                    paises)):
            plt.plot(paises_df['total_cases'],
                     label='Total de Casos {}'.format(paises))
            plt.plot(df_World['total_cases'],
                     label='Total de Casos a Nivel Mundial')
            plt.yscale("log")
            plt.title('Escala Logaritmica')
            plt.ylabel('Cantidad')
            plt.xlabel('Fechas/Mes')
            plt.grid()
            plt.legend()
            st.pyplot()
        if st.checkbox(
                "Gráfico del Total de Muertes a Nivel Mundial y de {}".format(
                    paises)):
            plt.plot(paises_df['total_deaths'],
                     label='Total de Casos {}'.format(paises))
            plt.plot(df_World['total_deaths'],
                     label='Total de Casos a Nivel Mundial')
            plt.yscale("log")
            plt.title('Escala Logaritmica')
            plt.ylabel('Cantidad')
            plt.xlabel('Fechas/Mes')
            plt.grid()
            plt.legend()
            st.pyplot()
        if st.checkbox(
                "Gráfico de Tasa de Mortalidad a Nivel Mundial y de {}".format(
                    paises)):
            df_mort_pais = (paises_df.total_deaths /
                            paises_df.total_cases) * 100
            # Tasas de muertes en el mundo
            df_mort_mundo = (df_World.total_deaths /
                             df_World.total_cases) * 100
            #plt.style.use('ggplot')
            plt.plot(df_mort_pais,
                     label='Tasa de mortalidad en {}'.format(paises))
            plt.plot(df_mort_mundo, label='Tasa de mortalidad en el Mundo')
            plt.title(
                'Tasa de Mortalidad en {} vs Mundo (Mensual)'.format(paises))
            plt.ylabel('% de Mortalidad')
            #plt.xticks(rotation = 45)
            plt.legend()
            plt.grid()
            st.pyplot()
        #EN COMPARACION con OTROS PAISES
        st.subheader(
            'Gráficos Comparativos de {} con Otros Paises'.format(paises))
        paises_seleccionados = df.location.unique().tolist()
        selected_columns = st.multiselect(
            "Seleccionar los Paises a Comparar: ", paises_seleccionados)
        #st.write(selected_columns)
        if len(selected_columns) > 0:
            st.warning(
                "ADVERTENCIA: Es recomendable seleccionar pocos paises para agilizar el procesamiento"
            )
            if st.checkbox("Realizar los Gráficos Comparativos"):
                selected_columns.append(paises)
                df_comparacion = df[df['location'].isin(selected_columns)]
                #Tasa Mortalidad
                st.subheader(
                    "Comparativa de la Tasa de Mortalidad de COVID-19 de {}".
                    format(paises))
                #fig, ax = plt.subplots(figsize=(14,10))
                for i in selected_columns:
                    df_mortalidad = (df_comparacion[df_comparacion['location']
                                                    == i].total_deaths /
                                     df_comparacion[df_comparacion['location']
                                                    == i].total_cases) * 100
                    plt.plot(df_mortalidad, label=i)
                    plt.legend()
                plt.grid()
                plt.xlabel("Fechas/Mes")
                plt.ylabel(
                    "% Mortalidad [Numero Total de Muertes/Numero Total de Casos]"
                )
                st.pyplot()
                #Total de Casos
                indice_paises = paises_df.index[
                    -1]  #El penuntilmo indice para asegurar
                st.subheader(
                    "Total de Casos y Muertes de COVID-19 de {} al {}".format(
                        paises, indice_paises))
                df_aux = df.loc[indice_paises, [
                    'location', 'total_cases', 'total_deaths',
                    'total_cases_per_million', 'total_deaths_per_million'
                ]]
                df_aux = df_aux[df_aux['location'].isin(selected_columns)]
                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))
                sns.barplot(df_aux.location,
                            df_aux.total_cases,
                            palette='Paired',
                            ax=ax1)
                ax1.set_xlabel('Paises')
                ax1.set_ylabel('Total de Casos')
                ax1.set_title('Total de Casos al {}'.format(indice_paises))
                sns.barplot(df_aux.location,
                            df_aux.total_deaths,
                            palette='Paired',
                            ax=ax2)
                ax2.set_xlabel('Paises')
                ax2.set_ylabel('Total de Muertes')
                ax2.set_title('Total de Muertes al {}'.format(indice_paises))
                plt.tight_layout()
                st.pyplot()
                st.subheader(
                    "Total de Casos y Muertes de COVID-19 por millon de habitantes de {} al {}"
                    .format(paises, indice_paises))
                fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 6))
                sns.barplot(df_aux.location,
                            df_aux.total_cases_per_million,
                            palette='Paired',
                            ax=ax1)
                ax1.set_xlabel('Paises')
                ax1.set_ylabel('Total de Casos por Millon de Habitantes')
                ax1.set_title(
                    'Total de Casos/millon al {}'.format(indice_paises))
                sns.barplot(df_aux.location,
                            df_aux.total_deaths_per_million,
                            palette='Paired',
                            ax=ax2)
                ax2.set_xlabel('Paises')
                ax2.set_ylabel('Total de Muertes por Millon de Habitantes')
                ax2.set_title(
                    'Total de Muertes/millon al {}'.format(indice_paises))
                plt.tight_layout()
                st.pyplot()

    ##NUEVO MENU: ANALISIS EXPLORATORIO Mundial
    elif add_selectbox == 'Análisis Exploratorio a Nivel Mundial':
        #Transformar la columna "date" no formato datetime
        st.header("Análisis Exploratorio a Nivel Mundial")
        df.date = pd.to_datetime(df.date)
        df.date.max()
        df.set_index('date', inplace=True)
        df_world = df[df.location == 'World']  #Pega de todo el mundo
        indice = df_world.index[-1]
        st.subheader(
            "TOP 10 Paises con Más Casos de COVID-19 al {}".format(indice))
        df_world = df[df.location == 'World']  #Pega de todo el mundo
        df_aux = df.loc[indice, ['location', 'total_cases']].sort_values(
            by='total_cases', ascending=False)[1:11]
        fig, ax = plt.subplots(figsize=(10, 6))
        ax.grid()
        sns.barplot(df_aux.location,
                    df_aux.total_cases,
                    palette='Paired',
                    ax=ax)
        #ax.set_title('TOP 5 Países com mais casos de COVID-19')
        ax.set_xlabel('Países')
        ax.set_ylabel('Total de Casos')
        plt.tight_layout()
        st.pyplot()
        st.subheader(
            "TOP 10 Paises con Más Muertes de COVID-19 al {}".format(indice))
        df_aux = df.loc[indice, ['location', 'total_deaths']].sort_values(
            by='total_deaths', ascending=False)[1:11]
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.barplot(df_aux.location,
                    df_aux.total_deaths,
                    palette='Paired',
                    ax=ax)
        ax.grid()
        #ax.set_title('TOP 5 Países com mais casos de COVID-19')
        ax.set_xlabel('Países')
        ax.set_ylabel('Total de Muertes')
        plt.tight_layout()
        st.pyplot()
        st.subheader(
            "Tasa de Mortalidad de COVID-19 del TOP 10 Paises {}".format(
                indice))
        df_aux = df.loc[
            indice, ['location', 'total_deaths', 'total_cases']].sort_values(
                by='total_deaths', ascending=False)[1:11]
        df_aux['mortalidad'] = (df_aux.total_deaths / df_aux.total_cases) * 100
        fig, ax = plt.subplots(figsize=(10, 6))
        sns.barplot(df_aux.location,
                    df_aux.mortalidad,
                    palette='Paired',
                    ax=ax)
        #ax.set_title('TOP 5 Países com mais casos de COVID-19')
        ax.set_xlabel('Países')
        ax.set_ylabel('Tasa de Mortalidad')
        ax.grid()
        plt.tight_layout()
        st.pyplot()
Ejemplo n.º 9
0
# evaluating accuracy for model built on full featureset
full_feat_acc = cross_val_score(lr, bc_X, bc_y, scoring='accuracy', cv=5)
# evaluating accuracy for model built on selected featureset
sel_feat_acc = cross_val_score(lr, bc_SX, bc_y, scoring='accuracy', cv=5)
""")

lr = LogisticRegression(max_iter=100000)
# accuracy of full feat validation (extracted from the reaserch notebook)
full_feat_acc = pd.Series([0.938596, 0.947368, 0.982456, 0.929825, 0.955752])

# evaluating accuracy for model built on selected featureset
sel_feat_acc = pd.Series(
    cross_val_score(lr, bc_SX, bc_y, scoring='accuracy', cv=5))

df_acc = pd.concat([full_feat_acc, sel_feat_acc], axis=1)
st.line_chart(df_acc)

st.write(""" 
### **Accuracy**
""")
st.dataframe(
    pd.DataFrame([np.average(full_feat_acc),
                  np.average(sel_feat_acc)
                  ]).T.rename({
                      0: "30 features",
                      1: "{} features".format(k)
                  },
                              axis=1))

st.write(""" 
## Conclusion
Ejemplo n.º 10
0
st.write(
    f"""**{top_host_1.iloc[0].host_name}** is at the top with {listingcounts.iloc[0]} property listings.
**{top_host_2.iloc[1].host_name}** is second with {listingcounts.iloc[1]} listings. Following are randomly chosen
listings from the two displayed as JSON using [`st.json`](https://streamlit.io/docs/api.html#streamlit.json)."""
)

st.json({top_host_1.iloc[0].host_name: top_host_1\
    [["name", "neighbourhood", "room_type", "minimum_nights", "price"]]\
        .sample(2, random_state=4).to_dict(orient="records"),
        top_host_2.iloc[0].host_name: top_host_2\
    [["name", "neighbourhood", "room_type", "minimum_nights", "price"]]\
        .sample(2, random_state=4).to_dict(orient="records")})

#diaplay linechart
st.write("line chart")
st.line_chart(df['price'].head(100))
#display area chart
st.write("area chart")
st.area_chart(df[['price', 'number_of_reviews']].head(100))
#display bar chart
st.write("bar chart")
st.bar_chart(df[['price', 'number_of_reviews']].head(100))

# display pydeck_chart

# st.subheader("pydeck chart")

# st.pydeck_chart(pdk.Deck(
#     map_style='mapbox://styles/mapbox/light-v9',
#     initial_view_state=pdk.ViewState(
#         latitude=37.76,
Ejemplo n.º 11
0
def main():
    st.title("Delhi and Dhaka Data")
    st.markdown("Select a city to view different charts of various indicators. ")

    #st.markdown(html_temp, unsafe_allow_html=True )

    
    city = st.sidebar.selectbox(label = "Select a City", index = 0,
                               options = list(COUNTRY_MAPPINGS.values()))

    
    indicator = st.sidebar.selectbox("Select the Indicators", index = 0,
                               options = list(INDICATORS_MAPPINGS.values()))
 

    st.subheader( city + ' area chat with all the indicators')

    folder_path='datasets'
    selected_filename = 'final.csv'
    filename = os.path.join(folder_path, selected_filename)

    # Reading Data
    df = pd.read_csv(filename, usecols = ['Ward_No','Ward_Name', 'Area', 'geometry','No_HH','TOT_P','TOT_M','TOT_F','ch_t_t','tenure_o','l_elect','hh_with_lat', 'no_latr', 'latr_pub', 'latr_o', 'have_bath','cf_fw', 'cf_lpg', 'kf_t','hh_bank','asset_bic', 'asset_2w', 'asset_4w', 'asset_tv_c'])

    
    selected_columns_df = ['Ward_No','Ward_Name', 'geometry','No_HH','TOT_P','TOT_M','TOT_F','ch_t_t','tenure_o','l_elect','hh_with_lat', 'no_latr', 'latr_pub', 'have_bath','cf_fw', 'cf_lpg', 'kf_t','hh_bank', 'asset_bic', 'asset_2w', 'asset_4w', 'asset_tv_c']
    df = df[selected_columns_df]


    
    
    #drawing first chat
    
    st.area_chart(df[['TOT_P','TOT_M','TOT_F']], use_container_width = False, width = 800)

    # show datasets
    if st.checkbox("Select to see first 10 Dataset"):
        st.dataframe(df.head(10))
        #number = st.number_input("Number of Rows to View", 5,10)
        #st.dataframe(df.head(number))

    # show columns
    #df = pd.read_csv(filename)
    if st.button("Click to see all the column names"):
        st.write(df.columns)  

    

    # show columns
    if st.checkbox("Selected Columns to Show"):
        all_columns         =   df.columns.tolist()
        selected_columns    =   st.multiselect("Select", all_columns)
        new_df              =   df[selected_columns]
        st.dataframe(new_df)


    st.subheader( city + ' - ' + indicator)
    # Plot and visualization
    
    #st.subheader("Data Visualization")

    # Seaborn Plot
    #if st.checkbox("Correlation Plot by Seaborn"):
    #    st.write(sns.heatmap(df.corr(), annot=True))
    #    st.pyplot()
    #    st.set_option('deprecation.showPyplotGlobalUse', False)

    #if st.checkbox("Pie Plot"):
    #    all_columns_names       = df.columns.tolist()
    #    if st.button("Generate Plot"):
    #        st.success("Generating a Pie Plot")
    #        st.write(df.iloc[:,-1].value_counts().plot.pie(autopct="%1.1f%"))
    #        st.pyplot()

    st.subheader("Customizable Plot")
    all_columns_names       = df.columns.tolist()
    type_of_plot            = st.selectbox("Select Type of Plot", ["area", "bar", "line","hist", "box", "kde"])
    selected_columns_names  = st.multiselect("Select Columns to Plot", all_columns_names)

    if st.button("Generate Customizable Plot"):
        st.success("Generating plot of {} for {}".format(type_of_plot, selected_columns_names ))

        #Plot by Streamlit
        if type_of_plot     ==   'area':
            custom_data     =   df[selected_columns_names]
            my_bar = st.progress(0)
            for percent_complete in range(100):
                time.sleep(0.1)
                my_bar.progress(percent_complete + 1)
            st.area_chart(custom_data)
            st.success('{} plot created'.format(type_of_plot))

        elif type_of_plot     ==   'bar':
            custom_data     =   df[selected_columns_names]
            my_bar = st.progress(0)
            for percent_complete in range(100):
                time.sleep(0.1)
                my_bar.progress(percent_complete + 1)
            st.bar_chart(custom_data)
            st.success('{} plot created'.format(type_of_plot))
        
        elif type_of_plot     ==   'line':
            custom_data     =   df[selected_columns_names]
            st.line_chart(custom_data)

        elif type_of_plot     ==   'box':
            custom_plot     =   df[selected_columns_names].plot(kind=type_of_plot)
            st.write(custom_plot)
            st.plyplot()
Ejemplo n.º 12
0
    st.dataframe(df_a)
st.text('                            ')
st.text('                            ')
st.text('----------------------------------------------------')


# In[5]:


st.markdown('''
< 日平均気温データ・グラフ >
''')

selected_targets_a = st.multiselect('Month - average', df_a.columns)
view_a = df_a[selected_targets_a]
st.line_chart(view_a)
st.text('                            ')
st.text('                            ')
st.text('----------------------------------------------------')


# In[6]:


# 日最高気温データ(Excel)の読み込み
df_h1= pd.read_excel('data_h.xlsx')

# 'Year'をindexに設定
df_h = df_h1.set_index('Year')

Ejemplo n.º 13
0
import streamlit as st
import yfinance as yf

st.write("""
# Stock Exchange

""")
googleStock = yf.Ticker('GOOGL')
appleStock = yf.Ticker('AAPL')
samsungStock = yf.Ticker('AAPL')

googleStockData = googleStock.history(period='id',
                                      start='2020-01-01',
                                      end='2021-01-31')
appleStockData = appleStock.history(period='id',
                                    start='2020-01-01',
                                    end='2021-01-31')
samsungStockData = samsungStock.history(period='id',
                                        start='2020-01-01',
                                        end='2021-01-31')

st.line_chart(googleStockData.Close)
st.line_chart(appleStockData.Close)
st.line_chart(samsungStockData.Close)
Ejemplo n.º 14
0
        generate_plot = st.button('Generate Plot')
        if generate_plot:
            st.success("Generating {} plot for {}".format(
                type_of_plot, selected_columns_names))

            if type_of_plot == 'area':
                custom_data_1 = data[selected_columns_names]
                st.area_chart(custom_data_1)

            elif type_of_plot == 'bar':
                custom_data_2 = data[selected_columns_names]
                st.bar_chart(custom_data_2)

            elif type_of_plot == 'line':
                custom_data_3 = data[selected_columns_names]
                st.line_chart(custom_data_3)

            # Custom Plot
            elif type_of_plot:
                cust_plot = data[selected_columns_names].plot(
                    kind=type_of_plot)
                st.write(cust_plot)
                st.pyplot()

elif choice == 'ML Models':
    st.subheader('ML Models')
    uploaded_file_2 = st.file_uploader('Upload dataset', type=['csv'])
    if uploaded_file_2 is not None:
        data1 = pd.read_csv(uploaded_file_2)
        st.dataframe(data1)
Ejemplo n.º 15
0
"""


@st.cache()
def get_data():
    df_s = pd.read_csv('data/housing.csv')
    return df_s


df = get_data()

st.header('MVP предсказание стоимости жилья')

if st.checkbox('Отобразить данные'):
    st.write(df)
    st.line_chart(df)

_test_size = st.selectbox('размер тестовой выборки: ',
                          (0.1, 0.15, 0.2, 0.25, 0.3, 0.35),
                          index=3,
                          format_func=lambda x: f"{x*100:0.0f}%")

if st.button('Создать модель'):
    X_train, X_test, y_train, y_test = train_test_split(df.drop('MEDV',
                                                                axis=1),
                                                        df['MEDV'],
                                                        test_size=_test_size,
                                                        random_state=0)
    st.text('Размер данных-' + str(X_train.shape) + str(X_test.shape))

    st.text('Старт модели')
                dim=1) == y.to(device)).float().sum().item()
            n += y.shape[0]

        return acc_sum / n


'''
# 训练模型
'''
num_epochs = st.slider(label='迭代周期',
                       min_value=5,
                       max_value=200,
                       value=5,
                       step=5)
st.subheader("损失折线图")
st_chart_loss = st.line_chart()
bar = st.progress(0)

with st.echo():

    def train_ch3(net,
                  train_iter,
                  test_iter,
                  loss,
                  num_epochs,
                  batch_size,
                  params=None,
                  lr=None,
                  optimizer=None,
                  device=None):
        process = 0
Ejemplo n.º 17
0
    if option != "":
        for i in top_stories_articles["results"]:
            str1 = str1 + i["abstract"]
        words = word_tokenize(str1)
        words_no_punc = []
        for w in words:
            if w.isalpha():
                words_no_punc.append(w.lower())
        stopwords = stopwords.words("english")
        clean_words = []
        for w in words_no_punc:
            if w not in stopwords:
                clean_words.append(w)
        fdist = FreqDist(clean_words)
        chart_data = pd.DataFrame(fdist.most_common(10), fdist.most_common(10))
        st.line_chart(chart_data)
    else:
        st.write("You have not selected a topic!")

st.subheader("III - Wordcloud")

if st.checkbox("Click here to generate wordcloud"):
    if option != "":
        wordcloud = WordCloud().generate(str1)
        plt.figure(figsize=(7, 7))
        plt.imshow(wordcloud)
        plt.axis("off")
        plt.savefig("top_stories_cloud.png")
        image1 = Image.open("top_stories_cloud.png")
        st.image(image1)
    else:
Ejemplo n.º 18
0
import streamlit as st
import pandas_datareader.data as web
from datetime import datetime as dt, timedelta as td

st.title('Crypto Tracker')

opts = st.selectbox('Select Pair', (
    'BTC-USD',
    'ETH-USD',
    'BNB-USD',
    'DOGE-USD', # stocks below
    'RELIANCE.NS', 
    'IDFCFIRSTB.NS', 
    'SBIN.NS', 
    'ITC.NS', 
    'ZOMATO.NS'
))

prices = web.get_data_yahoo(
    opts,
    start=dt.now() - td(days=365),
    end=dt.now())

st.line_chart(prices)
def stock_viz_volume(stock):
    df = web.DataReader(stock, 'stooq')
    df.sort_values('Date', ascending=True, inplace=True)
    data = df.filter(['Volume'])
    st.title('{} volume.'.format(stock.upper()))
    st.line_chart(data)
Ejemplo n.º 20
0
    st.text("Bat Deterrent Installation /per Turbine")
    # st.subheader("TBD")
    st.subheader("${:,.2f}".format(float(TotalBDS)))

with col3:
    st.text("NPV Curtailment Losses")
    st.subheader("${:,.2f}".format(
        float(NPVCurtLosses) * float(data["Value"]["Number of Turbines"])))

with col4:
    st.text("NPV Curtailment Losses/Turbine")
    st.subheader("${:,.2f}".format(float(NPVCurtLosses)))

# st.header("Cumulative Loss Due to Curtailment (USD per Turbine)")
# st.line_chart(ProjFinancials[["Cummulative Loss"]])

col1, col2 = st.beta_columns(2)

with col1:
    st.header("Site Information")
    st.table(data)

with col2:
    st.header("Site Location")
    st.map(windTurbines.loc[windTurbines["p_name"] == option])

st.header("Annual Revenue and Loss Due to Curtailment (USD per Turbine)")
st.line_chart(ProjFinancials[["Annual Revenue", "Annual Curt Loss"]])

st.table(ProjFinancials)
Ejemplo n.º 21
0
    st.header('Crop Classification Demo')
    st.subheader("Upload Crop NDVI Data")
    k = st.number_input("Maximum No. of Rows to Read", min_value=10,
                        max_value=1000, step=1, value=10, key='readinput')
    results = None
    uploaded_file = st.file_uploader(
        "Choose a CSV file (Maximum 1000 Rows for Performance)", type="csv", key='test')
    st.subheader("Upload Ground Truth Label (Only for Testing)")
    ground_truth_file = st.file_uploader(
        "Choose a CSV file (Should Match the NDVI CSV)", type="csv", key='truth')
    if uploaded_file is not None:
        data = pd.read_csv(uploaded_file, nrows=k)
        st.write(data)
        st.subheader("Curve Visualization")
        st.line_chart(data.T.to_numpy())
        max_row = data.shape[0]-1
        st.subheader("Plot single NDVI curve")
        ndvi_nrow = st.number_input(
            "Pick up a row", min_value=0, max_value=max_row, step=1, value=0, key='singleinput')
        picked_ndvi = data.iloc[ndvi_nrow]
        show_ndvi = st.button("Show single NDVI Curve")
        if show_ndvi:
            play_line_plots(picked_ndvi)
        st.subheader("Crop Classification")
        run_model = st.button("Run ML model")
        if run_model:
            with st.spinner('Model Running, Input Curve Row No.{}'.format(ndvi_nrow)):
                picked_input = data.iloc[[ndvi_nrow]]
                # scaler_info = read_scaler('./standard_scaler.npy')
                model_input = prepare_input(picked_input)
Ejemplo n.º 22
0
else:
    st.sidebar.error('Error: End date must fall after start date.')
    
if sidebar == "tools":
    
    ticker = st.text_input("Please enter ticker below. (For SPX use ^GSPC and for VIX use ^VIX)")
    status_radio = st.radio('Please click Search when you are ready.', ('Entry', 'Search'))
    
    options = ['historical regime', 'smoothed variance probability', 'continuous wavelet transform', 'all']
    series_type = ['Close', 'Adjusted Close']

    if status_radio == "Search":
    
        df = yf.download(ticker, start_date, end_date)
        df_plot = df[['Close', 'Adj Close']]
        st.line_chart(df_plot)
        st.write(df)
    
        time_series_type = st.radio("Please select which time series you would like", series_type)
        time_series_options = st.selectbox("Please select what kind of analysis", options)
        time_series_start = st.radio("Please Select Run when ready", ("Stop", "Run"))
    
        if time_series_start == "Run":
            timeseries = TimeSeries(df, time_series_type, ticker)
    
            if time_series_options == "historical regime":
                output = timeseries.get_regimes()
                
            if time_series_options == "smoothed variance probability":
                output = timeseries.smoothed_probability()
                
Ejemplo n.º 23
0
        st.markdown(
            "### Here is a chart for the data fitted with the test data and the regression line"
        )
        errTotal = polyreg.GetBestPolynomial(XTrain, yTrain, XTest, yTest,
                                             polynomial)
        if st.checkbox("Show Code"):
            showRegression()

        data = pd.DataFrame(errTotal, columns=["Train Error", "Test Error"])
        errTrain = errTotal[:, :1]
        errTest = errTotal[:, :2]
        st.markdown("## Error vs. Model Complexity")
        lastrowTrain = errTrain[0]
        lastrowTest = errTrain[0]

        chart = st.line_chart(errTotal[:1], 800, 800)
        for i in range(errTotal.shape[0]):
            newrowTrain = errTotal[i]
            chart.add_rows(newrowTrain)
            time.sleep(0.1)

        st.markdown(
            "### Let's look into the train and test errors producing the line chart above"
        )
        st.write(data)
        st.markdown('''
        As we can see, as the polynomial value increases, the regression line begins to overfit the data, resulting in a 
        lower train error, but increase in test error.
        ''')

    elif option == "Logistic Regression":
Ejemplo n.º 24
0
                              key="1").upper()
name2 = st.sidebar.text_input("Enter first name:", value="Jason",
                              key="2").upper()
name3 = st.sidebar.text_input("Enter first name:", value="Brittany",
                              key="3").upper()
name4 = st.sidebar.text_input("Enter first name:", value="Karen",
                              key="4").upper()
name5 = st.sidebar.text_input("Enter first name:", value="Gary",
                              key="5").upper()

names = [
    name for name in [name1, name2, name3, name4, name5]
    if name in total_df.columns
]

st.line_chart(total_df[names].rename(columns=capwords))

st.subheader("Guessing ages using first names")

st.markdown("""
The popularity of some names spike very quickly, and the age range of people with that name may be very small. Below are
confidence intervals of the age of someone, given their first name¹.
""")

YEAR = 2019
PERCENT = st.slider("",
                    min_value=25,
                    max_value=90,
                    value=50,
                    step=5,
                    format="%d%%")
Ejemplo n.º 25
0
y_range_3 = [(x, x**3) for x in x_range]
show_graphic = st.button(label="Show")
y_x = st.checkbox("y = x")
y_x2 = st.checkbox("y = x ^ 2")
y_x3 = st.checkbox("y = x ^ 3")

df = pd.DataFrame(y_range)
df2 = pd.DataFrame(y_range_2)
df3 = pd.DataFrame(y_range_3)

if show_graphic and y_x:
    my_bar = st.progress(0)
    for percent_complete in range(100):
        time.sleep(0.1)
    my_bar.progress(percent_complete + 1)
    st.line_chart(data=df)
    st.info("График y = x нарисован")
elif show_graphic and y_x2:
    my_bar = st.progress(0)
    for percent_complete in range(100):
        time.sleep(0.1)
    my_bar.progress(percent_complete + 1)
    st.line_chart(data=df2)
    st.info("График y = x^2 нарисован")
elif show_graphic and y_x3:
    my_bar = st.progress(0)
    for percent_complete in range(100):
        time.sleep(0.1)
    my_bar.progress(percent_complete + 1)
    st.line_chart(data=df3)
    st.info("График y = x^3 нарисован")
Ejemplo n.º 26
0
def cs_body():
    # Magic commands

    st.subheader('Magic commands')
    st.markdown('''Magic commands allow you to implicitly `st.write()`''')
    st.code('''
\'\'\' _This_ is some __Markdown__ \'\'\'
a=3
'a', a
'dataframe:', data
    ''')

    # Display text

    st.subheader('Display text')
    st.code('''
st.text('Fixed width text')
st.markdown('_Markdown_') # see *
st.latex(r\'\'\' e^{i\pi} + 1 = 0 \'\'\')
st.write('Most objects') # df, err, func, keras!
st.write(['st', 'is <', 3]) # see *
st.title('My title')
st.header(My header')
st.subheader('My sub')
st.code('for i in range(8): foo()')

* optional kwarg unsafe_allow_html = True
    ''')

    # Display data

    st.subheader('Display data')
    st.code('''
st.dataframe(data)
st.table(data.iloc[0:10]
st.json({'foo':'bar','fu':'ba'})
    ''')

    # Display charts

    st.subheader('Display charts')
    st.code('''
st.line_chart(data)
st.area_chart(data)
st.bar_chart(data)
st.pyplot(fig)
st.altair_chart(data)
st.vega_lite_chart(data)
st.plotly_chart(data)
st.bokeh_chart(data)
st.pydeck_chart(data)
st.deck_gl_chart(data)
st.graphviz_chart(data)
st.map(data)
    ''')

    # Display media

    st.subheader('Display media')
    st.code('''
st.image('./header.png')
st.audio(data)
st.video(data)
    ''')

    # Display interactive widgets

    st.subheader('Display interactive widgets')
    st.code('''
st.button('Hit me')
st.checkbox('Check me out')
st.radio('Radio', [1,2,3])
st.selectbox('Select', [1,2,3])
st.multiselect('Multiselect', [1,2,3])
st.slider('Slide me', min_value=0, max_value=10)
st.text_input('Enter some text')
st.number_input('Enter a number')
st.text_area('Area for textual entry')
st.date_input('Date input')
st.time_input('Time entry')
st.beta_color_picker('Pick a color')
st.file_uploader('File uploader')
    ''')
    st.write('Use widgets\' returned values in variables:')
    st.code('''
>>> for i in range(int(st.number_input('Num:'))): foo()
>>> if st.sidebar.selectbox('I:',['f']) == 'f': b()
>>> my_slider_val = st.slider('Quinn Mallory', 1, 88)
>>> st.write(slider_val)
    ''')

    # Control flow

    st.subheader('Control flow')
    st.code('''
st.stop()
    ''')

    # Display code

    st.subheader('Display code')
    st.code('''
st.echo()

>>> with st.echo():
>>>     # Code below both executed and printed
>>>     foo = 'bar'
>>>     st.write(foo)
    ''')

    # Display progress and status

    st.subheader('Display progress and status')
    st.code('''
st.progress(progress__variable_1_to_100)

st.spinner()

>>> with st.spinner(text='In progress'):
>>>     time.sleep(5)
>>>     st.success('Done')

st.balloons()
st.error('Error message')
st.warning('Warning message')
st.info('Info message')
st.success('Success message')
st.exception(e)
    ''')

    # Placeholders, help, and options

    st.subheader('Placeholders, help, and options')
    st.code('''
st.empty()

>>> my_placeholder = st.empty()
>>> my_placeholder.text('Replaced!')

st.help(pandas.DataFrame)

st.get_option(key)
st.set_option(key)

st.beta_set_page_config(layout='wide')
    ''')

    # Mutate data

    st.subheader('Mutate data')
    st.code('''
DeltaGenerator.add_rows(data)

>>> my_table = st.table(df1)
>>> my_table.add_rows(df2)

>>> my_chart = st.line_chart(df1)
>>> my_chart.add_rows(df2)
    ''')

    # Optimize performance

    st.subheader('Optimize performance')
    st.code('''
@st.cache

>>> @st.cache
... def foo(bar):
...     # Mutate bar
...     return data
...
>>> d1 = foo(ref1)
>>> # Executes as first time
>>>
>>> d2 = foo(ref1)
>>> # Does not execute; returns cached value, d1==d2
>>>
>>> d3 = foo(ref2)
>>> # Different arg, so function executes
    ''')

    return None
Ejemplo n.º 27
0
Archivo: app.py Proyecto: Remxy/AutoML
def main():
    """Semi Auto ML App with Streamlit"""

    st.title("Semi Auto ML App")
    st.text("Using Streamlit == 0.52.1+")

    activities = ["EDA", "Plot", "Model Building", "About"]

    choice = st.sidebar.selectbox("Select Activity", activities)

    if choice == 'EDA':
        st.subheader("Exploratory Data Analysis")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt", "xls"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Shape"):
                st.write(df.shape)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.write(all_columns)

            if st.checkbox("Select Columns To Show"):
                selected_columns = st.multiselect("Select Columns",
                                                  all_columns)
                new_df = df[selected_columns]
                st.dataframe(new_df)

            if st.checkbox("Show Summary"):
                st.write(df.describe())

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts())

    elif choice == 'Plot':
        st.subheader("Data Visualization")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt", "xls"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

        if st.checkbox("Correlation with Seaborn"):
            st.write(sns.heatmap(df.corr(), annot=True))
            plt.yticks(rotation=0)
            st.pyplot()

        if st.checkbox("Pie Chart"):
            all_columns = df.columns.to_list()
            columns_to_plot = st.selectbox("Select 1 Column", all_columns)
            pie_plot = df[columns_to_plot].value_counts().plot.pie(
                autopct="%1.1f%%")
            st.write(pie_plot)
            st.pyplot()

        all_columns_names = df.columns.tolist()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns_names = st.multiselect("Select Columns To Plot",
                                                all_columns_names)

        if st.button("Generate Plot"):
            st.success("Generating Customize Plot of {} for {}".format(
                type_of_plot, selected_columns_names))

            #Plot By Streamlit
            if type_of_plot == 'area':
                cust_data = df[selected_columns_names]
                st.area_chart(cust_data)

            elif type_of_plot == 'bar':
                cust_data = df[selected_columns_names]
                st.bar_chart(cust_data)

            elif type_of_plot == 'line':
                cust_data = df[selected_columns_names]
                st.line_chart(cust_data)

            #Custom Plot
            elif type_of_plot:
                cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
                st.write(cust_plot)
                st.pyplot()

    elif choice == 'Model Building':
        st.subheader("Building ML Model")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            # Model Building
            X = df.iloc[:, 0:-1]
            Y = df.iloc[:, -1]
            seed = 8

            #Model
            models = []
            models.append(("LR", LogisticRegression()))
            models.append(("LDA", LinearDiscriminantAnalysis()))
            models.append(("KNN", KNeighborsClassifier()))
            models.append(("CART", DecisionTreeClassifier()))
            models.append(("NB", GaussianNB()))
            models.append(("SVM", SVC()))

            #Evaluate each model in turn

            #List
            model_names = []
            model_mean = []
            model_std = []
            all_models = []
            scoring = 'accuracy'

            for name, model in models:
                kfold = model_selection.KFold(n_splits=10, random_state=seed)
                cv_results = model_selection.cross_val_score(model,
                                                             X,
                                                             Y,
                                                             cv=kfold,
                                                             scoring=scoring)
                model_names.append(name)
                model_mean.append(cv_results.mean())
                model_std.append(cv_results.std())

                #check accuracy results and create a standard dictionary for the model, accuracy and standard deviation
                accuracy_results = {
                    "model_name": name,
                    "model_accuracy": cv_results.mean(),
                    "standard_deviation": cv_results.std()
                }
                all_models.append(accuracy_results)

            if st.checkbox("Metrics as Table"):
                st.dataframe(
                    pd.DataFrame(zip(model_names, model_mean, model_std),
                                 columns=[
                                     "Model Name", "Model Accuracy",
                                     "Standard Deviation"
                                 ]))

        #Create JSON box
            if st.checkbox("Metrics as JSON"):
                st.json(all_models)

    elif choice == 'About':
        st.subheader("About")
        st.text(
            "This is a Drag and Drop Semi Auto Machine Learning App built using Streamlit and Python"
        )
Ejemplo n.º 28
0
def app():
    # Add title and image
    st.write("""
    # Statistics
    Select a stock and date range to view statistics on that stock.
    """)

    #Create a sidebar header
    st.sidebar.header('Date Range:')

    #Create a function to get the users input
    def get_input():
        start_date = st.sidebar.date_input("Starting Date:",
                                           value=(datetime.today() -
                                                  timedelta(days=365)),
                                           min_value=datetime(1817, 3, 8),
                                           max_value=datetime.today())
        end_date = st.sidebar.date_input("Ending Date:",
                                         min_value=datetime(1817, 3, 8),
                                         max_value=datetime.today())

        with open('./stock symbols.csv', 'r',
                  encoding='utf-8-sig') as stock_file:
            stock_list = pd.read_csv(stock_file)
            symbols = stock_list.iloc[:, 0]
            selected = st.selectbox(label="", options=symbols)
            index = stock_list[stock_list['Symbol'] == selected].index.values
            stock_symbol = stock_list['Symbol'][index].to_string(index=False)
            company_name = stock_list['Name'][index].to_string(index=False)
            sector = stock_list['Sector'][index].to_string(index=False)

        return start_date, end_date, stock_symbol.strip(), company_name, sector

    #Get stock data within time frame entered by the user
    def get_data(stock_symbol, start_date, end_date):

        #Get the date range
        start = pd.to_datetime(start_date)
        end = pd.to_datetime(end_date)

        #Load the data
        df = dr.DataReader(stock_symbol,
                           data_source='yahoo',
                           start=start,
                           end=end)
        df.reset_index()

        #Set the start and end index rows both to 0
        start_row = 0
        end_row = 0

        for i in range(0, len(df)):
            if start <= pd.to_datetime(df.index[i]):
                start_row = i
                break
        for j in range(0, len(df)):
            if end >= pd.to_datetime(df.index[i]):
                end_row = len(df) - 1 - j
                break

        df = df.set_index(pd.DatetimeIndex(df.index.values))

        return df.iloc[start_row:end_row + 1, :]

    #Get user input
    start_date, end_date, stock_symbol, company_name, sector = get_input()

    #Get the data
    df = get_data(stock_symbol, start_date, end_date)

    #Display stock name and sector header
    st.header("Company:" + company_name + "\n")
    st.header("Market Sector:" + sector + "\n")

    #Display the close price
    st.header("Closing Prices from " + str(start_date) + " to " +
              str(end_date))
    st.line_chart(df['Close'])

    #Display the volume
    st.header("Volume from " + str(start_date) + " to " + str(end_date))
    st.line_chart(df['Volume'])

    #Get statistics on the data
    st.header("Statistics from " + str(start_date) + " to " + str(end_date))
    st.write(df.describe())
Ejemplo n.º 29
0
    df = pd.read_csv(uploaded_file)

    date_col = df.columns[0]
    val_col = df.columns[1]
    df[date_col] = pd.to_datetime(df[date_col])

    df['Week_name'] = df[date_col].dt.week
    df['Month_name'] = df[date_col].dt.month
    df['Quarter_name'] = df[date_col].dt.quarter

    df = df.set_index(date_col)

    st.markdown("### Dataset")
    st.write(df[[val_col]])
    st.markdown("### Plot")
    st.line_chart(df[val_col])

    st.markdown("### Correlation")
    sn.heatmap(df.corr(), annot=True)
    st.pyplot()

    ########################## Train-Test split
    split_value = int(0.9 * len(df))
    train_df = df.iloc[:split_value]
    test_df = df.iloc[split_value:]

    st.markdown("## Train-Test split into 90:10 ratio")
    st.markdown("### Length of Training set: " + str(len(train_df)))
    st.markdown("### Length of Test set: " + str(len(test_df)))

    best_p, best_aic = run_auto_var(train_df, 12)
Ejemplo n.º 30
0
    st.subheader(f"Analysing #{hashtag} from {date}")
    d = {"Positive": [pos_count], "Negative": [neg_count]}
    df = pd.DataFrame(data=d)

    hashtag = f"#{hashtag}"
    with st.spinner("Getting tweets..."):
        tweets = tw.Cursor(api.search, q=hashtag, lang="en",
                           since=date).items()

    total_tweets = st.empty()
    pos_tweets = st.empty()
    neg_tweets = st.empty()

    sentiments = ["Positive", "Negative"]
    chart = st.line_chart(df)
    barchart = st.empty()

    for idx, tweet in enumerate(tweets):
        tweets_count += 1
        output = requests.post("http://backend:8000/api",
                               json={"tweet": tweet.text})
        output = output.content.decode("utf8")
        output = json.loads(output).get("sentiment")

        if output == "Negative":
            neg_count += 1
        elif output == "Positive":
            pos_count += 1

        total_tweets.text("Tweets Analysed: %d" % tweets_count)