Example #1
0
def eda():
    with st.beta_expander(label='Descripción de la sección', expanded=True):
        st.subheader(
            'En esta sección, el usuario puede examinar y visualizar los conjuntos de datos que conforman al corpus de este estudio.'
        )

    opcion_general = st.sidebar.radio(
        'Selecciona el conjunto de datos, o el corpus general para su análisis',
        options=['Conjuntos de datos separados', 'Corpus general'])

    if opcion_general == 'Conjuntos de datos separados':
        opcion_clase = st.selectbox(
            label='Seleciona uno de los conjuntos de datos disponibles',
            options=['Microficciones', 'Noticias', 'Frases/Reflexiones'])
        if opcion_clase == 'Microficciones':
            datos = carga(archivo='microficciones.csv')
            st.dataframe(data=datos)
        elif opcion_clase == 'Noticias':
            datos = carga(archivo='noticias.csv')
            st.dataframe(data=datos)
        elif opcion_clase == 'Frases/Reflexiones':
            datos = carga(archivo='frases_pensamientos.csv')
            st.dataframe(data=datos)

    elif opcion_general == 'Corpus general':
        opcion_corpus = st.sidebar.radio(
            label='Seleciona el corpus sin procesar, o ya preprocesado',
            options=['corpus', 'corpus preprocesado'],
            key=1)
        if opcion_corpus == 'corpus':
            datos = carga(archivo='corpus.csv')
            st.dataframe(data=datos)
        elif opcion_corpus == 'corpus preprocesado':
            datos = carga(archivo='corpus_preprocesado.csv')
            st.dataframe(data=datos)
            if st.button(label='Generar nube de palabras'):
                nube = hero.wordcloud(datos['Texto limpio'])
                st.pyplot(nube)
            if st.button(label='Frecuencia de palabras', key=1):
                fig, ax = plt.subplots()
                ax = hero.top_words(datos['Texto limpio'],
                                    normalize=False).head(n=30).plot(
                                        kind='barh',
                                        color=[
                                            'C0', 'C1', 'C2', 'C3', 'C4', 'C5',
                                            'C6', 'C7', 'C8', 'C9'
                                        ])
                st.pyplot(fig)

            opcion_tipo = st.selectbox(
                label='Seleciona la clase de microtexto ya preprocesado',
                options=['Microficción', 'Noticia', 'Frase/Pensamiento'])
            if opcion_tipo == 'Microficción':
                datos_seleccionados = datos.loc[datos[opcion_tipo] == True]
                st.dataframe(data=datos_seleccionados)
                opcion_uno, opcion_dos = st.beta_columns(2)
                with opcion_uno:
                    if st.checkbox(label='Nube de palabras', key=1):
                        nube = hero.wordcloud(
                            datos_seleccionados['Texto limpio'])
                        st.pyplot(nube)
                with opcion_dos:
                    if st.checkbox(label='Frecuencia de palabras', key=2):
                        fig, ax = plt.subplots()
                        ax = hero.top_words(
                            datos_seleccionados['Texto limpio'],
                            normalize=False).head(n=30).plot(kind='barh',
                                                             color=[
                                                                 'C0', 'C1',
                                                                 'C2', 'C3',
                                                                 'C4', 'C5',
                                                                 'C6', 'C7',
                                                                 'C8', 'C9'
                                                             ])

                        st.pyplot(fig)
            elif opcion_tipo == 'Noticia':
                datos_seleccionados = datos.loc[datos[opcion_tipo] == True]
                st.dataframe(data=datos_seleccionados)
                opcion_uno, opcion_dos = st.beta_columns(2)
                with opcion_uno:
                    if st.checkbox(label='Nube de palabras', key=1):
                        nube = hero.wordcloud(
                            datos_seleccionados['Texto limpio'])
                        st.pyplot(nube)
                with opcion_dos:
                    if st.checkbox(label='Frecuencia de palabras', key=2):
                        fig, ax = plt.subplots()
                        ax = hero.top_words(
                            datos_seleccionados['Texto limpio'],
                            normalize=False).head(n=30).plot(kind='barh',
                                                             color=[
                                                                 'C0', 'C1',
                                                                 'C2', 'C3',
                                                                 'C4', 'C5',
                                                                 'C6', 'C7',
                                                                 'C8', 'C9'
                                                             ])

                        st.pyplot(fig)
            elif opcion_tipo == 'Frase/Pensamiento':
                datos_seleccionados = datos.loc[datos[opcion_tipo] == True]
                st.dataframe(data=datos_seleccionados)
                opcion_uno, opcion_dos = st.beta_columns(2)
                with opcion_uno:
                    if st.checkbox(label='Nube de palabras', key=1):
                        nube = hero.wordcloud(
                            datos_seleccionados['Texto limpio'])
                        st.pyplot(nube)
                with opcion_dos:
                    if st.checkbox(label='Frecuencia de palabras', key=2):
                        fig, ax = plt.subplots()
                        ax = hero.top_words(
                            datos_seleccionados['Texto limpio'],
                            normalize=False).head(n=30).plot(kind='barh',
                                                             color=[
                                                                 'C0', 'C1',
                                                                 'C2', 'C3',
                                                                 'C4', 'C5',
                                                                 'C6', 'C7',
                                                                 'C8', 'C9'
                                                             ])

                        st.pyplot(fig)
Example #2
0
def main():
    '''
    The main code for Streamlit
    '''
    #allows us to Pick what page we want to be on
    st.sidebar.title("type in the stock you want to predict")
    stock = st.sidebar.text_input('stock name', 'Type Here')

    app_mode = st.sidebar.selectbox(
        "Choose how many months to predict",
        ["one month", "two months", "three months"])

    if app_mode == "one month":
        time = 30
    elif app_mode == "two months":
        time = 60
    elif app_mode == "three months":
        time = 90

    if stock != 'Type Here':

        df = data_load(stock)

        df_lstm = pd.DataFrame()
        df_lstm['close'] = df.Close

        #df_lstm = df_lstm.reindex(pd.date_range(df_lstm.index[0], df_lstm.index[-1]+datetime.timedelta(days=time), freq='D')).interpolate()

        scaler = MinMaxScaler(feature_range=(0, 1))
        dataset = scaler.fit_transform(df_lstm)

        train, test = dataset[:-time], dataset[-time:]
        look_back = 1
        trainX, trainY = create_dataset(train, look_back)
        testX, testY = create_dataset(test, look_back)

        trainX = np.reshape(trainX, (trainX.shape[0], 1, trainX.shape[1]))
        testX = np.reshape(testX, (testX.shape[0], 1, testX.shape[1]))

        model = Sequential()
        model.add(LSTM(4, input_shape=(1, look_back)))
        model.add(Dense(1))
        model.compile(loss='mean_squared_error', optimizer='adam')
        model.fit(trainX, trainY, epochs=1, batch_size=1, verbose=0)

        trainPredict = model.predict(trainX)
        testPredict = model.predict(testX)

        trainPredict = scaler.inverse_transform(trainPredict)
        trainY = scaler.inverse_transform([trainY])
        testPredict = scaler.inverse_transform(testPredict)
        testY = scaler.inverse_transform([testY])

        trainPredictPlot = np.empty_like(dataset)
        trainPredictPlot[:, :] = np.nan
        trainPredictPlot[look_back:len(trainPredict) +
                         look_back, :] = trainPredict
        # shift test predictions for plotting
        testPredictPlot = np.empty_like(dataset)
        testPredictPlot[:, :] = np.nan
        testPredictPlot[len(trainPredict) + (look_back * 2) + 1:len(dataset) -
                        1, :] = testPredict
        # plot baseline and predictions
        fig, ax = plt.subplots()
        ax.plot(df_lstm.index,
                scaler.inverse_transform(dataset),
                label='original')
        ax.plot(df_lstm.index, trainPredictPlot, label='trian prediction')
        ax.plot(df_lstm.index, testPredictPlot, label='test prediction')
        ax.set_xlabel('Date')
        ax.set_ylabel('Close Price')
        ax.set_title(f'{app_mode} price prediction')
        ax.legend()

        st.pyplot(fig)

        lstm = pd.DataFrame(testPredictPlot[-time + 1:-1])
        lstm.index = df_lstm.index[-time + 1:-1]
        lstm.index.name = 'Date'
        lstm.columns = ['price']

        st.table(lstm)
Example #3
0
ax = fig.gca()

# plot circle first
plt.xlim(-1,1)
plt.ylim(-1,1)
plt.grid(linestyle='--')
ax.set_aspect(1)
ax.add_artist(circle)

#plot our points on it
plt.scatter(x_list, y_list,s=1, color='r')

ax.set_ylabel("y-value", fontsize = 10)
ax.set_xlabel("x-value", fontsize = 10)

st.pyplot(fig)

# finally lets display our estimation of Pi, the true value and the percent
# difference between the two (in this example r = 1, so dont need to divide by it)
estimated_pi = 4*inside_count/iterations
st.write("Your Estimation of Pi:", estimated_pi)
st.write("True Value of Pi:", np.pi)
#calculate the percent difference from the standard: |value - true_value|/|true_value|*100%
diff_percent = abs(estimated_pi-np.pi)/np.pi*100
st.write("The percent error between your estimation and the true value is:", round(diff_percent,3), "%")

# lets track how the estimations change as we change the number of iterations!
# actually going to add a new point to the graph for every new estimation of \pi
st.header("How the Total Number of Points Affects Your Estimate")
st.write("One really cool thing about Monte Carlo is as you increase \
the total number of points you use in your simulation, the more accurate your results. \
Example #4
0
# Information of mnist dataset
# (x_train, y_train), (x_test, y_test) = mnist.load_data()
if st.checkbox('Show images sizes'):
    st.write(f'##### X Train Shape: {x_train.shape}')
    st.write(f'##### X Test Shape: {x_test.shape}')
    st.write(f'##### Y Train Shape: {y_train.shape}')
    st.write(f'##### Y Test Shape: {y_test.shape}')

# display one random image from our training set:
# st.subheader('Inspecting dataset')
# if st.checkbox('Show random image from the train set'):
#     num = np.random.randint(0, x_train.shape[0])
#     image = x_train[num]
#     st.image(image, caption=class_names[y_train[num]], width=96)
st.write('***')
if st.checkbox('Show 10 different image from the train set'):
    num_10 = np.unique(y_train, return_index=True)[1]
    #     st.write(num_10)
    images = x_train[num_10]
    for i in range(len(images)):
        # define subplot
        plt.subplot(2, 5, 1 + i)  #, sharey=False)
        # plot raw pixel data
        plt.imshow(images[i])
        plt.title(class_names[i])
        plt.xticks([])
        plt.yticks([])
    plt.suptitle("10 different numbers", fontsize=18)
    st.pyplot()  # Warning
st.header('Display Player Stats of Selected Team(s)')
st.write('Data Dimension: ' + str(df_selected_team.shape[0]) + ' rows and ' +
         str(df_selected_team.shape[1]) + ' columns.')
st.dataframe(df_selected_team)


# Download NBA player stats data
def filedownload(df):
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(
        csv.encode()).decode()  # strings <-> bytes conversions
    href = f'<a href="data:file/csv;base64,{b64}" download="playerstats.csv">Download CSV File</a>'
    return href


st.markdown(filedownload(df_selected_team), unsafe_allow_html=True)

# Heatmap
if st.button('Intercorrelation Heatmap'):
    st.header('Intercorrelation Matrix Heatmap')
    df_selected_team.to_csv('output.csv', index=False)
    df = pd.read_csv('output.csv')

    corr = df.corr()
    mask = np.zeros_like(corr)
    mask[np.triu_indices_from(mask)] = True
    with sns.axes_style("white"):
        f, ax = plt.subplots(figsize=(7, 5))
        ax = sns.heatmap(corr, mask=mask, vmax=1, square=True)
    st.pyplot(f)
Example #6
0
 def speed_accuracy_test(self, path_x_train, path_y_train):
     h5f = h5py.File(path_x_train)
     self.x_train = h5f['dataset_1'][:]
     #st.write("Shape:",self.x_train.shape)
     h5f = h5py.File(path_y_train)
     self.y_train = h5f['dataset_1'][:]
     self.Predict_Neural_Network()
     self.prediction = np.squeeze(self.prediction)
     self.average_error = round(
         (np.mean(self.y_train - self.prediction)) * 1000000, 2)
     self.average_absolute_error = round(
         (np.mean(np.abs(self.y_train - self.prediction))) * 1000000, 2)
     self.maximum_error = round(
         np.max((np.abs(self.y_train - self.prediction)) * 1000000), 2)
     self.NN_Time_Taken = round(float(self.stop - self.start), 2)
     self.Average_Time_Taken = round(
         (self.NN_Time_Taken / self.x_train.shape[0]) * 1000000, 2)
     self.std_dev = round(
         (np.std(self.y_train - self.prediction)) * 1000000, 2)
     greater_than_one_percent = np.sum(
         (self.prediction - self.y_train) > 0.01)
     percentage_greater_than_one_percent = (np.sum(
         (self.prediction - self.y_train) > 0.01) /
                                            self.x_train.shape[0]) * 100.0
     greater_than_half_percent = np.sum(
         (self.prediction - self.y_train) > 0.005)
     percentage_greater_than_half_percent = (np.sum(
         (self.prediction - self.y_train) > 0.005) /
                                             self.x_train.shape[0]) * 100.0
     greater_than_quarter_percent = np.sum(
         (self.prediction - self.y_train) > 0.0025)
     percentage_greater_than_quarter_percent = (np.sum(
         (self.prediction - self.y_train) > 0.0025) / self.x_train.shape[0]
                                                ) * 100.0
     lesser_than_one_percent = np.sum(
         (self.prediction - self.y_train) < -0.01)
     percentage_lesser_than_one_percent = (np.sum(
         (self.prediction - self.y_train) < -0.01) /
                                           self.x_train.shape[0]) * 100.0
     lesser_than_half_percent = np.sum(
         (self.prediction - self.y_train) < -0.005)
     percentage_lesser_than_half_percent = (np.sum(
         (self.prediction - self.y_train) < -0.005) /
                                            self.x_train.shape[0]) * 100.0
     lesser_than_quarter_percent = np.sum(
         (self.prediction - self.y_train) < -0.0025)
     percentage_lesser_than_quarter_percent = (np.sum(
         (self.prediction - self.y_train) < -0.0025) / self.x_train.shape[0]
                                               ) * 100.0
     st.write(
         "For the testing dataset of ", self.x_train.shape[0],
         " Options, we look at the statistics of the error distribution. Therefore an error of $10,000 would correspond to a 1% error"
     )
     st.markdown(
         "The error values are quoted on a notional of a million dollars")
     st.write("Average Absolute error: $", self.average_absolute_error,
              " per million")
     st.write("Average error: $", self.average_error, " per million")
     st.write("Maximum error: $", self.maximum_error, " per million")
     st.write("Total Time Taken to price ", self.x_train.shape[0],
              " options is ", self.NN_Time_Taken, " seconds")
     st.write("Time Taken is ", self.Average_Time_Taken,
              "microseconds per option")
     st.write("Percentage of Options with error > +/-1% : " +
              "{:.5%}".format(
                  (greater_than_one_percent + lesser_than_one_percent) /
                  self.x_train.shape[0]))
     st.write("Percentage of Options with error > +/-0.5% : " +
              "{:.5%}".format(
                  (greater_than_half_percent + lesser_than_half_percent) /
                  self.x_train.shape[0]))
     st.write("Percentage of Options with error > +/-0.25% : " +
              "{:.5%}".format((greater_than_quarter_percent +
                               lesser_than_quarter_percent) /
                              self.x_train.shape[0]))
     st.write(
         "The plot below is a distribution of the error values along with vertical lines marking the 1%, 0.5% and 0.25% error values along with "
         "the number and percentage of total options which have error values that exceed these bounds. Please enlarge the chart by clicking the enlarge option on the top right corner of the chart"
     )
     a4_dims = (20, 15)
     fig, ax = plt.subplots(figsize=a4_dims)
     error = pd.DataFrame((self.prediction - self.y_train)) * 100.0
     error.columns = ['Error']
     error_plot = sns.kdeplot(ax=ax, data=error['Error'], shade=True)
     p = plt.axvline(-0.25, 0, 20, color="red")
     p = plt.axvline(0.25, 0, 20, color="red")
     p = plt.axvline(-0.5, 0, 20, color="green")
     p = plt.axvline(0.5, 0, 20, color="green")
     p = plt.axvline(-1, 0, 20, color="blue")
     p = plt.axvline(1, 0, 20, color="blue")
     p = plt.text(-1.0 + 0.02,
                  10.0,
                  str(lesser_than_one_percent),
                  fontsize=16)
     p = plt.text(-1.0 + 0.02,
                  9.0,
                  str(round(percentage_lesser_than_one_percent, 3)),
                  fontsize=16)
     p = plt.text(-0.5, 10.0, str(lesser_than_half_percent), fontsize=16)
     p = plt.text(-0.5,
                  9.0,
                  str(round(percentage_lesser_than_half_percent, 3)),
                  fontsize=16)
     p = plt.text(-0.25,
                  10.0,
                  str(lesser_than_quarter_percent),
                  fontsize=16)
     p = plt.text(-0.25,
                  9.0,
                  str(round(percentage_lesser_than_quarter_percent, 3)),
                  fontsize=16)
     p = plt.text(0.25 - 0.11,
                  10.0,
                  str(greater_than_quarter_percent),
                  fontsize=16)
     p = plt.text(0.25 - 0.12,
                  9.0,
                  str(round(percentage_greater_than_quarter_percent, 3)),
                  fontsize=16)
     p = plt.text(0.5 - 0.07,
                  10.0,
                  str(greater_than_half_percent),
                  fontsize=16)
     p = plt.text(0.5 - 0.09,
                  9.0,
                  str(round(percentage_greater_than_half_percent, 3)),
                  fontsize=16)
     p = plt.text(1.0 - 0.05,
                  10.0,
                  str(greater_than_one_percent),
                  fontsize=16)
     p = plt.text(1.0 - 0.05,
                  9.0,
                  str(round(percentage_greater_than_one_percent, 3)),
                  fontsize=16)
     st.pyplot()
Example #7
0
    with st.beta_expander("Excluded tags"):
        st.write(config.EXCLUDED_TAGS)

    # Number of tags per project
    st.write("---")
    st.subheader("Exploratory Data Analysis")
    num_tags_per_project = [len(tags) for tags in df.tags]
    num_tags, num_projects = zip(*Counter(num_tags_per_project).items())
    plt.figure(figsize=(10, 3))
    ax = sns.barplot(list(num_tags), list(num_projects))
    plt.title("Tags per project", fontsize=20)
    plt.xlabel("Number of tags", fontsize=16)
    ax.set_xticklabels(range(1, len(num_tags) + 1), rotation=0, fontsize=16)
    plt.ylabel("Number of projects", fontsize=16)
    plt.show()
    st.pyplot(plt)

    # Distribution of tags
    tags = list(itertools.chain.from_iterable(df.tags.values))
    tags, tag_counts = zip(*Counter(tags).most_common())
    plt.figure(figsize=(10, 3))
    ax = sns.barplot(list(tags), list(tag_counts))
    plt.title("Tag distribution", fontsize=20)
    plt.xlabel("Tag", fontsize=16)
    ax.set_xticklabels(tags, rotation=90, fontsize=14)
    plt.ylabel("Number of projects", fontsize=16)
    plt.show()
    st.pyplot(plt)

    # Plot word clouds top top tags
    plt.figure(figsize=(20, 8))
def main():
    """A Simple Summarization NLP App"""
    st.title("NLP App with Streamlit")
    menu = ["Home", "NLP(files)", "About"]
    choice = st.sidebar.selectbox("Menu", menu)
    if choice == "Home":
        st.subheader("Home: Analyse Text")
        raw_text = st.text_area("Enter Text Here")
        num_of_most_common = st.sidebar.number_input("Most Common Tokens", 5, 15)
        if st.button("Analyze"):

            with st.beta_expander("Original Text"):
                st.write(raw_text)

            with st.beta_expander("Text Analysis"):
                token_result_df = text_analyzer(raw_text)
                st.dataframe(token_result_df)

            with st.beta_expander("Entities"):
                # entity_result = get_entities(raw_text)
                # st.write(entity_result)

                entity_result = render_entities(raw_text)
                # st.write(entity_result)
                stc.html(entity_result, height=1000, scrolling=True)

            # Layouts
            col1, col2 = st.beta_columns(2)

            with col1:
                with st.beta_expander("Word Stats"):
                    st.info("Word Statistics")
                    docx = nt.TextFrame(raw_text)
                    st.write(docx.word_stats())

                with st.beta_expander("Top Keywords"):
                    st.info("Top Keywords/Tokens")
                    processed_text = nfx.remove_stopwords(raw_text)
                    keywords = get_most_common_tokens(
                        processed_text, num_of_most_common
                    )
                    st.write(keywords)

                with st.beta_expander("Sentiment"):
                    sent_result = get_sentiment(raw_text)
                    st.write(sent_result)

            with col2:
                with st.beta_expander("Plot Word Freq"):
                    fig = plt.figure()
                    # sns.countplot(token_result_df['Token'])

                    top_keywords = get_most_common_tokens(
                        processed_text, num_of_most_common
                    )
                    plt.bar(keywords.keys(), top_keywords.values())
                    plt.xticks(rotation=45)
                    st.pyplot(fig)

                with st.beta_expander("Plot Part of Speech"):
                    try:
                        fig = plt.figure()
                        sns.countplot(token_result_df["PoS"])
                        plt.xticks(rotation=45)
                        st.pyplot(fig)
                    except:
                        st.warning("Insufficient Data: Must be more than 2")

                with st.beta_expander("Plot Wordcloud"):
                    try:
                        plot_wordcloud(raw_text)
                    except:
                        st.warning("Insufficient Data: Must be more than 2")

            with st.beta_expander("Download Text Analysis Result"):
                make_downloadable(token_result_df)

    elif choice == "NLP(files)":
        st.subheader("NLP Task")

        text_file = st.file_uploader("Upload Files", type=["pdf", "docx", "txt"])
        num_of_most_common = st.sidebar.number_input("Most Common Tokens", 5, 15)

        if text_file is not None:
            if text_file.type == "application/pdf":
                raw_text = read_pdf(text_file)
                # st.write(raw_text)
            elif text_file.type == "text/plain":
                # st.write(text_file.read()) # read as bytes
                raw_text = str(text_file.read(), "utf-8")
                # st.write(raw_text)
            else:
                raw_text = docx2txt.process(text_file)
                # st.write(raw_text)

            with st.beta_expander("Original Text"):
                st.write(raw_text)

            with st.beta_expander("Text Analysis"):
                token_result_df = text_analyzer(raw_text)
                st.dataframe(token_result_df)

            with st.beta_expander("Entities"):
                # entity_result = get_entities(raw_text)
                # st.write(entity_result)

                entity_result = render_entities(raw_text)
                # st.write(entity_result)
                stc.html(entity_result, height=1000, scrolling=True)

            # Layouts
            col1, col2 = st.beta_columns(2)

            with col1:
                with st.beta_expander("Word Stats"):
                    st.info("Word Statistics")
                    docx = nt.TextFrame(raw_text)
                    st.write(docx.word_stats())

                with st.beta_expander("Top Keywords"):
                    st.info("Top Keywords/Tokens")
                    processed_text = nfx.remove_stopwords(raw_text)
                    keywords = get_most_common_tokens(
                        processed_text, num_of_most_common
                    )
                    st.write(keywords)

                with st.beta_expander("Sentiment"):
                    sent_result = get_sentiment(raw_text)
                    st.write(sent_result)

            with col2:
                with st.beta_expander("Plot Word Freq"):
                    fig = plt.figure()
                    # sns.countplot(token_result_df['Token'])

                    top_keywords = get_most_common_tokens(
                        processed_text, num_of_most_common
                    )
                    plt.bar(keywords.keys(), top_keywords.values())
                    plt.xticks(rotation=45)
                    st.pyplot(fig)

                with st.beta_expander("Plot Part of Speech"):
                    try:
                        fig = plt.figure()
                        sns.countplot(token_result_df["PoS"])
                        plt.xticks(rotation=45)
                        st.pyplot(fig)
                    except:
                        st.warning("Insufficient Data")

                with st.beta_expander("Plot Wordcloud"):
                    try:
                        plot_wordcloud(raw_text)
                    except:
                        st.warning("Insufficient Data")

            with st.beta_expander("Download Text Analysis Result"):
                make_downloadable(token_result_df)

    else:
        st.subheader("About")
Example #9
0
def main():
    frontendtemplate = FrontEndTemplate()
    init_invoice_df = getInitDataFrame()
    plot_one_df = get_city_size(init_invoice_df)
    # TODO show sidebar
    showSideBar()

    if btn_dic['btn_datashow']:
        st.markdown(frontendtemplate.title_temp.format('Data View'),
                    unsafe_allow_html=True)
        st.dataframe(init_invoice_df[[
            'invoice_number',
            'product_name',
            'quantity',
            'unit_price',
            'invoice_number',
            'invoice_date',
            'store_address',
        ]])

        # TODO
        st.markdown(frontendtemplate.title_temp.format('各類別數量'),
                    unsafe_allow_html=True)
        plot_cat_cal = init_invoice_df.groupby(
            'cat_id')['invoice_number'].count()
        plot_cat_cal.plot(kind='bar', rot=85, figsize=(10, 6))
        st.set_option('deprecation.showPyplotGlobalUse', False)
        st.pyplot()
        # st.bar_chart(plot_cat_cal, width=20,use_container_width=True)
        # st.dataframe(init_invoice_df.groupby('cat_id')['unit_price'].count())
        # TODO
        st.markdown(frontendtemplate.title_temp.format('各載具在各類別的數量'),
                    unsafe_allow_html=True)
        plot_catcnid_cal = init_invoice_df.groupby(
            ['carrier_number', 'cat_id'], as_index=False).size()
        st.dataframe(plot_catcnid_cal)
        # TODO
        st.markdown(frontendtemplate.title_temp.format('各類別在各縣市上的數量'),
                    unsafe_allow_html=True)

        # TODO 分欄
        left_column, right_column = st.beta_columns(2)
        with left_column:
            left_column.write("飲料沖泡")
            plot_one_df.loc[plot_one_df['cat_id'] == 1,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            left_column.pyplot()
            left_column.write("美食生鮮")
            plot_one_df.loc[plot_one_df['cat_id'] == 3,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            left_column.pyplot()
            left_column.write("居家生活")
            plot_one_df.loc[plot_one_df['cat_id'] == 5,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            left_column.pyplot()
            left_column.write("家電")
            plot_one_df.loc[plot_one_df['cat_id'] == 7,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            left_column.pyplot()
            left_column.write("零食")
            plot_one_df.loc[plot_one_df['cat_id'] == 9,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            left_column.pyplot()
            left_column.write("菸酒")
            plot_one_df.loc[plot_one_df['cat_id'] == 13,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            left_column.pyplot()
            left_column.write("生活休閒")
            plot_one_df.loc[plot_one_df['cat_id'] == 15,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            left_column.pyplot()

        with right_column:
            right_column.write("麵食料理")
            plot_one_df.loc[plot_one_df['cat_id'] == 2,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            right_column.pyplot()
            right_column.write("保健生機")
            plot_one_df.loc[plot_one_df['cat_id'] == 4,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            right_column.pyplot()
            right_column.write("美容保養")
            plot_one_df.loc[plot_one_df['cat_id'] == 6,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            right_column.pyplot()
            right_column.write("箱包服飾")
            plot_one_df.loc[plot_one_df['cat_id'] == 8,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            right_column.pyplot()
            right_column.write("其他")
            plot_one_df.loc[plot_one_df['cat_id'] == 10,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            right_column.pyplot()
            right_column.write("3C")
            plot_one_df.loc[plot_one_df['cat_id'] == 12,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            right_column.pyplot()
            right_column.write("寵物專區")
            plot_one_df.loc[plot_one_df['cat_id'] == 14,
                            ['city', 'size']].plot.bar(x='city',
                                                       y='size',
                                                       rot=85)
            right_column.pyplot()

    if btn_dic['btn_predict']:

        classifierName = ClassifierByName(df=init_invoice_df)
        result = classifierName.testByData([getproductword])
        st.markdown(frontendtemplate.title_temp.format('預測結果'),
                    unsafe_allow_html=True)
        st.markdown(
            """<h3 style="color:#0877E3;font-weight:bold;">準確率為:0.9633427718680646</h3>""",
            unsafe_allow_html=True)
        outputHtml = predictShow(getproductword, result[0])
        st.markdown(outputHtml, unsafe_allow_html=True)
        st.markdown(
            """<h3 style="color:#0877E3;font-weight:bold;">各類別數據量</h3>""",
            unsafe_allow_html=True)
        catval_df = dfValueCount(init_invoice_df)
        catval_df.value_counts().plot(kind='bar', rot=85, figsize=(10, 6))
        st.set_option('deprecation.showPyplotGlobalUse', False)
        st.pyplot()

    if btn_dic['btn_rec']:
        invoice_afterFilter_df, resdf, final_dict = getFilterDataFrame(
            init_invoice_df, selected_cat_list, getwordListStr)
        # TODO 主頁
        st.markdown(frontendtemplate.title_temp.format('此次查詢Infos'),
                    unsafe_allow_html=True)
        st.subheader("此次查詢共{}筆".format(invoice_afterFilter_df.shape[0]))
        st.dataframe(invoice_afterFilter_df[[
            'invoice_number',
            'product_name',
            'store_address',
            'quantity',
            'unit_price',
        ]])
        # # 分群資料
        # st.dataframe(resdf.head(25))

        group_users_cal_dict = resdf.set_index(['group_name'
                                                ])['users'].to_dict()
        group_users_count_dict = resdf.set_index(['group_name'
                                                  ])['users_count'].to_dict()
        # # 分群人數資料
        # st.write(group_users_cal_dict)
        # TODO 分欄
        left_column, right_column = st.beta_columns(2)
        # Or even better, call Streamlit functions inside a "with" block:
        with left_column:
            left_column.markdown(frontendtemplate.title_temp.format('曾經購買'),
                                 unsafe_allow_html=True)
            used_buy_items = final_dict['used_buy']
            # left_column.write(used_buy_items)
            for item in used_buy_items:
                card_temp_body = ""
                group_name = item['group_name']
                result = item['result']
                outputHtmlList = imageListShow(result)
                for item in outputHtmlList:
                    card_temp_body += item
                # print(type(group_users_cal_dict[group_name]))
                group_users_cal_dict[group_name] = group_users_cal_dict[
                    group_name].replace('\r', '\\r').replace('\n', '\\n')
                # users_list = json.loads(group_users_cal_dict[group_name], strict=False)
                # print(users_list)
                card_temp = frontendtemplate.card_temp_up + card_temp_body + frontendtemplate.card_temp_down
                left_column.markdown(card_temp.format(
                    '#F6800A', group_name, group_users_count_dict[group_name]),
                                     unsafe_allow_html=True)
        with right_column:
            right_column.markdown(frontendtemplate.title_temp.format('未來可能購買'),
                                  unsafe_allow_html=True)
            feature_buy_items = final_dict['feature_buy']
            # right_column.write(feature_buy_items)
            for item in feature_buy_items:
                card_temp_body = ""
                group_name = item['group_name']
                result = item['result']
                outputHtmlList = imageListShow(result)
                for item in outputHtmlList:
                    card_temp_body += item
                card_temp = frontendtemplate.card_temp_up + card_temp_body + frontendtemplate.card_temp_down
                right_column.markdown(card_temp.format(
                    '#03b6fc', group_name, group_users_count_dict[group_name]),
                                      unsafe_allow_html=True)
Example #10
0
def page_viewer():

    st.write(
        'Viewer for VTK file in [XML format](https://vtk.org/Wiki/VTK_XML_Formats) (Unstructured grid)'
    )

    base_dir = st.text_input('Input base dir', '.')

    file_buffer = st.file_uploader("or upload .vtu file",
                                   type=None,
                                   encoding=None)

    if file_buffer:
        content_byte = file_buffer.read()
        content_decoded = content_byte.decode('utf-8')
        file_path = './result.vtu'
        with open(file_path, mode='w') as f:
            f.write(content_decoded)
        files = []
        files.append(file_path)
    else:
        files = [
            f.name for f in os.scandir(base_dir) if re.search(r'\d+', f.name)
        ]

        headers = list(
            set([re.sub(r'\d+$', '',
                        f.rsplit('.')[0]) for f in files]))
        header = st.sidebar.selectbox('Select header', headers)

        files = [
            os.path.join(base_dir, f) for f in files
            if re.fullmatch(header, re.sub(r'\d+$', '',
                                           f.rsplit('.')[0])) is not None
        ]
        files.sort(key=lambda s: int(re.search(r'\d+', s).group()))

    # VTK Reader
    reader = VtkReader()

    # Read first data
    try:
        reader.read(files[0])
    except Exception:
        st.error('Invalid file format')
        return
    data_dict = reader.get_data_dict()
    c0_xmin = np.min(reader.Coords[0, :])
    c0_xmax = np.max(reader.Coords[0, :])
    c0_ymin = np.min(reader.Coords[1, :])
    c0_ymax = np.max(reader.Coords[1, :])
    # Read last data
    reader.read(files[-1])
    cn_xmin = np.min(reader.Coords[0, :])
    cn_xmax = np.max(reader.Coords[0, :])
    cn_ymin = np.min(reader.Coords[1, :])
    cn_ymax = np.max(reader.Coords[1, :])
    # Set display area
    domain = {}
    domain['xmin'] = min(c0_xmin, cn_xmin)
    domain['xmax'] = max(c0_xmax, cn_xmax)
    domain['ymin'] = min(c0_ymin, cn_ymin)
    domain['ymax'] = max(c0_ymax, cn_ymax)
    len_x = domain['xmax'] - domain['xmin']
    len_y = domain['ymax'] - domain['ymin']
    domain['xmin'] -= 0.1 * len_x
    domain['xmax'] += 0.1 * len_x
    domain['ymin'] -= 0.1 * len_y
    domain['ymax'] += 0.1 * len_y
    domain['aspect'] = (domain['ymax'] - domain['ymin']) / (domain['xmax'] -
                                                            domain['xmin'])

    # Get data list
    data_dict = reader.get_data_dict()

    # Get value list
    values = list(data_dict.keys())
    value = st.sidebar.selectbox('Select value', values)

    if value is not None:
        # Set system
        systems = [i + 1 for i in range(data_dict[value])]
        system = st.sidebar.selectbox('Select system', systems)

    display_mode = st.sidebar.selectbox('Select mode', ['Frame', 'Animation'])

    if display_mode == 'Frame':

        step = st.sidebar.number_input(
            f'Step No. 0 to {len(files) - 1}',
            min_value=0,
            max_value=len(files) - 1,
            value=0,
            step=1,
        )

        st.info('Value: {} System: {} Step: {}/{}'.format(
            value, system, step,
            len(files) - 1))

        reader.read(files[step])
        val = reader.get_value(value, system=system)

        drawer = DrawMesh()
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        drawer.set_domain(ax, domain=domain)
        drawer.draw(coords=reader.Coords,
                    connectivity=reader.Lnodes,
                    value=val)

        st.pyplot(plt)

    elif display_mode == 'Animation':

        drawer = DrawMesh()
        fig = plt.figure()
        ax = fig.add_subplot(1, 1, 1)
        drawer.set_domain(ax, domain=domain)

        latest_iteration = st.empty()
        bar = st.progress(0)

        stplot = st.pyplot(plt)

        def animate(istep):
            reader.read(files[istep])
            val = reader.get_value(value, system=system)
            drawer.draw(coords=reader.Coords,
                        connectivity=reader.Lnodes,
                        value=val)
            stplot.pyplot(plt)
            latest_iteration.text(f'Step: {istep}')
            bar.progress((istep + 1) / len(files))

        for istep in range(len(files)):
            animate(istep)
            time.sleep(0.005)
def plot_wordcloud(my_text):
    my_wordcloud = WordCloud().generate(my_text)
    fig = plt.figure()
    plt.imshow(my_wordcloud, interpolation="bilinear")
    # plt.axis('off')
    st.pyplot(fig)
Example #12
0
# Main Panel
# Print specified input parameters
st.header('Specified Input parameters')
st.write(df)
st.write('---')


# Load Fitted-Model
model = pickle.load(open('doc/boston_clf.pkl', 'rb'))
# Prediction
prediction = model.predict(df)

st.header('Prediction of MEDV')
st.write(prediction)
st.write('---')

# Explaining the model's predictions using SHAP values
# https://github.com/slundberg/shap
explainer = shap.TreeExplainer(model)
shap_values = explainer.shap_values(X)

st.header('Feature Importance')
st.set_option('deprecation.showPyplotGlobalUse', False)
plt.title('Feature importance based on SHAP values')
shap.summary_plot(shap_values, X)
st.pyplot(bbox_inches='tight')
st.write('---')

plt.title('Feature importance based on SHAP values (Bar)')
shap.summary_plot(shap_values, X, plot_type="bar")
st.pyplot(bbox_inches='tight')
Example #13
0
def run_cluster_app():

    st.header('■Cluster Analysis')
    st.write('- To group students with similar learning characteristics.')
    
    st.sidebar.subheader('Data upload')
    df_edu = pd.read_csv("data/eng_sample_data_cluster.csv")
    def download_link(object_to_download, download_filename, download_link_text):
        if isinstance(object_to_download,pd.DataFrame):
            object_to_download = object_to_download.to_csv(index=False, encoding = 'utf_8_sig')
            b64 = base64.b64encode(object_to_download.encode()).decode()
            return f'<a href="data:file/txt;base64,{b64}" download="{download_filename}">{download_link_text}</a>'

    tmp_download_link = download_link(df_edu, 'sample_cluster.csv', 'Download sample csv file.')
    st.sidebar.markdown(tmp_download_link, unsafe_allow_html=True)
    
#     st.sidebar.info("""
#     [Download the sample csv file](https://github.com/59er/eng_learning_analytics_web/blob/master/sample_data/eng_sample_data_cluster_for_WEB.csv)
#         """)

    try:

        uploaded_file = st.sidebar.file_uploader("File upload (Drag and drop or use [Browse files] button to import csv file. Only utf-8 format is available.)", type=["csv"])
        # uploaded_file = st.file_uploader(
        #     label = 'File Upload(drag and drop csv/Excel)',
        #     type = ['csv', 'xlsx']
        # )
        if uploaded_file is not None:
            df_edu = pd.read_csv(uploaded_file)
            uploaded_file.seek(0)
            display_data = st.sidebar.checkbox(label = 'Show uploaded data')
            
            if display_data:
                st.dataframe(df_edu)

            # df_edu.columns = ['','Literature','History','Math','Physics']
            # st.write(df_edu.columns)
            dat_i = df_edu.set_index('ID')
            dat_i.describe().round(2)
            dat_i.mean(axis=1)
            pred = KMeans(n_clusters = 3).fit_predict(dat_i)
            dat_i1 = dat_i.copy()
            dat_i1['cluster_id'] = pred
            dat_i1['cluster_id'].value_counts()

            # z = linkage(dat_i, metric = 'euclidean', method = 'ward')
            # st.set_option('deprecation.showPyplotGlobalUse', False)
            # fig = plt.figure(figsize = (12,6), facecolor = 'w')
            # ax = fig.add_subplot(title= '樹形図: 全体')
            # dendrogram(z)
            # st.pyplot(fig)
            st.set_option('deprecation.showPyplotGlobalUse', False)
            st.write(sns.clustermap(dat_i,col_cluster = False, cmap='Blues', linewidth=.5))
            st.pyplot()    


        else:
            df_edu = pd.read_csv('data/eng_sample_data_cluster.csv')
            show_df = st.sidebar.checkbox('Show DataFreme')
            if show_df == True:
                st.write(df_edu)

            df_edu.columns = ['','Literature','Reading','History','Math','Physics']
            dat_i = df_edu.set_index('')
            dat_i.describe().round(2)
            dat_i.mean(axis=1)
            pred = KMeans(n_clusters = 3).fit_predict(dat_i)
            dat_i1 = dat_i.copy()
            dat_i1['cluster_id'] = pred
            dat_i1['cluster_id'].value_counts()

            # z = linkage(dat_i, metric = 'euclidean', method = 'ward')
            # st.set_option('deprecation.showPyplotGlobalUse', False)
            # fig = plt.figure(figsize = (12,6), facecolor = 'w')
            # ax = fig.add_subplot(title= '樹形図: 全体')
            # dendrogram(z)
            # st.pyplot(fig)
            st.set_option('deprecation.showPyplotGlobalUse', False)
            st.write(sns.clustermap(dat_i,col_cluster = False, cmap='Blues', linewidth=.5))
            st.pyplot()

            st.write('This example suggests that there are two groups: those who are good at humanities subjects and those who are good at science subjects.')
        
    except Exception as e:
        st.header('ERROR: Data inconsistency. Check data format to be uploaded.')
        print('Data inconsistency error')
Example #14
0
def files(data):
    if data is not None:
        df = pd.read_csv(data)
        st.dataframe(df.head())
        if st.checkbox("Show Shape"):
            st.write(df.shape)
        if st.checkbox("Show Columns"):
            st.write(df.columns)
        if st.checkbox("Show Full dataframe"):
            pd.set_option("display.max_rows", None, "display.max_columns",
                          None)
            st.write(df)
        if st.checkbox("Show Statistics"):
            st.write(df.describe())
        if st.checkbox("Show Selected Columns"):
            mult_col = st.multiselect("Select Columns", df.columns)
            cols = df[mult_col]
            st.write(cols)
        if st.checkbox("Show Count of Unique values"):
            st.write(df.nunique().to_frame('counts'))
        if st.checkbox("Show Value Counts"):
            option = st.selectbox("Select Columns", df.columns)
            for i in df.columns:
                if i == option:
                    st.write(df[i].value_counts().rename_axis(
                        'unique values').to_frame('counts'))
        if st.checkbox("Check Null values"):
            st.write(df.isnull().sum())
            var = ["Numerical", "Categorical"]
            st.info("Handle Missing values")
            select = st.selectbox("Select Varaibles", var)
            if var[0] == select:
                opt = st.radio("Fill with ", ("Mean", "Median"))
                if opt == "Mean":
                    st.write(df.fillna(df.mean))
                else:
                    st.write(df.fillna(df.median))
            else:
                opt = st.radio("Fill with ",
                               ("Most common class", "Unknown variable"))
                if opt == "Most common class":
                    st.write(
                        df.apply(
                            lambda x: x.fillna(x.value_counts().index[0])))
                else:
                    text = st.text_input("Enter a variable or text to fill")
                    st.write(df.fillna(text))
        if st.checkbox("Correlation Plot"):
            st.write(sns.heatmap(df.corr(), annot=True))
            st.pyplot()

        st.subheader("Plots")

        def plots(df):
            type_of_plot = st.selectbox("Select Type of Plot",
                                        ["Area", "Bar", "Line", "Hist", "Box"])
            selected_columns_names = st.multiselect("Select Columns To Plot",
                                                    df.columns)
            if st.button("Plot"):
                st.success("Plotted {} plot for {}".format(
                    type_of_plot, selected_columns_names))
                if type_of_plot == 'Area':
                    cust_data = df[selected_columns_names]
                    st.area_chart(cust_data)

                elif type_of_plot == 'Bar':
                    cust_data = df[selected_columns_names]
                    st.bar_chart(cust_data)

                elif type_of_plot == 'Line':
                    cust_data = df[selected_columns_names]
                    st.line_chart(cust_data)

                elif type_of_plot:
                    cust_plot = df[selected_columns_names].plot(
                        kind=type_of_plot)
                    st.write(cust_plot)
                    st.pyplot()

            st.markdown("#### Relationship")
            plot_type = st.selectbox("Select Type of Plot",
                                     ["Count", "Box", "Violin", "Swarm"])
            column1 = st.selectbox("Select the x axis", df.columns)
            column2 = st.selectbox("Select the y axis", df.columns)
            if st.button("Generate Plot"):
                st.success("Plotted {} plot".format(type_of_plot))
                if plot_type == "Count":
                    st.warning(
                        "Note: You dont need to select y axis for this plot")
                    st.write(sns.countplot(data=df, x=column1))
                    st.pyplot()
                elif plot_type == "Box":
                    st.write(sns.boxplot(data=df, x=column1, y=column2))
                    st.pyplot()
                elif plot_type == "Violin":
                    st.write(sns.violinplot(data=df, x=column1, y=column2))
                    st.pyplot()
                else:
                    st.write(sns.swarmplot(data=df, x=column1, y=column2))
                    st.pyplot()

        plots(df)
Example #15
0
def main():
    """ BPX Energy """
    st.title("BPX Energy LE Forecast Tool")
    st.subheader("SoHa Change Dev/Test")

    html_temp = """
	<div style="background-color:green;"><p style="color:white;font-size:50px;padding:10px">LE Forecast Application</p></div>
	"""
    st.markdown(html_temp, unsafe_allow_html=True)

    def file_selector(folder_path='./datasets'):
        filenames = os.listdir(folder_path)
        selected_filename = st.selectbox("Select A file", filenames)
        return os.path.join(folder_path, selected_filename)

    filename = file_selector()
    st.info("You Selected {}".format(filename))

    # Read Data
    df = pd.read_csv(filename)

    # Show Dataset

    if st.checkbox("Show Dataset"):
        number = st.number_input("Number of Rows to View")
        st.dataframe(df.head(n=1000))

    # Show Columns
    if st.button("Column Names"):
        st.write(df.columns)

    # Show Shape
    if st.checkbox("Shape of Dataset"):
        data_dim = st.radio("Show Dimension By ", ("Rows", "Columns"))
        if data_dim == 'Rows':
            st.text("Number of Rows")
            st.write(df.shape[0])
        elif data_dim == 'Columns':
            st.text("Number of Columns")
            st.write(df.shape[1])
        else:
            st.write(df.shape)

    # Select Columns
    if st.checkbox("Select Columns To Show"):
        all_columns = df.columns.tolist()
        selected_columns = st.multiselect("Select", all_columns)
        new_df = df[selected_columns]
        st.dataframe(new_df)

    # Show Values
    if st.button("Value Counts"):
        st.text("Value Counts By Target/Class")
        st.write(df.iloc[:, -1].value_counts())

    # Show Datatypes
    if st.button("Data Types"):
        st.write(df.dtypes)

    # Show Summary
    if st.checkbox("Summary"):
        st.write(df.describe().T)

    ## Plot and Visualization

    st.subheader("Data Visualization")
    # GFOz
    if st.checkbox("GFOz[Bokeh]"):
        st.text("GFOz vs actuals [LE]")
        if st.checkbox("Chart LE"):
            st.text("Value of LE Curve")
            all_columns_names = df.columns.tolist()
            primary_col = st.selectbox("Primary Columm to GroupBy",
                                       all_columns_names)
            selected_columns_names = st.multiselect("Select Columns",
                                                    all_columns_names)
        if st.button("Chart"):
            st.text("Generate Chart")
            if selected_columns_names:
                bokeh_chart = df.groupby(
                    primary_col)[selected_columns_names].count()
            else:
                bokeh_chart = df.iloc[:, -1].value_counts()
            st.write(bokeh_chart.plot(kind="line"))
            st.bokeh_chart()

    # Pie Chart
    if st.checkbox("Pie Plot"):
        all_columns_names = df.columns.tolist()
        if st.button("Generate Pie Plot"):
            st.success("Generating A Pie Plot")
            st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
            st.pyplot()

    # Count Plot
    if st.checkbox("Plot of Value Counts"):
        st.text("Value Counts By Target")
        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox("Primary Columm to GroupBy",
                                   all_columns_names)
        selected_columns_names = st.multiselect("Select Columns",
                                                all_columns_names)
        if st.button("Plot"):
            st.text("Generate Plot")
            if selected_columns_names:
                vc_plot = df.groupby(
                    primary_col)[selected_columns_names].count()
            else:
                vc_plot = df.iloc[:, -1].value_counts()
            st.write(vc_plot.plot(kind="bar"))
            st.pyplot()

    # Customizable Plot

    st.subheader("Customizable Plot")
    all_columns_names = df.columns.tolist()
    type_of_plot = st.selectbox("Select Type of Plot",
                                ["area", "bar", "line", "hist", "box", "kde"])
    selected_columns_names = st.multiselect("Select Columns To Plot",
                                            all_columns_names)

    if st.button("Generate Plot"):
        st.success("Generating Customizable Plot of {} for {}".format(
            type_of_plot, selected_columns_names))

        # Plot By Streamlit
        if type_of_plot == 'area':
            cust_data = df[selected_columns_names]
            st.area_chart(cust_data)

        elif type_of_plot == 'bar':
            cust_data = df[selected_columns_names]
            st.bar_chart(cust_data)

        elif type_of_plot == 'line':
            cust_data = df[selected_columns_names]
            st.line_chart(cust_data)

        # Custom Plot
        elif type_of_plot:
            cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()

    if st.button("SoHa Change"):
        st.balloons()

    st.sidebar.header("LE Utility Tool")
    st.sidebar.info(
        "The LE & Forecast Utility is a tool meant to improve the speed & consistency of production reporting for operations PEs."
    )

    st.sidebar.header("Get Datasets")
    st.sidebar.markdown("[Common ML Dataset Repo](" ")")

    st.sidebar.header("Developed By")
    st.sidebar.info("*****@*****.**")
    st.sidebar.info("*****@*****.**")
    st.sidebar.text("SoHa Change Team lead: Kellen McLoughlin")
    st.sidebar.text("Web App maintained by MarkusJBPX")
Example #16
0
def main():
    xw = st.sidebar.slider('bottoms concentration', .04, .99, .058)
    xf = st.sidebar.slider('Feed concentration', .04, .99, .45)
    yd = st.sidebar.slider('Top concentration', .04, .99, .9)
    rv= st.sidebar.slider('Relative volatality', 1.5, 10.0, 3.0)
    q = st.sidebar.slider('vapour liquid ratio', .04, .99, .33)
    mul = st.sidebar.slider('Multiplies R mini', 1.0, 10.0, 2.0)
    x1 = np.linspace(0, 1, 11)
    y1 = np.linspace(0, 1, 11)
    xe = np.linspace(0, 1, 100)
    ye = rv * xe / (1 + (rv - 1) * xe)
    plt.plot(xe, ye)
    plt.plot(x1, y1)
    m1 = (-q / (1 - q))
    n1 = (xf / (1 - q))
    a = (m1 * rv - m1)
    b = (m1 + n1 * rv - n1 - rv)
    c = n1
    xd = yd
    yw = xw
    yf = xf
    x3 = (-b - math.sqrt(b ** 2 - 4 * a * c)) / (2 * a)
    x4 = (-b - math.sqrt(b ** 2 - 4 * a * c)) / (2 * a)
    if x3 > 0:
        p = x3
        l = (m1 * p) + n1
    elif x3 < 0:
        y = 0
    elif x4 > 0:
        p = x4
        l = (m1 * p) + n1
    elif x4 < 0:
        u = 0
    rm = (xd - l) / (l - p)
    R = rm * mul
    n2 = xd / (R + 1)
    m2 = (yd - n2) / yd
    mx = (n1 - n2) / (m2 - m1)
    my = (m1 * mx) + n1
    c = (my - yw) / (mx - xw)
    xs = np.linspace(xw, mx, 11)
    ys = c * (xs - xw) + yw
    xo = np.linspace(mx, yd, 11)
    yo = m2 * xo + n2
    xfe = np.linspace(mx, xf, 11)
    yfe = (m1 * xfe) + n1
    plt.plot(xfe, yfe)
    plt.plot(xo, yo)
    plt.plot(xs, ys)
    plt.xlabel("x")
    plt.ylabel("y")
    matplotlib.pyplot.annotate("yd", (xd, yd))
    matplotlib.pyplot.annotate("xw", (xw, yw))
    matplotlib.pyplot.annotate("xf", (xf, yf))
    h = np.zeros(20)
    t = np.zeros(20)
    c = yd
    v = (yd - my) / (yd - mx)
    z = (my - xw) / (mx - xw)
    for i in range(1, 20):
        h[0] = yd
        t[0] = yd
        h[i] = c / (rv * (1 - c) + c)
        if h[i] < xw:
            gh = i
            st.write("Number of stages in distillation cloumn=",gh-1)
            break;
        else:
            if h[i] > mx:
                t[i] = v * (h[i] - mx) + my
                c = t[i]
            else:
                t[i] = z * (h[i] - xw) + xw
                c = t[i]
    for i in range(gh - 1):
        gy = np.linspace(h[i + 1], h[i], gh + 1)
        hu = np.ones(gh + 1) * t[i]
        plt.plot(gy, hu, 'r')
        ay = np.linspace(t[i], t[i + 1], gh + 1)
        au = np.ones(gh + 1) * h[i + 1]
        plt.plot(au, ay, 'r')
    sns.set_style("darkgrid")
    st.pyplot()
Example #17
0
def app(im):
    """Page App"""
    width, choice = app_sidebar()
    if im is not None:
        if choice == "Bilevel":
            img = im.convert("1")

        elif choice == "Greyscale":
            img = im.convert("L")

        elif choice == "Contrast":
            factor = st.sidebar.slider(choice, 0.0, 3.0, 1.0)
            enhancer = ImageEnhance.Contrast(im)
            img = enhancer.enhance(factor)

        elif choice == "Brightness":
            factor = st.sidebar.slider(choice, 0.0, 3.0, 1.0)
            enhancer = ImageEnhance.Brightness(im)
            img = enhancer.enhance(factor)

        elif choice == "Colorpick":
            arr = np.array(im.convert("RGB"))
            hsv = cv2.cvtColor(arr, cv2.COLOR_BGR2HSV)

            a, b = st.sidebar.slider(choice, 0, 255, (0, 255))
            img_mask = cv2.inRange(hsv, np.ones(3) * a, np.ones(3) * b)
            img = cv2.bitwise_and(arr, arr, mask=img_mask)

        elif choice == "Canny":
            arr = np.array(im.convert("RGB"))
            gray = cv2.cvtColor(arr, cv2.COLOR_BGR2GRAY)
            a, b = st.sidebar.slider(choice, 0, 500, (200, 400))
            img = cv2.Canny(gray, a, b)
            img = signature(img)

        elif choice == "Hist":
            from matplotlib import pyplot as plt

            arr = np.array(im.convert("RGB"))

            for i, col in enumerate(("b", "g", "r")):
                histr = cv2.calcHist([arr], [i], None, [256], [0, 256])
                plt.plot(histr, color=col)
                plt.xlim([0, 256])

            st.pyplot()

        elif choice == "Kuwahara":
            # https://en.wikipedia.org/wiki/Kuwahara_filter
            # https://qiita.com/Cartelet/items/5c1c012c132be3aa9608
            r = st.sidebar.slider(choice, 5, 50, 5, 5)
            arr = np.array(im.convert("RGB"))
            h, w, _ = arr.shape
            img = np.empty_like(arr)
            arr = np.pad(arr, ((r, r), (r, r), (0, 0)), "edge")
            ave, var = cv2.integral2(arr)
            ave_mask = (ave[:-r - 1, :-r - 1] + ave[r + 1:, r + 1:] -
                        ave[r + 1:, :-r - 1] - ave[:-r - 1, r + 1:])
            ave = ave_mask / (r + 1)**2

            var_mask = (var[:-r - 1, :-r - 1] + var[r + 1:, r + 1:] -
                        var[r + 1:, :-r - 1] - var[:-r - 1, r + 1:])
            var = (var_mask / (r + 1)**2 - ave**2).sum(axis=2)

            for i in range(h):
                for j in range(w):
                    a1, b1, c1, d1, = (
                        ave[i, j],
                        ave[i + r, j],
                        ave[i, j + r],
                        ave[i + r, j + r],
                    )
                    a2, b2, c2, d2, = (
                        var[i, j],
                        var[i + r, j],
                        var[i, j + r],
                        var[i + r, j + r],
                    )
                    img[i, j] = np.array([a1, b1, c1,
                                          d1])[np.array([a2, b2, c2,
                                                         d2]).argmin()]
            img = signature(img)

        try:
            st.image(img, width=width)
        except UnboundLocalError:
            st.image(im, width=width)
Example #18
0
from db_handler import get_market_tracker

game = st.Game()
user = st.get_user("JimHawkins")

sl.title("Hi, " + user.username)

# Display the different planets in a table
sl.title("OE System")
OE = pd.DataFrame(game.systems[0]['locations'])
OE

fig, ax = plt.subplots(figsize=(12,12))
ax.scatter(OE.x, OE.y)
OE[['x','y','name']].apply(lambda x: ax.text(*x),axis=1)
sl.pyplot(fig)

# Display the entire Market Tracker Database
sl.title("Market Tracker")
market = get_market_tracker()
market

# Plot how the sell price of a good on each planet has changed over time
"""
# Sell Price of Goods Over Time
*Use the dropdown to select the good to graph*
"""
good = sl.selectbox('Select', market.symbol.unique())
good_mask = market['symbol'] == good

fig = plt.figure()
# Age vs Pregnancies
st.header('Pregnancy count Graph (Others vs Yours)')
fig_preg = plt.figure()
ax1 = sns.scatterplot(x='Age',
                      y='Pregnancies',
                      data=df,
                      hue='Outcome',
                      palette='Greens')
ax2 = sns.scatterplot(x=user_data['age'],
                      y=user_data['pregnancies'],
                      s=150,
                      color=color)
plt.xticks(np.arange(10, 100, 5))
plt.yticks(np.arange(0, 20, 2))
plt.title('0 - Healthy & 1 - Unhealthy')
st.pyplot(fig_preg)

# Age vs Glucose
st.header('Glucose Value Graph (Others vs Yours)')
fig_glucose = plt.figure()
ax3 = sns.scatterplot(x='Age',
                      y='Glucose',
                      data=df,
                      hue='Outcome',
                      palette='magma')
ax4 = sns.scatterplot(x=user_data['age'],
                      y=user_data['glucose'],
                      s=150,
                      color=color)
plt.xticks(np.arange(10, 100, 5))
plt.yticks(np.arange(0, 220, 10))
Example #20
0
    lb_dict = {}
    lb_name = []
    lb_num = []
    for i in lb:
        lb_dict.update({i: lb_list.count(i)})
    lb2 = sorted(lb_dict.items(), key=lambda lb_dict: lb_dict[1], reverse=True)
    for i in range(0, len(lb2)):
        lb_num.append(lb2[i][1])
        lb_name.append(lb2[i][0])
    fig = plt.figure(figsize=(15, 8))
    plt.bar(range(0, len(lb_num)), lb_num, color='pink')
    plt.plot(range(0, len(lb_num)), lb_num, color='lightblue')
    plt.title("当日反馈类型统计", fontsize=24)
    plt.xticks(range(0, len(lb_num)), lb_name, rotation=60, fontsize=12)
    plt.rcParams['font.sans-serif'] = ['SimHei']
    st.pyplot(fig)

    fig2 = plt.figure(figsize=(10, 6))
    lb = list(set(df2['记录人'].values))
    lb_list = list(df2['记录人'].values)
    lb_dict = {}
    lb_name = []
    lb_num = []
    for i in lb:
        lb_dict.update({i: lb_list.count(i)})
    lb2 = sorted(lb_dict.items(), key=lambda lb_dict: lb_dict[1], reverse=True)
    for i in range(0, len(lb2)):
        lb_num.append(lb2[i][1])
        lb_name.append(lb2[i][0])
    colors = [
        '#F08080', '#FFB6C1', '#B0E0E6', '#ADD8E6', 'lightblue', 'pink', 'cyan'
Example #21
0
def main():
    st.title('Iris EDA APP')
    st.subheader('EDA Web App with Streamlit')
    st.markdown("""
    	#### Description
    	+ This is a simple Exploratory Data Analysis  of the Iris Dataset depicting the various species built with Streamlit.
    	#### Purpose
    	+ To show a simple EDA of Iris using Streamlit framework. 
    	""")
    #code beloow
    my_dataset = "iris.csv"
    @st.cache(persist=True)
    def explore_data(dataset):
        df = pd.read_csv(os.path.join(dataset))
        return df

    # load the dataset
    data = explore_data(my_dataset)

    if st.checkbox("Preview DataFrame"):
        if st.button("Head"):
            st.write(data.head())
        if st.button("Tail"):
            st.write(data.tail())
        else:
            st.write(data.head(2))

    # Show Entire DataFrame
    if st.checkbox("Show All DataFrame"):
        st.dataframe(data)

    #Show All Columns Name
    if st.checkbox("Show All Column Names"):
        st.text("Columns:")
        st.write(data.columns)

    data_dim = st.radio('What Dimension Do you want to show',('Rows','Columns'))
    if data_dim == 'Rows':
        st.text("Showing Length of Rows")
        st.write(len(data))

    if data_dim == 'Column':
        st.text("Showing Length of Columns")
        st.write(data.shape[1])

    # SHow summary of Dataset
    if st.checkbox("Show Summary of Dataset")
        st.write(data.describe())

    species_option = st.selectbox('Select Columns',('sepal_length','sepal_width','petal_length','petal_width','species'))
    if species_option == 'sepal_length':
        st.write(data['sepal_length'])
    elif species_option =='sepal_width':
        st.write(data['sepal_width'])
    elif species_option =='petal_length':
        st.write(data['petal_length'])
    elif species_option =='petal_width':
        st.write(data['petal_width'])
    elif species_option =='species':
        st.write(data['species'])
    else:
        st.write("Select A Column")

    #show plots
    if st.checkbox("Simple bar plot with Matplotlib"):
        data.plot(kind='bar')
        st.pyplot()

    if st.checkbox("Simple Correlation with Matplotlib"):
        plt.matshow(data.corr())
        st.pyplot()

    if st.checkbox("Simple Correlation with Seaborn"):
        st.write(sns.heatmap(data.corr(),annot=True))
        st.pyplot()

    #Show plots
    if st.checkbox("Bar plot of Groups or Counts"):
        v_counts = data.groupby('species')
        st.bar_chart(v_counts)


    #Iris Image Manipulation
    @st.cache
    def load_img(img):
        im = Image.open(os.path.join(img))
        return im

    species_type = st.radio('What is the Iris Species do you want to see?',('Satosa','Versicolor','Virginica'))
    if species_type =='Setosa':
        st.text("Showing Iris Setosa")
        st.image(load_img('imgs/iris_setosa.jpg'))

    elif species_type =='Versicolor':
        st.text("Showing Iris Versicolor")
        st.image(load_img('imgs/iris_versicolor.jpg'))

    elif species_type == 'Virginica':
        st.text("Showing Iris Virginica")
        st.image(load_img('imgs/iris_virginica.jpg'))

        # Show Image or Hide Image with Checkbox
    if st.checkbox("Show Image/Hide Image"):
        my_image = load_image('iris_setosa.jpg')
        enh = ImageEnhance.Contrast(my_image)
        num = st.slider("Set Your Contrast Number", 1.0, 3.0)
        img_width = st.slider("Set Image Width", 300, 500)
        st.image(enh.enhance(num), width=img_width)

        # About
    if st.button("About App"):
        st.subheader("Iris Dataset EDA App")
        st.text("Built with Streamlit")
        st.text("Thanks to the Streamlit Team Amazing Work")

    if st.checkbox("By"):
        st.text("Ehtisham Raza")
        st.text("*****@*****.**")
Example #22
0
     var3 = group_by_1[mask]['Girl Power'].unique()[0]
     var4 = group_by_1[mask]['Gotheim'].unique()[0]
     var5 = group_by_1[mask]['Boy Power'].unique()[0]

     # Pie chart
     labels = ['Les Gaulois','Girl Power','Gotheim','Boy Power' , 'Just For Fun' ]

     sizes = [var1,var2,var3,var4,var5]
     colors = ['red','lightcoral','darkorange','cornflowerblue','royalblue']

     fig1, ax1 = plt.subplots()
     
     title1 = "<h2 style='text-align: center; color: grey;'>Répartition des votes par groupe politique</h2></br><h3 style='text-align: center; color: grey;'>Département : "+dept+" </br> Année "+str(annee)+" - "+tour_label+"</h3>"
     st.markdown(title1, unsafe_allow_html=True)
     ax1 = plt.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors, startangle=90)
     st.pyplot(clear_figure=False)
     # Equal aspect ratio ensures that pie is drawn as a circle

     title2 = "<h2 style='text-align: center; color: grey;'>Abstention</h2></br><h3 style='text-align: center; color: grey;'>Département : "+dept+" </br> Année "+str(annee)+" - "+tour_label+"</h3>"
     st.markdown(title2, unsafe_allow_html=True)
     var1 = toto[mask]['Inscrits'].unique()[0] - toto[mask]['Votants'].unique()[0]
     var2 = toto[mask]['Inscrits'].unique()[0]
     # Pie chart
     labels = ['Abstentionnistes','Votants']
     sizes = [var1,var2]
     colors = ['mediumpurple','cadetblue']
     fig, ax2 = plt.subplots()
     ax2 = plt.pie(sizes, labels=labels, autopct='%1.1f%%', colors=colors, startangle=90)
     st.pyplot(clear_figure=False)
     # Equal aspect ratio ensures that pie is drawn as a circle
Example #23
0
###############################################----------------------------------
elif page == options[1]:  #if the second page is selected
    st.header('Firearm Alarm in Action')
    st.text('For how long would you like Firearm Alarm to listen?')
    t = st.slider('Select the time (hours).',
                  min_value=1,
                  max_value=24,
                  step=1)

    x = np.arange(0, 4, 1 / int(args.samplerate))
    fig, ax = plt.subplots()
    ax.set_ylim(-1, 1)
    line, = ax.plot(x, np.zeros(len(x)), color='m', linewidth=2)
    plt.xlabel('Time (s)')
    plt.ylabel('Sound Wave')
    the_plot = st.pyplot(plt)

    if st.button('Start listening with Firearm Alarm'):
        with st.spinner("Listening..."):
            for i in range(0, int(t * 3600)):

                recording = record()

                ## run it through the model
                mfcc = wav2mfcc(recording)

                X_test = np.reshape(mfcc, (1, 20, 170, 1))
                Y_predict = cnn_model.predict(X_test)

                if Y_predict.round()[0][0] == 1:
                    plt.text(0,
Example #24
0
corr2_fig = px.scatter(
    df,
    x=corr_x,
    y=corr_y,  # corr_x, corr_y 선택한 것
    color='Outcome',
    color_discrete_map=dict(noDM='green', DM='red'),
    template="plotly_white")
st.write(corr2_fig)

# heatmap
st.subheader('Heatmap of correlation')
fig4 = plt.figure(figsize=(6, 5))
sns.heatmap(df.corr(), annot=True, vmin=-1, vmax=1,
            cmap='coolwarm')  # correlation matrix 함수로 그려줌
# -1, 1 완벽한 결과를 위해 / 반상관성 cool, 상관성 warm
st.pyplot(fig4)
# st.write(fig4)

# 출처: https://rfriend.tistory.com/409 [R, Python 분석과 프로그래밍의 친구 (by R Friend)]

# correlation heatmap
st.subheader('Heatmap of selected parameters')
fig5 = plt.figure(figsize=(5, 4))
hmap_params = st.multiselect(
    "Select parameters to include on heatmap",
    options=list(df.columns),
    default=[p for p in df.columns if "Outcome" not in p])
sns.heatmap(df[hmap_params].corr(),
            annot=True,
            vmin=-1,
            vmax=1,
Example #25
0
    ss.Agent01.play_bernoulli_bandit(0)
if go2:
    ss.Agent01.play_bernoulli_bandit(1)
if go3:
    ss.Agent01.play_bernoulli_bandit(2)
if go4:
    ss.Agent01.play_bernoulli_bandit(3)
if go5:
    ss.Agent01.play_bernoulli_bandit(4)

## Output

st.header("Expected Distributions of Bandits")
st.text("Played so far:")
for no in range(5):
    a = ss.Agent01.bandit_beta_expectations[no].Alpha
    b = ss.Agent01.bandit_beta_expectations[no].Beta
    st.text(f"Bandit {no+1}: {a+b-2} times with {a-1} successes.")

# Configure Plot
fig, ax = plt.subplots()
x = np.linspace(0, 1, 200)

# Get expectations
expectations = {}
for no in range(5):
    a = ss.Agent01.bandit_beta_expectations[no].Alpha
    b = ss.Agent01.bandit_beta_expectations[no].Beta
    sns.lineplot(x=x, y=beta.pdf(x, a, b))
st.pyplot(fig)
Example #26
0
def main():
    st.title("Chinese GDP 2000 to 2016")
    st.text("Execept Hong Kong, Macau and Taiwanfrom")
    st.header("Spatial/Time Distribution")
    china_gdp = load_data()
    year = st.slider("Year: ",
                     value=int(2000),
                     min_value=int(2000),
                     max_value=int(2016))
    if st.checkbox("Spatial Distribution by Year"):
        st.bokeh_chart(bokeh_plot_map(china_gdp, str(year)))
        x = china_gdp["ID"]
        y = china_gdp[str(year)]
        st.plotly_chart(plot_bar_chart(x, y))

    if st.checkbox("Time Trend by District"):
        district_selected = st.multiselect("District:", china_gdp["ID"])
        years = np.arange(2000, 2017).astype(str)
        # gdps = china_gdp.loc[china_gdp["ID"] == district_selected[0], years].values.ravel()
        st.plotly_chart(plot_time_series(china_gdp, years, district_selected))

    st.header("Global Spatial Autocorrelation")
    # create spatial weights
    w = weights.KNN.from_dataframe(china_gdp, k=8)
    w.transform = 'R'
    china_gdp[str(year) + "_lag"] = weights.spatial_lag.lag_spatial(
        w, china_gdp["2000"])
    china_gdp[str(year) + "_std"] = (china_gdp[str(year)] - china_gdp[str(year)].mean()) \
                                        / china_gdp[str(year)].std()
    china_gdp[str(year) + "_lag_std"] = (china_gdp[str(year) + "_lag"] - china_gdp[str(year) + "_lag"].mean()) \
                                        / china_gdp[str(year) + "_lag"].std()

    if st.checkbox("Spatial Lag"):
        st.bokeh_chart(bokeh_plot_map(china_gdp, str(year) + "_lag"))

    if st.checkbox("Binary Classification"):
        gdp_threshold = float(st.text_input("larger than:", 8000))
        gdp_min = china_gdp[str(year)].min()
        gdp_max = china_gdp[str(year)].max()
        if gdp_threshold > gdp_min and gdp_threshold < gdp_max:
            china_gdp["binary"] = np.where(
                china_gdp[str(year)] > gdp_threshold, 1, 0)
            st.bokeh_chart(bokeh_plot_map(china_gdp, "binary"))
        else:
            st.info("Please input a number between {}  and {}".format(
                gdp_min, gdp_max))

    # china_gdp[str(year) + "_std"] = (china_gdp[str(year)] - china_gdp[str(year)].mean()) \
    #                                 / china_gdp[str(year)].std()
    # china_gdp[str(year) + "_lag_std"] = (china_gdp[str(year) + "_lag"] - china_gdp[str(year) + "_lag"].mean()) \
    #                                     / china_gdp[str(year) + "_lag"].std()

    # Moran Plot"
    if st.checkbox("Moran Plot"):
        st.write(
            plot_moranI(china_gdp[str(year) + "_std"],
                        china_gdp[str(year) + "_lag_std"]))

        # Moran's I"
        w.transform = 'R'
        moran = esda.moran.Moran(china_gdp[str(year)], w)
        st.text("Moran's I: {}".format(moran.I))
        plot_moran(moran)
        st.pyplot()
        # st.write(plot_moran(moran))

    # local spatial autocorrelation
    st.header("Local Spatial Autocorrelation")
    lisa = esda.moran.Moran_Local(china_gdp[str(year)], w)
    if st.checkbox("LISA"):
        china_gdp['Is'] = lisa.Is
        st.bokeh_chart(bokeh_plot_map(china_gdp, 'Is'))

    if st.checkbox("The Location of the LISA "):
        q_labels = ['Q1', 'Q2', 'Q3', 'Q4']
        labels = [q_labels[i - 1] for i in lisa.q]
        china_gdp["cl"] = labels
        palette = ['red', 'lightblue', 'blue', 'pink']
        st.bokeh_chart(plot_moran_map(china_gdp, "cl", palette))

    if st.checkbox("The significant observations "):
        sig = 1 * (lisa.p_sim < 0.05)
        labels = ['non-sig.', 'significant']
        labels = [labels[i] for i in sig]
        china_gdp["cl"] = labels
        palette = ['white', 'black']
        st.bokeh_chart(plot_moran_map(china_gdp, "cl", palette))

    if st.checkbox("LISA statistics"):
        sig = 1 * (lisa.p_sim < 0.05)
        hotspot = 1 * (sig * lisa.q == 1)
        coldspot = 3 * (sig * lisa.q == 3)
        doughnut = 2 * (sig * lisa.q == 2)
        diamond = 4 * (sig * lisa.q == 4)
        spots = hotspot + coldspot + doughnut + diamond
        spot_labels = [
            '0 ns', '1 hot spot', '2 doughnut', '3 cold spot', '4 diamond'
        ]
        labels = [spot_labels[i] for i in spots]
        china_gdp["cl"] = labels
        palette = ['grey', 'red', 'lightblue', 'blue', 'pink']
        st.bokeh_chart(plot_moran_map(china_gdp, "cl", palette))
Example #27
0
    ax.tick_params(axis='x', colors='#777777', labelsize=12)
    ax.set_yticks([])
    ax.margins(0, 0.01)
    ax.grid(which='major', axis='x', linestyle='-')
    ax.set_axisbelow(True)
    ax.text(0,
            1.15,
            'Casos confirmados por región covid Chile',
            transform=ax.transAxes,
            size=24,
            weight=600,
            ha='left',
            va='top')
    ax.text(1,
            0,
            'by Monin Leonor @joquitz; credit @pratapvardhan',
            transform=ax.transAxes,
            color='#777777',
            ha='right',
            bbox=dict(facecolor='white', alpha=0.8, edgecolor='white'))
    plt.box(False)


# Create a simple app
ultimodia = df_region.ndia.max()
st.title("Evolucion del Covid19 en Chile por región")
day = st.slider("Selecciones día", 1, ultimodia)
st.markdown("Chile 2020 luego de %i días desde el primer caso" % day)

plot = st.pyplot(draw_barchart(day))
Example #28
0
def app():
	#st.title('Tweet Analysis')
	


	img = Image.open("ana.jpg")
	
	st.image(img,width=200)
	
	activity = ['Tweet Analyzer','Generate CSV']

	choice = st.sidebar.selectbox("Select What to want",activity)

	if choice == 'Tweet Analyzer':

		html_temp1 = """
		<div style="background-color:#FFE4C4;padding:10px">
		<h3 style="color:#0080ff;text-align:center;">Analysis Tweets</h3>
		</div>
		"""
		st.markdown(html_temp1,unsafe_allow_html=True)
		st.subheader('Enter Username (without @) :')
		#st.write('Most Recent Tweets')
		#st.write('Word Cloud')
		#st.write('Bar Graph')

		text = st.text_input('')

		user_choice = st.selectbox("Select Option",['Recent Tweet','WordCloud','View Data and Download','Visualize types of comments'])

		if st.button('Analysis'):

			if user_choice == 'Recent Tweet':
				st.success('Fetching Recent Tweets')

				def show_recent(text):

					posts = api.user_timeline(screen_name=text,count=100,lang='en',tweet_mode='extended')

					def get_tweet():
						l=[]
						i=1
						for tw in posts[:5]:
							l.append(tw.full_text)
							i=i+1

						return l 
						
					recent_tweet = get_tweet()
					return recent_tweet

				recent_tweet = show_recent(text)
				
				st.write(recent_tweet)


			elif user_choice == 'WordCloud':
				st.success('Generating Cloud')
				def gen_cloud():
					posts = api.user_timeline(screen_name=text,count=100,lang='en',tweet_mode='extended')

					df =pd.DataFrame([tw.full_text for tw in posts],columns=['Tweets'])	
					all_words = ' '.join([tw for tw in df['Tweets']])
					wordCloud = WordCloud(width=500, height=300, random_state=21, max_font_size=110).generate(all_words)
					plt.imshow(wordCloud,interpolation='bicubic')
					plt.axis('off')
					plt.savefig('WC.jpg')
					plt.show()
					img = Image.open('WC.jpg')
					return img 

				img = gen_cloud()
				
				st.image(img)


			elif user_choice == 'View Data and Download':
				def get_csv():
					st.success("Generating CSV and Downloading Link")
					posts = api.user_timeline(screen_name=text,count=100,lang='en',tweet_mode='extended')
					df =pd.DataFrame([tw.full_text for tw in posts],columns=['Tweets'])

					def cleanTxt(textt):

						textt = re.sub('@[A-Za-z0–9]+', '', textt) #Removing @mentions
						textt = re.sub('#', '', textt) # Removing '#' hash tag
						textt = re.sub('RT[\s]+', '', textt) # Removing RT
						textt = re.sub('https?:\/\/\S+', '', textt) # Removing hyperlink

						return textt 

					df['Tweets'] = df['Tweets'].apply(cleanTxt)

					def getSub(textt):
						return TextBlob(textt).sentiment.subjectivity

					def getpolar(textt):
						return TextBlob(textt).sentiment.polarity	

					df['Subjectivity'] = df['Tweets'].apply(getSub)
					df['Polarity'] = df['Tweets'].apply(getpolar)	


					def getanlysis(score):
						if score<0:
							return 'Negative'
						elif score == 0:
							return 'Neutral'
						else: 
							return 'Positive' 

					df['Analysis'] = df['Polarity'].apply(getanlysis)


					
					
					return df 


				df = get_csv()
				st.write(df)
				def get_link():
					data = df

					csv = data.to_csv(index=False)
					b64 = base64.b64encode(csv.encode()).decode()


					return f'<a href="data:file/csv;base64,{b64}" download="xyz.csv">Download csv file</a>'

				st.markdown(get_link(),unsafe_allow_html=True)



			else: 
				def Plot_Analysic():
					st.success("Generating Visualisation for Sentiment Analysis")
					posts = api.user_timeline(screen_name=text,count=100,lang='en',tweet_mode='extended')
					df =pd.DataFrame([tw.full_text for tw in posts],columns=['Tweets'])

					def cleanTxt(textt):

						textt = re.sub('@[A-Za-z0–9]+', '', textt) #Removing @mentions
						textt = re.sub('#', '', textt) # Removing '#' hash tag
						textt = re.sub('RT[\s]+', '', textt) # Removing RT
						textt = re.sub('https?:\/\/\S+', '', textt) # Removing hyperlink

						return textt 

					df['Tweets'] = df['Tweets'].apply(cleanTxt)

					def getSub(textt):
						return TextBlob(textt).sentiment.subjectivity

					def getpolar(textt):
						return TextBlob(textt).sentiment.polarity	

					df['Subjectivity'] = df['Tweets'].apply(getSub)
					df['Polarity'] = df['Tweets'].apply(getpolar)	


					def getanlysis(score):
						if score<0:
							return 'Negative'
						elif score == 0:
							return 'Neutral'
						else: 
							return 'Positive' 

					df['Analysis'] = df['Polarity'].apply(getanlysis)
					
					return df 

				df = Plot_Analysic()	

				st.write(sns.countplot(x=df["Analysis"],data=df))
				#st.write(sns.pie(x=df["Analysis"],data=df))
				st.pyplot(use_container_width=True)


	else: 
		html_temp4 = """
		<div style="background-color:#FFE4C4;padding:10px">
		<h3 style="color:#0080ff;text-align:center;">Analysis Using NLP</h3>
		</div>
		"""
		st.markdown(html_temp4,unsafe_allow_html=True)
		st.subheader('Click On button to get data from web')
		if st.button('Web Scrapping'):
			st.success('Scrapping Done')
			status = st.radio("See data",("Data","Hide Data"))
			if status == 'Data':
				def web_data():
					htmlfile = open('Reserve Bank of India - Speeches.html',encoding="utf8").read()
					soup = BeautifulSoup(htmlfile)
					for speech_text_1 in soup.findAll(attrs={'class' : 'tablecontent2'}):
						speech_text_1 = speech_text_1.text.strip()


						lines = (line.strip() for line in speech_text_1.splitlines())

						chunks = (phrase.strip() for line in lines for phrase in line.split("  "))

						speech_1 = '\n'.join(chunk for chunk in chunks if chunk)

					return speech_1
			speech = web_data()	

			st.write(speech)

		elif st.button('Tokenize'):
			def tokenn(spee):
				a = word_tokenize(spee)

				return a 
			
			token = tokenn(speech)
			st.write(token)	
                              x='airline',
                              y='airline_sentiment',
                              histfunc='count',
                              color='airline_sentiment',
                              facet_col='airline_sentiment',
                              labels={'airline_sentiment': 'tweets'},
                              height=600,
                              width=800)
    st.plotly_chart(fig_choice)

st.sidebar.header("Word Cloud")
word_sentiment = st.sidebar.radio('Display Word Cloud for which sentiment?',
                                  ('positive', 'neutral', 'negative'))

if not st.sidebar.checkbox("Hide", True, key='3'):
    st.header('Word cloud for %s sentiment' % (word_sentiment))
    df = data[data['airline_sentiment'] == word_sentiment]
    words = ' '.join(df['text'])
    processed_words = ' '.join([
        word for word in words.split()
        if 'http' not in word and not word.startswith('@') and word != 'RT'
    ])
    wordcloud = WordCloud(stopwords=STOPWORDS,
                          background_color='white',
                          height=640,
                          width=800).generate(processed_words)
    plt.imshow(wordcloud)
    plt.xticks([])
    plt.yticks([])
    st.pyplot()
Example #30
0
import streamlit as st
import network_stability

st.title('Network Stability')

hours = st.text_input(label='Hours', value=0)
minutes = st.text_input(label='Minutes', value=0)
seconds = st.text_input(label='Seconds', value=0)

speed_button = st.button(label='Run Speed Test')
connection_button = st.button(label='Run Connection Test')

if speed_button:
    with st.spinner('Wait for it...'):
        net = network_stability.NetworkTest()
        net.speed_test_interval(hours=int(hours),
                                minutes=int(minutes),
                                seconds=int(seconds))
    st.success('Speed test completed.')
    fig = net.report_speed('speed.png')
    st.pyplot(fig, bbox_inches='tight')

if connection_button:
    with st.spinner('Wait for it...'):
        net = network_stability.NetworkTest()
        net.connection_test_interval(hours=int(hours),
                                     minutes=int(minutes),
                                     seconds=int(seconds))
    st.success('Connection test completed.')
    fig = net.report_connection('connection.png')
    st.pyplot(fig, bbox_inches='tight')