Esempio n. 1
0
def main():
    global al_score, dl_score
    st.title("MNIST - An Ng.")

    X_train, X_test, y_train, y_test = prepare_data()
    st.markdown('**Shape**')
    st.write('\nTraining set :', X_train.shape, "\nTest set :", X_test.shape)

    X_train, y_train = preprocess_data(X_train, y_train.reshape(-1, 1))
    X_test, y_test = preprocess_data(X_test, y_test.reshape(-1, 1))

    # general_config()
    radio_btn = st.radio("Approach", ("Deep Learning", "Active Learning"))

    if radio_btn == "Deep Learning":
        col1, col2 = st.beta_columns([1, 2])

        #params
        with col1:
            dl_expander = st.beta_expander("Params", expanded=True)
            with dl_expander:
                lr, epochs, batch_size = general_config()
        #display

        with col2:
            if st.button("Train"):
                #training
                with st.beta_container():
                    model = PassiveLearner(X_train, y_train, X_test, y_test,
                                           epochs, batch_size, lr)
                    with st.spinner('Training...'):
                        model.train()
                    st.balloons()
                    st.success("Train Successfully")

                    dl_score = model.evaluate(X_test, y_test)
                    st.write("Accuracy of Deep learning: ", dl_score)

    else:
        col1, col2 = st.beta_columns([1, 2])

        #params
        with col1:
            al_expander = st.beta_expander("Params", expanded=True)
            with al_expander:
                lr, epochs, batch_size = general_config()
                n_initial, n_queries, query_strategy = al_config()

                if query_strategy == 'uncertainty_sampling':
                    query_strategy = uncertainty_sampling
                else:
                    query_strategy = entropy_sampling
        #display
        with col2:
            if st.button("Train"):
                #training
                with st.beta_container():
                    model = CustomAcitveLearner(X_train, y_train, X_test,
                                                y_test, epochs, batch_size, lr,
                                                n_initial, n_queries,
                                                query_strategy)

                    with st.spinner('Training...'):
                        model.train()

                    st.balloons()
                    st.success("Train Successfully")

                    al_score = model.evaluate(X_test, y_test)
                    st.write("Accuracy of Active learning: ", al_score)
Esempio n. 2
0
def input_values():
    data2 = pd.read_csv('data.csv', header=[0])

    if st.sidebar.checkbox('Work for this week'):
        selected_name = st.sidebar.selectbox('Name', options=data['Members'])
        days_selected = st.sidebar.multiselect('Days free to work',
                                               options=days)
        hours = st.sidebar.slider('No.of hours per week will be able to work',
                                  1.0, 1.0, 8.0)
        team_willing = st.sidebar.multiselect('Team willing to work in',
                                              options=teams)
        password = str(st.sidebar.text_input('enter the passphrase')).lower()

        if st.sidebar.button('Submit details'):
            y = data2.loc[data2.Members == str(selected_name)]
            z = y.iloc[:, -1].values
            if password == str(z[0]):
                st.balloons()
                input_data = {
                    'Name': [str(selected_name)],
                    'Time': [str(datetime.datetime.today())],
                    'Days': [str(days_selected)],
                    'Hours': [str(hours)],
                    'Reason': ['None'],
                    'Team': [str(team_willing)]
                }
                input_df = pd.DataFrame(input_data)
                input_df.to_csv('record.csv',
                                mode='a',
                                header=False,
                                index=None)
                input_df.to_sql('table1',
                                if_exists='append',
                                con=engine,
                                index=False,
                                index_label=None)
                record_changed = pd.read_sql('table1',
                                             con=engine,
                                             index_col=None)
                record_reverse = record_changed.iloc[::-1]
                st.subheader('Continous Log')
                st.write(record_reverse.head())
                input_df.to_csv('week_log.csv',
                                mode='a',
                                header=False,
                                index=None)
                input_df.to_sql('table2',
                                if_exists='append',
                                con=engine,
                                index=False,
                                index_label=None)
                record_changed_wl = pd.read_sql('table2',
                                                con=engine,
                                                index_col=None)
                record_reverse_wl = record_changed_wl.iloc[::-1]
                st.subheader('Weekly Log')
                st.write(record_reverse_wl.head())
            else:
                st.sidebar.warning('Wrong passphrase')
    elif st.sidebar.checkbox('Cannot Work this week'):
        selected_name = st.sidebar.selectbox('Name', options=data['Members'])
        reason = st.sidebar.text_input('Reason')
        password = str(st.sidebar.text_input('enter the passphrase')).lower()
        if st.sidebar.button('Submit details'):
            y = data2.loc[data2.Members == str(selected_name)]
            z = y.iloc[:, -1].values
            if password == str(z[0]):
                st.balloons()
                input_data = {
                    'Name': [str(selected_name)],
                    'Time': [str(datetime.datetime.today())],
                    'Days': ['None'],
                    'Hours': 0,
                    'Reason': [str(reason)],
                    'Team': ['None']
                }
                input_df = pd.DataFrame(input_data)
                input_df.to_csv('record.csv',
                                mode='a',
                                header=False,
                                index=None)
                input_df.to_sql('table1',
                                if_exists='append',
                                con=engine,
                                index=False,
                                index_label=None)
                record_changed = pd.read_sql('table1',
                                             con=engine,
                                             index_col=None)
                record_reverse = record_changed.iloc[::-1]
                st.subheader('Continous Log')
                st.write(record_reverse.head())
                input_df.to_csv('week_log.csv',
                                mode='a',
                                header=False,
                                index=None)
                input_df.to_sql('table2',
                                if_exists='append',
                                con=engine,
                                index=False,
                                index_label=None)
                record_changed_wl = pd.read_sql('table2',
                                                con=engine,
                                                index_col=None)
                record_reverse_wl = record_changed_wl.iloc[::-1]
                st.subheader('Weekly Log')
                st.write(record_reverse_wl.head())
            else:
                st.sidebar.warning('Wrong passphrase')
Esempio n. 3
0
#import libraries
import matplotlib.pyplot as plt
import pandas as pd
import streamlit as st
import numpy as np
import matplotlib
#matplotlib.use('Agg')
import seaborn as sns
#Remove Warnings
st.balloons()
st.set_option('deprecation.showPyplotGlobalUse', False)
st.title("Tips_Dataset")

#import dataset
df = pd.read_csv('tips.csv')
#First thirty rows
tips = df.head(30)
#Display the table
st.table(tips)
st.header("Visualisation Using Seaborn")

#bar plot
st.subheader("Bar Plot")
tips.plot(kind='bar')
st.pyplot()
#Displot
st.subheader("Displot")
sns.displot(tips['total_bill'])
st.pyplot()

#joinplot
def run_process():
    # generate input payload
    input_schema = ENDPOINTS.input_schema
    output_schema = ENDPOINTS.output_schema

    st.markdown("## Schemas")

    st.markdown("### Input Schema")
    st.json(input_schema['properties'])
    st.markdown("### Output Schema")
    st.json(output_schema['properties'])

    st.markdown("## Payload")

    default_payload = PayloadGenerator(
        images_as_base64=False).generate(DEFAULT_PAYLOAD_IN)

    input_payload = {}
    for key in input_schema["properties"]:

        key_type = input_schema['properties'][key].get(
            "type") or input_schema['properties'][key].get("$ref")

        if key_type == "#/definitions/Image":
            val = st.file_uploader(label="{}: {}".format(
                key, input_schema['properties'][key]),
                                   type=["png", "jpg", "tif", "jp2"])
        else:
            val = st.text_input(
                label="{}: {}".format(key, input_schema['properties'][key]),
                value=default_payload[key],
            )

        if key_type == "#/definitions/Image" and val is not None:
            # Convert image as base64 for processing
            val = streamlit_utils.encode_image_from_upload(val)

        if val is not None:
            try:
                input_payload[key] = json.loads(val)
            except:
                input_payload[key] = val

    if st.button(label="SEND PAYLOAD"):
        st.json(truncate_dict_for_debug(input_payload))
        try:
            jsonschema.validate(input_payload, input_schema)
            st.text("Input payload valid !")
        except jsonschema.ValidationError as e:
            st.text(e)
        except jsonschema.SchemaError as e:
            st.text(e)

        @st.cache(show_spinner=False)
        def _process(input_payload):
            return ENDPOINTS.process(input_payload)

        t1 = time.time()
        with st.spinner("Processing"):
            response = _process(input_payload)
        t2 = time.time()

        st.balloons()

        st.markdown("## Display")
        image, result = streamlit_utils.parse_payloads(input_payload, response)
        st.image(image,
                 channels="RGB",
                 use_column_width=True,
                 caption="RGB image clipped to 8 bits")

        st.markdown("## Response")
        st.success("Response computed in {:.02f}s".format(t2 - t1))
        st.json(response)
Esempio n. 5
0
def write(state):
    
    if state.task == "Regression":
        from pycaret.regression import predict_model, finalize_model,save_model
    elif state.task == "Classification":
        from pycaret.classification import predict_model, finalize_model,save_model
    else:
        from pycaret.clustering import predict_model,save_model
        
    def online_predict(model, input_df,target_type):
        """make prediction on online data

        Args:
            model (object): a trained model
            input_df (pd.DataFrame): the input dataframe for predicitons
            target_type (str): the type of training target

        Returns:
            str: predcition
        """
        prediction_df = predict_model(model, data=input_df)
        if target_type == 'Regression' or target_type == 'Classification':
            predictions = prediction_df['Label'][0]
        else:
            predictions = prediction_df['Cluster'][0]
        return predictions        

    if state.trained_model is not None:
        st.header("Make a Prediction on Given Input or Upload a File.")

        add_selectbox = st.sidebar.selectbox(
            "How would you like to predict?",
            ("Online", "Batch", "SaveModel")
        )

        X_before_preprocess = state.X_before_preprocess
        target_name = state.y_before_preprocess
        ignore_columns = state.ignore_columns
        trained_model = state.trained_model      
        
        if add_selectbox == "Online":
            with st.spinner("Predicting ..."):
                input_df = retrieve_train_element(X_before_preprocess, target_name, ignore_columns,state.task)
                output = ""
                if st.button("Predict"):
                    output = online_predict(trained_model, input_df,state.task)
                    output = str(output)
                    st.success(f'The Prediction is **{output}**')
        
        if add_selectbox == 'Batch':
            file_upload = st.file_uploader('Upload csv file for prediciton', type=["csv", "xlsx"])
            if file_upload is not None:
                file_extension = file_upload.name.split('.')[1]
                if file_extension == "csv":
                    data = pd.read_csv(file_upload)
                else:
                    data = pd.read_excel(file_upload)
                predictions = predict_model(trained_model, data=data)
                st.write(predictions)  
                
                is_download = st.checkbox("Do You Want to Download the Prediction File?", value=False)
                if is_download:
                    file_extension = st.selectbox("Choose Csv or Excel File to Download", options=[".csv",".xlsx"])
                    file_name = st.text_input("File Name",value="prediction",key=1)
                    if file_name:
                        href = download_button(predictions, file_name, "Download",file_extension)
                        st.markdown(href, unsafe_allow_html=True)
                    else:
                        st.error("File Name cannot be empty!") 
        
        if add_selectbox == "SaveModel":
            is_download = st.checkbox("Do You Want to Download the Model?", value=False)
            if is_download:
                file_name = st.text_input("File Name",value="",key=2)
                is_finalize = st.checkbox("Do You Want to Finalize the Model (not for clustering)?", value=False)
                if file_name:
                    if is_finalize:
                        finalized_model = finalize_model(trained_model)
                        _,name = save_model(finalized_model, file_name)
                    else:
                        _,name = save_model(trained_model, file_name)
                    with open(name, "rb") as f:
                        e = joblib.load(f)
                    href = download_button(e, file_name, "Download",".pkl",pickle_it=True)
                    st.markdown(href, unsafe_allow_html=True)
                    
                    remove_cache = st.checkbox("Remove the Cache?", value=False)
                    if remove_cache:
                        p = Path(".").glob("*.pkl")
                        for filename in p:
                            filename.unlink()
                        if len(list(p)) == 0:
                            st.success("Delete the Cache File from Local Filesystem!")
                            st.balloons()
                else:
                    st.error("Please Give a File Name first!")
                

    else:
        st.error("Please Train a Model first!")
Esempio n. 6
0
def write():
    """ Test Dataset """
    st.title("First test with data (csv)")
    st.header("I hope everything works out")

    html_temp = """ <div style = "background-color:orange"><p align="center" style ="color:white;font_size:30px">(´・ᴗ・ )</p></div>"""
    st.markdown(html_temp, unsafe_allow_html=True)

    def file_selector(
        folder_path='C:/Users/tred1/Desktop/infectious_disease_modelling-master/data'
    ):
        filenames = os.listdir(folder_path)
        selected_filename = st.selectbox("Select a file", filenames)
        return os.path.join(folder_path, selected_filename)

    filename = file_selector()
    st.info("You Selected {}".format(filename))

    # ---------------------- Чтение из файла ---------------------

    df = pd.read_csv(filename)

    # --------------------- Показать данные ---------------------

    # ------ Вывод определенного количества ячеек таблицы -------
    if st.checkbox("Show Dataset"):
        number = st.number_input("Number of Rows to View", 1, 200)
        st.dataframe(df.head(number))

# ---------- Вывод наименований полей таблицы ---------------
    if st.button("Column Names"):
        st.write(df.columns)

# --------- Общая информация о кол-ве стобцов/строк ---------
    if st.checkbox("Shape of Dataset"):
        st.write(df.shape)
        data_dim = st.radio("Show Dimension By ", ("Rows", "Columns"))
        if data_dim == 'Rows':
            st.text("Number of Rows")
            st.write(df.shape[0])
        elif data_dim == 'Columns':
            st.text("Number of Columns")
            st.write(df.shape[1])
        else:
            st.write(df.shape)

# ---------------- Выбор стобца базы данных ---------------
    if st.checkbox("Select Columns to Show"):
        all_columns = df.columns.tolist()
        selected_columns = st.multiselect("Select", all_columns)
        new_df = df[selected_columns]
        st.dataframe(new_df)

# ------- Определение количества повторений в столбце -------
    if st.button("Value Counts"):
        st.text("Value Counts By Target/Class")
        st.write(df.iloc[:, 1].value_counts())

# ---------------- Какой тип данных у поля -----------------
    if st.button("Data Types"):
        st.write(df.dtypes)

    if st.checkbox("Summary"):
        st.write(df.describe().T)

        st.subheader("Data Visualization")
        st.subheader("Customizable Plot")
        all_columns_names = df.columns.tolist()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns_names = st.multiselect("Select Columns To Plot",
                                                all_columns_names)

    if st.button("Generate Plot"):
        st.success("Generating Customizable Plot of {} for {}".format(
            type_of_plot, selected_columns_names))

        if type_of_plot == 'area':
            cust_data = df[selected_columns_names]
            st.area_chart(cust_data)

        elif type_of_plot == 'bar':
            cust_data = df[selected_columns_names]
            st.bar_chart(cust_data)

        elif type_of_plot == 'line':
            cust_data = df[selected_columns_names]
            st.line_chart(cust_data)

        elif type_of_plot:
            cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()

# ---------------- Pie Plot -------------------
    if st.checkbox("Pie Plot"):
        all_columns_names = df.columns.tolist()
        if st.button("Generate Pie Plot", key=1):
            st.success("Generating A Pie Plot")
            st.write(df.iloc[:, 1].value_counts().plot.pie(autopct="%1.1f%%"))
            st.pyplot()

# ---------------- Seaborn -------------------
    if st.checkbox("Correlation Plot[Seaborn]"):
        st.write(sns.heatmap(df.corr(), annot=True))
        st.pyplot()

# --------------- Count Plot ------------------
    if st.checkbox("Plot of Value Counts"):
        st.text("Value Counts By Target")
        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox("Primary Column to GroupBy",
                                   all_columns_names)
        selected_columns_names = st.multiselect("Select Columns",
                                                all_columns_names)
        if st.button("Plot", key=2):
            st.text("Generate Plot")
            if selected_columns_names:
                vc_plot = df.groupby(
                    primary_col)[selected_columns_names].count()
            else:
                vc_plot = df.iloc[:, 1].value_counts()
            st.write(vc_plot.plot(kind="bar"))
            st.pyplot()

    if st.button("Шарики"):
        st.balloons()
Esempio n. 7
0
def main():

    # Test/Title
    st.title('StreamLit Concepts')

    # Header/Subheader
    st.header("To Create Header")
    st.subheader("To Create subheader")

    st.subheader("Do you want to build the GUI using web app")
    st.code('pip install streamlit')

    #text
    st.text("hello Streamlit")

    html_temp = """
	<div style="background-color:tomato;padding:10px">
	<h2 style="color:white;text-align:center;">Streamlit ML App </h2>
	</div>

	"""
    st.markdown(html_temp, unsafe_allow_html=True)
    st.markdown('<i class="material-icons">{}</i>'.format("people"),
                unsafe_allow_html=True)
    st.latex(r''' e^{i\pi} + 1 = 0 ''')
    st.latex(r'''
    ...     a + ar + a r^2 + a r^3 + \cdots + a r^{n-1} =
    ...     \sum_{k=0}^{n-1} ar^k =
    ...     a \left(\frac{1-r^{n}}{1-r}\right)
    ...     ''')
    st.write(['st', 'is <', 3])
    st.write("✔� Time up!")
    st.code('s="Happy" for i in range(0,10): print(s)')

    df1 = pd.DataFrame(np.random.randn(50, 5),
                       columns=('col %d' % i for i in range(5)))
    my_table = st.table(df1)

    df = st.cache(
        pd.read_csv
    )("https://github.com/SurendraRedd/StreamlitProjects/raw/master/lang.csv")
    is_check = st.checkbox("Display Data")
    if is_check:
        st.write(df)

    st.write('Dataframe example')
    st.dataframe(df1)

    #Markdown
    st.markdown("### This is a Markdown")

    st.markdown("### 🎲 Demo on streamlit")
    st.markdown("Streamlit python package is used to develop applications"
                "with out knowing much web application concepts.")
    st.markdown("**♟ Examples ♟**")
    st.markdown("* Happly learning!.")

    #Will be used for displaying the Error Messages in a colourful format
    st.success("Successful")
    st.info("Information!")
    st.warning('this is a warning')
    st.error("this is an error Danger")

    data = {'1': "True", '2': "True", '3': "False"}
    st.json(data)

    # Exception handling
    st.exception("IndexError('list out of index')")

    place_holder = st.empty()
    place_holder.text('Replaced!')

    #help of the function
    st.help(range)

    st.write("Text with write")

    st.write(range(10))

    st.line_chart({"data": [1, 5, 2, 6, 2, 1]})
    st.area_chart({"data": [1, 5, 2, 6, 2, 1]})
    st.bar_chart({"data": [1, 5, 2, 6, 2, 1]})

    arr = np.random.normal(1, 1, size=100)
    fig, ax = plt.subplots()
    ax.hist(arr, bins=20)
    st.pyplot(fig)
    '''
    # Markdown magic

    This is some _**text**_.
    '''

    df = pd.DataFrame({'col1': [1, 2, 3]})
    df  # <-- Draw the dataframe

    x = 10
    'x', x  # <-- Draw the string 'x' and then the value of x

    # Select box
    exp = st.selectbox("Select your experience: ", np.arange(1, 40, 1))

    # Slider
    exp = st.slider("Select your experience: ",
                    min_value=1,
                    max_value=40,
                    value=1,
                    step=1)

    # Multiselect
    movies = st.multiselect(
        "Select Balayya Favourite movies?",
        ["SamaraSimhaReddy", "Simha", "NarasimhaNaidu", "Legend"])

    # Will only run once if already cached
    df = load_data()
    st.write(df)

    st.button('Click')
    st.checkbox('Check the checkbox')
    st.radio('Radio Button', [1, 2, 3])
    st.selectbox('Select', [1, 2, 3])
    st.multiselect('Multiselect', [1, 2, 3])
    st.slider('slide', min_value=0, max_value=10)
    st.text_input('Enter Username')
    st.number_input('Enter a Number')
    st.text_area('Enter Text Here!')
    st.date_input('Date Input')
    st.time_input('Time entry')
    st.file_uploader('File Uploader')
    st.beta_color_picker('Select color')

    st.echo()
    with st.echo():
        text = 's="Happy Learning!" for i in range(0,10): print(s)'
        st.write(text)

    #Image opening
    #img = Image.open("download.jfif") #open the image stored in specified location
    img = Image.open(
        urllib.request.urlopen(
            "https://github.com/SurendraRedd/ChallengeDeploy/raw/main/singlefile/Solution.png"
        ))  # Opens the image from the url
    #response = requests.get("https://github.com/SurendraRedd/Techgig/blob/main/images/Solution.png")
    #img = Image.open(BytesIO(response.content))
    #img = Image.open(urllib.request.urlopen("https://github.com/SurendraRedd/Techgig/blob/main/images/Solution.png"))
    st.image(img, width=300, caption="Simple Image")

    # Video playing
    vid_file = open("sample-mp4-file.mp4",
                    "rb").read()  #play the video stored in specified location
    st.video(vid_file)
    #videoUserDefined("https://www.youtube.com/embed/B2iAodr0fOo")

    #widgets
    if st.checkbox("Show/hide"):
        st.text("Showing or Hiding Widget")

    # Radio
    status = st.radio("What is your status", ("Married", "Single"))
    if status == 'Married':
        st.success("You are Married")

    # Add a selectbox to the sidebar:
    add_selectbox = st.sidebar.selectbox('Navigation',
                                         ('Home', 'About', 'Help'))

    if add_selectbox == 'About':
        st.write('You have selected about page')
    elif add_selectbox == 'Home':
        st.write('you have selected Home page')
    else:
        st.write('you have selected help page')

    # Sample Progress bar
    my_bar = st.progress(0)

    for percent_complete in range(100):
        time.sleep(0.1)
        my_bar.progress(percent_complete + 1)

    with st.spinner('Wait for it...'):
        time.sleep(5)
    st.success('Done!')

    st.balloons()

    st.write('Happy Stream Lite App Learning')
Esempio n. 8
0
    def Question_Answer(qna):
        user_answer = ["i"]
        questions = [sub["question"] for sub in qna]
        answers = [sub["answer"] for sub in qna]
        if len(questions) == 1:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.append(user_answer_1)
                user_answer.pop(0)
                correct_wrong = get_similarity([answers[0]], user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write("Awesome!!!", str(total_marks))
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for boolean in correct_wrong:
                    if boolean == 0:
                        st.write("Mistaken Question:", questions[0])
                        st.write("Correct answer: ", answers[0])
                    else:
                        pass
            else:
                st.balloons()

        elif len(questions) == 2:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            st.write(questions[1])
            user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.extend([user_answer_1, user_answer_2])
                user_answer.pop(0)
                correct_wrong = get_similarity(answers, user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write("Awesome!!!", str(total_marks))
            i = 0
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for i in range(len(correct_wrong)):
                    if correct_wrong[i] == 0:
                        st.write("Mistaken Question:", questions[i])
                        st.write("Correct answer: ", answers[i])
                    else:
                        pass
            else:
                st.balloons()

        elif len(questions) == 3:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            st.write(questions[1])
            user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2)
            st.write(questions[2])
            user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.extend(
                    [user_answer_1, user_answer_2, user_answer_3])
                user_answer.pop(0)
                correct_wrong = get_similarity(answers, user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write("Awesome!!!", str(total_marks))
            i = 0
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for i in range(len(correct_wrong)):
                    if correct_wrong[i] == 0:
                        st.write("Mistaken Question:", questions[i])
                        st.write("Correct answer: ", answers[i])
                    else:
                        pass
            else:
                st.balloons()

        elif len(questions) == 4:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            st.write(questions[1])
            user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2)
            st.write(questions[2])
            user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3)
            st.write(questions[3])
            user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.extend([
                    user_answer_1, user_answer_2, user_answer_3, user_answer_4
                ])
                user_answer.pop(0)
                correct_wrong = get_similarity(answers, user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write("Awesome!!!", str(total_marks))
            i = 0
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for i in range(len(correct_wrong)):
                    if correct_wrong[i] == 0:
                        st.write("Mistaken Question:", questions[i])
                        st.write("Correct answer: ", answers[i])
                    else:
                        pass
            else:
                st.balloons()

        elif len(questions) == 5:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            st.write(questions[1])
            user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2)
            st.write(questions[2])
            user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3)
            st.write(questions[3])
            user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4)
            st.write(questions[4])
            user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.extend([
                    user_answer_1, user_answer_2, user_answer_3, user_answer_4,
                    user_answer_5
                ])
                user_answer.pop(0)
                correct_wrong = get_similarity(answers, user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write("Awesome!!!", str(total_marks))
            i = 0
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for i in range(len(correct_wrong)):
                    if correct_wrong[i] == 0:
                        st.write("Mistaken Question:", questions[i])
                        st.write("Correct answer: ", answers[i])
                    else:
                        pass
            else:
                st.balloons()

        elif len(questions) == 6:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            st.write(questions[1])
            user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2)
            st.write(questions[2])
            user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3)
            st.write(questions[3])
            user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4)
            st.write(questions[4])
            user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5)
            st.write(questions[5])
            user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.extend([
                    user_answer_1, user_answer_2, user_answer_3, user_answer_4,
                    user_answer_5, user_answer_6
                ])
                user_answer.pop(0)
                correct_wrong = get_similarity(answers, user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write("Awesome!!!", str(total_marks))
            i = 0
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for i in range(len(correct_wrong)):
                    if correct_wrong[i] == 0:
                        st.write("Mistaken Question:", questions[i])
                        st.write("Correct answer: ", answers[i])
                    else:
                        pass
            else:
                st.balloons()

        elif len(questions) == 7:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            st.write(questions[1])
            user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2)
            st.write(questions[2])
            user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3)
            st.write(questions[3])
            user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4)
            st.write(questions[4])
            user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5)
            st.write(questions[5])
            user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6)
            st.write(questions[6])
            user_answer_7 = st.text_area("Enter the Answer:", height=2, key=7)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.extend([
                    user_answer_1, user_answer_2, user_answer_3, user_answer_4,
                    user_answer_5, user_answer_6, user_answer_7
                ])
                user_answer.pop(0)
                correct_wrong = get_similarity(answers, user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write("Awesome!!!", str(total_marks))
            i = 0
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for i in range(len(correct_wrong)):
                    if correct_wrong[i] == 0:
                        st.write("Mistaken Question:", questions[i])
                        st.write("Correct answer: ", answers[i])
                    else:
                        pass
            else:
                st.balloons()

        elif len(questions) == 8:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            st.write(questions[1])
            user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2)
            st.write(questions[2])
            user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3)
            st.write(questions[3])
            user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4)
            st.write(questions[4])
            user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5)
            st.write(questions[5])
            user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6)
            st.write(questions[6])
            user_answer_7 = st.text_area("Enter the Answer:", height=2, key=7)
            st.write(questions[7])
            user_answer_8 = st.text_area("Enter the Answer:", height=2, key=8)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.extend([
                    user_answer_1, user_answer_2, user_answer_3, user_answer_4,
                    user_answer_5, user_answer_6, user_answer_7, user_answer_8
                ])
                user_answer.pop(0)
                correct_wrong = get_similarity(answers, user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write(total_marks)
            i = 0
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for i in range(len(correct_wrong)):
                    if correct_wrong[i] == 0:
                        st.write("Mistaken Question:", questions[i])
                        st.write("Correct answer: ", answers[i])
                    else:
                        pass
            else:
                st.balloons()

        elif len(questions) == 9:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            st.write(questions[1])
            user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2)
            st.write(questions[2])
            user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3)
            st.write(questions[3])
            user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4)
            st.write(questions[4])
            user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5)
            st.write(questions[5])
            user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6)
            st.write(questions[6])
            user_answer_7 = st.text_area("Enter the Answer:", height=2, key=7)
            st.write(questions[7])
            user_answer_8 = st.text_area("Enter the Answer:", height=2, key=8)
            st.write(questions[8])
            user_answer_9 = st.text_area("Enter the Answer:", height=2, key=9)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.extend([
                    user_answer_1, user_answer_2, user_answer_3, user_answer_4,
                    user_answer_5, user_answer_6, user_answer_7, user_answer_8,
                    user_answer_9
                ])
                user_answer.pop(0)
                correct_wrong = get_similarity(answers, user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write("Awesome!!!", str(total_marks))
            i = 0
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for i in range(len(correct_wrong)):
                    if correct_wrong[i] == 0:
                        st.write("Mistaken Question:", questions[i])
                        st.write("Correct answer: ", answers[i])
                    else:
                        pass
            else:
                st.balloons()

        elif len(questions) >= 10:
            st.write(questions[0])
            user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1)
            st.write(questions[1])
            user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2)
            st.write(questions[2])
            user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3)
            st.write(questions[3])
            user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4)
            st.write(questions[4])
            user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5)
            st.write(questions[5])
            user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6)
            st.write(questions[6])
            user_answer_7 = st.text_area("Enter the Answer:", height=2, key=7)
            st.write(questions[7])
            user_answer_8 = st.text_area("Enter the Answer:", height=2, key=8)
            st.write(questions[8])
            user_answer_9 = st.text_area("Enter the Answer:", height=2, key=9)
            st.write(questions[9])
            user_answer_10 = st.text_area("Enter the Answer:",
                                          height=2,
                                          key=10)
            submission = st.button("Submit", key=1)
            if submission:
                user_answer.extend([
                    user_answer_1, user_answer_2, user_answer_3, user_answer_4,
                    user_answer_5, user_answer_6, user_answer_7, user_answer_8,
                    user_answer_9, user_answer_10
                ])
                user_answer.pop(0)
                correct_wrong = get_similarity(answers, user_answer)
                total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100
                st.write("Awesome!!!", str(total_marks))
            i = 0
            if 0 in correct_wrong:
                st.write(
                    "You have done some mistakes, here is where you went wrong..."
                )
                for i in range(len(correct_wrong)):
                    if correct_wrong[i] == 0:
                        st.write("Mistaken Question:", questions[i])
                        st.write("Correct answer: ", answers[i])
                    else:
                        pass
            else:
                st.balloons()
def check_answer(questionNum, answer):
    params = {'key': my_key, 'question': questionNum, 'answer': answer}
    result = requests.get(url, params).text
    if 'correct' in result[0:8].lower():
        st.balloons()
    return result
Esempio n. 10
0
def write():
    st.title('Glassdoor Salary Predictor')
    st.header('Predict a potential salary')
    st.markdown('''Uses Sci-Kit Learn Random Forest Regressor based on the following specifiable traits:''')

    option = st.selectbox(
        'Select Job Type',
        ('Software Engineering', 'Data Science', 'Product Design'))

    if option == 'Software Engineering':

        df = pd.read_csv('swe_cleaned.csv')
        swe_model = joblib.load('model.pkl')

        st.subheader('Company Details: \n Check Glassdoor for exact values, if unsure')

        rating = st.slider('Glassdoor Rating of the Company',
                        min_value=0.0,
                        max_value=5.0,
                        step=0.1)
        age = st.number_input('Age of the Company', step=1.0, min_value=0.0)

        st.subheader('Details about the Job:')

        jobhq = st.radio("Is the Job at Headquarters? (0 for No, 1 for Yes)",
                        options=[0, 1])
        job_type_num = st.selectbox("Job Type", options=df["job_simp"].unique())


        def title_number_simplifier(title):
            if 'reg' in title.lower():
                return 1
            elif 'back-end' in title.lower():
                return 2
            elif 'full-stack' in title.lower():
                return 3
            elif 'web' in title.lower():
                return 4
            elif 'data' in title.lower():
                return 5
            elif 'mobile' in title.lower():
                return 6
            elif 'systems' in title.lower():
                return 7


        job_type_num1 = title_number_simplifier(job_type_num)


        def seniority_number(title):
            if 'senior' in title.lower():
                return '1'
            elif 'junior' in title.lower():
                return '2'
            else:
                return '3'


        seniority_num = st.radio("Senior role?", options=["Senior", "Not Senior"])
        seniority_num1 = seniority_number(seniority_num)
        seniority_num2 = seniority_number(seniority_num)
        seniority_num3 = seniority_number(seniority_num)


        st.subheader('Your skills:')
        python_yn = st.radio("Python (0 for No, 1 for Yes)", options=[0, 1])
        java_yn = st.radio("Java (0 for No, 1 for Yes)", options=[0, 1])
        javascript_yn = st.radio("Javascript (0 for No, 1 for Yes)", options=[0, 1])
        c_yn = st.radio("C (0 for No, 1 for Yes)", options=[0, 1])
        html_yn = st.radio("HTML/CSS (0 for No, 1 for Yes)", options=[0, 1])

        features = [
            rating, jobhq, age, python_yn, java_yn, javascript_yn, c_yn, html_yn,
            job_type_num1, seniority_num1, seniority_num2, seniority_num3
        ]
        final_features = np.array(features).reshape(1, -1)

        if st.button('Predict'):
            prediction = swe_model.predict(final_features)
            st.balloons()
            st.success(f'Your predicted salary is US$ {round(prediction[0],3)*1000} ')


    elif option == 'Data Science':

        df = pd.read_csv('data_cleaned.csv')

        data_model = joblib.load('data_model.pkl')

        st.subheader('Company Details: \n Check Glassdoor for exact values, if unsure')

        rating = st.slider('Glassdoor Rating of the Company',
                        min_value=0.0,
                        max_value=5.0,
                        step=0.1)
        age = st.number_input('Age of the Company', step=1.0, min_value=0.0)

        st.subheader('Details about the Job:')

        jobhq = st.radio("Is the Job at Headquarters? (0 for No, 1 for Yes)",
                        options=[0, 1])
        job_type_num = st.selectbox("Job Type", options=['Data Scientist', 'Data Engineer', 'Analyst', 'Director', 'Manager', 'Machine Learning Engineer', 'Research', 'Software'])


        def number_simplifier(title):
            if "data scientist" in title.lower():
                return 3
            elif "data engineer" in title.lower():
                return 2
            elif "analyst" in title.lower():
                return 1
            elif "director" in title.lower():
                return 4
            elif "manager" in title.lower():
                return 5
            elif "machine learning engineer" in title.lower():
                return 6
            elif "unspecified" in title.lower():
                return 7
            elif "research" in title.lower():
                return 8
            elif "software" in title.lower():
                return 9

        job_type_num1 = number_simplifier(job_type_num)

        def senior_simplifier(title):
            if title == "Senior":
                return 1
            else:
                return 2


        seniority_num = st.radio("Senior role?", options=["Senior", "Not Senior"])
        seniority_num1 = senior_simplifier(seniority_num)


        st.subheader('Your skills:')
        python_yn = st.radio("Python (0 for No, 1 for Yes)", options=[0, 1])
        R_yn = st.radio("R (0 for No, 1 for Yes)", options=[0, 1])
        aws = st.radio("AWS (0 for No, 1 for Yes)", options=[0, 1])
        spark = st.radio("Spark (0 for No, 1 for Yes)", options=[0, 1])
        excel = st.radio("Excel (0 for No, 1 for Yes)", options=[0, 1])

        features = [
            rating, jobhq, age, python_yn, R_yn, aws, spark, excel,
            job_type_num1, seniority_num1
        ]
        final_features = np.array(features).reshape(1, -1)

        if st.button('Predict'):
            prediction = data_model.predict(final_features)
            st.balloons()
            st.success(f'Your predicted salary is US$ {round(prediction[0],3)*1000} ')


    elif option == 'Product Design':

        df = pd.read_csv('dsgn_cleaned.csv')

        dsgn_model = joblib.load('dsgn_model.pkl')

        st.subheader(
            'Company Details: \n Check Glassdoor for exact values, if unsure')

        rating = st.slider('Glassdoor Rating of the Company',
                           min_value=0.0,
                           max_value=5.0,
                           step=0.1)
        age = st.number_input('Age of the Company', step=1.0, min_value=0.0)

        st.subheader('Details about the Job:')

        jobhq = st.radio("Is the Job at Headquarters? (0 for No, 1 for Yes)",
                         options=[0, 1])
        job_type_num = st.selectbox("Job Type",
                                    options=[
                                        'Product Designer', 'UI/UX Designer',
                                        'Graphic Designer', 'Structural Designer', 'Web Designer',
                                        'Unspecified'
                                    ])

        def number_simplifier(title):
            if "product designer" in title.lower():
                return 6
            elif "ui/ux designer" in title.lower():
                return 5
            elif "graphic designer" in title.lower():
                return 2
            elif "structural designer" in title.lower():
                return 3
            elif "web designer" in title.lower():
                return 4
            elif "unspecified" in title.lower():
                return 1

        job_type_num1 = number_simplifier(job_type_num)

        def senior_simplifier(title):
            if title == "Senior":
                return 1
            else:
                return 2

        seniority_num = st.radio("Senior role?",
                                 options=["Senior", "Not Senior"])
        seniority_num1 = senior_simplifier(seniority_num)

        st.subheader('Your skills:')
        figma_yn = st.radio("Figma (0 for No, 1 for Yes)", options=[0, 1])
        adobe_yn = st.radio("Adobe Creative Suite (0 for No, 1 for Yes)", options=[0, 1])
        cad_yn = st.radio("CAD Software (0 for No, 1 for Yes)", options=[0, 1])
        html_css_js_yn = st.radio("HTML/CSS/JavaScript (0 for No, 1 for Yes)", options=[0, 1])
        photo_yn = st.radio("Photography (0 for No, 1 for Yes)", options=[0, 1])
        graphic_yn = st.radio("Graphics (0 for No, 1 for Yes)", options=[0, 1])

        features = [
            rating, jobhq, age, figma_yn, adobe_yn, cad_yn, html_css_js_yn,
            photo_yn, graphic_yn, job_type_num1, seniority_num1
        ]
        final_features = np.array(features).reshape(1, -1)

        if st.button('Predict'):
            prediction = dsgn_model.predict(final_features)
            st.balloons()
            st.success(
                f'Your predicted salary is US$ {round(prediction[0],3)*1000} ')
Esempio n. 11
0
def main():
    st.title('MACHINE LEARNING FOR YOU..')

    options = ['WELCOME', 'EXPLORE']
    option = st.sidebar.selectbox('Select option: ', options)

    if option == options[0]:
        welcome_text = st.markdown(get_content("README.md"))
    elif option == options[1]:

        #ensuring producibility
        seed = st.sidebar.slider('SEED', 1, 50, step=1)

        np.random.seed(seed=seed)
        # np.random.RandomState(seed=seed)

        # welcome_text.empty()
        try:
            train_df = st.file_uploader("Upload Train dataset: ",
                                        type=['csv', 'xlsx'])
            test_df = st.file_uploader("Upload Test dataset: ",
                                       type=['csv', 'xlsx'])
        except Exception as e:
            st.warning(e)
        if train_df is not None and test_df is not None:
            # ##st.code("""
            # df.select_dtypes(include=[np.number]).shape
            # """, language='python')
            st.success('Upload complete. Status: SUCCESS')
            train = pd.read_csv(train_df)
            test = pd.read_csv(test_df)
            train.columns = map(str.lower, train.columns)
            test.columns = map(str.lower, test.columns)
            train["marker"] = "train"
            test["marker"] = "test"
            df = pd.concat([train, test], axis=0)
            df, mem_reduced = reduce_mem_usage(df)
            st.write("MEMORY SAVED: ", mem_reduced, "MB")
            df = df.loc[:, ~df.columns.duplicated()].drop_duplicates()
            keep_cols = df.columns
            datetime_ = st.multiselect('SELECT FEATURES OF TYPE DATE: ',
                                       df.columns.tolist(), date_catcher(df))
            if datetime_:
                datetime_ = list(datetime_)
                for col_ in datetime_:
                    try:
                        df[col_] = pd.to_datetime(df[col_],
                                                  infer_datetime_format=True,
                                                  format="%y%m%d")
                    except Exception as e:
                        st.write("EXCEPTION (can be ignored): ", str(e))
                    # else:
                st.write("DATETIME COLUMNS PARSED SUCCESSFULLY.")
            else:
                st.write("NO DATE COLUMN FOUND.")

            full_df = None
            full_train = None
            full_test = None

            st.dataframe(df)

            #show code
            df_head()

            st.write("SHAPE: ", df.shape)

            #show code
            df_shape()

            id_ = st.multiselect(
                'SELECT *ONE* FEATURE FOR FINAL TEST FILE (ex: ID): ',
                test.columns.tolist(),
                ["id" if "id" in test.columns else test.columns.tolist()[0]])
            if not id_:
                st.warning(
                    "YOU REALLY SHOULD PICK AN IDENTIFY FOR YOUR TEST SUBMISSION FILE."
                )
            test_id = test[id_]  #store ID for test dataframe
            train_data = df[df["marker"] == "train"]
            # test_data = df[df["marker"] == "test"]
            target_col = st.multiselect(
                "Choose preferred target column: ", train.columns.tolist(), [
                    "target" if "target" in train.columns else
                    train.columns.tolist()[-1]
                ])

            # st.write(target_col)
            if target_col:
                target_col = list(target_col)
                target_cp, ax = plt.subplots()
                sns.countplot(data=train_data, x=target_col[0])
                st.pyplot(target_cp)
                plot_target()
            else:
                st.warning("TARGET VARIABLE NOT YET DECLARED")

            if len(datetime_) < 1:
                st.write("NO DATETIME COLUMN FOUND. SKIPPING......")
            else:
                st.write(
                    "INITIALIZING DATE FEATURE ENGINEERING VIA SANGO SHRINE...."
                )
                date_parser_v1(df, datetime_)

            df = df.apply(lambda col: col.str.lower()
                          if (col.dtype == 'object') else col)

            st.dataframe(df)

            st.write("DATE FEATURE ENGINEERING COMPLETE")

            num_df = df.select_dtypes(include=[np.number]).shape[1]
            obj_df = df.select_dtypes(include='object').shape[1]
            if num_df:
                st.write('Numerical column count: ', num_df)
                st.code('''df.select_dtypes(include=[np.number])''',
                        language='python')
            if obj_df:
                cat_cols = [
                    col for col in df.columns
                    if col not in list(df.select_dtypes(include=[np.number]))
                ]
                st.write('Categorical column count: ', obj_df)

                #show code
                st.code('''#see categorical columns
df.select_dtypes(include=['object'])
                ''',
                        language='python')

                st.write(cat_cols[:5])
            st.subheader("Data Summary")
            st.write(df.describe().T)

            #show code
            st.code('''
            df.describe()
            ''',
                    language='python')

            train_data = df[df["marker"] == "train"]
            test_data = df[df["marker"] == "test"]

            train_data = train_data.dropna(subset=[target_col[0]])
            test_data.loc[test_data["marker"] == "test",
                          target_col[0]] = "N/A"  #
            pre_miss_df = pd.concat([train_data, test_data], axis=0)
            target_var = train_data[target_col[0]]
            missing_df = pd.DataFrame(data=np.round(
                (pre_miss_df.isnull().sum() / pre_miss_df.shape[0]) * 100, 1),
                                      columns=["missing (%)"])

            #show code
            st.code('''
             pd.DataFrame(data=np.round((train.isnull().sum()/train.shape[0])*100,1), columns=["missing (%)"])
             ''',
                    language='python')

            st.dataframe(missing_df.T)
            if missing_df["missing (%)"].any():  #check for nans (True if any)
                keep = st.slider("KEEP COLUMNS WITH MISSING DATA (%)", 0, 100,
                                 50, 10)

                keep_cols = missing_df[
                    missing_df["missing (%)"] <= int(keep)].index
                keep_cols = list(keep_cols)

                handle_nan = st.selectbox(label="HANDLE NANs",
                                          options=["MODE", "MEDIAN", "MEAN"])
                """Read on SimpleImputer"""
                if handle_nan == "MODE":
                    full_train = train_data[keep_cols].fillna(
                        train_data[keep_cols].mode().iloc[0])
                    full_test = test_data[keep_cols].fillna(
                        test_data[keep_cols].mode().iloc[0])

                elif handle_nan == "MEDIAN":
                    full_train = train_data[keep_cols].fillna(
                        train_data[keep_cols].median().iloc[0])
                    full_test = test_data[keep_cols].fillna(
                        test_data[keep_cols].median().iloc[0])

                elif handle_nan == "MEAN":
                    full_train = train_data[keep_cols].fillna(
                        train_data[keep_cols].mean().iloc[0])
                    full_test = test_data[keep_cols].fillna(
                        test_data[keep_cols].mean().iloc[0])

                else:
                    st.write("NO SELECTED WAY TO HANDLE NAN")  #precaution

                st.write("MISSING DATA PADDED")
            else:
                st.write("NO MISSING DATA")

            #conserve memory
            df = None

            if full_train is not None and full_test is not None:
                new_df = pd.concat([full_train, full_test],
                                   axis=0)  #use padded data
            else:
                new_df = pre_miss_df  #use this since missing data wasn't present
            st.dataframe(new_df.head(50))
            st.write("SHAPE: ", new_df.shape)
            if new_df.shape[1] > 50:
                st.write("ABSOLUTE CORRELATION WITH TARGET VARIABLE")
                st.write(new_df[new_df["marker"] == "train"].corr()[
                    target_col[0]].sort_values(by=target_col[0],
                                               ascending=False).T)
                st.write("[correlation is not causation]")

                #show code
                heatmap_code()

            else:
                heatmap_fig, ax = plt.subplots()
                sns.heatmap(new_df[new_df["marker"] == "train"].corr(),
                            annot=True,
                            linewidth=.5,
                            fmt='.1f',
                            ax=ax)
                st.pyplot(heatmap_fig)

                #show code
                heatmap_sns()

            new_df_cols = list(new_df.columns)
            if target_col[0] in list(new_df.columns):
                new_df_cols.remove(target_col[0])
            if id_[0] in list(new_df.columns):
                new_df_cols.remove(id_[0])

            st.subheader("PLOTTING POSSIBLE RELATIONSHIP WITH TARGET FEATURE")
            check_relationship(new_df_cols, target_col[0],
                               new_df[new_df["marker"] == "train"])

            #handle features excluded
            remove_feat = st.multiselect("SELECT FEATURE(S) TO DROP",
                                         new_df_cols)
            if remove_feat:
                new_df = remove_features(new_df, remove_feat)

                #show code
                st.code('''
                df.dropna([list of columns to drop]), axis=1, inplace=True)
                ''',
                        language='python')

            else:
                st.write("KEEPING ALL FEATURES")

            st.dataframe(new_df.head(50))
            st.write(new_df.shape)

            #test_id = new_df[new_df["marker"] == "test"][id_] #store ID for test dataframe

            #remove monotonic or unique features
            new_df = remove_mono_unique(dataframe=new_df, cols=new_df.columns)
            st.dataframe(new_df.head(50))
            st.write(new_df.shape)
            st.write("MONOTONIC AND UNIQUE FEATURES REMOVED")

            NOT_DUMMY = [
                target_col[0], "target", "marker", "claim", "prediction",
                "response"
            ]  #features we do not need the dummy for

            exclude_cols = [
                col for col in new_df.columns if col not in NOT_DUMMY
            ]
            exclude_cols = list(
                set(exclude_cols).intersection(list(new_df.columns)))
            dum_df = pd.get_dummies(new_df[exclude_cols], drop_first=True)
            dum_df["marker"] = new_df["marker"].copy()
            dum_df[target_col[0]] = new_df[target_col[0]].copy()
            st.dataframe(dum_df.head(100))
            st.write(dum_df.shape)
            st.write("CATEGORICAL FEATURES ENCODED")

            new_df = None

            dum_train = dum_df[dum_df["marker"] == "train"].drop(
                [target_col[0], "marker"], axis=1)
            dum_train_y = pd.DataFrame(list(
                dum_df.iloc[:dum_train.shape[0]][target_col[0]].astype('int')),
                                       columns=["target"])
            dum_test = dum_df[dum_df["marker"] == "test"].drop(
                [target_col[0], "marker"], axis=1)

            #feature scaling
            train_scaled, test_scaled = feature_scaling(dum_train, dum_test)

            st.subheader("Train Data")

            st.dataframe(train_scaled.head(200))
            st.write(train_scaled.shape)
            st.markdown(download_csv(train_scaled,
                                     "cpt_train.csv",
                                     info="DOWNLOAD TRAIN FILE"),
                        unsafe_allow_html=True)

            st.subheader("Test Data")

            st.dataframe(test_scaled.head(200))
            st.write(test_scaled.shape)
            st.markdown(download_csv(test_scaled,
                                     "cpt_test.csv",
                                     info="DOWNLOAD TEST FILE"),
                        unsafe_allow_html=True)

            #downsample/upsample
            # _train = _target = None
            if len(set(dum_train_y["target"])) == 2:  #binary classification
                classones = int(
                    (dum_train_y[dum_train_y["target"] == 1].count() /
                     dum_train_y.shape[0]) * 100)
                classzeroes = int(
                    (dum_train_y[dum_train_y["target"] == 0].count() /
                     dum_train_y.shape[0]) * 100)
                if classones >= 70 or classzeroes >= 70:
                    st.warning("IMBALANCED TRAINING SET DETECTED!")
                    st.write("CLASS 1(", classones, "%) to CLASS 0(",
                             classzeroes, "%)")
                    _train, _target, balance_type = balance_out(
                        train_scaled, dum_train_y, seed)

                    if balance_type != "DEFAULT":
                        st.subheader("Train Data (BALANCED)")

                        st.dataframe(_train.head(50))
                        st.write(_train.shape)
                        st.markdown(download_csv(
                            _train,
                            "cpt_train_balanced.csv",
                            info="DOWNLOAD BALANCED TRAIN FILE"),
                                    unsafe_allow_html=True)

                else:
                    _train, _target = train_scaled.copy(), dum_train_y.copy()
            else:
                st.write("")

            st.header('TRAINING/TESTING SECTION')

            model = st.sidebar.selectbox('Select Algorithm: ', MODELS)

            #algorithm selection and hyperparameter tuning
            params = model_parameter(model)
            model_ = build_model(model, params, seed)


            train_, val_, test_, test_resp = initialize_model(model=model_, Xtrain_file=_train, ytrain_file=_target["target"], \
                                                        test_file=test_scaled, test_dataframe=test_id, target_var_=target_col[0], seed=seed)

            if test_resp is not None:
                # st.write("Train Accuracy (on train data: ", sklearn.metrics.accuracy_score(train_[0], train_[1]))
                st.write(
                    "VALIDATION Accuracy (on train data): ",
                    np.round(
                        sklearn.metrics.accuracy_score(val_[0], val_[1]) * 100,
                        1), '(%)')
                st.write(
                    "TEST Accuracy (on train data): ",
                    np.round(
                        sklearn.metrics.accuracy_score(test_[0], test_[1]) *
                        100, 1), '(%)')

                st.write(
                    "TEST F1 SCORE (on train data): ",
                    np.round(
                        sklearn.metrics.f1_score(test_[0], test_[1]) * 100, 1),
                    '(%)')

                st.write(test_resp.head(1000))
                st.write(test_resp.shape)
                st.write("")
                st.markdown(download_csv(test_resp,
                                         "MLCPT_TEST_PRED.csv",
                                         info="DOWNLOAD TEST PREDICTION FILE"),
                            unsafe_allow_html=True)
                st.write("MODEL ESTABLISHED. YAY!")
                st.balloons()

                train_scaled = test_scaled = None
            else:
                st.write("YOUR MODEL FAILED TO COMPLETE")

        elif train_df:
            st.write("YOU NEED TEST DATASET TOO")
        elif test_df:
            st.write("YOU NEED TRAIN DATASET AS WELL")
        else:
            st.write("ABEG UPLOAD TRAIN AND TEST DATASET")
    else:
        st.write('INVALID ARGUMENT! ')

    st.markdown(
        "<h5 style='text-align: center'>Made with <span style='color:red'>&hearts;</span> By <a href='https://www.twitter.com/__oemmanuel__'>Emmanuel</a> </h5>",
        unsafe_allow_html=True)
Esempio n. 12
0
def main():

	menu = ["Team","Lieu d'intérêt", "Evénement", "Produit", "Itinéraire", "Maintenance"]
	choice = st.sidebar.radio("Menu", menu)

	

	if choice == "Team":
		


		image = Image.open('DATAtourisme.png')
		st.image(image, use_column_width = True, output_format = 'PNG')


		st.markdown("<h1 style='text-align: center; font-size:15px; color:#A11F40;'>Qu'est ce que DATAtourisme ?</h1>", unsafe_allow_html=True)   

		st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True)   
		

		col1, col2, col3 = st.beta_columns((1,13,1))   		
   				
		with col1:
   		   
   		   st.markdown("")

		with col2:
			st.markdown("DATAtourisme est un dispositif national visant à faciliter l’accès aux données publiques d’information touristique produites à travers les territoires par les offices de tourisme et les comités départements ou régionaux du tourisme. Il se matérialise par une plateforme de collecte, de normalisation et de diffusion de données en open data, directement reliée aux bases de données territoriales, et repose sur l’animation d’une communauté d’utilisateurs. Le dispositif est copiloté par la Direction générale des entreprises et la fédération ADN Tourisme. Les données collectées sont relatives au recensement de l’ensemble des événements et points d’intérêt touristiques de France (musées, monuments, sites naturels, activités, itinéraires, expos et concerts, etc)", unsafe_allow_html=True)   


		with col3:
			st.markdown("")



		st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True)   


		st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True)


		st.markdown("<h1 style='text-align: center; font-size:15px; color:#A11F40;'>Qui sommes-nous ?</h1>", unsafe_allow_html=True)   

		st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True)   
		


		col1, col2, col3 = st.beta_columns((1,13,1))   		
   				
		with col1:
   		   
   		   st.markdown("")

		with col2:
			st.markdown("A propos de ce projet : Dans le cadre de notre formation professionnelle de Data Analyst, notre équipe de 5 s'est alliée à ADN Tourisme pour proposer un état des lieux, à jour, du projet DATAtourisme, ce qui n'existait pas jusqu'alors.", unsafe_allow_html=True)   

		with col3:
			st.markdown("")   		   



		st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True)   
		

		col1, col2, col3 = st.beta_columns((3,0.72,3))

		with col1:
			st.markdown("")

		with col2:
			if st.button("Team"):
			   st.balloons()

		with col3:
			st.markdown("")


		st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True)   
		





#		st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Team</h1>", unsafe_allow_html=True)   

		



		col1, col2, col3, col4, col5 = st.beta_columns(5)   		
   				
		with col1:
   		   
   		   st.image("cm1.jpg", use_column_width=True)
   		   st.markdown("""**Carla&#8239;Moreno**""")
   		   st.markdown("""*Scrum Master*""")
   		   st.markdown(link1, unsafe_allow_html=True)

		with col2:
   		   
   		   st.image("cc.jpg", use_column_width=True)
   		   st.markdown("""**Corentin&#8239;Guillo**""")
   		   st.markdown("""*Product Owner*""")
   		   st.markdown(link4, unsafe_allow_html=True)


		with col3:
   		   
   		   st.image("Yvanne.jpg", use_column_width=True)	
   		   st.markdown("""**Yvanne&#8239;Euchin**""")
   		   st.markdown("""*Equipe Tech*""")
   		   st.markdown(link2, unsafe_allow_html=True)



		with col4:
		   st.image("md.jpg", use_column_width=True)
		   st.markdown("""**Michael&#8239;Desforges**""")
		   st.markdown("""*Equipe Tech*""")
		   st.markdown(link, unsafe_allow_html=True)


		with col5:
		   st.image("ab.jpg", use_column_width=True)
		   st.markdown("""**Amar&#8239;Barache**""")
		   st.markdown("""*Equipe Tech*""")
		   st.markdown(link5, unsafe_allow_html=True)



		image = Image.open('WCS.png')
		st.image(image, use_column_width = True, output_format = 'PNG')


		page_bg_img = '''
		<style>
		body {
		background-image: url("https://i.ibb.co/cD9CndX/nuages2.jpg");
		background-size: cover;
		}
		</style>
		'''

		st.markdown(page_bg_img, unsafe_allow_html=True)


	if choice == "Produit":
		data = data1




		image = Image.open('DATAtourisme.png')
		st.image(image, use_column_width = True, output_format = 'PNG')

		st.markdown("<h1 style='text-align:center; font-size:29px; color: #57565B;'>Produit</h1>", unsafe_allow_html=True)




		if st.checkbox('voir dataframe'):
		   st.write(data)
#		   st.write(data.iloc[0:100,:])
		 
				 


		f" POI : **{len(data1.index)}** sur **{len(data1.index)+len(data2.index)+len(data3.index)+len(data4.index)}** au total"

		f" Créateurs de données : **{len(data1.createur_donnée.unique())}**"

		f" Fournisseurs : **{len(data1.fournisseur.unique())}**"

		f" Villes : **{len(data1.ville.unique())}**"

		f" POI avec photo :  **{int(round(data1.photo.sum()/len(data1.photo.index)*100))}%**"


				 

		st.markdown(""" # **Densité de POI** """)

		fig = px.density_mapbox(data, lat='latitude', lon='longitude', radius=4,
                        center={"lat": 46.037763, "lon": 4.4}, zoom=4, color_continuous_midpoint = 5,
                        mapbox_style='carto-positron', color_continuous_scale=['grey','darkgrey','grey','red','red'])
		fig.update_layout(coloraxis_showscale=False,margin=dict( l=0, r=0, b=0, t=0, pad = 4 ))
		fig.update_traces(hoverinfo='skip', hovertemplate=None)
		st.plotly_chart(fig)

		



		st.markdown("""# **Par départements**""")


		fig = px.choropleth_mapbox(data, 
                           geojson=france_regions_geo, 
                           color=data.code_departement.value_counts(),
                           locations=data.code_departement.value_counts().index.tolist(), 
                           featureidkey='properties.code',
                           opacity=1,
                           center={"lat": 46.037763, "lon": 2.062783},
                           mapbox_style="carto-positron", zoom=4)

		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
		st.plotly_chart(fig)


#		st.markdown(""" # **Répartition des sous-categories** """)


#		x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])#.drop("HébergementProduit",axis=0).index[0:17])
#		y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])#.drop("HébergementProduit",axis=0).iloc[0:17])
#		fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
#		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
#		st.plotly_chart(fig)



		

#		x = list(data.sous_categorie.str.split(',',expand = True).stack().explode().value_counts().drop("HébergementProduit",axis=0).index)
#		y=list(data.sous_categorie.str.split(',',expand = True).stack().explode().value_counts().drop("HébergementProduit",axis=0))
#		fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
#		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
#		st.plotly_chart(fig)

		

#		image = Image.open('WCS.png')
#		st.image(image, use_column_width = True, output_format = 'PNG')





	elif choice == "Evénement":
		data = data2



		image = Image.open('DATAtourisme.png')
		st.image(image, use_column_width = True, output_format = 'PNG')

		st.markdown("<h1 style='text-align:center; font-size:29px; color: #57565B;'>Evénement</h1>", unsafe_allow_html=True)



		if st.checkbox('voir dataframe'):
		   st.write(data.iloc[0:100,:])
		 

		f" POI : **{len(data2.index)}** sur **{len(data1.index)+len(data2.index)+len(data3.index)+len(data4.index)}** au total"

		f" Créateurs de données : **{len(data2.createur_donnée.unique())}**"

		f" Fournisseurs : **{len(data2.fournisseur.unique())}**"

		f" Villes : **{len(data2.ville.unique())}**"

		f" POI avec photo :  **{int(round(data2.photo.sum()/len(data2.photo.index)*100))}%**"
			
		

		st.markdown(""" # **Densité de POI** """)

		fig = px.density_mapbox(data, lat='latitude', lon='longitude', radius=4,
                        center={"lat": 46.037763, "lon": 4.4}, zoom=4, color_continuous_midpoint = 5,
                        mapbox_style='carto-positron', color_continuous_scale=['grey','darkgrey','grey','red','red'])
		fig.update_layout(coloraxis_showscale=False,margin=dict( l=0, r=0, b=0, t=0, pad = 4 ))
		fig.update_traces(hoverinfo='skip', hovertemplate=None)
		st.plotly_chart(fig)

		st.markdown("""# **Par départements**""")

		fig = px.choropleth_mapbox(data, 
                           geojson=france_regions_geo, 
                           color=data.code_departement.value_counts(),
                           locations=data.code_departement.value_counts().index.tolist(), 
                           featureidkey='properties.code',
                           opacity=1,
                           center={"lat": 46.037763, "lon": 2.062783},
                           mapbox_style="carto-positron", zoom=4)

		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
		st.plotly_chart(fig)


#		st.markdown(""" # **Répartition des sous-categories** """)


#		x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])
#		y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])
#		fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
#		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
#		st.plotly_chart(fig)




#		x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index)
#		y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts())
#		fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
#		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
#		st.plotly_chart(fig)


		

#		image = Image.open('WCS.png')
#		st.image(image, use_column_width = True, output_format = 'PNG')



	elif choice == "Lieu d'intérêt":
		data = data3



		image = Image.open('DATAtourisme.png')
		st.image(image, use_column_width = True, output_format = 'PNG')


		st.markdown("<h1 style='text-align:center; font-size:29px; color: #57565B;'>Lieux d'intérêt</h1>", unsafe_allow_html=True)



		if st.checkbox('voir dataframe'):
		   st.write(data.iloc[0:100,:])
		   
		   
		



		f" POI : **{len(data3.index)}** sur **{len(data1.index)+len(data2.index)+len(data3.index)+len(data4.index)}** au total"

		f" Créateurs de données : **{len(data3.createur_donnée.unique())}**"

		f" Fournisseurs : **{len(data3.fournisseur.unique())}**"

		f" Villes : **{len(data3.ville.unique())}**"

		f" POI avec photo :  **{int(round(data3.photo.sum()/len(data3.photo.index)*100))}%**"

				

		st.markdown(""" # **Densité de POI** """)

		fig = px.density_mapbox(data, lat='latitude', lon='longitude', radius=4,
                        center={"lat": 46.037763, "lon": 4.4}, zoom=4, color_continuous_midpoint = 5,
                        mapbox_style='carto-positron', color_continuous_scale=['grey','darkgrey','grey','red','red'])
		fig.update_layout(coloraxis_showscale=False,margin=dict( l=0, r=0, b=0, t=0, pad = 4 ))
		fig.update_traces(hoverinfo='skip', hovertemplate=None)
		st.plotly_chart(fig)

		st.markdown("""# **Par départements**""")

		fig = px.choropleth_mapbox(data, 
                           geojson=france_regions_geo, 
                           color=data.code_departement.value_counts(),
                           locations=data.code_departement.value_counts().index.tolist(), 
                           featureidkey='properties.code',
                           opacity=1,
                           center={"lat": 46.037763, "lon": 2.062783},
                           mapbox_style="carto-positron", zoom=4)

		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
		st.plotly_chart(fig)

#		st.markdown(""" # **Répartition des sous-categories** """)

#		x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])
#		y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])
#		fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
#		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
#		st.plotly_chart(fig)



#		image = Image.open('WCS.png')
#		st.image(image, use_column_width = True, output_format = 'PNG')



	elif choice == "Itinéraire":
		data = data4




		image = Image.open('DATAtourisme.png')
		st.image(image, use_column_width = True, output_format = 'PNG')


		st.markdown("<h1 style='text-align:center; font-size:29px; color: #57565B;'>Itinéraire</h1>", unsafe_allow_html=True)



		if st.checkbox('voir dataframe'):
		   st.write(data.iloc[0:100,:])
		 		   

		f" **POI** : **{len(data4.index)}** sur **{len(data1.index)+len(data2.index)+len(data3.index)+len(data4.index)}** au total"

		f" Créateurs de données : **{len(data4.createur_donnée.unique())}**"

		f" Fournisseurs : **{len(data4.fournisseur.unique())}**"

		f" Villes : **{len(data4.ville.unique())}**"

		f" POI avec photo :  **{int(round(data4.photo.sum()/len(data4.photo.index)*100))}%**"

				 	

		st.markdown(""" # **Densité de POI** """)

		fig = px.density_mapbox(data, lat='latitude', lon='longitude', radius=4,
                        center={"lat": 46.037763, "lon": 4.4}, zoom=4, color_continuous_midpoint = 5,
                        mapbox_style='carto-positron', color_continuous_scale=['grey','darkgrey','grey','red','red'])
		fig.update_layout(coloraxis_showscale=False,margin=dict( l=0, r=0, b=0, t=0, pad = 4 ))
		fig.update_traces(hoverinfo='skip', hovertemplate=None)
		st.plotly_chart(fig)

		st.markdown("""# **Par départements**""")

		fig = px.choropleth_mapbox(data, 
                           geojson=france_regions_geo, 
                           color=data.code_departement.value_counts(),
                           locations=data.code_departement.value_counts().index.tolist(), 
                           featureidkey='properties.code',
                           opacity=1,
                           center={"lat": 46.037763, "lon": 2.062783},
                           mapbox_style="carto-positron", zoom=4)

		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
		st.plotly_chart(fig)

		
#		x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])
#		y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])
#		fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
#		fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
#		st.plotly_chart(fig)
				

			
#		image = Image.open('WCS.png')
#		st.image(image, use_column_width = True, output_format = 'PNG')

	elif choice == "Maintenance":

		image = Image.open('DATAtourisme.png')
		st.image(image, use_column_width = True, output_format = 'PNG')


		mdp = st.text_input("Mot de passe ?")


		st.write()
		if mdp == "Les+tour1stes.":
			if st.checkbox('voir dataframe'):
				st.write(data_erreur)
				st.markdown("")
				download = st.button('télécharger')
				if download:
					csv = data_erreur.to_csv(index=False)
					b64 = base64.b64encode(csv.encode()).decode()  
					linko= f'<a href="data:file/csv;base64,{b64}" download="data_erreur.csv">Download csv file</a>'
					st.markdown(linko, unsafe_allow_html=True)
				 	

			f" Départements sans fournisseurs : **{data_erreur[data_erreur.fournisseur.isna()].code_departement.unique()}**"
			f" Départements sans créateur : **{data_erreur[data_erreur.createur_donnée.isna()].code_departement.unique()}**"
			f" Fournisseurs sans région : **{data_erreur[data_erreur.region.isna()].fournisseur.unique()}**"
			st.markdown("")
			st.markdown(""" # **Carte des erreurs latitude & longitude** """)
			st.markdown("")
			st.map(data_erreur_map)

			st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Répartition des sous-categories de la categorie Lieu d'intérêt</h1>", unsafe_allow_html=True)
			x = list(data3.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])
			y=list(data3.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])
			fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
			fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
			st.plotly_chart(fig)

			st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Répartition des sous-categories de la categorie Lieu Evénement</h1>", unsafe_allow_html=True)
			x = list(data2.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])
			y=list(data2.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])
			fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
			fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
			st.plotly_chart(fig)

			st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Répartition des sous-categories de la categorie Produit</h1>", unsafe_allow_html=True)
			x = list(data1.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])#.drop("HébergementProduit",axis=0).index[0:17])
			y=list(data1.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])#.drop("HébergementProduit",axis=0).iloc[0:17])
			fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
			fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
			st.plotly_chart(fig)

			st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Répartition des sous-categories de la categorie Itinéraire</h1>", unsafe_allow_html=True)
			x = list(data4.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])
			y=list(data4.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])
			fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40'])
			fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category"))
			st.plotly_chart(fig)
		
		
		
   
		

#		image = Image.open('WCS.png')
#		st.image(image, use_column_width = True, output_format = 'PNG')


				



	else:
		st.subheader("""  """)
Esempio n. 13
0
def showBalloons():
  # display only once, by caching the function
  st.balloons()
  pass
Esempio n. 14
0
def main():

    st.title('Pylanceを使ってエラーをスムーズに解決しよう')
    
    st.markdown('Pythonで自力でエラー解決が解決できるようになるためには、エラー文を読む、デバッグをするなどがあります。ただ、数百行のコードのエラー箇所の特定には時間がかかります。')  
    
    st.markdown('エラー箇所の特定を素早くできるように視覚化してくれるのがVSCodeの拡張機能の**Pylance**です。')   
    
    st.markdown('ここでは、そのPylanceを使ったエラーの解決方法について紹介していきます。')

    im('img/head_img.png')
    
    st.subheader('どのメニューを選びますか?')
    option = st.selectbox(
        '',
        ('Pylanceのインストール方法←まずはこちら!', 'Pylanceによるエラー発見の方法'))
    
    if option == 'Pylanceのインストール方法←まずはこちら!':
        st.subheader('Pylanceのインストール方法')

        st.markdown('Pylanceをインストールするには、VSCodeの拡張機能でpylanceと入力して、')
        
        im('img/inst1.png')
        
        st.markdown('出てきたものをインストールします。')
        im('img/inst2.png')
        
        st.markdown('その後に、VSCode右下の歯車ボタンの"管理"をクリック')
        im('img/kanri.png')
        st.markdown('"設定"を開きます。')
        im('img/inst4.png')
        
        st.markdown('そこで、「**python.analysis.typeCheckingMode**」を検索して、設定を**basic**にします。')
        im('img/inst5.png')
        
        st.markdown('これで、Pylanceのインストールが完了です。')

        if st.button('完了したらクリック!'):
            st.markdown('おめでとうございます!!😆😆')
            im('img/happy.png')
            st.balloons()
    
    if option == 'Pylanceによるエラー発見の方法':
        st.subheader('Pylanceでのエラーの発見の仕方')

        st.markdown('Pylanceを入れると下の画像のように、赤い波線が出る箇所があります。エラーを発見するには、この赤い波線にカーソルを合わせて見ていきます。')

        im('img/error.png')
        
        st.subheader('解決したいエラー文を選択する')
        option = st.selectbox(
        '',
        ('Expected indented block', 'A is not defined', 'Unexpected indentation', 'Expected ")"', 'Expected expression', 'String literal is unterminated', 'Invalid character in token ""', 'Expected ":"', 'A is possibly unbound'))
        
        
        if option == 'Expected indented block':
            
            st.subheader('Expected indented block')
            im('img/expindent.png')

            st.markdown('この表記が出た場合は**インデントをできていない箇所**があります。Expectedは「期待されている」という意味です。Expected indented blockなので、**インデントすることを期待されている**という意味になります。')
            
            st.markdown('例えば次の例だと、if文の後のprint文がインデントされていません。')
            
            im('img/erim6.png')

            st.subheader('対処法')
            
            st.markdown('インデントを忘れている部分があるのでそこをインデントしてあげます。')
            
            im('img/erim7.png')
            
            st.subheader('このエラーがよく起こる場所')
            st.markdown('関数の定義、if文、for文、try-except文周りでよく起きます。')
            
            im('img/er1.png')
            st.markdown('このエラーが出た場合は、それらの文の周りを見直してみましょう。')
            
            st.subheader('エラーは解消できましたか?')
            if st.button('赤い波線が消えたらクリック'):
                st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') 
                im('img/happy.png')
                
                st.balloons()           
                
            
        if option == 'A is not defined':
            st.subheader('A is not defined')
            
            im('img/er2.png')
            st.markdown('この表記が出た場合は**Aという変数を定義していない。**もしくは、**Aというモジュールをimportしていない**のどちらかです。Aがモジュール名の場合はモジュールのimportの記述を忘れている可能性があります。')
            
            st.subheader('対処法')
            st.markdown('次の3つを確認してみて下さい。')
            st.markdown('**①import文を忘れている場合はそれを書きます。**')
            st.markdown('**②変数が定義されていない場合は定義します。**')
            st.markdown('**③文字に間違いがないかチェックします。**')
            
            st.subheader('このエラーがよく起こる場面')

            st.subheader('変数の未定義')
            im('img/meri1.png')
            st.markdown('上の例ではtestという変数が定義されていません。')
            
            st.markdown('【対処法】→手前で変数を定義する。')
            
            im('img/meri2.png')        
            
            st.subheader('文字のタイポ')
            im('img/m1.png')
            
            st.markdown('上の例ではprintと書くところをprntと書いてしまっています。')
            
            st.markdown('【対処法】→誤字の訂正')
            
            st.subheader('関数の中の変数')
            
            im('img/m2.png')
            st.markdown('上の例ではinitialが関数の中で初めて出てきた形になっていて、エラーになっています。')
            st.markdown('【対処法】→引数として設定してあげる')
            
            im('img/emm.png')
            
            st.subheader('エラーは解消できましたか?')
            if st.button('赤い波線が消えたらクリック'):
                st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') 
                im('img/beach.jpg')
                
                st.balloons()
                
        if option == 'Unexpected indentation':
            st.subheader('Unexpected indentation')
            
            im('img/er3.png')
            
            st.markdown('この表記が出た場合はインデントをする必要がないところでインデントをしている可能性があります。')
            
            im('img/er4.png')     
            
            st.subheader('対処法')   
            st.markdown('インデントを解消します。例えば上の2つ目のprintだと、if文や、for文、関数の定義のdefの後などではなく、インデントの必要はないのでインデントを戻します。')
            
            im('img/er5.png')   
            
            st.subheader('エラーは解消できましたか?')
            if st.button('赤い波線が消えたらクリック'):
                st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') 
                im('img/happy.png')
                
                st.balloons()
                
        if option == 'Expected ")"':
            st.subheader('Expected ")"')
            
            im('img/mml1.png')
            
            st.markdown('この表記が出た場合は、どこか近くの行で**)が抜けている**可能性があります。')
            
            st.markdown('例えば、こちら↓はfor文の下のprintの最後の)を忘れている例です。')
            im('img/erim.png')
            
            st.subheader('対処法')   
            st.markdown(')が抜けている箇所を見つけて)を付け足してあげる必要があります。')
            
            im('img/elm1.png')
            
            st.subheader('エラーは解消できましたか?')
            if st.button('赤い波線が消えたらクリック'):
                st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') 
                im('img/smile.jpg')
                
                st.balloons()                    

        if option == 'Expected expression':
            st.subheader('Expected expression')
        
            im('img/er8.png')
            
            st.markdown('この表記の意味は**「式」が期待されています。**という意味です。')
            
            st.markdown('Pythonで「式」とは、実行されることによって何かの結果になるものです。')
            
            st.markdown('例をあげると、')
            
            st.markdown('1 + 5')
            
            st.markdown('[3, 5, 7, 9]')
            
            st.markdown('など、なんらかの結果やデータになるものが式です。')
            
            st.markdown('エラーの例としては、例えばこちら')
            im('img/erim2.png')
            
            st.markdown('1 + 2 + だと式として完成していないのでエラーとなります。')
            
            
            st.subheader('対処法')   
            st.markdown('式として成立していない部分を見つけて直してあげよう')
            
            im('img/erml.png')
            
            st.markdown('5を足すことで一つの式となりました。')
            
            st.subheader('エラーは解消できましたか?')
            if st.button('赤い波線が消えたらクリック'):
                st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') 
                im('img/banzai.png')
                
                st.balloons() 

        if option == 'String literal is unterminated':
            
            st.subheader('String literal is unterminated')
        
            im('img/er6.png')
            
            st.markdown('この表記が出た場合は文字列の**クオテーションの閉じ忘れ**の可能性があります。')
            
            
            
            st.markdown('例えば、こちらは文字列の最後の"を忘れている例です。')
            im('img/erim3.png')
            
            
            st.subheader('対処法')   
            st.markdown('足りていないクオテーションを付け足してあげましょう。')
            
            im('img/errm.png')
            
            
            st.subheader('エラーは解消できましたか?')
            if st.button('赤い波線が消えたらクリック'):
                st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') 
                im('img/jumping.jpg')
                
                st.balloons()      

        if option == 'Invalid character in token ""':
            st.subheader('Invalid character in token ""')
            
            im('img/er9.png')
            
            st.markdown('この表記が出た場合、**全角の空白**がどこかに入っている可能性があります。')
            
            st.markdown('例えば、下ではfor文の:の後ろに全角の空白が入っています。')
            im('img/erim4.png')
            
            
            st.subheader('対処法')   
            st.markdown(':の後に入っている全角の空白を消します。上のように、文の後に#を置くには「半角スペース2つ分」空けて#を置けば問題ないです。')
            
            im('img/6em.png')
            
            st.subheader('エラーは解消できましたか?')
            if st.button('赤い波線が消えたらクリック'):
                st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!一つレベルアップしました😆😆') 
                im('img/jump.jpg')
                
                st.balloons() 
                
        if option == 'Expected ":"':
            st.subheader('Expected ":"')
            
            im('img/ex1.png')
            
            st.markdown('この表記が出た場合、どこかにコロン:を忘れています。')
            
            st.markdown('例えば、下では関数を定義する際のコロン:を忘れています。')
            im('img/ex2.png')
            
            
            st.subheader('対処法')   
            st.markdown('コロン:を忘れている部分に付け足してあげます。')
            
            im('img/ex3.png')
            
            st.markdown('このエラーがよく起こる場面')
            st.markdown('関数の定義、if文、for文、try-except文周りでよく起きます。')
            
            im('img/er1.png')
            
            st.subheader('エラーは解消できましたか?')
            if st.button('赤い波線が消えたらクリック'):
                st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!一つレベルアップしました😆😆') 
                im('img/happy.png')
                
                st.balloons()
            
        if option == 'A is possibly unbound':
            st.subheader('A is possibly unbound')
            
            im('img/unbound.png')
            
            st.markdown('この表記が出た場合、変数Aが未定義になっている可能性があります。')
            
            st.markdown('例えば、下ではinfo_dictはtry文が実行された時だけ定義されて、exceptの時は定義されていません。ですのでexcept文が実行された時はinfo_dictが定義されていないとエラーが出ます。')
            im('img/unbound2.png')
            
            
            st.subheader('対処法')   
            st.markdown('info_dictをtry、exceptのどちらの場合でも定義されるような場所で定義してあげる。')
            
            im('img/unbound3.png')
            
            st.subheader('エラーは解消できましたか?')
            if st.button('赤い波線が消えたらクリック'):
                st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!一つレベルアップしました😆😆') 
                im('img/gutts.jpg')
                
                st.balloons()                      
Esempio n. 15
0
def main():
    side_img = Image.open("images/emotion3.jpg")
    with st.sidebar:
        st.image(side_img, width=300)
    st.sidebar.subheader("Menu")
    website_menu = st.sidebar.selectbox(
        "Menu", ("Emotion Recognition", "Project description", "Our team",
                 "Leave feedback", "Relax"))
    st.set_option('deprecation.showfileUploaderEncoding', False)

    if website_menu == "Emotion Recognition":
        st.sidebar.subheader("Model")
        model_type = st.sidebar.selectbox("How would you like to predict?",
                                          ("mfccs", "mel-specs"))
        em3 = em6 = em7 = gender = False
        st.sidebar.subheader("Settings")

        st.markdown("## Upload the file")
        with st.beta_container():
            col1, col2 = st.beta_columns(2)
            # audio_file = None
            # path = None
            with col1:
                audio_file = st.file_uploader("Upload audio file",
                                              type=['wav', 'mp3', 'ogg'])
                if audio_file is not None:
                    if not os.path.exists("audio"):
                        os.makedirs("audio")
                    path = os.path.join("audio", audio_file.name)
                    if_save_audio = save_audio(audio_file)
                    if if_save_audio == 1:
                        st.warning("File size is too large. Try another file.")
                    elif if_save_audio == 0:
                        # extract features
                        # display audio
                        st.audio(audio_file, format='audio/wav', start_time=0)
                        try:
                            wav, sr = librosa.load(path, sr=44100)
                            Xdb = get_melspec(path)[1]
                            mfccs = librosa.feature.mfcc(wav, sr=sr)
                            # # display audio
                            # st.audio(audio_file, format='audio/wav', start_time=0)
                        except Exception as e:
                            audio_file = None
                            st.error(
                                f"Error {e} - wrong format of the file. Try another .wav file."
                            )
                    else:
                        st.error("Unknown error")
                else:
                    if st.button("Try test file"):
                        wav, sr = librosa.load("test.wav", sr=44100)
                        Xdb = get_melspec("test.wav")[1]
                        mfccs = librosa.feature.mfcc(wav, sr=sr)
                        # display audio
                        st.audio("test.wav", format='audio/wav', start_time=0)
                        path = "test.wav"
                        audio_file = "test"
            with col2:
                if audio_file is not None:
                    fig = plt.figure(figsize=(10, 2))
                    fig.set_facecolor('#d1d1e0')
                    plt.title("Wave-form")
                    librosa.display.waveplot(wav, sr=44100)
                    plt.gca().axes.get_yaxis().set_visible(False)
                    plt.gca().axes.get_xaxis().set_visible(False)
                    plt.gca().axes.spines["right"].set_visible(False)
                    plt.gca().axes.spines["left"].set_visible(False)
                    plt.gca().axes.spines["top"].set_visible(False)
                    plt.gca().axes.spines["bottom"].set_visible(False)
                    plt.gca().axes.set_facecolor('#d1d1e0')
                    st.write(fig)
                else:
                    pass
            #     st.write("Record audio file")
            #     if st.button('Record'):
            #         with st.spinner(f'Recording for 5 seconds ....'):
            #             st.write("Recording...")
            #             time.sleep(3)
            #         st.success("Recording completed")
            #         st.write("Error while loading the file")

        if model_type == "mfccs":
            em3 = st.sidebar.checkbox("3 emotions", True)
            em6 = st.sidebar.checkbox("6 emotions", True)
            em7 = st.sidebar.checkbox("7 emotions")
            gender = st.sidebar.checkbox("gender")

        elif model_type == "mel-specs":
            st.sidebar.warning("This model is temporarily disabled")

        else:
            st.sidebar.warning("This model is temporarily disabled")

        # with st.sidebar.beta_expander("Change colors"):
        #     st.sidebar.write("Use this options after you got the plots")
        #     col1, col2, col3, col4, col5, col6, col7 = st.beta_columns(7)
        #
        #     with col1:
        #         a = st.color_picker("Angry", value="#FF0000")
        #     with col2:
        #         f = st.color_picker("Fear", value="#800080")
        #     with col3:
        #         d = st.color_picker("Disgust", value="#A52A2A")
        #     with col4:
        #         sd = st.color_picker("Sad", value="#ADD8E6")
        #     with col5:
        #         n = st.color_picker("Neutral", value="#808080")
        #     with col6:
        #         sp = st.color_picker("Surprise", value="#FFA500")
        #     with col7:
        #         h = st.color_picker("Happy", value="#008000")
        #     if st.button("Update colors"):
        #         global COLOR_DICT
        #         COLOR_DICT = {"neutral": n,
        #                       "positive": h,
        #                       "happy": h,
        #                       "surprise": sp,
        #                       "fear": f,
        #                       "negative": a,
        #                       "angry": a,
        #                       "sad": sd,
        #                       "disgust": d}
        #         st.success(COLOR_DICT)

        if audio_file is not None:
            st.markdown("## Analyzing...")
            if not audio_file == "test":
                st.sidebar.subheader("Audio file")
                file_details = {
                    "Filename": audio_file.name,
                    "FileSize": audio_file.size
                }
                st.sidebar.write(file_details)

            with st.beta_container():
                col1, col2 = st.beta_columns(2)
                with col1:
                    fig = plt.figure(figsize=(10, 2))
                    fig.set_facecolor('#d1d1e0')
                    plt.title("MFCCs")
                    librosa.display.specshow(mfccs, sr=sr, x_axis='time')
                    plt.gca().axes.get_yaxis().set_visible(False)
                    plt.gca().axes.spines["right"].set_visible(False)
                    plt.gca().axes.spines["left"].set_visible(False)
                    plt.gca().axes.spines["top"].set_visible(False)
                    st.write(fig)
                with col2:
                    fig2 = plt.figure(figsize=(10, 2))
                    fig2.set_facecolor('#d1d1e0')
                    plt.title("Mel-log-spectrogram")
                    librosa.display.specshow(Xdb,
                                             sr=sr,
                                             x_axis='time',
                                             y_axis='hz')
                    plt.gca().axes.get_yaxis().set_visible(False)
                    plt.gca().axes.spines["right"].set_visible(False)
                    plt.gca().axes.spines["left"].set_visible(False)
                    plt.gca().axes.spines["top"].set_visible(False)
                    st.write(fig2)

            if model_type == "mfccs":
                st.markdown("## Predictions")
                with st.beta_container():
                    col1, col2, col3, col4 = st.beta_columns(4)
                    mfccs = get_mfccs(path, model.input_shape[-1])
                    mfccs = mfccs.reshape(1, *mfccs.shape)
                    pred = model.predict(mfccs)[0]

                    with col1:
                        if em3:
                            pos = pred[3] + pred[5] * .5
                            neu = pred[2] + pred[5] * .5 + pred[4] * .5
                            neg = pred[0] + pred[1] + pred[4] * .5
                            data3 = np.array([pos, neu, neg])
                            txt = "MFCCs\n" + get_title(data3, CAT3)
                            fig = plt.figure(figsize=(5, 5))
                            COLORS = color_dict(COLOR_DICT)
                            plot_colored_polar(fig,
                                               predictions=data3,
                                               categories=CAT3,
                                               title=txt,
                                               colors=COLORS)
                            # plot_polar(fig, predictions=data3, categories=CAT3,
                            # title=txt, colors=COLORS)
                            st.write(fig)
                    with col2:
                        if em6:
                            txt = "MFCCs\n" + get_title(pred, CAT6)
                            fig2 = plt.figure(figsize=(5, 5))
                            COLORS = color_dict(COLOR_DICT)
                            plot_colored_polar(fig2,
                                               predictions=pred,
                                               categories=CAT6,
                                               title=txt,
                                               colors=COLORS)
                            # plot_polar(fig2, predictions=pred, categories=CAT6,
                            #            title=txt, colors=COLORS)
                            st.write(fig2)
                    with col3:
                        if em7:
                            model_ = load_model("model/model4.h5")
                            mfccs_ = get_mfccs(path, model_.input_shape[-2])
                            mfccs_ = mfccs_.T.reshape(1, *mfccs_.T.shape)
                            pred_ = model_.predict(mfccs_)[0]
                            txt = "MFCCs\n" + get_title(pred_, CAT7)
                            fig3 = plt.figure(figsize=(5, 5))
                            COLORS = color_dict(COLOR_DICT)
                            plot_colored_polar(fig3,
                                               predictions=pred_,
                                               categories=CAT7,
                                               title=txt,
                                               colors=COLORS)
                            # plot_polar(fig3, predictions=pred_, categories=CAT7,
                            #            title=txt, colors=COLORS)
                            st.write(fig3)
                    with col4:
                        if gender:
                            with st.spinner('Wait for it...'):
                                gmodel = load_model("model/model_mw.h5")
                                gmfccs = get_mfccs(path,
                                                   gmodel.input_shape[-1])
                                gmfccs = gmfccs.reshape(1, *gmfccs.shape)
                                gpred = gmodel.predict(gmfccs)[0]
                                gdict = [["female", "woman.png"],
                                         ["male", "man.png"]]
                                ind = gpred.argmax()
                                txt = "Predicted gender: " + gdict[ind][0]
                                img = Image.open("images/" + gdict[ind][1])

                                fig4 = plt.figure(figsize=(3, 3))
                                fig4.set_facecolor('#d1d1e0')
                                plt.title(txt)
                                plt.imshow(img)
                                plt.axis("off")
                                st.write(fig4)

            # if model_type == "mel-specs":
            # st.markdown("## Predictions")
            # st.warning("The model in test mode. It may not be working properly.")
            # if st.checkbox("I'm OK with it"):
            #     try:
            #         with st.spinner("Wait... It can take some time"):
            #             global tmodel
            #             tmodel = load_model_cache("tmodel_all.h5")
            #             fig, tpred = plot_melspec(path, tmodel)
            #         col1, col2, col3 = st.beta_columns(3)
            #         with col1:
            #             st.markdown("### Emotional spectrum")
            #             dimg = Image.open("images/spectrum.png")
            #             st.image(dimg, use_column_width=True)
            #         with col2:
            #             fig_, tpred_ = plot_melspec(path=path,
            #                                         tmodel=tmodel,
            #                                         three=True)
            #             st.write(fig_, use_column_width=True)
            #         with col3:
            #             st.write(fig, use_column_width=True)
            #     except Exception as e:
            #         st.error(f"Error {e}, model is not loaded")

    elif website_menu == "Project description":
        import pandas as pd
        import plotly.express as px
        st.title("Project description")
        st.subheader("GitHub")
        link = '[GitHub repository of the web-application]' \
               '(https://github.com/CyberMaryVer/speech-emotion-webapp)'
        st.markdown(link, unsafe_allow_html=True)

        st.subheader("Theory")
        link = '[Theory behind - Medium article]' \
               '(https://talbaram3192.medium.com/classifying-emotions-using-audio-recordings-and-python-434e748a95eb)'
        st.markdown(link + ":clap::clap::clap: Tal!", unsafe_allow_html=True)
        with st.beta_expander("See Wikipedia definition"):
            components.iframe(
                "https://en.wikipedia.org/wiki/Emotion_recognition",
                height=320,
                scrolling=True)

        st.subheader("Dataset")
        txt = """
            This web-application is a part of the final **Data Mining** project for **ITC Fellow Program 2020**. 

            Datasets used in this project
            * Crowd-sourced Emotional Mutimodal Actors Dataset (**Crema-D**)
            * Ryerson Audio-Visual Database of Emotional Speech and Song (**Ravdess**)
            * Surrey Audio-Visual Expressed Emotion (**Savee**)
            * Toronto emotional speech set (**Tess**)    
            """
        st.markdown(txt, unsafe_allow_html=True)

        df = pd.read_csv("df_audio.csv")
        fig = px.violin(df,
                        y="source",
                        x="emotion4",
                        color="actors",
                        box=True,
                        points="all",
                        hover_data=df.columns)
        st.plotly_chart(fig, use_container_width=True)

        st.subheader("FYI")
        st.write(
            "Since we are currently using a free tier instance of AWS, "
            "we disabled mel-spec and ensemble models.\n\n"
            "If you want to try them we recommend to clone our GitHub repo")
        st.code(
            "git clone https://github.com/CyberMaryVer/speech-emotion-webapp.git",
            language='bash')

        st.write(
            "After that, just uncomment the relevant sections in the app.py file "
            "to use these models:")

    elif website_menu == "Our team":
        st.subheader("Our team")
        st.balloons()
        col1, col2 = st.beta_columns([3, 2])
        with col1:
            st.info("*****@*****.**")
            st.info("*****@*****.**")
            st.info("*****@*****.**")
        with col2:
            liimg = Image.open("images/LI-Logo.png")
            st.image(liimg)
            st.markdown(
                f""":speech_balloon: [Maria Startseva](https://www.linkedin.com/in/maria-startseva)""",
                unsafe_allow_html=True)
            st.markdown(
                f""":speech_balloon: [Tal Baram](https://www.linkedin.com/in/tal-baram-b00b66180)""",
                unsafe_allow_html=True)
            st.markdown(
                f""":speech_balloon: [Asher Holder](https://www.linkedin.com/in/asher-holder-526a05173)""",
                unsafe_allow_html=True)

    elif website_menu == "Leave feedback":
        st.subheader("Leave feedback")
        user_input = st.text_area("Your feedback is greatly appreciated")
        user_name = st.selectbox(
            "Choose your personality",
            ["checker1", "checker2", "checker3", "checker4"])

        if st.button("Submit"):
            st.success(f"Message\n\"\"\"{user_input}\"\"\"\nwas sent")

            if user_input == "log123456" and user_name == "checker4":
                with open("log0.txt", "r", encoding="utf8") as f:
                    st.text(f.read())
            elif user_input == "feedback123456" and user_name == "checker4":
                with open("log.txt", "r", encoding="utf8") as f:
                    st.text(f.read())
            else:
                log_file(user_name + " " + user_input)
                thankimg = Image.open("images/sticky.png")
                st.image(thankimg)

    else:
        import requests
        import json

        url = 'http://api.quotable.io/random'
        if st.button("get random mood"):
            with st.beta_container():
                col1, col2 = st.beta_columns(2)
                n = np.random.randint(1, 1000, 1)[0]
                with col1:
                    quotes = {
                        "Good job and almost done":
                        "checker1",
                        "Great start!!":
                        "checker2",
                        "Please make corrections base on the following observation":
                        "checker3",
                        "DO NOT train with test data":
                        "folk wisdom",
                        "good work, but no docstrings":
                        "checker4",
                        "Well done!":
                        "checker3",
                        "For the sake of reproducibility, I recommend setting the random seed":
                        "checker1"
                    }
                    if n % 5 == 0:
                        a = np.random.choice(list(quotes.keys()), 1)[0]
                        quote, author = a, quotes[a]
                    else:
                        try:
                            r = requests.get(url=url)
                            text = json.loads(r.text)
                            quote, author = text['content'], text['author']
                        except Exception as e:
                            a = np.random.choice(list(quotes.keys()), 1)[0]
                            quote, author = a, quotes[a]
                    st.markdown(f"## *{quote}*")
                    st.markdown(f"### ***{author}***")
                with col2:
                    st.image(image=f"https://picsum.photos/800/600?random={n}")
Esempio n. 16
0
def main():
    st.title("NLP App with Streamlit")
    st.markdown(
        "Welcome! This is a simple NLP application created using Streamlit and deployed on Heroku."
    )
    st.markdown(
        "In the box below, you can type custom text or paste an URL from which text is extracted. Once you have a the text, open the sidebar and choose any of the four applications. Currently, we have applications to tokenize text, extract entitiles, analyze sentiment, and summarize text (and a suprise! :wink:)."
    )
    st.markdown(
        "You can preview a percentage of your text by selecting a value on the slider and clicking on \"Preview\""
    )

    nlp = load_spacy()

    text = fetch_text(
        st.text_area(
            "Enter Text (or URL) and select application from sidebar",
            "Here is some sample text. When inputing your custom text or URL make sure you delete this text!"
        ))

    pct = st.slider("Preview length (%)", 0, 100)
    length = (len(text) * pct) // 100
    preview_text = text[:length]

    if st.button("Preview"):
        st.write(preview_text)

    apps = [
        'Show tokens & lemmas', 'Extract Entities', 'Show sentiment',
        'Summarize text', 'Suprise'
    ]
    choice = st.sidebar.selectbox("Select Application", apps)
    if choice == "Show tokens & lemmas":
        if st.button("Tokenize"):
            st.info("Using spaCy for tokenization and lemmatization")
            st.json([(f"Token: {token.text}, Lemma: {token.lemma_}")
                     for token in analyze_text(nlp, text)])
    elif choice == 'Extract Entities':
        if st.button("Extract"):
            st.info("Using spaCy for NER")
            doc = analyze_text(nlp, text)
            html = displacy.render(doc, style='ent')
            html = html.replace('\n\n', '\n')
            st.write(html, unsafe_allow_html=True)
    elif choice == "Show sentiment":
        if st.button("Analyze"):
            st.info("Using TextBlob for sentiment analysis")
            blob = TextBlob(text)
            sentiment = {
                'polarity': np.round(blob.sentiment[0], 3),
                'subjectivity': np.round(blob.sentiment[1], 3),
            }
            st.write(sentiment)
            st.info(
                "Polarity is between -1 (negative) and 1 (positive) indicating the type of sentiment\nSubjectivity is between 0 (objective) and 1 (subjective) indicating the bias of the sentiment"
            )
    elif choice == "Summarize text":
        summarizer_type = st.sidebar.selectbox("Select Summarizer",
                                               ['Gensim', 'Sumy Lex Rank'])
        if summarizer_type == 'Gensim':
            summarizer = gensim_summarizer
        elif summarizer_type == 'Sumy Lex Rank':
            summarizer = sumy_summarizer

        if st.button(f"Summarize using {summarizer_type}"):
            st.success(summarizer(text))
    elif choice == 'Suprise':
        st.balloons()

    st.markdown(
        "The code for this app can be found in [this](https://github.com/sudarshan85/streamlit_nlp) Github repository."
    )
Esempio n. 17
0
def app():
    st.title('输入你的学号, 试试在圆周率中找到它🐈')

    number = st.number_input(label='',
                             min_value=0,
                             max_value=99999999,
                             format="%d")

    if number != 0:

        f'正在Pi中匹配{number}'

        # Add a placeholder
        latest_iteration = st.empty()
        bar = st.progress(0)

        for i in range(100):
            # Update the progress bar with each iteration.
            latest_iteration.text(f'{i+1}%')
            bar.progress(i + 1)
            nd = random.random()
            if nd > 0.3:
                time.sleep(0.01)
            elif nd < 0.03:
                time.sleep(0.9)
            else:
                time.sleep(0.1)
        # with st.spinner(text='In progress'):
        #     time.sleep(5)
        #     st.success('Done')

        n = number
        r = requests.get('http://subidiom.com/pi/piday.asp', params={'s': n})

        html_doc = r.text

        # print(
        # float(re.findall(pattern="Search time was (.*?) second", string=html_doc)[0]))

        rank = re.findall(pattern="appears at the ([0-9,]*?)(st|nd|rd|th| )",
                          string=html_doc)[0][0]

        b = re.findall(pattern="<font size=4>(\d*?)<font color=0f00ff>",
                       string=html_doc)[0]
        a = re.findall(pattern="</font>(\d*?)<br>", string=html_doc)[0]

        st.success(f'{n} 在 $\pi$ 的第 {rank} 位!')
        st.balloons()

        img_path = 'picat.jpg'
        W, H = (637, 637)
        msg = rank
        im = Image.open(img_path)
        draw = ImageDraw.Draw(im)
        w, h = draw.textsize(msg)
        # myFont = ImageFont.truetype("SimHei.ttf", 30, encoding="utf-8")
        draw.textsize(msg)
        draw.text(((W - w) / 2, (H - h) / 2), msg, fill="black")
        st.image(im, caption='', use_column_width=True)

        with open('style.html', 'r', encoding='utf-8') as f:
            pretty_number = f.read()
            pretty_number += "<br>"
            pretty_number += "<div align=\"center\" class=\"pi\">"
            if b:
                pretty_number += f"<font color=\"gray\" size=\"5\">...{b}</font>"
            else:
                pretty_number += f"<font color=\"gray\" size=\"5\">3.{b}</font>"
            pretty_number += "".join([
                f"<span class=\"d{x}\"><font size=\"6\">{x}</font></span>"
                for x in str(number)
            ])
            pretty_number += f"<font color=\"gray\" size=\"5\">{a}...</font>"
            pretty_number += "</div>"
            components.html(pretty_number)

    st.write("""
    ***
    ## π 中一定包含你的学号么?
    ### 一定
    如果你的学号是 8 位数,那么它一定会在 π 的前 ~18.168 亿位中出现。(认真脸.jpg)
    """)

    st.write("""
    ***
    ## π 中包含了所有可能的数字组合吗?
    ### 不好说
    $\pi$ 是一个无理数,此外 $\pi$ 还是一个超越数——它不是任何有理数系数多项式的根。$\pi$ 的数字序列被认为是随机分布的,但至今未能证明,同样 $\pi$ 的合取性(disjunctive)与正规性(normal)也未在十进制下得到证明。
    """)

    st.write('''
    ***
    ### Reference📄
    1. [圆周率](https://zh.wikipedia.org/wiki/%E5%9C%93%E5%91%A8%E7%8E%87)
    2. [Does 𝜋 contain all possible number combinations?](https://math.stackexchange.com/questions/216343/does-pi-contain-all-possible-number-combinations)
    3. [Pi Does NOT Contain the Universe](http://justinparrtech.com/JustinParr-Tech/pi-does-not-contain-the-universe/)
    4. [π里包含了所有可能的数字组合吗?](https://www.guokr.com/article/439682/)
    ''')
Esempio n. 18
0
def main():
    st.sidebar.header("Settings")

    # GET DATA
    data = load_data()
    data_unpivoted = clean_and_reshape_data(data)

    # >> DISPLAY WIDGETS <<

    # FILTER TO SELECTED LOCATION & YEAR
    locations_list = list(
        data_unpivoted.loc[:, 'Location'].sort_values(ascending=True).unique())
    location = st.sidebar.selectbox(
        'Select location',
        locations_list,
        index=locations_list.index('United Kingdom'))
    indicator_slot = st.sidebar.empty()  # reserve slot for indcator selector
    year = st.sidebar.slider('Select year',
                             min_value=2015,
                             max_value=2022,
                             value=2020)

    data_view = data_unpivoted.loc[(data_unpivoted.loc[:,
                                                       'Location'] == location)
                                   & (data_unpivoted.loc[:, 'Year'] == year)]

    if st.checkbox('Show data', False):
        '''
        ### Data

        _The sample data used in this application is property of Oxford Economics and provided for personal use and_
        _educational purposes only. A 5yr rolling mean transformation has been applied to the original data series values_
        _and so is still representative of actual level values. Please do not redistribute this data without the express_
        _permission of the owner, Oxford Economics._
        '''
        # TABLE
        st.sidebar.subheader('Data view')
        if st.sidebar.checkbox('Show DataFrame', True):
            data_view

        if st.sidebar.checkbox('Show Table'):
            st.table(data_view)
    '''
    ### Chart
    '''
    # chart data is calculated in two steps
    # step 1 (using data_unpivoted, filtered by location)
    chart_data = data_unpivoted[(data_unpivoted['Location'] == location)]

    indicators_list = list(
        chart_data.loc[:, 'Indicator'].sort_values(ascending=True).unique())
    # this selection box is put into the reserved widget slot created above
    indicator = indicator_slot.selectbox('Select indicator', indicators_list)

    # step 2 (using chart_data, filtered by location's indicators)
    chart_data = chart_data[(chart_data['Indicator'] == indicator)]

    fig = alt.Chart(chart_data, title=f'{location} | {indicator}').mark_bar().encode(
    alt.X('Year:O', axis=alt.Axis(domain=False, tickSize=0)),
    alt.Y('Value', axis=alt.Axis(domain=False, tickSize=0, title='Value')),
        color='Value', tooltip=['Id','Year','Value']) \
        .properties(width=600).interactive()
    st.altair_chart(fig)

    # ABOUT
    st.sidebar.header('About')
    st.sidebar.info('Using Streamlit to build a Web App.\n\n' + \
        '(c) 2020. Oxford Economics Ltd. All rights reserved.')
    st.sidebar.markdown('---')

    # Display Readme.md
    if st.sidebar.checkbox('Readme', False):
        st.markdown('---')
        '''
        ### Readme
        '''
        with open('./README.md', 'r', encoding='utf-8') as f:
            readme = f.read()
            st.markdown(readme)

    # TESTS
    if st.sidebar.checkbox('Run Tests', False):
        st.markdown('---')
        st.title('Test Suite')
        '''
        ### Data Load Test
        '''
        suite = unittest.TestLoader().loadTestsFromModule(TestFixtures)
        result = unittest.TextTestRunner(verbosity=2).run(suite)
        if result.wasSuccessful():
            st.info(f'Test PASSED :-)')
            st.balloons()
        else:
            st.error(f'Test FAILED :-(')

    # Style
    st.sidebar.markdown('---')
    if st.sidebar.checkbox('Configure Style'):
        BlockContainerStyler().block_container_styler()
Esempio n. 19
0
def layout(page):
    '''Layout Streamlit commands'''
    
    st.title(page)
    st.write('''This page lists the layout commands (currently in beta) that are available in Streamlit. They are not yet integrated 
    into the basic Streamlit functions and therefore may not always work in unique (edge) cases. If you believe you have 
    encountered such a case please let us know on the [Streamlit Community Platform.](https://discuss.streamlit.io/)''')
    st.markdown("---")

# ***************** COLUMNS SECTION ****************
    st.header('Columns')
    st.subheader('Columns of Equal Size:')
    st.code('''
col1,col2 = st.beta_columns(2)
col1.image('img/brain.png', caption= "This ia a blue brain!")
data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns = ['a', 'b', 'c'])
col2.write(data)''')
    
    col1,col2 = st.beta_columns(2)
    col1.image('img/brain.png', caption= "This ia a blue brain!", use_column_width=True)
    data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns = ['a', 'b', 'c'])
    col2.write('__A Dataframe__')
    col2.write(data)

    st.subheader('Columns of Different Sizes:')
    st.code('''
col3,col4,col5 = st.beta_columns([1,2,3]) 
# 3 columns where first is the smallest, the second is 2x the size of the first and 3rd is 3x the first
col3.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App")
with col4: 
    st.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App")
with col5: 
    st.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App")
    ''')
    col3,col4,col5 = st.beta_columns([1,2,3]) 
    # 3 columns where first is the smallest, the second is 2x the size of the first and 3rd is 3x the first
    col3.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App")
    with col4: 
        st.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App")
    with col5: 
        st.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App")

    st.subheader('Columns to Make a Grid:')
    st.code('''
for i in range(1,3): # number of rows in your table! = 2
    cols = st.beta_columns(2) # number of columns in each row! = 2
    # first column if the ith row
    cols[0].image('img/row_%i_col_0.png' %i, use_column_width=True)
    cols[1].image('img/row_%i_col_1.jpg' %i, use_column_width=True)
    ''')
    for i in range(1,3): # number of rows in your table! = 2
        cols = st.beta_columns(2) # number of columns in each row! = 2
        # first column if the ith row
        cols[0].image('img/row_%i_col_0.png' %i, use_column_width=True)
        cols[1].image('img/row_%i_col_1.jpg' %i, use_column_width=True)

# ***************** CONTAINERS SECTION *************************
    st.markdown("---")
    st.header('Containers')
    st.write('''You may want to create a container for a couple of reasons. These include:

- Creating invisible structure to your app that can help with coding and flow
- They allow you to write objects 'out of order' 
- They serve as a conceptual block of code''')
    st.subheader('Container using `with`:')
    st.code(''' 
with st.beta_container():
    st.write("This bar graph is inside the container")
    # You can call any Streamlit command, including custom components:
    st.bar_chart(np.random.randn(50, 3))
    ''')
    with st.beta_container():
        st.write("This bar graph is inside the container")
        st.bar_chart(np.random.randn(50, 3))

    st.subheader('Container out of order:')
    st.code(''' 
container = st.beta_container() 
container.write("This button is inside a container")
button = container.button('Press Me and see something to blow your mind!')
if button:
    st.header("Voila!! The order is backwards!")
   
container.write("This is _after_ the if button statement, but comes _before_ the 'Voila!!'")
    ''')
    container = st.beta_container() 
    container.write("This button is inside a container")
    button = container.button('Press Me and see something to blow your mind!', key='container_button_run')
    if button:
        st.header("Voila!! The order is backwards!")
   
    container.write("This is _after_ the if button statement, but comes _before_ the 'Voila!!'")

# ********************* EXPANDER SECTION *******************
    st.markdown("---")
    st.header("Expander")
    st.write('''The expander allows you to hide sections that you may not always want expanded. 
    When the user clicks the expander, it *__does not__* rerun the script, so this can be useful 
    for housing additional widgets.''')
    st.code(''' 
with st.beta_expander('Expand Me'): 
    st.write('Well hello there!')
    st.balloons()''')

    with st.beta_expander('Expand Me'): 
        st.write('Well hello there!')
        st.balloons()

# ********************* SIDEBAR SECTION *******************
    st.markdown("---")
    st.header('Sidebar')
    st.write(''' 
You may have noticed the handy sidebar to your left :point_left:

If you would like to create your own you simply need to add `sidebar` before you call a streamlit function. For example:''')

    st.code('''
# use st.sidebar.<widget> notation
sidebar_button = st.sidebar.button("Click here to remove your button")
if not sidebar_button: # if the button is NOT clicked display this message
    st.sidebar.markdown('You added a widget to the sidebar!')

    ''')
    add_sidebar = st.button('Run this code to add to the sidebar')
    if add_sidebar: 
        a = st.sidebar.button("Click here to remove your button", key='sidebar_button_run')
        if not a:
            st.sidebar.markdown('You added a widget to the sidebar!')
    
    st.write('''
NOTE: The `st.sidebar.<function>`notation works for basically ALL the streamlit functions. However, there are a few that it _doesn't_
work with, those we have listed here (as it's shorter to list the few it doesn't work with): 

functions that will cause an error (and their workarounds):

-  :exclamation: `st.sidebar.echo()`
    - :white_check_mark: `st.sidebar.code()`
- :exclamation: `st.sidebar.spinner()`
    - :white_check_mark: no current workarounds :disappointed:
    ''') #:heavy_multiplication_x: ideal: :X: (big red X from slack)

# *************************** SET PAGE CONFIG ***********************
    st.markdown("---")
    st.header("Set Page Configuration")

    buff, config1, mid, config2, buff = st.beta_columns([1,20,0.5,20,1])

    with config1: 
        st.subheader("Set the layout")
        st.write('''
You can change the layout of your app in two ways, the default is centered, with one `centered` column down the 
centre (surprise!) of the app. The other option is `wide`, which this app already is! If you would like to see what 
centered looks like click the 'Change the Layout' button.
        ''')
        st.code('''
st.set_page_config(page_title="Streamlit cheat sheet",
    layout="centered")
        ''')

        layout_change = st.button('Change the Layout')
        if layout_change: 
            code_to_save = 'st.set_page_config(page_title="Streamlit cheat sheet",layout="centered")'
            pkle.dump(code_to_save, open('format.txt', 'wb'))
            st.experimental_rerun()
        
        st.markdown("---")
        st.subheader("Set the App Name")
        st.write('''
This setting allows you to change the name that appers in your browser tab. If you would like to change the name 
of the app yourself then like click the 'Set the app name' button.
        ''')
        st.code('''
usr_name = st.text_input('Pick a page name')
st.set_page_config(page_title="%s",layout="wide") %usr_name
        ''')
        usr_name = st.text_input('Pick a page name')
        if len(usr_name) == 0: 
            usr_name = "Pick a Name!"

        page_name = st.button('Set the app name')
        if page_name: 
            code_to_save = 'st.set_page_config(page_title="%s",layout="wide")' %usr_name
            pkle.dump(code_to_save, open('format.txt', 'wb'))
            st.experimental_rerun()
        
    with config2: 
        st.subheader("Set the Icon")
        st.write('''
In the tab what houses this app, there is a default icon of a black and white Streamlit logo. BUT we wanted you to be 
able to change this to any emoji you wanted to have! To do this, check out the code below. If you would like to see 
what the emoji looks like click the 'Change the Emoji' button.
        ''')
        st.code('''
st.set_page_config(page_title="Streamlit cheat sheet",
    layout="wide", page_icon=":monkey:")
        ''')
        #st.set_page_config() page_icon=None
        emoji_change = st.button("Change the Emoji")
        if emoji_change: 
            code_to_save = 'st.set_page_config(page_title="Streamlit cheat sheet",layout="wide", page_icon=":monkey:")'
            pkle.dump(code_to_save, open('format.txt', 'wb'))
            st.experimental_rerun()


        st.markdown("---")
        st.subheader("Set the Sidebar")
        # initial_sidebar_state='auto'
        st.write('''
With this setting, you can change the sidebar to be either `expanded`, `collapsed` or `auto` when a user first arrives to 
your app. If this is not specified then the default is `auto`, which collapses the sidebar on a mobile device and shows it
 on all other devices. If you would like to set the sifebar like click the 'Change the Sidebar State' button.

        ''')
        st.code('''
st.set_page_config(page_title="Streamlit cheat sheet",
    layout="wide", initial_sidebar_state="collapsed")
        ''')

        #initial_sidebar_state='collapsed'
        sidebar_change = st.button("Change the Sidebar State")
        if sidebar_change: 
            code_to_save = 'st.set_page_config(page_title="Streamlit cheat sheet",layout="wide", initial_sidebar_state="collapsed")'
            pkle.dump(code_to_save, open('format.txt', 'wb'))
            st.experimental_rerun()
    return
Esempio n. 20
0
def main():

    st.title("DataHacks 2021")

    st.write("## Predict any country's pillar and prosperity scores!")

    st.write(
        "#### Using our machine learning models, predict any country's prosperity score"
    )

    countries_df = pd.read_csv(
        'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Data/merged.csv'
    )
    countries_df = list(countries_df['country'].unique())

    names = [
        'busi', 'econ', 'educ', 'envi', 'gove', 'heal', 'pers', 'safe', 'soci',
        'prosperity'
    ]

    def pillar(name='busi', countries=['Chad']):
        url = 'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Data/'
        df = pd.read_csv(url + name + '_train.csv')
        df = df.drop(['Unnamed: 0'], axis=1)
        for i in df.columns:
            if i.find('year') > -1:
                df = df.drop([i], axis=1)
        y = df[name]

        df = df.drop(['rank_' + name, name], axis=1)

        df = remove_low_information_features(df)

        df = remove_highly_null_features(df)

        df = remove_single_value_features(df)

        df = remove_highly_correlated_features(df)

        X = df
        problem_type = 'regression'
        objective = 'auto'

        automl = evalml.automl.AutoMLSearch(problem_type=problem_type,
                                            objective=objective)

        best_pipeline = automl.load(name + '_best_pipeline')

        df = pd.read_csv(url + name + '_test.csv')
        df = df.drop(['Unnamed: 0'], axis=1)

        for i in df.columns:
            if i.find('year') > -1:
                df = df.drop([i], axis=1)

        df = remove_low_information_features(df)

        df = remove_highly_null_features(df)

        df = remove_single_value_features(df)

        df = remove_highly_correlated_features(df)

        predictions = best_pipeline.predict(df)

        result = pd.DataFrame()

        result[name] = predictions

        df = pd.read_csv(url + name + '_test.csv')
        temp = df[['country', 'year']]

        result = pd.merge(left=temp,
                          right=result,
                          how="left",
                          on=[temp.index, result.index])
        result = result.drop(['key_0', 'key_1'], axis=1)

        result['rank_' + name] = result.groupby("year")[name].rank(
            "dense", ascending=False)
        result['rank_' + name] = result['rank_' + name].astype('int')

        result = result[result['country'].isin(countries)]
        metric = pd.read_csv(
            'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Metrics/'
            + name + '_metrics.csv')

        return result, metric

    def prosperity(
            countries=['Chad', 'Togo', 'Zimbabwe', 'Ivory Coast', 'Georgia']):

        url = 'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Data/'
        df = pd.read_csv(url + 'merged.csv')
        df = df.drop(['Unnamed: 0'], axis=1)

        metrics = [
            'educ', 'soci', 'heal', 'pers', 'busi', 'econ', 'safe', 'gove',
            'envi'
        ]
        ranks = ['rank_' + metric for metric in metrics]
        drop = metrics + ranks + ['year', 'prosperity_score']

        y = df['prosperity_score']

        df = df.drop(drop, axis=1)

        df = remove_low_information_features(df)

        df = remove_highly_null_features(df)

        df = remove_single_value_features(df)

        df = remove_highly_correlated_features(df)

        X = df

        problem_type = 'regression'
        objective = 'auto'

        automl = evalml.automl.AutoMLSearch(problem_type=problem_type,
                                            objective=objective)

        #automl.search(X,y)
        #best_pipeline = automl.best_pipeline
        #best_pipeline.fit(X,y)
        #best_pipeline.save('prosperity_best_pipeline')

        best_pipeline = automl.load('prosperity_best_pipeline')

        test = pd.read_csv(url + 'test.csv', index_col=0)

        drop = ['year']
        df = test.copy()
        df = df.drop(drop, axis=1)

        df = remove_low_information_features(df)

        df = remove_highly_null_features(df)

        df = remove_single_value_features(df)

        df = remove_highly_correlated_features(df)

        X = df

        predictions = best_pipeline.predict(X)

        result = pd.DataFrame()

        result['prosperity'] = predictions

        df = pd.read_csv(url + 'test.csv')
        temp = df[['country', 'year']]

        result = pd.merge(left=temp,
                          right=result,
                          how="left",
                          on=[temp.index, result.index])
        result = result.drop(['key_0', 'key_1'], axis=1)

        result['rank_prosperity'] = result.groupby("year")["prosperity"].rank(
            "dense", ascending=False)
        result['rank_prosperity'] = result['rank_prosperity'].astype('int')

        result = result[result['country'].isin(countries)]

        metric = pd.read_csv(
            'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Metrics/prosperity_metrics.csv'
        )

        return result, metric

    country_sel = st.multiselect(
        "Select which countries you want to learn more about!", countries_df)
    name = st.selectbox('Score', names)

    if st.button("Submit"):
        st.balloons()
        if len(list(country_sel)) < 1:
            country_sel = [
                'Chad', 'Togo', 'Zimbabwe', 'Ivory Coast', 'Georgia'
            ]

        if name == 'prosperity':
            r, m = prosperity(list(country_sel))
        else:
            r, m = pillar(name, list(country_sel))

        st.write(r)
        st.write(m)
Esempio n. 21
0
def eda_analysis():

    global documentation_string
    global documentation_substring
    global df
    global df_categorical
    global df_numeric
    global df_date
    # Utilizing a documentation platform to see all the changes we would be using (Useful for pipelining)

    st.write("")
    st.write("")
    st.write(
        "This streamlined EDA shows a high-level analysis of your data, with just a few clicks!"
    )
    st.write(
        "The datasets below have their own unique attributes that touch on specific concepts that I wanted to highlight."
    )
    st.write("")

    st.write('## Data Input')
    #read_file()
    st.info(
        'NOTE: You can also upload your own CSV data to play around with through the <Experimental Reading Data> option below'
    )
    option = st.selectbox('Choose which type of data', files.name)
    st.write("You have chosen " + option)
    option_index = files.index[files['name'] == option]
    # st.write(files.loc[option_index,'file_name'].item())
    option_name = files.loc[option_index, 'file_name'].item()
    st.write(files.loc[option_index, 'description'].item())
    if (option_name == '<Experimental Reading data>'):
        read_file()
    else:
        df = read_data("", option_name, ",")

    if st.button('1. Initial features'):
        initial_features(df)

    if st.button('2. Check for duplicated values'):
        check_duplicated(df)

    # if st.checkbox('Drop Duplicates?'):
    #     #Drop the duplicate
    #     documentation_substring= f"Dropped {len(df[df.duplicated()])} values\n"
    #     df.drop_duplicates(inplace=True)
    #     logging.info(documentation_substring)
    #     documentation_string+=documentation_substring+'\n'
    #     st.write(documentation_substring)

    # if st.button('Check for duplicated values 2'):
    #     if len(df[df.duplicated()]) > 0:
    #         st.write("No. of duplicated entries: ", len(df[df.duplicated()]))
    #         st.write(df[df.duplicated(keep=False)].sort_values(by=list(df.columns)))
    #     else:
    #         st.write("No duplicated entries found")
    # Function to calculate missing values by column# Funct

    if st.button('3a. In-depth analysis on missing values'):
        missing_values = missing_values_table(df)
        st.write("### Missing value rows:")
        st.write(missing_values)

    if st.button('3b. Visualize missing values'):
        # Visualize missing values as a matrix
        # Checks if the missing values are localized
        visualize_missing_values(df)

    # if st.button("(1) Drop Missing Rows"):
    #     st.write(1)

    # if st.button("(1) Drop Missing Rows"):
    #     st.write(1)

    # if st.button("(1) Drop Missing Rows"):
    #     st.write(1)

    if st.button("4. Check the data type of each column with an example"):
        check_data_type(df)

    if st.button('5. Column-wise analysis'):
        column_analysis(df)

    # if st.button("Convert numeric to categorical feature <Pending>"):
    #     pass

    # if st.button("Convert string to datetime feature <Pending>"):
    #     pass

    # if st.button("Overview of summary based on the target variable <Pending>"):
    #     pass

    # if st.button("Rename columns if needed <Pending>"):
    #     pass

    # if st.button("Drop the target variable from the dataframe <Pending>"):
    #     pass

    seperate_features()
    # df_numeric=df.select_dtypes(include=['float64', 'int64'])
    # df_date=df.select_dtypes(include=['datetime64'])
    # df_categorical=df.select_dtypes(exclude=['float64', 'int64','datetime64'])

    if st.button("6. Get implied numeric, categorical and datetime features"):
        get_column_types(df_categorical, df_numeric, df_date)
    # if st.button("Remove extra white space in text columns <Pending>"):
    #     pass
    st.write("### Define the target variable")
    st.write("")
    st.info(
        'Make sure you define the target variable for bivariate classification'
    )
    if st.checkbox('Find the target variable'):
        if (files.loc[option_index, 'name'].item()
                == "<Experimental Reading data>") and (
                    files.loc[option_index,
                              'target'].item() == "Find your target variable"):
            st.info("Search for the target variable from your dataset")
            st.write(df.head())
        else:
            st.write("For this dataset, it is {0}".format(
                files.loc[option_index, 'target'].item()))
        target_name = st.text_input("Enter the target name",
                                    files.loc[option_index, 'target'].item())
        target = find_target(target_name)
        st.write("Target: ", target_name)
        st.write("Target type: ", type(target))
        st.write("### Overview")
        st.write(target.head())
        st.write(target.value_counts())
        sns.countplot(x=target, data=df)
        st.pyplot()

    st.write("### Finding the data variables")
    st.write(
        "You can manually change the categorical, numeric and date-time variables"
    )
    if (files.loc[option_index,
                  'name'].item() == "<Experimental Reading data>"):
        st.info(
            "You would need to manually extract the date-time variables yourself"
        )
    if (option == "Cat Shelter information"):
        st.info(
            "The variables: date_of_birth and datetime  should be manually changed to date-time variables"
        )

    if st.checkbox("Choose data types"):
        choose_data_types()

    st.markdown("## Categorical columns")

    if st.button("Information on categorical columns"):
        st.write("### Categorical Column names")
        st.write(df_categorical.columns)
        st.write("### Categorical Info")
        buffer = io.StringIO()
        df_categorical.info(buf=buffer)
        s = buffer.getvalue()
        st.text(s)
        #st.write(df_numeric)

    categorical_selector = st.radio(
        "Choose what type of categorical analysis to conduct:", [
            "Select one of the two",
            "Univariate analysis of categorical feature",
            "Bivariate analysis of categorical feature"
        ])
    categorical_names = df_categorical.columns.tolist()
    categorical_names.append("All columns")
    if (categorical_selector == "Univariate analysis of categorical feature"):
        categorical_option = st.selectbox("Choose which column",
                                          categorical_names)
        if (categorical_option == "All columns"):
            for col in df_categorical.columns:
                categorical_summarized(df_categorical, y=col)
        else:
            categorical_summarized(df_categorical, y=categorical_option)

    if (categorical_selector == "Bivariate analysis of categorical feature"):
        st.info(
            "**Make sure that you have defined the target variable from the checkbox above**"
        )
        categorical_option = st.selectbox("Choose which column",
                                          categorical_names)
        if (categorical_option == "All columns"):
            for col in df_categorical.columns:
                categorical_summarized(df_categorical, y=col, hue=target)
        else:
            categorical_summarized(df_categorical,
                                   y=categorical_option,
                                   hue=target)

    # if st.button("Categorical Data Imputation <Pending>"):
    #     pass

    # if st.button("Chi square analysis <Pending>"):
    #     pass

    # if st.button("Encoding categorical data <Pending>"):
    #     pass

    if st.button(" View Finalized Categorical columns"):
        st.write(df_categorical.head(10))

    st.markdown("## Date-time columns")

    date_selector = st.radio("Choose what type of Date analysis to conduct:",
                             ["Select one:", "Breakdown of date features"])
    df_date = df_date.apply(pd.to_datetime)
    date_names = df_date.columns.tolist()
    date_names.append("All columns")
    if date_selector == 'Breakdown of date features':
        date_option = st.selectbox("Choose which column", date_names)
        if (date_option == "All columns"):
            for col in df_date.columns:
                time_summarized(df_date, x=col)
        else:
            time_summarized(df_date, x=date_option)

    st.markdown("## Numeric columns")

    if st.button("Initial numeric features"):
        st.write("### Numeric Overviews")
        st.write(df_numeric.head())
        df_numeric.hist(figsize=(20, 20), bins=10, xlabelsize=8, ylabelsize=8)
        st.pyplot()

    colsize = len(df_numeric.columns) - 5
    if st.button("Correlation matrix"):
        plt.figure(figsize=(15, 15))
        sns.heatmap(df_numeric.corr(), annot=True)
        st.pyplot()

    numeric_selector = st.radio(
        "Choose what type of numeric analysis to conduct:", [
            "Select one of the two", "Univariate analysis of numeric feature",
            "Bivariate analysis of numeric feature"
        ])
    numeric_names = df_numeric.columns.tolist()
    numeric_names.append("All columns")
    if (numeric_selector == "Univariate analysis of numeric feature"):
        numeric_option = st.selectbox("Choose which column", numeric_names)
        if (numeric_option == "All columns"):
            for col in df_numeric.columns:
                quantitative_summarized(df_numeric, y=col)
        else:
            quantitative_summarized(df_numeric, y=numeric_option)

    if (numeric_selector == "Bivariate analysis of numeric feature"):
        st.info(
            "**Make sure that you have defined the target variable from the checkbox above**"
        )
        numeric_option = st.selectbox("Choose which column", numeric_names)
        if (numeric_option == "All columns"):
            for col in df_numeric.columns:
                quantitative_summarized(dataframe=df_numeric,
                                        y=col,
                                        palette=c_palette,
                                        x=target,
                                        verbose=False)
        else:
            quantitative_summarized(dataframe=df_numeric,
                                    y=numeric_option,
                                    palette=c_palette,
                                    x=target,
                                    verbose=False)

    # if (numeric_selector=="Multivariate variate analysis of numeric feature"):
    #     st.info("**Make sure that you have defined the target variable from the checkbox above**")
    #     st.write(df_numeric.head())
    #     var1 = st.text_input("Enter the first variable")
    #     var2 = st.text_input("Enter the second variable")
    #     quantitative_summarized(dataframe= df_numeric, y = var1, x = var2, hue = target, palette=c_palette3, verbose=False)

    st.write("")
    st.write("")

    if st.button("You're done!! Click here to celebrate"):
        st.balloons()
Esempio n. 22
0
def get_dataset(name):
    wh = None

    if name == 'Suside Statistics':
        st.markdown(
            '<p style="margin-top: 30px">Here, we are analyzing the <b> Horld Health Organization </b> (<i><b>WHO</b></i>) dataset on the suicide statistics in the world.</p><p style="margin-bottom: 30px">We can therefore visualize the data and identify the inequal repartition of this phenomene in countries and also, we can understand the differences depending on the age of the victims and through years</p><hr style="border:1px solid black">',
            unsafe_allow_html=True)
        wh = pd.read_csv('data/who_suicide_statistics.csv')
        "\n\n"
        wh
        # st.sidebar.write("Check any option to visualize the related data")
        st.sidebar.markdown(
            '<h2 style="text-align: center; color: #f0ad4e"><b>Check any option to visualize the related data</b></h2>',
            unsafe_allow_html=True)
        st.sidebar.markdown("<br>", unsafe_allow_html=True)

        if st.sidebar.checkbox('Show Description for suside dataset'):
            st.markdown(
                '<h2 style="text-align: center; color: #d9534f"><b>Here is a Description of the data Series.</b></h2>',
                unsafe_allow_html=True)
            st.write(wh.describe(include="all").T)

        if st.sidebar.checkbox('Show global sucides on years'):
            st.markdown(
                '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Global sucides through years.</b></h2>',
                unsafe_allow_html=True)
            st.set_option('deprecation.showPyplotGlobalUse', False)
            sns.set(style="darkgrid")
            sns.set(rc={'figure.figsize': (15, 10)})
            ax = sns.regplot(data=wh,
                             x='year',
                             y='suicides_no',
                             x_jitter=0.2,
                             order=4)
            ax.set_yscale('log')
            st.pyplot()

        if st.sidebar.checkbox('Show Suside number by age and country'):
            st.markdown(
                '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age and country.</b></h2>',
                unsafe_allow_html=True)
            wh.groupby(['country', 'age'
                        ]).suicides_no.sum().nlargest(10).plot(kind='barh')
            st.pyplot()

        if st.sidebar.checkbox('Show Suside number by age and sex'):
            st.markdown(
                '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age and sex.</b></h2>',
                unsafe_allow_html=True)
            ax = sns.catplot(x="sex",
                             y="suicides_no",
                             col='age',
                             data=wh,
                             estimator=median,
                             height=4,
                             aspect=.7,
                             kind='bar')
            st.pyplot()

        if st.sidebar.checkbox('Show Suside number by age interval and sex'):
            st.markdown(
                '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age interval and sex.</b></h2>',
                unsafe_allow_html=True)
            wh['age'] = wh.age.astype(
                pd.api.types.CategoricalDtype(categories=[
                    '5-14 years', '15-24 years', '25-34 years', '35-54 years',
                    '55-74 years', '75+ years'
                ]))
            wh.pivot_table(index='age',
                           columns='sex',
                           values='suicides_no',
                           aggfunc='sum').plot(kind='barh')
            st.pyplot()

        # if st.sidebar.checkbox('Show Suside number by age, sex and for each year from 1979 to 2016s'):
        # 	st.markdown('<h4 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age, sex and for each year from 1979 to 2016.</b></h4>', unsafe_allow_html=True)
        # 	df = wh.groupby(['year','age']).suicides_no.sum().reset_index()
        # 	df['age'] = df.age.astype(pd.api.types.CategoricalDtype(categories = ['5-14 years','15-24 years','25-34 years','35-54 years','55-74 years','75+ years']))
        # 	sns.set(rc={'figure.figsize':(15,10)})
        # 	st.pyplot()

        if st.sidebar.checkbox(
                'Show Suside number by age, sex and for each year from 1979 to 2016'
        ):
            st.markdown(
                '<h4 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age, sex and for each year from 1979 to 2016.</b></h4>',
                unsafe_allow_html=True)
            sns.catplot('age',
                        'suicides_no',
                        hue='sex',
                        col='year',
                        data=wh,
                        kind='bar',
                        col_wrap=3,
                        estimator=sum)
            st.pyplot()

        if st.sidebar.checkbox('Show Evolution of Suside number by sex '):
            st.markdown(
                '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Evolution of Suside number by sex.</b></h2>',
                unsafe_allow_html=True)
            sns.set(style="darkgrid")
            g = sns.FacetGrid(wh, row="sex", col="age", margin_titles=True)
            g.map(plt.scatter, "suicides_no", "population", edgecolor="w")
            st.pyplot()

    # elif name == 'Nutrition and Population Statistics':
    # 	# health = pd.read_csv('data/data.csv')
    # 	# st.write(health.head())
    # 	# health
    # 	"# aaaaaaaaaaaaa"

    elif name == 'Survey on Mental Health in the Tech Workplace':
        st.sidebar.markdown(
            '<h5 style="margin-top: 30px; margin-bottom: 30px; text-align: center; color: #d9534f; font-family: cursive"><b>Pick an option to make the diagram display</b></h5>',
            unsafe_allow_html=True)
        st.markdown(
            '<h3 style="margin-top: 20px; color: #fbe25d"><b><u><i>Survey on Mental Health in the Tech Workplace</i></u></b></h3>',
            unsafe_allow_html=True)
        st.markdown(
            '	<p style="margin-top: 0px">Here, we are analyzing the <b> Horld Health Organization </b> (<u><b>WHO</b></u>) dataset mental health statistics in the world.</p><p style="margin-bottom: 30px">We will then be able to visualize data Comming from thousands of peple all around the world and we will classify them depending on some criterias.</p><ul><li>Their employment status</li><li>Their gender</li><li>Their Anonymity</li><li>Their care options</li><li>Their treatement, if they are ill</li></ul>',
            unsafe_allow_html=True)
        mtech = pd.read_csv('data/survey.csv')
        st.markdown(
            '<h3 style="margin-top: 20px; color: #d9534f; font-family: cursive"><b>DATASET</b></h3>',
            unsafe_allow_html=True)
        mtech
        "\n"
        st.markdown(
            '<h3 style="margin-top: 20px; color: #d9534f; font-family: cursive"><b>Description of the data Serie.</b></h3>',
            unsafe_allow_html=True)
        # "### Description of the data Serie."
        st.write(mtech.describe(include='all'))

        if st.sidebar.checkbox('Diagram depending on the occupation'):
            st.markdown(
                '<h3 style="margin-top: 30px; color: #d9534f; font-family: cursive"><b>Diagram depending on the occupation</b></h3>',
                unsafe_allow_html=True)
            st.markdown(
                '<p style="margin-top: 10px">Here, we cas visualize the mental health survey depending on if the people are employed or self employed, onsite werkers or remote workers, work for tech company or non tech company</p>',
                unsafe_allow_html=True)
            sns.catplot(x='self_employed',
                        hue='remote_work',
                        col='tech_company',
                        kind='count',
                        data=mtech)
            st.pyplot()

        if st.sidebar.checkbox('Diagram depending on the occupatio'):
            st.markdown(
                '<h3 style="margin-top: 30px; color: #d9534f; font-family: cursive"><b>anonymity around mental illness</b></h3>',
                unsafe_allow_html=True)
            st.markdown(
                '<p style="margin-top: 10px">The folowing wisualization show the answers from the question </p><ul><li>if the worker\'s anonymity will be protected if they choose to take advantage of mental health or substance abuse treatment resources</li><li>If they Would be willing to discuss a mental health issue with your coworkers</li><li>If they Would be willing to discuss a mental health issue with your direct supervisor(s)</li></ul><br>The answers are not really positive',
                unsafe_allow_html=True)
            sns.catplot(x='anonymity',
                        hue='leave',
                        col='supervisor',
                        row='coworkers',
                        kind='count',
                        data=mtech)
            st.pyplot()

        if st.sidebar.checkbox('Consequences of mental illness subject'):
            st.markdown(
                '<h3 style="margin-top: 30px; color: #d9534f; font-family: cursive"><b>Diagram depending on the occupation</b></h3>',
                unsafe_allow_html=True)
            st.markdown(
                '<p style="margin-top: 10px">The folowing wisualization show the answers from the question: </p><ul><li> Does your employer provide resources to learn more about mental health issues and how to seek help?</li><li>Do you think that discussing a mental health issue with your employer would have negative consequences?</li><li>If they have a family history of mental illness?</li><li>If they Have sought treatment for a mental health condition?</li></ul><br>The answers are not really positive, It show that the subject of mental ilness is still a real taboo in the society',
                unsafe_allow_html=True)
            sns.catplot(x='seek_help',
                        hue='mental_health_consequence',
                        col='treatment',
                        row='family_history',
                        kind='count',
                        data=mtech)
            st.pyplot()

        if st.sidebar.checkbox('Care options and Consequences'):
            st.markdown(
                '<h3 style="margin-top: 30px; color: #d9534f; font-family: cursive"><b>Diagram showing the Care options and Consequences</b></h3>',
                unsafe_allow_html=True)
            sns.catplot(x='benefits',
                        hue='treatment',
                        col='wellness_program',
                        row='care_options',
                        kind='count',
                        data=mtech)
            st.pyplot()

    elif name == 'Suicide Statistics over years':
        data = pd.read_csv("data/who_suicide_statistics.csv")
        numeric_columns = data.select_dtypes(
            ['float64', 'float32', 'int32', 'int64']).columns

        # checkbox widget
        checkbox = st.sidebar.checkbox("Reveal data.")

        if checkbox:
            # st.write(data)
            st.dataframe(data=data)

        # create jointplot
        st.sidebar.markdown('<br>', unsafe_allow_html=True)
        st.sidebar.markdown('<h3><font color=‘#5cb85c>Joint plot</font></h3>',
                            unsafe_allow_html=True)
        select_box3 = st.sidebar.selectbox(label='x', options=numeric_columns)
        select_box4 = st.sidebar.selectbox(label="y", options=numeric_columns)
        sns.jointplot(x=select_box3, y=select_box4, data=data)
        st.pyplot()

        # create histograms
        # st.sidebar.subheader("Histogram")
        st.sidebar.markdown('<br>', unsafe_allow_html=True)
        st.sidebar.markdown('<h3><font color=‘#5cb85c>Histogram</font></h3>',
                            unsafe_allow_html=True)
        select_box3 = st.sidebar.selectbox(label="Feature",
                                           options=numeric_columns)
        histogram_slider = st.sidebar.slider(label="Number of Bins",
                                             min_value=5,
                                             max_value=100,
                                             value=30)
        sns.distplot(data[select_box3], bins=histogram_slider)
        st.pyplot()

        # create scatterplots
        # st.sidebar.subheader("Scatter plot setup")
        st.sidebar.markdown('<br>', unsafe_allow_html=True)
        st.sidebar.markdown(
            '<h3><font color=‘#5cb85c>Scatter plot setup</font></h3>',
            unsafe_allow_html=True)
        # add select widget
        select_box1 = st.sidebar.selectbox(label='X axis',
                                           options=numeric_columns)
        select_box2 = st.sidebar.selectbox(label="Y axis",
                                           options=numeric_columns)
        sns.relplot(x=select_box1, y=select_box2, data=data)
        st.pyplot()

    else:
        st.balloons()
        "### About the application"
    x = wh
Esempio n. 23
0
def start_ui():

    st.title("Sysmon Extractor")
    st.subheader("Extract sysmon data based off the event type")

    st.write("Supported data types are csv, json and parquet.")
    st.write("Load your data by specifying the full path.")

    st.info("HDFS is supported. Specify hdfs://HOST:PORT/path/to/file for either input or outputh path.")

    up_file = st.text_input("Input File:")

    if up_file is not None:
        if ".csv" in up_file:
            headers = st.checkbox(
                "For csv files if the first row contains headers?",
                value=True)
        else:
            headers = False

    st.write("Specify the output path below. If writing to a local directory, the full path must be specified. If no path is provided, it will write the file to the current working directory.")
    out_file = st.text_input("Output file")

    st.write("If you have multiple log sources in one file, please enter the column that specifies the log source for each row.")
    log_col = st.text_input("Log name columns:")

    st.write("If your Sysmon data is nested in another column, please enter the column below.")
    event_col = st.text_input("Event column: ")

    selection = st.multiselect(
        "Select events to extract",
        EVENT_DICTIONARY,
        format_func=_format_rules
    )

    st.write("If you want to extract any other columns from the data, specify them below separated by a comma")
    additional_cols = st.text_area("Additional columns", "col1, col2.nested_col")

    st.write("If you want the resulting file as a single file, check the box below.")
    st.warning(
        "Make sure that you have enough memory to fit the data into memory, otherwise this will fail")
    single_file = st.checkbox("Output as a single file", value=False)

    st.write("If you have an existing spark cluster you would like to connect to, enter it below.")
    st.info("For a cluster, spark://HOST:PORT, for a mesos/yarn cluster mesos://HOST:PORT")
    master = st.text_input("Spark Instance", "local")

    if st.button("Extract!"):

        assert up_file, st.write("Must choose a file for upload.")
        assert selection, st.write("Please select rules you would like to extract")

        # Assign default values
        # master = "local" if not master else master
        additional_cols = [] if not additional_cols else re.sub(r"\s+", "", additional_cols).split(",")
        out_file = f"{os.getcwd()}/sysmon-output.csv" if not out_file else out_file

        with st.spinner(text="Extracting logs..."):
            extract(
                up_file.strip(),
                selection,
                ouput_file=out_file,
                header=headers,
                log_column=log_col,
                event_column=event_col,
                additional_columns=additional_cols,
                single_file=single_file,
                master=master
            )

        st.success("Done!")
        st.balloons()

    st.header("Event Mapping")
    st.subheader("See what each event is.")

    sysmon_df = SYSMON_SCHEMA.set_index("Event")
    st.table(sysmon_df) 

    st.header("Event Dictionary Help")
    st.subheader("Display the schema information for each Sysmon rule")

    event = st.selectbox("Select an event.", EVENT_DICTIONARY, format_func=_format_rules)

    df = ossem.getEventDf("windows", "sysmon", f"event-{event}")

    st.table(df[["event_code", "title", "name",
                "type", "description", "sample_value"]])
Esempio n. 24
0
def main():
    st.set_page_config(
        page_title="Oil Spill Dashboard",
        page_icon=":ship:",
        layout="centered",
        initial_sidebar_state="expanded",
    )

    def load_raw_data(x):
        # data
        time_series_df = pd.read_csv(x)
        time_series_df.sort_values(by=['timestamp'],
                                   inplace=True,
                                   kind="mergesort")
        time_sorted_df = time_series_df.sort_values(by=['timestamp'],
                                                    inplace=True)
        time_series_df['timestamp'] = pd.to_datetime(
            time_series_df['timestamp'])
        return time_series_df

    def load_data(x):
        time_series_df = load_raw_data(x)
        # cleaning and indexing
        time_series_df.drop([
            "call_sign", "flag", "draught", "ship_and_cargo_type", "length",
            "width", "eta", "destination", "status", "maneuver", "accuracy",
            "collection_type", 'mmsi_label'
        ],
                            axis=1,
                            inplace=True)
        time_series_df.drop(['created_at', 'imo', 'name'],
                            axis=1,
                            inplace=True)
        time_series_df = time_series_df[time_series_df['speed'].notna()]
        time_series_df = time_series_df.reset_index(drop=True)
        time_series_df.drop(time_series_df[time_series_df['speed'] == 0].index,
                            inplace=True)
        return time_series_df

    st.title("Oil spill prediction Dashboard :rocket:")
    st.sidebar.title("Enter Parameters :paperclip:")
    st.sidebar.markdown("Powered by AIS Data set")
    load_csv_data = st.sidebar.checkbox("Upload Csv")

    if (load_csv_data):
        uploaded_file = st.file_uploader("Choose Csv file")
    else:
        uploaded_file = None
    if uploaded_file is not None:
        uploaded_file.seek(0)
        time_series_df1 = load_raw_data(uploaded_file)
        time_series_df = load_data(uploaded_file)
    else:
        time_series_df1 = load_raw_data(
            '../Data/main/Maritius_AOI_20200701_0731_full.csv')
        time_series_df = load_data(
            '../Data/main/Maritius_AOI_20200701_0731_full.csv')

    raw = st.sidebar.checkbox("Show Raw Dataset")
    not_raw = st.sidebar.checkbox("Show cleaned Dataset")
    if (raw):
        st.subheader("AIS Dataset (Raw)")
        st.dataframe(time_series_df1[:500].style.highlight_max(axis=0))
    if (not_raw):
        st.subheader("AIS Dataset (Cleaned)")
        st.dataframe(time_series_df[:500].style.highlight_max(axis=0))

    vessels = time_series_df.mmsi.unique()
    st.markdown("Anomaly detection with time series data of: ", len(vessels))
    classifier = st.sidebar.selectbox(
        "Classifier",
        ("Select one model", "Code", "Benchmark model(IQR)",
         "K-Means clustering", "Isolation Forest", "All of the above(Best)"))

    mv_value = st.sidebar.selectbox("Select vessel", vessels)
    st.write("Selected Vessel: ", mv_value)
    param = st.sidebar.radio("Vessel Parameter",
                             ("speed", "course", "heading", "rot"),
                             key='param')
    mv_data = time_series_df[time_series_df['mmsi'] == mv_value]

    if st.button("Plot all basic graphs"):
        p = figure(title='Speed Vs Time',
                   x_axis_label='Timestamp',
                   y_axis_label='Speed')

        p.line(mv_data['timestamp'],
               mv_data['speed'],
               legend='Speed Trend',
               line_width=2)
        st.bokeh_chart(p, use_container_width=True)

        q = figure(title='Course Vs Time',
                   x_axis_label='Timestamp',
                   y_axis_label='Course')
        q.line(mv_data['timestamp'],
               mv_data['course'],
               legend='Course Trend',
               line_width=2)
        st.bokeh_chart(q, use_container_width=True)

        r = figure(title='Heading Vs Time',
                   x_axis_label='Timestamp',
                   y_axis_label='Heading')
        r.line(mv_data['timestamp'],
               mv_data['heading'],
               legend='Heading Trend',
               line_width=2)
        st.bokeh_chart(r, use_container_width=True)

        s = figure(title='Rot Vs Time',
                   x_axis_label='Timestamp',
                   y_axis_label='Rot')
        s.line(mv_data['timestamp'],
               mv_data['rot'],
               legend='Rot Trend',
               line_width=2)
        st.bokeh_chart(s, use_container_width=True)
    map_df = mv_data[time_series_df['latitude'].notna()]
    map_df = map_df[time_series_df['longitude'].notna()]
    if st.button("Plot Map"):
        map_df.filter(['latitude', 'longitude'])
        st.map(map_df)

    mv_data = mv_data.drop(['mmsi', 'msg_type', 'latitude', 'longitude'],
                           axis=1)
    mv_data = mv_data[mv_data['speed'].notna()]
    mv_data = mv_data.set_index(['timestamp'])
    mv_data.index = pd.to_datetime(mv_data.index, unit='s')
    names = mv_data.columns
    rollmean = mv_data.resample(rule='D').mean()
    rollstd = mv_data.resample(rule='D').std()

    if classifier == "Benchmark model: Interquartile Range (IQR)":

        df2 = mv_data
        names = df2.columns
        x = mv_data[names]
        scaler = StandardScaler()
        pca = PCA()
        pipeline = make_pipeline(scaler, pca)
        pipeline.fit(x)

        features = range(pca.n_components_)

        pca = PCA(n_components=2)
        principalComponents = pca.fit_transform(x)
        principalDf = pd.DataFrame(data=principalComponents,
                                   columns=['pc1', 'pc2'])
        mv_data['pc1'] = pd.Series(principalDf['pc1'].values,
                                   index=mv_data.index)
        mv_data['pc2'] = pd.Series(principalDf['pc2'].values,
                                   index=mv_data.index)

        result = adfuller(principalDf['pc1'])
        st.write("p value", result[1])
        pca1 = principalDf['pc1'].pct_change()
        autocorrelation = pca1.dropna().autocorr()
        st.write('Autocorrelation(pc1) is: ', autocorrelation)
        plot_acf(pca1.dropna(), lags=20, alpha=0.05)
        pca2 = principalDf['pc2'].pct_change()
        autocorrelation = pca2.autocorr()
        st.write('Autocorrelation(pc2) is: ', autocorrelation)
        plot_acf(pca2.dropna(), lags=20, alpha=0.05)
        q1_pc1, q3_pc1 = mv_data['pc1'].quantile([0.25, 0.75])
        iqr_pc1 = q3_pc1 - q1_pc1
        lower_pc1 = q1_pc1 - (1.5 * iqr_pc1)
        upper_pc1 = q3_pc1 + (1.5 * iqr_pc1)
        q1_pc2, q3_pc2 = mv_data['pc2'].quantile([0.25, 0.75])
        iqr_pc2 = q3_pc2 - q1_pc2
        lower_pc2 = q1_pc2 - (1.5 * iqr_pc2)
        upper_pc2 = q3_pc2 + (1.5 * iqr_pc2)
        mv_data['anomaly_pc1'] = ((mv_data['pc1'] > upper_pc1) |
                                  (mv_data['pc1'] < lower_pc1)).astype('int')
        mv_data['anomaly_pc2'] = ((mv_data['pc2'] > upper_pc2) |
                                  (mv_data['pc2'] < lower_pc2)).astype('int')
        total_anomaly = mv_data['anomaly_pc1'].value_counts(
        ) + mv_data['anomaly_pc2'].value_counts()
        outliers_pc1 = mv_data.loc[(mv_data['pc1'] > upper_pc1) |
                                   (mv_data['pc1'] < lower_pc1), 'pc1']
        outliers_pc2 = mv_data.loc[(mv_data['pc2'] > upper_pc2) |
                                   (mv_data['pc2'] < lower_pc2), 'pc2']
        st.write("Outlier Propotion(pc1): ", len(outliers_pc1) / len(mv_data))
        st.write("Outlier Propotion(pc2): ", len(outliers_pc2) / len(mv_data))

        a = mv_data[mv_data['anomaly_pc1'] == 1]  #anomaly
        b = mv_data[mv_data['anomaly_pc2'] == 1]  #anomaly
        fig = plt.figure()
        plt.plot(mv_data[param], color='blue', label='Normal')
        plt.plot(a[param],
                 linestyle='none',
                 marker='X',
                 color='red',
                 markersize=12,
                 label='Anomaly1')
        plt.plot(b[param],
                 linestyle='none',
                 marker='X',
                 color='green',
                 markersize=12,
                 label='Anomaly2')
        plt.xlabel('Date and Time')
        plt.ylabel(param)
        plt.title('Anomalies with given MMSI')
        plt.legend(loc='best')
        plt.show()
        plt.gcf().autofmt_xdate()
        st.pyplot(fig)
        data1 = a
        data2 = b

    if classifier == "K-Means clustering":
        df2 = mv_data
        names = df2.columns
        x = mv_data[names]
        scaler = StandardScaler()
        pca = PCA()
        pipeline = make_pipeline(scaler, pca)
        pipeline.fit(x)

        features = range(pca.n_components_)

        pca = PCA(n_components=2)
        principalComponents = pca.fit_transform(x)
        principalDf = pd.DataFrame(data=principalComponents,
                                   columns=['pc1', 'pc2'])
        mv_data['pc1'] = pd.Series(principalDf['pc1'].values,
                                   index=mv_data.index)
        mv_data['pc2'] = pd.Series(principalDf['pc2'].values,
                                   index=mv_data.index)
        fraction = st.number_input("Fraction",
                                   0.00,
                                   1.00,
                                   step=0.01,
                                   key='fraction')
        kmeans = KMeans(n_clusters=2, random_state=42)
        kmeans.fit(principalDf.values)
        labels = kmeans.predict(principalDf.values)
        unique_elements, counts_elements = np.unique(labels,
                                                     return_counts=True)
        clusters = np.asarray((unique_elements, counts_elements))

        # no of points in each clusters
        fig = plt.figure()
        plt.bar(clusters[0], clusters[1], tick_label=clusters[0])
        plt.xlabel('Clusters')
        plt.ylabel('Number of points')
        plt.title('Number of points in each cluster')
        st.pyplot(fig)

        # cluster graph
        fig = plt.figure()
        plt.scatter(principalDf['pc1'], principalDf['pc2'], c=labels)
        plt.xlabel('pc1')
        plt.ylabel('pc2')
        plt.title('K-means of clustering')
        st.pyplot(fig)

        # distance function to be used
        def getDistanceByPoint(data, model):
            distance = []
            for i in range(0, len(data)):
                Xa = np.array(data.loc[i])
                Xb = model.cluster_centers_[model.labels_[i] - 1]
                distance.append(np.linalg.norm(Xa - Xb))
            return pd.Series(distance, index=data.index)

        outliers_fraction = fraction
        distance = getDistanceByPoint(principalDf, kmeans)
        number_of_outliers = int(outliers_fraction * len(distance))
        threshold = distance.nlargest(number_of_outliers).min()
        principalDf['anomaly1'] = (distance >= threshold).astype(int)

        st.write("Anomaly Count by Kmeans",
                 principalDf['anomaly1'].value_counts())

        mv_data['anomaly1'] = pd.Series(principalDf['anomaly1'].values,
                                        index=mv_data.index)
        a = mv_data[mv_data['anomaly1'] == 1]  #anomaly
        fig = plt.figure(figsize=(18, 6))
        plt.plot(mv_data[param], color='blue', label='Normal')
        plt.plot(a[param],
                 linestyle='none',
                 marker='X',
                 color='red',
                 markersize=12,
                 label='Anomaly')
        plt.xlabel('Date and Time')
        plt.ylabel(param)
        plt.title('Anomalies with given MMSI')
        plt.legend(loc='best')
        plt.gcf().autofmt_xdate()
        st.pyplot(fig)
        data3 = a

    if classifier == "Isolation Forest":
        df2 = mv_data
        names = df2.columns
        x = mv_data[names]
        scaler = StandardScaler()
        pca = PCA()
        pipeline = make_pipeline(scaler, pca)
        pipeline.fit(x)

        features = range(pca.n_components_)

        pca = PCA(n_components=2)
        principalComponents = pca.fit_transform(x)
        principalDf = pd.DataFrame(data=principalComponents,
                                   columns=['pc1', 'pc2'])
        mv_data['pc1'] = pd.Series(principalDf['pc1'].values,
                                   index=mv_data.index)
        mv_data['pc2'] = pd.Series(principalDf['pc2'].values,
                                   index=mv_data.index)
        fraction = st.number_input("Fraction",
                                   0.00,
                                   1.00,
                                   step=0.01,
                                   key='fraction')
        kmeans = KMeans(n_clusters=2, random_state=42)
        kmeans.fit(principalDf.values)
        labels = kmeans.predict(principalDf.values)
        unique_elements, counts_elements = np.unique(labels,
                                                     return_counts=True)
        clusters = np.asarray((unique_elements, counts_elements))

        # IsolationForest method 3 (checkpoint)
        outliers_fraction = fraction
        model = IsolationForest(contamination=outliers_fraction)
        model.fit(principalDf.values)
        principalDf['anomaly2'] = pd.Series(model.predict(principalDf.values))

        # visualization
        mv_data['anomaly2'] = pd.Series(principalDf['anomaly2'].values,
                                        index=mv_data.index)
        a = mv_data.loc[mv_data['anomaly2'] == -1]  #anomaly
        # anomaly count method 3
        st.write("Anomaly count isolated forest: ",
                 mv_data['anomaly2'].value_counts())
        fig = plt.figure()
        plt.plot(mv_data[param], color='blue', label='Normal')
        plt.plot(a[param],
                 linestyle='none',
                 marker='X',
                 color='red',
                 markersize=12,
                 label='Anomaly')
        plt.xlabel('Date and Time')
        plt.ylabel('Reading')
        plt.title('Anomalies with given MMSI')
        plt.legend(loc='best')
        plt.gcf().autofmt_xdate()
        st.pyplot(fig)
        data4 = a

    if classifier == "All of the above(Best)":

        df2 = mv_data
        names = df2.columns
        x = mv_data[names]
        scaler = StandardScaler()
        pca = PCA()
        pipeline = make_pipeline(scaler, pca)
        pipeline.fit(x)
        features = range(pca.n_components_)
        pca = PCA(n_components=2)
        principalComponents = pca.fit_transform(x)
        principalDf = pd.DataFrame(data=principalComponents,
                                   columns=['pc1', 'pc2'])
        mv_data['pc1'] = pd.Series(principalDf['pc1'].values,
                                   index=mv_data.index)
        mv_data['pc2'] = pd.Series(principalDf['pc2'].values,
                                   index=mv_data.index)
        result = adfuller(principalDf['pc1'])
        st.write("p value", result[1])
        pca1 = principalDf['pc1'].pct_change()
        autocorrelation = pca1.dropna().autocorr()
        st.write('Autocorrelation(pc1) is: ', autocorrelation)
        pca2 = principalDf['pc2'].pct_change()
        autocorrelation = pca2.autocorr()
        st.write('Autocorrelation(pc2) is: ', autocorrelation)
        q1_pc1, q3_pc1 = mv_data['pc1'].quantile([0.25, 0.75])
        iqr_pc1 = q3_pc1 - q1_pc1
        lower_pc1 = q1_pc1 - (1.5 * iqr_pc1)
        upper_pc1 = q3_pc1 + (1.5 * iqr_pc1)
        q1_pc2, q3_pc2 = mv_data['pc2'].quantile([0.25, 0.75])
        iqr_pc2 = q3_pc2 - q1_pc2
        lower_pc2 = q1_pc2 - (1.5 * iqr_pc2)
        upper_pc2 = q3_pc2 + (1.5 * iqr_pc2)
        mv_data['anomaly_pc1'] = ((mv_data['pc1'] > upper_pc1) |
                                  (mv_data['pc1'] < lower_pc1)).astype('int')
        mv_data['anomaly_pc2'] = ((mv_data['pc2'] > upper_pc2) |
                                  (mv_data['pc2'] < lower_pc2)).astype('int')
        total_anomaly = mv_data['anomaly_pc1'].value_counts(
        ) + mv_data['anomaly_pc2'].value_counts()
        outliers_pc1 = mv_data.loc[(mv_data['pc1'] > upper_pc1) |
                                   (mv_data['pc1'] < lower_pc1), 'pc1']
        outliers_pc2 = mv_data.loc[(mv_data['pc2'] > upper_pc2) |
                                   (mv_data['pc2'] < lower_pc2), 'pc2']
        st.write("Outlier Propotion(pc1): ", len(outliers_pc1) / len(mv_data))
        st.write("Outlier Propotion(pc2): ", len(outliers_pc2) / len(mv_data))
        a = mv_data[mv_data['anomaly_pc1'] == 1]  #anomaly
        b = mv_data[mv_data['anomaly_pc2'] == 1]  #anomaly
        data1 = a
        data2 = b

        fraction = st.number_input("Fraction",
                                   0.00,
                                   1.00,
                                   step=0.01,
                                   key='fraction')
        kmeans = KMeans(n_clusters=2, random_state=42)
        kmeans.fit(principalDf.values)
        labels = kmeans.predict(principalDf.values)
        unique_elements, counts_elements = np.unique(labels,
                                                     return_counts=True)
        clusters = np.asarray((unique_elements, counts_elements))

        # distance function to be used
        def getDistanceByPoint(data, model):
            distance = []
            for i in range(0, len(data)):
                Xa = np.array(data.loc[i])
                Xb = model.cluster_centers_[model.labels_[i] - 1]
                distance.append(np.linalg.norm(Xa - Xb))
            return pd.Series(distance, index=data.index)

        outliers_fraction = fraction
        distance = getDistanceByPoint(principalDf, kmeans)
        number_of_outliers = int(outliers_fraction * len(distance))
        threshold = distance.nlargest(number_of_outliers).min()
        principalDf['anomaly1'] = (distance >= threshold).astype(int)

        st.write("Anomaly Count by Kmeans",
                 principalDf['anomaly1'].value_counts())

        mv_data['anomaly1'] = pd.Series(principalDf['anomaly1'].values,
                                        index=mv_data.index)
        a = mv_data[mv_data['anomaly1'] == 1]  #anomaly
        data3 = a
        outliers_fraction = fraction
        model = IsolationForest(contamination=outliers_fraction)
        model.fit(principalDf.values)
        principalDf['anomaly2'] = pd.Series(model.predict(principalDf.values))

        # visualization
        mv_data['anomaly2'] = pd.Series(principalDf['anomaly2'].values,
                                        index=mv_data.index)
        a = mv_data.loc[mv_data['anomaly2'] == -1]  #anomaly
        # anomaly count method 3
        st.write("Anomaly count isolated forest: ",
                 mv_data['anomaly2'].value_counts())
        data4 = a

        def intersection(lst1, lst2, lst3, lst4):
            lst5 = [value for value in lst2 if value in lst1]
            lst6 = [value for value in lst3 if value in lst5]
            lst7 = [value for value in lst4 if value in lst6]
            return lst7

        time_common = intersection(data1.index.unique(), data2.index.unique(),
                                   data3.index.unique(), data4.index.unique())
        time_df = pd.DataFrame(columns=mv_data.columns, index=time_common)
        for time in time_common:
            time_df.loc[time] = mv_data.loc[time]

        # visualization
        fig = plt.figure()
        plt.plot(mv_data[param], color='blue', label='Normal')
        plt.plot(time_df[param],
                 linestyle='none',
                 marker='X',
                 color='red',
                 markersize=12,
                 label='Anomaly')
        plt.xlabel('Date and Time')
        plt.ylabel('Reading')
        plt.title('Anomalies')
        plt.legend(loc='best')
        plt.gcf().autofmt_xdate()
        st.pyplot(fig)
        st.dataframe(time_df)

    code = ''' 
	# data
	time_series_df=pd.read_csv('../Data/main/Maritius_AOI_20200701_0731_full.csv')
	time_series_df.sort_values(by=['timestamp'], inplace=True)
	time_sorted_df = time_series_df.sort_values(by=['timestamp'], inplace=True)
	time_series_df['timestamp'] = pd.to_datetime(time_series_df['timestamp'])


	# cleaning and indexing
	time_series_df.drop(["call_sign", "flag" ,"draught" , "ship_and_cargo_type",
       "length", "width","eta" , "destination",  "status", "maneuver",  "accuracy" ,
       "collection_type" ,'mmsi_label'], axis=1, inplace=True)
	time_series_df.drop(['created_at','imo', 'name'], axis=1, inplace=True)
	time_series_df = time_series_df[time_series_df['speed'].notna()]
	time_series_df = time_series_df.reset_index(drop=True)
	time_series_df.drop(time_series_df[time_series_df['speed'] == 0].index, inplace = True) 


	# list mmsi
	time_series_df.mmsi.unique() 

	# paramameters -> speed, course, rot, heading
	mv_value = 477269900
	param = 'speed'
	fraction = 0.05

	# Data after parameters and everything
	len(time_series_df[time_series_df["mmsi"]==mv_value])
	mv_data = time_series_df[time_series_df['mmsi']==mv_value]


	# param ploting after selecting vessel 
	plt.plot( mv_data['timestamp'], mv_data['speed'])
	plt.gcf().autofmt_xdate()
	plt.show()

	plt.plot( mv_data['timestamp'], mv_data['course'])
	plt.gcf().autofmt_xdate()
	plt.show()

	plt.plot( mv_data['timestamp'], mv_data['rot'])
	plt.gcf().autofmt_xdate()
	plt.show()

	plt.plot( mv_data['timestamp'], mv_data['heading'])
	plt.gcf().autofmt_xdate()
	plt.show()

	# data cleaning again
	mv_data = mv_data.drop(['mmsi','msg_type','latitude', 'longitude'], axis=1)
	mv_data = mv_data[mv_data['speed'].notna()]
	mv_data = mv_data.set_index(['timestamp'])
	mv_data.index = pd.to_datetime(mv_data.index, unit='s')
	names=mv_data.columns
	rollmean = mv_data.resample(rule='D').mean()
	rollstd = mv_data.resample(rule='D').std()


	# Method 1 (checkpoint)
	df2 = mv_data
	names=df2.columns
	x = mv_data[names]
	scaler = StandardScaler()
	pca = PCA()
	pipeline = make_pipeline(scaler, pca)
	pipeline.fit(x)


	# PCA feature graph (Not done)
	features = range(pca.n_components_)
	plt.figure(figsize=(15, 5))
	plt.bar(features, pca.explained_variance_)
	plt.xlabel('PCA feature')
	plt.ylabel('Variance')
	plt.xticks(features)
	plt.title("Importance of the Principal Components based on inertia")
	plt.show()


	pca = PCA(n_components=2)
	principalComponents = pca.fit_transform(x)
	principalDf = pd.DataFrame(data = principalComponents, columns = ['pc1', 'pc2'])
	mv_data['pc1']=pd.Series(principalDf['pc1'].values, index=mv_data.index)
	mv_data['pc2']=pd.Series(principalDf['pc2'].values, index=mv_data.index)

	# p value and pc1 and pc2 autocorrelation
	result = adfuller(principalDf['pc1'])
	print(result[1])
	pca1 = principalDf['pc1'].pct_change()
	autocorrelation = pca1.dropna().autocorr()
	print('Autocorrelation is: ', autocorrelation)
	plot_acf(pca1.dropna(), lags=20, alpha=0.05)
	pca2 = principalDf['pc2'].pct_change()
	autocorrelation = pca2.autocorr()
	print('Autocorrelation is: ', autocorrelation)
	plot_acf(pca2.dropna(), lags=20, alpha=0.05)

	# model calculations method 1
	q1_pc1, q3_pc1 = mv_data['pc1'].quantile([0.25, 0.75])
	iqr_pc1 = q3_pc1 - q1_pc1
	lower_pc1 = q1_pc1 - (1.5*iqr_pc1)
	upper_pc1 = q3_pc1 + (1.5*iqr_pc1)
	q1_pc2, q3_pc2 = mv_data['pc2'].quantile([0.25, 0.75])
	iqr_pc2 = q3_pc2 - q1_pc2
	lower_pc2 = q1_pc2 - (1.5*iqr_pc2)
	upper_pc2 = q3_pc2 + (1.5*iqr_pc2)
	mv_data['anomaly_pc1'] = ((mv_data['pc1']>upper_pc1) | (mv_data['pc1']<lower_pc1)).astype('int')
	mv_data['anomaly_pc2'] = ((mv_data['pc2']>upper_pc2) | (mv_data['pc2']<lower_pc2)).astype('int')
	total_anomaly = mv_data['anomaly_pc1'].value_counts() + mv_data['anomaly_pc2'].value_counts()
	outliers_pc1 = mv_data.loc[(mv_data['pc1']>upper_pc1) | (mv_data['pc1']<lower_pc1), 'pc1']
	outliers_pc2 = mv_data.loc[(mv_data['pc2']>upper_pc2) | (mv_data['pc2']<lower_pc2), 'pc2']
	len(outliers_pc1)/len(mv_data)
	len(outliers_pc2)/len(mv_data)

	# ploting anomaly method 1
	a = mv_data[mv_data['anomaly_pc1'] == 1] #anomaly
	b = mv_data[mv_data['anomaly_pc2'] == 1] #anomaly
	plt.figure(figsize=(18,6))
	plt.plot(mv_data[param], color='blue', label='Normal')
	plt.plot(a[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly1')
	plt.plot(b[param], linestyle='none', marker='X', color='green', markersize=12, label='Anomaly2')
	plt.xlabel('Date and Time')
	plt.ylabel(param)
	plt.title(param +' Anomalies with MMSI: ' mv_value)
	plt.legend(loc='best')
	plt.show();
	data1 = a
	data2 = b


	#  Method 2 K means (checkpoint)
	kmeans = KMeans(n_clusters=2, random_state=42)
	kmeans.fit(principalDf.values)
	labels = kmeans.predict(principalDf.values)
	unique_elements, counts_elements = np.unique(labels, return_counts=True)
	clusters = np.asarray((unique_elements, counts_elements))

	# no of points in each clusters
	plt.figure(figsize = (9, 7))
	plt.bar(clusters[0], clusters[1], tick_label=clusters[0])
	plt.xlabel('Clusters')
	plt.ylabel('Number of points')
	plt.title('Number of points in each cluster')
	plt.show()

	# cluster graph
	plt.figure(figsize=(9,7))
	plt.scatter(principalDf['pc1'], principalDf['pc2'], c=labels)
	plt.xlabel('pc1')
	plt.ylabel('pc2')
	plt.title('K-means of clustering')
	plt.show()

	# function to be used
	def getDistanceByPoint(data, model):
	    distance = []
	    for i in range(0,len(data)):
		Xa = np.array(data.loc[i])
		Xb = model.cluster_centers_[model.labels_[i]-1]
		distance.append(np.linalg.norm(Xa-Xb))
	    return pd.Series(distance, index=data.index)

	# method 2 calulations
	outliers_fraction = fraction
	distance = getDistanceByPoint(principalDf, kmeans)
	number_of_outliers = int(outliers_fraction*len(distance))
	threshold = distance.nlargest(number_of_outliers).min() 
	principalDf['anomaly1'] = (distance >= threshold).astype(int)

	# Anomaly count
	principalDf['anomaly1'].value_counts()

	# K means anomaly plots
	mv_data['anomaly1'] = pd.Series(principalDf['anomaly1'].values, index=mv_data.index)
	a = mv_data[mv_data['anomaly1'] == 1] #anomaly
	plt.figure(figsize=(18,6))
	plt.plot(mv_data[param], color='blue', label='Normal')
	plt.plot(a[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly')
	plt.xlabel('Date and Time')
	plt.ylabel(param)
	plt.title(param +' Anomalies with MMSI: ' mv_value)
	plt.legend(loc='best')
	plt.show();
	data3 = a


	# IsolationForest method 3 (checkpoint)
	outliers_fraction = fraction
	model =  IsolationForest(contamination=outliers_fraction)
	model.fit(principalDf.values) 
	principalDf['anomaly2'] = pd.Series(model.predict(principalDf.values))

	# visualization
	mv_data['anomaly2'] = pd.Series(principalDf['anomaly2'].values, index=mv_data.index)
	a = mv_data.loc[mv_data['anomaly2'] == -1] #anomaly
	plt.figure(figsize=(18,6))
	plt.plot(mv_data[param], color='blue', label='Normal')
	plt.plot(a[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly')
	plt.xlabel('Date and Time')
	plt.ylabel(param +'Reading')
	plt.title(param +' Anomalies with MMSI: ' mv_value)
	plt.legend(loc='best')
	plt.show();
	data4 = a

	# anomaly count method 3 
	mv_data['anomaly2'].value_counts()


	# Method 4 
	def intersection(lst1, lst2,lst3,lst4): 
	    lst5 = [value for value in lst2 if value in lst1]
	    lst6 = [value for value in lst3 if value in lst5]
	    lst7 = [value for value in lst4 if value in lst6]
	    return lst7
	 
	time_common = intersection(data1.index.unique() , data2.index.unique() , data3.index.unique() , data4.index.unique() )
	time_df = pd.DataFrame(columns = mv_data.columns, index = time_common) 
	for time in time_common:
	    time_df.loc[time] = mv_data.loc[time]

	# visualization
	plt.figure(figsize=(18,6))
	plt.plot(mv_data[param], color='blue', label='Normal')
	plt.plot(time_df[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly')
	plt.xlabel('Date and Time')
	plt.ylabel(param +'Reading')
	plt.title('Anomalies')
	plt.legend(loc='best')
	plt.show();
'''
    if classifier == "Code":
        st.code(code, language='python')
    st.balloons()
Esempio n. 25
0
def main():
    """Common ML Dataset Explorer"""
    st.title("Common Machine Learning Dataset Explorer")
    st.subheader("Simple Data Science Explorer with streamlit")

    html_temp = """
	<div style="background-color:tomato;"><p style="color:white; font-weight:bold;">By IMRAN S M</p></div>
	"""
    st.markdown(html_temp, unsafe_allow_html=True)

    def file_selector(folder_path="./datasets"):
        filenames = os.listdir(folder_path)
        selected_filename = st.selectbox("Select a File", filenames)
        return os.path.join(folder_path, str(selected_filename))

    filename = file_selector()
    st.info("You Selected {}".format(filename))

    # Read Data
    df = pd.read_csv(filename)

    # Show Dataset
    if st.checkbox("Show Dataset"):
        number = st.number_input("Number of Rows to View", 1)
        st.dataframe(df.head(number))

    # Show Columns
    if st.button("Column Names"):
        st.write(df.columns)

    # Show Shape Of Dataframe
    if st.checkbox("Shape Of Dataframe"):
        data_dim = st.radio("Show Dimension By ", ("Rows", "Columns"))
        if data_dim == 'Rows':
            st.text("Number of Rows")
            st.write(df.shape[0])
        elif data_dim == 'Columns':
            st.text("Number of Columns")
            st.write(df.shape[1])
    else:
        st.write(df.shape)

    # Select Columns
    if st.checkbox("Select Columns To Show"):
        all_columns = df.columns.tolist()
        Selected_columns = st.multiselect("Select", all_columns)
        new_df = df[Selected_columns]
        st.dataframe(new_df)

    # Show Values
    if st.button("Value Counts"):
        st.text("Value Counts By Target/Class")
        st.write(df.iloc[:-1].value_counts())

    # Show Data Types
    if st.button("Data Types"):
        st.write(df.dtypes)

    # Show Summary
    if st.checkbox("Describe"):
        st.write(df.describe().T)

    # Plot And Visualization
    st.subheader("Data Visualization")

    # Correlation
    # Seaborn Plot

    if st.checkbox("Correlation Plot[Seaborn]"):
        st.write(sns.heatmap(df.corr(), annot=True))
        st.pyplot()

    # Pie Chart
    if st.checkbox("Pie Plot"):
        all_columns_names = df.columns.tolist()
        if st.button("Generate Plot"):
            st.success("Generated Pie Plot:")
            st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
            st.pyplot()

    # Count Plot

    if st.checkbox("Plot of Value Counts"):
        st.text("Value Counts By Target")
        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox("Primary Columm to GroupBy",
                                   all_columns_names)
        selected_columns_names = st.multiselect("Select Columns",
                                                all_columns_names)
        if st.button("Plot"):
            st.text("Generate Plot")
            if selected_columns_names:
                vc_plot = df.groupby(
                    primary_col)[selected_columns_names].count()
            else:
                vc_plot = df.iloc[:, -1].value_counts()
            st.write(vc_plot.plot(kind="bar"))
            st.pyplot()

    # Customizable Plot

    st.subheader("Customizable Plot")
    all_columns_names = df.columns.tolist()
    type_of_plot = st.selectbox("Select Type of Plot ",
                                ["area", "bar", "line", "hist", "box", "kde"])
    selected_column_names = st.multiselect("Select Column To Plot",
                                           all_columns_names)

    if st.button("Generate  Plot"):
        st.success("Generated Customizable {} Plot for {}".format(
            type_of_plot, selected_column_names))

        # Plot By Streamlit
        if type_of_plot == "area":
            cust_data = df[selected_column_names]
            st.area_chart(cust_data)

        elif type_of_plot == "bar":
            cust_data = df[selected_column_names]
            st.bar_chart(cust_data)

        elif type_of_plot == "line":
            cust_data = df[selected_column_names]
            st.line_chart(cust_data)

# Custom Plot
        elif type_of_plot:
            cust_plot = df[selected_column_names].plot(kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()

    if st.button("Celebrate!!"):
        st.balloons()
Esempio n. 26
0
def main():
    st.sidebar.markdown(
        "<h1 style='text-align: center; color: black;'>🧭 Navigation Bar 🧭</h1>",
        unsafe_allow_html=True)
    nav = st.sidebar.radio(
        "",
        ["Home 🏡", "User defined Prediction📟", "Forecasting 📊", "Dashboard 📌"])
    if nav == "Home 🏡":
        st.markdown(
            "<h1 style ='color:black; text_align:center;font-family:times new roman;font-size:20pt; font-weight: bold;'>DEEP WINDS ⚒️</h1>",
            unsafe_allow_html=True)
        st.markdown(
            "<h1 style=' color:brown; text_align:center;font-weight: bold;font-size:19pt;'>Made by Quad Techies with ❤️</h1>",
            unsafe_allow_html=True)
        st.markdown(
            "<h1 style ='color:green; text_align:center;font-weight: bold;font-size:18pt;'>🌎 Wind Power Prediction DL Web-App 🌎</h1>",
            unsafe_allow_html=True)
        with st.beta_expander("Write a review 📝"):
            col1, col2 = st.beta_columns(2)
            with col1:
                username = st.text_input("Name")
            with col2:
                comments = st.text_input("Comments")
            if st.button("Post ✔️"):
                if ((username == '' and comments == '') or username == ''
                        or comments == ''):
                    st.markdown(
                        "<h1 style='text-align: center; font-weight:bold;color:red;background-color:white;font-size:12pt;border-style: solid;border-color:red;border-radius:4px'>❌ Empty field ❌ </h1>"
                        .format(username),
                        unsafe_allow_html=True)
                else:
                    create_usertable()
                    add_userdata(username, comments)
                    result = login_user(username, comments)
                    if result:
                        st.markdown(
                            "<h1 style='text-align: center; font-weight: normal;color:DeepPink;background-color:white;font-size:12pt;border-style: solid;border-color:Deeppink;border-radius:6px'> Thankyou for your comment {} 🎉 - with regards Team DeepWind❤️ </h1>"
                            .format(username),
                            unsafe_allow_html=True)
        with st.beta_expander("View reviews 📝"):
            result = select_all()
            data = pd.DataFrame(result, columns=['UserName', 'Comments'])
            st.table(data)
        with st.beta_expander("Like this page💰🏆!!"):
            if st.button("❤️"):
                st.markdown(
                    "<h1 style='text-align: center; font-weight: normal;color:DeepPink;background-color:white;font-size:12pt;border-style: solid;border-color:Deeppink;border-radius:6px'> Thanks for your like😀!</h1>",
                    unsafe_allow_html=True)
                create_likestable()
                add_likesdata('1')
                like = count_likes()
                like = pd.DataFrame(like, columns=['Total Likes 🎖️ : '])
                like = like.to_string(index=False)
                st.markdown(
                    "<h1 style='text-align: left; color: black;font-size:12pt'>{}</h1>"
                    .format(like),
                    unsafe_allow_html=True)

    if nav == "User defined Prediction📟":
        set_png_as_page_bg('gra (1).jpg')
        st.markdown(
            "<h1 style='text-align: center; color: green;'>User Input Parameters 💻️</h1>",
            unsafe_allow_html=True)
        with st.beta_expander("Preferences"):
            st.markdown(
                "<h1 style='text-align: left; font-weight:bold;color:black;background-color:white;font-size:11pt;'> Temperature ⛅🌞🌧️ (°C) </h1>",
                unsafe_allow_html=True)
            col1, col2 = st.beta_columns(2)
            with col1:
                min_temp = st.number_input('🌡️ Minimum Temperature (°C)',
                                           min_value=-89,
                                           max_value=55,
                                           value=-15,
                                           step=1)
            with col2:
                max_temp = st.number_input('🌡️ Maximum Temperature (°C)',
                                           min_value=-88,
                                           max_value=56,
                                           value=50,
                                           step=1)
            st.markdown(
                "<h1 style='text-align: left; font-weight:bold;color:black;background-color:white;font-size:11pt;'> Wind Speed 🌬️ (m/s) </h1>",
                unsafe_allow_html=True)
            col1, col2 = st.beta_columns(2)
            with col1:
                min_speed = st.number_input('🚀 Minimum Wind Speed (m/s)',
                                            min_value=0,
                                            max_value=99,
                                            value=1,
                                            step=1)
            with col2:
                max_speed = st.number_input('🚀 Maximum Wind Speed (m/s)',
                                            min_value=2,
                                            max_value=100,
                                            value=27,
                                            step=1)
        st.write("")
        temperature = st.slider('Temperature ⛅🌞🌧️ [°C]',
                                min_value=min_temp,
                                step=1,
                                max_value=max_temp,
                                value=max_temp)
        pressure = st.slider('Pressure  ⚡ [atm]️',
                             min_value=800,
                             step=1,
                             max_value=1050,
                             value=1050)
        wind_speed = st.slider('Wind Speed  🌬️ [m/s]',
                               min_value=min_speed,
                               step=1,
                               max_value=max_speed,
                               value=max_speed)
        wind_direction = st.slider('Wind Direction  🚩🌀 [deg]', 0, 1, 360)
        dew_point = st.slider('Dew Point  💦 [deg]', float(-360), float(1),
                              float(360))
        relative_humidity = st.slider('Relative Humidity  ☔ [%]', 0, 1, 100)
        result = ""
        profit = 0
        if st.button("Predict"):
            result = predict(temperature, pressure, wind_speed, wind_direction,
                             dew_point, relative_humidity)
            profit = result * 0.017 * 24 * 365 * 0.39
            profit = int(74.19 * profit)
            st.balloons()
        st.success('Predicted Power is {} kW'.format(result))
        st.warning('Annual Profit is {} Rupees'.format(round(profit, 2)))

    if nav == "Forecasting 📊":
        set_png_as_page_bg('04.gif')
        st.markdown(
            "<h1 style='text-align: center; color:black ;'>⚡FORECASTING⚡</h1>",
            unsafe_allow_html=True)
        with st.beta_expander("📁 Sample Dataset 📁"):
            st.markdown(get_binary_file_downloader_html('SampleData.csv'),
                        unsafe_allow_html=True)
    # Setup file upload
        st.markdown(
            "<h1 style='text-align:center; color:white;background-color:black;font-size:14pt'>📂 Upload your CSV or Excel file. (200MB max) 📂</h1>",
            unsafe_allow_html=True)
        uploaded_file = st.file_uploader(label="", type=['csv', 'xlsx'])

        global df
        if uploaded_file is not None:
            print(uploaded_file)
            st.markdown(
                "<h1 style='text-align:center; color:black;background-color:lightgreen;font-size:14pt'>📂 File upload successful 📂</h1>",
                unsafe_allow_html=True)
            print("hello")

            try:
                df = pd.read_csv(uploaded_file)
                st.write(df)

            except Exception as e:
                df = pd.read_excel(uploaded_file)
                st.write(df)

            st.markdown(
                "<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'>📈 INPUT DATA IN TERMS OF DATE 📈</h1>",
                unsafe_allow_html=True)

            trace = go.Scatter(x=df['DateTime'],
                               y=df['Power generated by system | (kW)'],
                               mode='lines',
                               name='Data')
            layout = go.Layout(
                title="",
                xaxis={'title': "Date"},
                yaxis={'title': "Power generated by system | (kW)"})
            fig = go.Figure(data=[trace], layout=layout)
            #fig.show()
            st.write(fig)

            df1 = df.reset_index()['Power generated by system | (kW)']
            import matplotlib.pyplot as plt
            st.write("\n")
            st.markdown(
                "<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'>📈 INPUT DATA IN TERMS OF NO. OF HOURS 📈 </h1>",
                unsafe_allow_html=True)
            trace = go.Scatter(x=df1.index,
                               y=df['Power generated by system | (kW)'],
                               mode='lines',
                               name='Data')
            layout = go.Layout(
                title="",
                xaxis={'title': "No. of hours"},
                yaxis={'title': "Power generated by system (kW)"})

            fig = go.Figure(data=[trace], layout=layout)
            #fig.show()
            st.write(fig)
            from sklearn.preprocessing import MinMaxScaler
            scaler = MinMaxScaler(feature_range=(0, 1))
            df1 = scaler.fit_transform(np.array(df1).reshape(-1, 1))
            ##splitting dataset into train and test split
            training_size = int(len(df1) * 0.65)
            test_size = len(df1) - training_size
            train_data, test_data = df1[0:training_size, :], df1[
                training_size:len(df1), :1]

            import numpy

            # convert an array of values into a dataset matrix
            # convert an array of values into a dataset matrix
            def create_dataset(dataset, time_step=1):
                dataX, dataY = [], []
                for i in range(len(dataset) - time_step - 1):
                    a = dataset[i:(i + time_step),
                                0]  ###i=0, 0,1,2,3-----99   100
                    dataX.append(a)
                    dataY.append(dataset[i + time_step, 0])
                return numpy.array(dataX), numpy.array(dataY)
    # reshape into X=t,t+1,t+2,t+3 and Y=t+4

            time_step = 30
            X_train, y_train = create_dataset(train_data, time_step)
            X_test, ytest = create_dataset(test_data, time_step)
            # reshape input to be [samples, time steps, features] which is required for LSTM
            X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1)
            X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1)
            ### Create the BILSTM model
            from tensorflow.keras.models import Sequential
            from tensorflow.keras.layers import Dense
            from tensorflow.keras.layers import LSTM
            from tensorflow.keras.layers import Bidirectional
            model = Sequential()
            model.add(Bidirectional(LSTM(300, input_shape=(1, 30))))
            model.add(Dense(1))
            model.compile(loss='mae', optimizer='adam')
            model.fit(X_train,
                      y_train,
                      validation_data=(X_test, ytest),
                      epochs=10,
                      batch_size=64,
                      verbose=1)
            import tensorflow as tf
            ### Lets Do the prediction and check performance metrics
            train_predict = model.predict(X_train)
            test_predict = model.predict(X_test)
            ##Transformback to original form
            train_predict = scaler.inverse_transform(train_predict)
            test_predict = scaler.inverse_transform(test_predict)
            ### Calculate RMSE performance metrics
            import math
            from sklearn.metrics import mean_squared_error
            math.sqrt(mean_squared_error(y_train, train_predict))
            ### Test Data RMSEmath.sqrt(mean_squared_error(ytest,test_predict))
            ### Plotting
            # shift train predictions for plotting
            look_back = 30
            trainPredictPlot = numpy.empty_like(df1)
            trainPredictPlot[:, :] = np.nan
            trainPredictPlot[look_back:len(train_predict) +
                             look_back, :] = train_predict
            # shift test predictions for plotting
            testPredictPlot = numpy.empty_like(df1)
            testPredictPlot[:, :] = numpy.nan
            testPredictPlot[len(train_predict) + (look_back * 2) + 1:len(df1) -
                            1, :] = test_predict
            # plot baseline and predictions
            st.markdown(
                "<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'>📈 TRAIN AND TEST DATA 📈 </h1>",
                unsafe_allow_html=True)

            #plt.plot(scaler.inverse_transform(df1))
            plt.plot(scaler.inverse_transform(df1),
                     color="blue",
                     linewidth=1,
                     linestyle="-")
            plt.xlabel('No. of hours')
            # Set the y axis label of the current axis.
            plt.ylabel('Power generated by system | (kW)')
            plt.plot(trainPredictPlot,
                     label='Train Data',
                     color="black",
                     linewidth=2,
                     linestyle="--")
            plt.plot(testPredictPlot,
                     label='Test Data',
                     color="orange",
                     linewidth=2,
                     linestyle="--")
            plt.legend(loc="upper left")
            #plt.show()
            st.pyplot(plt)

            x_input = test_data[len(test_data) - 30:].reshape(1, -1)
            temp_input = list(x_input)
            temp_input = temp_input[0].tolist()
            # demonstrate prediction for next 24 hours
            from numpy import array
            lst_output = []
            n_steps = 30
            i = 0
            while (i < 24):
                if (len(temp_input) > 30):
                    #print(temp_input)
                    x_input = np.array(temp_input[1:])
                    x_input = x_input.reshape(1, -1)
                    x_input = x_input.reshape((1, n_steps, 1))
                    yhat = model.predict(x_input, verbose=0)
                    temp_input.extend(yhat[0].tolist())
                    temp_input = temp_input[1:]
                    lst_output.extend(yhat.tolist())
                    i = i + 1
                else:
                    x_input = x_input.reshape((1, n_steps, 1))
                    yhat = model.predict(x_input, verbose=0)
                    print(yhat[0])
                    temp_input.extend(yhat[0].tolist())
                    print(len(temp_input))
                    lst_output.extend(yhat.tolist())
                    i = i + 1

            print(lst_output)
            day_new = np.arange(1, 31)
            day_pred = np.arange(len(df1), len(df1) + 24)
            import matplotlib.pyplot as plt
            print(len(df1))
            progress = st.progress(0)
            for i in range(100):
                time.sleep(0.1)
                progress.progress(i + 1)
            st.balloons()
            st.markdown(
                "<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'>📈 PREDICTED RESULTS FOR NEXT 24 HOURS 📈</h1>",
                unsafe_allow_html=True)
            plt.plot(day_pred,
                     scaler.inverse_transform(lst_output),
                     color="green",
                     linewidth=1.5,
                     linestyle="--",
                     marker='*',
                     markerfacecolor='yellow',
                     markersize=7)
            plt.legend('GTtP', loc="upper left")

            plt.xlabel('No. of hours')
            # Set the y axis label of the current axis.
            plt.ylabel('Power generated by system | (kW)')

            st.pyplot(plt)
            st.markdown(
                "<h1 style='text-align: center; color:black ;background-color:yellow;font-size:14pt'>🏷️ G-Given Data, \n🏷️T-Train Data, \n🏷️t-Test Data, \n🏷️P-Predicted Results</h1>",
                unsafe_allow_html=True)
            power = pd.DataFrame(scaler.inverse_transform(lst_output),
                                 columns=['Predicted Power(kW)'])
            st.write(power)
            avg_power = power.sum()
            avg_power = int(avg_power / 24)
            profit1 = avg_power * 0.017 * 24 * 0.39
            profit1 = 74.19 * profit1
            st.balloons()
            value = f"<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'> Day Profit is {profit1:.2f} Rupees</h1>"
            st.markdown(value, unsafe_allow_html=True)

    if nav == "Dashboard 📌":
        set_png_as_page_bg('white.jpg')
        city = st.text_input('Enter the city:')
        print()
        try:
            query = city
            w_data = weather_data(query)
            print_weather(w_data, city)
        except:
            pass
            st.warning('City name not found...')
Esempio n. 27
0
def main():

    html_temp = """<div style="background-color:tansparent;"><div class="header-category__background" style="background-image: url('https://img.freepik.com/free-photo/pile-3d-twitter-logos_1379-879.jpg?size=620&ext=jpg');"><p style="color:white;font-size:50px;padding:50px">TWEET CLASSIFIER</p></div>"""
    st.markdown(html_temp, unsafe_allow_html=True)
    # Creates a main title and subheader on your page -
    # these are static across all pagesss
    image = Image.open('_110627626_trump_climate_quotesv7_976-nc.png').convert(
        'RGB')
    st.image(image, caption='TRUMP TWEET QOUTES', use_column_width=True)
    # Creating sidebar with selection box -
    # you can create multiple pages this way
    st.subheader("Climate change tweet classification")
    options = ["Prediction", "Information", "Models", "EDA"]
    selection = st.sidebar.selectbox("Choose Option", options)
    if selection == "Models":
        st.info('The infomation about the models')
        # You can read a markdown file from supporting resources folder
        html = markdown.markdown(open("Data/models.md").read())
        st.markdown(html, unsafe_allow_html=True)

    # Building out the "Information" page
    if selection == "Information":
        st.info("General Information")
        # You can read a markdown file from supporting resources folder
        html = markdown.markdown(open("Data/info.md").read())
        st.markdown(html, unsafe_allow_html=True)

        st.subheader("Raw Twitter data and label")
        if st.checkbox('Show raw data'):  # data is hidden if box is unchecked
            st.write(train[['sentiment',
                            'message']])  # will write the df to the page

    if selection == "EDA":
        st.subheader(
            "The Visualizations used to explore the raw and processed tweeter data"
        )
        if st.checkbox('The popular words used in the Tweets message data'
                       ):  # data is hidden if box is unchecked
            image = Image.open('joint_cloud.png')
            st.image(image, caption='WORD CLOUD ', use_column_width=True)
        if st.checkbox(
                'Tweet message distribution over the sentiments pie chart'
        ):  # data is hidden if box is unchecked
            image = Image.open(
                'Tweet message distribution over the sentiments bar chart.png')
            st.image(
                image,
                caption=
                'Tweet message distribution over the sentiments bar chart',
                use_column_width=True)
        if st.checkbox(
                'Tweet message distribution over the sentiments bar chart'
        ):  # data is hidden if box is unchecked
            image = Image.open(
                'Tweet message distribution over the sentiments.png')
            st.image(image,
                     caption='Tweet message distribution over the sentiments ',
                     use_column_width=True)
        if st.checkbox('The count of word used in the Tweets message data'
                       ):  # data is hidden if box is unchecked
            image = Image.open('wordcount_bar.png')
            st.image(image, caption='WORD COUNT BAR', use_column_width=True)

    # Building out the predication page
    if selection == "Prediction":
        st.info("Prediction with ML Models")
        # Creating a text box for user input
        tweet_text = st.text_area("Enter Text/Tweet", "Type Here")
        st.subheader(
            "Select a check box of the model you wish to use to classify your tweet"
        )
        if st.checkbox("LinearSVC"):
            # Transforming user input with vectorizer
            vect_text = vectoriser.transform([tweet_text]).toarray()
            # Load your .pkl file with the model of your choice + make predictions
            # Try loading in multiple models to give the user a choice
            predictor = joblib.load(
                open(os.path.join("Data/LinearSVC.pkl"), "rb"))
            prediction = predictor.predict(vect_text)
            st.success("Text Categorized as: {}".format(prediction))
            st.balloons()
        if st.checkbox('Logistic'):
            # Transforming user input with vectorizer
            vect_text = vectoriser.transform([tweet_text]).toarray()
            # Load your .pkl file with the model of your choice + make predictions
            # Try loading in multiple models to give the user a choice
            predictor = joblib.load(
                open(os.path.join("Data/LogisticRegression.pkl"), "rb"))
            prediction = predictor.predict(vect_text)
            st.success("Text Categorized as: {}".format(prediction))
            st.balloons()

        if st.checkbox('SVC'):
            # Transforming user input with vectorizer
            vect_text = vectoriser.transform([tweet_text]).toarray()
            # Load your .pkl file with the model of your choice + make predictions
            # Try loading in multiple models to give the user a choice
            predictor = joblib.load(open(os.path.join("Data/SVC.pkl"), "rb"))
            prediction = predictor.predict(vect_text)
            st.success("Text Categorized as: {}".format(prediction))
            st.balloons()
        if st.checkbox('MultiNB'):
            # Transforming user input with vectorizer
            vect_text = vectoriser.transform([tweet_text]).toarray()
            # Load your .pkl file with the model of your choice + make predictions
            # Try loading in multiple models to give the user a choice
            predictor = joblib.load(
                open(os.path.join("Data/MultinomialNB.pkl"), "rb"))
            prediction = predictor.predict(vect_text)

            # When model has successfully run, will print prediction
            # You can use a dictionary or similar structure to make this output
            # more human interpretable.
            st.success("Text Categorized as: {}".format(prediction))
            st.balloons()
Esempio n. 28
0
def main():

    round_type = st.selectbox(
        'Choose the round',
        ('Test', 'Description', 'Charades', '1 Word', 'Review'))
    current_round = round_type
    if current_round == 'Test':
        r = [
            'This is a sample dash', 'cruise missile lana', 'gypsy Molly',
            'Mike is the greatest!'
        ]
        st.header("This is a test round")
        st.write(
            "1) If you are the first person in the round, click the plus sign for the next word"
        )
        st.write(
            "2) If you are not the first person, enter the clue count to start"
        )
        st.write("3) After 60 seconds, your turn is over")
        st.write(
            "4) Relay the clue count to the group and place your phone face down on the table"
        )
        ss.x = st.number_input('Clue Count',
                               min_value=-1,
                               max_value=dash_num + 1,
                               value=-1,
                               step=1)
        if ss.x == -1:
            st.markdown('**Start the round!**')
        elif ss.x == dash_num + 1:
            #st.markdown('**End of Round**!')
            """### Round Over!"""
            st.markdown(
                "![Alt Text](https://media.giphy.com/media/R3eONMIcGP8nr8ajSs/giphy.gif)"
            )
            st.balloons()
        else:
            st.markdown('**{}**'.format(r[ss.x]))
    elif current_round == 'Description':
        r = r1
        ss.x = -1
        st.header("Welcome to the 1st Round of The Dash")
        st.write(
            "1) If you are the first person in the round, click the plus sign for the next word"
        )
        st.write(
            "2) If you are not the first person, enter the clue count to start"
        )
        st.write("3) After 60 seconds, your turn is over")
        st.write(
            "4) Relay the clue count to the group and place your phone face down on the table"
        )
        ss.x = st.number_input('Clue Count',
                               min_value=-1,
                               max_value=dash_num,
                               value=-1,
                               step=1)
        if ss.x == -1:
            st.markdown('**Start the round!**')
        elif ss.x == dash_num:
            """### Round Over!"""
            st.markdown(
                "![Alt Text](https://media.giphy.com/media/R3eONMIcGP8nr8ajSs/giphy.gif)"
            )
            st.balloons()
        else:
            st.write(r[ss.x])
    elif current_round == 'Charades':
        r = r2
        st.header("Welcome to the 2nd Round of The Dash")
        st.write("Use normal charades to help your team guess the answer")
        st.write(
            "1) If you are the first person in the round, click the plus sign for the next word"
        )
        st.write(
            "2) If you are not the first person, enter the clue count to start"
        )
        st.write("3) After 60 seconds, your turn is over")
        st.write(
            "4) Relay the clue count to the group and place your phone face down on the table"
        )
        ss.x = st.number_input('Clue Count',
                               min_value=-1,
                               max_value=dash_num,
                               value=-1,
                               step=1)
        if ss.x == -1:
            st.markdown('**Start the round!**')
        elif ss.x == dash_num:
            """### Round Over!"""
            st.markdown(
                "![Alt Text](https://media.giphy.com/media/PhH6eIv19BsnDTrX0F/giphy.gif)"
            )
            st.balloons()
        else:
            st.write(r[ss.x])
    elif current_round == '1 Word':
        r = r3
        st.header("Welcome to the Last Round of The Dash!")
        st.write(
            "Use only one word,including proper nouns, to help your team guess the clue"
        )
        st.write(
            "1) If you are the first person in the round, click the plus sign for the next word"
        )
        st.write(
            "2) If you are not the first person, enter the clue count to start"
        )
        st.write("3) After 60 seconds, your turn is over")
        st.write(
            "4) Relay the clue count to the group and place your phone face down on the table"
        )
        ss.x = st.number_input('Clue Count',
                               min_value=-1,
                               max_value=dash_num,
                               value=-1,
                               step=1)
        if ss.x == -1:
            st.markdown('**Start the round!**')
        elif ss.x == dash_num:
            """### Round Over!"""
            st.markdown(
                "![Alt Text](https://media.giphy.com/media/oOEtax0fEqdoiNNfnP/giphy.gif)"
            )
            st.balloons()
        else:
            st.write(r[ss.x])
    else:
        st.dataframe(dashes['dashes'])
Esempio n. 29
0
def main(local=False):

    st.set_page_config(
        page_title="Simple NTU Course Viewer",
        page_icon="🧊",
        layout="wide",
        initial_sidebar_state="expanded",
    )
    with st.spinner('讀取資料中⋯'):
        course_df = read_df(local)
        course_df = pre_processing(course_df.copy())
    st.write("""
    # 台大 110 年課表查詢""")

    col1, col2 = st.beta_columns((7, 4))
    with col1:
        search_txt = st.text_input('輸入課程名稱/ID/老師名稱', '')

        need_help = st.beta_expander('需要幫忙嗎 👉')
        with need_help:
            st.markdown(
                """輸入**課程名稱**或是**課程 ID** 或是**老師名稱**。不能夠同時輸入課程名稱和老師名稱。""",
                unsafe_allow_html=True)

    with col2:
        valid_column = course_df.drop('raw_day', axis=1).columns
        view_options = st.multiselect('選擇檢視欄位', list(valid_column),
                                      list(valid_column))

    days = ['一', '二', '三', '四', '五', '六', '七']
    # days_select = [False for i in range(7)]

    if 'days_select' not in st.session_state:
        st.session_state['days_select'] = [False for i in range(7)]

    def update_day(d):
        st.session_state['days_select'][
            d] = not st.session_state['days_select'][d]

    with st.form("date_picker"):
        st.write("選擇上課日")
        cols = st.beta_columns(7)
        for i, col in enumerate(cols):
            st.session_state['days_select'][i] = col.checkbox(days[i])

        date_opt = st.radio("篩選條件", ('Subset', 'All Matched'))

        # Every form must have a submit button.
        submitted = st.form_submit_button("確認")
        if submitted:
            # st.write(st.session_state['days_select'])
            days_select = st.session_state['days_select']
            pass

    other_info = st.beta_expander('其他資訊 🔗')
    with other_info:
        st.markdown("""一些常用連結:

+ [PTT NTUcourse 看板](https://www.ptt.cc/bbs/NTUcourse/index.html)
+ [Original Repo](https://github.com/hungchun0201/NTUclassCrawler)
+ [台大課程網](https://nol.ntu.edu.tw/nol/guest/index.php)

<span style="font-size: 10px">* 註:僅為小型試用版,故僅用 Streamlit 簡單製作而已。若有不週全的地方,請自行修正 🙌🏾</span>
                    """,
                    unsafe_allow_html=True)

    df = course_df

    def in_list(x, date_opt):
        if date_opt == 'Subset':
            if set(x).issubset(
                    set(np.array(days)[st.session_state['days_select']])):
                return True
            else:
                return False
        else:
            if set(x) == set(np.array(days)[st.session_state['days_select']]):
                return True
            else:
                return False

    st.write("## 課表結果")
    with st.spinner("結果產生中⋯"):
        if search_txt == "" and np.sum(st.session_state['days_select']) == 0:
            display_df = df[view_options]
        else:
            if np.sum(st.session_state['days_select']) == 0:
                display_df = df[(
                    df['Title'].str.contains(search_txt)
                    | df['Instructor'].str.contains(search_txt)
                    | df['Id'].str.contains(search_txt))][view_options]
            else:
                display_df = df[(df['Title'].str.contains(search_txt)
                                 | df['Instructor'].str.contains(search_txt)
                                 | df['Id'].str.contains(search_txt))
                                & course_df['raw_day'].apply(
                                    in_list, args=(date_opt, ))][view_options]

    st.write("""<style>
    tr:hover {background-color:#50536b42;
    table {
        max-width: -moz-fit-content;
        max-width: fit-content;
        white-space: nowrap;
    }
</style>""",
             unsafe_allow_html=True)

    st.write(f"""<div style="overflow:scroll; justify-content: center;">
{display_df.to_html()}
</div>""",
             unsafe_allow_html=True)

    # <div class="styledTable" style="overflow:scroll">
    # </div>

    st.balloons()
def app():
    st.balloons()
    
    st.markdown("# Visualizations :art:")

    menu = ["Number of Tweets per Day", "Number of Retweets per Day", 
    "Number of Likes per Day", "Most Common Tweets", "Sentiment Scores", "Common Entities"]
    choice = st.selectbox("View", menu)

    if choice == "Number of Retweets per Day":
        fig1 = px.histogram(covid_data, x="datetime", color="retweets", title="Number of Retweets Per Day")
        st.write(fig1)


    elif choice == "Number of Likes per Day":
        fig2 = px.histogram(covid_data, x="datetime", color="likes" ,title="Likes Per Day")
        st.write(fig2)


    elif choice == "Most Common Tweets":
        st.write("Word Cloud for Most Common Tweets")
        stop_words = get_stop_words('english')
        concat_quotes = ' '.join(
            [i for i in covid_data.text_without_stopwords.astype(str)])
        #print(concat_quotes[:10])
        stylecloud.gen_stylecloud(  text=concat_quotes,
                                    icon_name='fab fa-twitter',
                                    palette='cartocolors.qualitative.Bold_9',
                                    background_color='white',
                                    output_name='tweets.png',
                                    collocations=False,
                                    custom_stopwords=stop_words )

        #Displaying image from a file
        Image(filename="tweets.png", width=780, height=780)
        st.image("tweets.png")

        #Display the most common words after stemming
        #
        #Create separate columns
        table_col, input_col = st.beta_columns([3,2])

        covid_data['text_stem'] = covid_data['text_stem'].apply(lambda x:str(x).split()) #Use tokenize or split, smae results
        top = Counter([item for sublist in covid_data['text_stem'] for item in sublist]) #Counts the frequency of words

        with input_col:
            top_n = st.slider("How many of the common words do you want to see?", 0, 5, 10)
            temp = pd.DataFrame(top.most_common(top_n))
            temp.columns = ['common_words', 'count']
            #temp = temp.reset_index()

        with table_col:
            fig = px.pie(temp, values='count', names='common_words',
                         title='Top Common Words',
                         hover_data=['common_words'], color_discrete_sequence=px.colors.qualitative.G10)
            fig.update_layout(showlegend=False, width=450, height=450)
            st.write(fig)
            # colorscale = [[0, '#4d004c'], [.5, '#f2e5ff'], [1, '#ffffff']]
            # fig = ff.create_table(temp, height_constant=15, colorscale=colorscale)
            # st.write(fig)
        #st.write(temp.style.background_gradient(cmap = 'Blues'))
        
    elif choice == "Sentiment Scores":
        pie_col, input_col = st.beta_columns([3,2])
       #Convert the text_stem column to string type. nrclext only takes input of type str
        covid_data['text_stem'] = covid_data['text_stem'].astype(str)
        #Create a text object
        text_object = NRCLex(' '.join(covid_data['text_stem']))

        #Create a list from the  text object
        sentiment_scores = pd.DataFrame(list(text_object.raw_emotion_scores.items())) 
        #Create a dataframe of two columns
        sentiment_scores = sentiment_scores.rename(columns={0: "Sentiment", 1: "Count"})
        with input_col:
            num_n = st.slider("Change Pie Chart Values Here", 0, 5, 10)
            sentiment_scores = sentiment_scores.head(num_n)

            btn = st.button("Show Table")
            colorscale = [[0, '#272D31'], [.5, '#ffffff'], [1, '#ffffff']]
            font=['#FCFCFC', '#00EE00', '#008B00']
            if btn:
                fig =  ff.create_table(sentiment_scores, colorscale=colorscale,
                font_colors=font)
                st.write(fig)
        
        with pie_col:
            fig = px.pie(sentiment_scores, values='Count', names='Sentiment',
            title='Top Emotional Affects',
            hover_data=['Sentiment'], color_discrete_sequence=px.colors.qualitative.Dark24)
            fig.update_traces(textposition='inside', textinfo='percent+label')

            fig.update_layout(showlegend = False, width = 450, height = 450,
            font=dict(color='#383635', size=15)
            )

            st.write(fig)

        #Create a dataframe with a dictionary of the sentiments
        st.title("Table Showing Words & Sentiments")

        sentiment_words = pd.DataFrame(list(text_object.affect_dict.items()),columns = ['words','sentiments'])

        num_o = st.slider("Change table size", 0, 50, 100)
        sentiment_words = sentiment_words.head(num_o)
        
        fig = go.Figure(data=[go.Table(columnwidth=[1, 2], header=dict(values=
        list(sentiment_words[['words', 'sentiments']].columns),
            fill_color='maroon',
        align=['left', 'center'], height=40, font=dict(color='white', size=18)),

        cells=dict(values=[sentiment_words.words, sentiment_words.sentiments],
        fill_color='lightseagreen',
        align='left'))
        ])

        fig.update_layout(margin=dict(l=5, r=5, b=10, t=10))
        st.write(fig)

    elif choice == "Common Entities":
        st.write("Word Cloud for Most Common Entities")

        # remove duplicate claims (Not really needed since dropped already)
        words = covid_data.text_stem.unique()
        # NER list we'll use - Perhaps could be expanded?
        nlp = en_core_web_sm.load()
        #nlp = spacy.load(en_core_web_sm)
        corpus = list(nlp.pipe(words[:700]))
        all_ents = defaultdict(int)
        for i, doc in enumerate(corpus):
            #print(i,doc)
            for ent in doc.ents:
                all_ents[str(ent)] += 1
        sorted_ents = pd.DataFrame(sorted(all_ents.items(), key=operator.itemgetter(1), reverse=True),columns = ['entities','count'])

        stop_words = get_stop_words('english')
        hashtags = sorted_ents['entities'].dropna().tolist()
        unique_entities=(" ").join(hashtags)
        # concat_quotes = ' '.join(
        #     [i for i in sorted_ents.entities.astype(str)])
        # #print(concat_quotes[:10])
        stylecloud.gen_stylecloud(  text=unique_entities,
                                    #file_path='concat_quotes',
                                    icon_name='fas fa-comments',
                                    palette='cartocolors.qualitative.Prism_8',
                                    background_color='white',
                                    output_name='entities.png',
                                    collocations=False,
                                    custom_stopwords=stop_words )
        
        #Displaying image from a file
        Image(filename="entities.png", width=780, height=780)
        st.image("entities.png")

    else:
        fig3 = px.histogram(covid_data, x="datetime", title="Number of Tweets Per Day")
        st.write(fig3)