def main(): global al_score, dl_score st.title("MNIST - An Ng.") X_train, X_test, y_train, y_test = prepare_data() st.markdown('**Shape**') st.write('\nTraining set :', X_train.shape, "\nTest set :", X_test.shape) X_train, y_train = preprocess_data(X_train, y_train.reshape(-1, 1)) X_test, y_test = preprocess_data(X_test, y_test.reshape(-1, 1)) # general_config() radio_btn = st.radio("Approach", ("Deep Learning", "Active Learning")) if radio_btn == "Deep Learning": col1, col2 = st.beta_columns([1, 2]) #params with col1: dl_expander = st.beta_expander("Params", expanded=True) with dl_expander: lr, epochs, batch_size = general_config() #display with col2: if st.button("Train"): #training with st.beta_container(): model = PassiveLearner(X_train, y_train, X_test, y_test, epochs, batch_size, lr) with st.spinner('Training...'): model.train() st.balloons() st.success("Train Successfully") dl_score = model.evaluate(X_test, y_test) st.write("Accuracy of Deep learning: ", dl_score) else: col1, col2 = st.beta_columns([1, 2]) #params with col1: al_expander = st.beta_expander("Params", expanded=True) with al_expander: lr, epochs, batch_size = general_config() n_initial, n_queries, query_strategy = al_config() if query_strategy == 'uncertainty_sampling': query_strategy = uncertainty_sampling else: query_strategy = entropy_sampling #display with col2: if st.button("Train"): #training with st.beta_container(): model = CustomAcitveLearner(X_train, y_train, X_test, y_test, epochs, batch_size, lr, n_initial, n_queries, query_strategy) with st.spinner('Training...'): model.train() st.balloons() st.success("Train Successfully") al_score = model.evaluate(X_test, y_test) st.write("Accuracy of Active learning: ", al_score)
def input_values(): data2 = pd.read_csv('data.csv', header=[0]) if st.sidebar.checkbox('Work for this week'): selected_name = st.sidebar.selectbox('Name', options=data['Members']) days_selected = st.sidebar.multiselect('Days free to work', options=days) hours = st.sidebar.slider('No.of hours per week will be able to work', 1.0, 1.0, 8.0) team_willing = st.sidebar.multiselect('Team willing to work in', options=teams) password = str(st.sidebar.text_input('enter the passphrase')).lower() if st.sidebar.button('Submit details'): y = data2.loc[data2.Members == str(selected_name)] z = y.iloc[:, -1].values if password == str(z[0]): st.balloons() input_data = { 'Name': [str(selected_name)], 'Time': [str(datetime.datetime.today())], 'Days': [str(days_selected)], 'Hours': [str(hours)], 'Reason': ['None'], 'Team': [str(team_willing)] } input_df = pd.DataFrame(input_data) input_df.to_csv('record.csv', mode='a', header=False, index=None) input_df.to_sql('table1', if_exists='append', con=engine, index=False, index_label=None) record_changed = pd.read_sql('table1', con=engine, index_col=None) record_reverse = record_changed.iloc[::-1] st.subheader('Continous Log') st.write(record_reverse.head()) input_df.to_csv('week_log.csv', mode='a', header=False, index=None) input_df.to_sql('table2', if_exists='append', con=engine, index=False, index_label=None) record_changed_wl = pd.read_sql('table2', con=engine, index_col=None) record_reverse_wl = record_changed_wl.iloc[::-1] st.subheader('Weekly Log') st.write(record_reverse_wl.head()) else: st.sidebar.warning('Wrong passphrase') elif st.sidebar.checkbox('Cannot Work this week'): selected_name = st.sidebar.selectbox('Name', options=data['Members']) reason = st.sidebar.text_input('Reason') password = str(st.sidebar.text_input('enter the passphrase')).lower() if st.sidebar.button('Submit details'): y = data2.loc[data2.Members == str(selected_name)] z = y.iloc[:, -1].values if password == str(z[0]): st.balloons() input_data = { 'Name': [str(selected_name)], 'Time': [str(datetime.datetime.today())], 'Days': ['None'], 'Hours': 0, 'Reason': [str(reason)], 'Team': ['None'] } input_df = pd.DataFrame(input_data) input_df.to_csv('record.csv', mode='a', header=False, index=None) input_df.to_sql('table1', if_exists='append', con=engine, index=False, index_label=None) record_changed = pd.read_sql('table1', con=engine, index_col=None) record_reverse = record_changed.iloc[::-1] st.subheader('Continous Log') st.write(record_reverse.head()) input_df.to_csv('week_log.csv', mode='a', header=False, index=None) input_df.to_sql('table2', if_exists='append', con=engine, index=False, index_label=None) record_changed_wl = pd.read_sql('table2', con=engine, index_col=None) record_reverse_wl = record_changed_wl.iloc[::-1] st.subheader('Weekly Log') st.write(record_reverse_wl.head()) else: st.sidebar.warning('Wrong passphrase')
#import libraries import matplotlib.pyplot as plt import pandas as pd import streamlit as st import numpy as np import matplotlib #matplotlib.use('Agg') import seaborn as sns #Remove Warnings st.balloons() st.set_option('deprecation.showPyplotGlobalUse', False) st.title("Tips_Dataset") #import dataset df = pd.read_csv('tips.csv') #First thirty rows tips = df.head(30) #Display the table st.table(tips) st.header("Visualisation Using Seaborn") #bar plot st.subheader("Bar Plot") tips.plot(kind='bar') st.pyplot() #Displot st.subheader("Displot") sns.displot(tips['total_bill']) st.pyplot() #joinplot
def run_process(): # generate input payload input_schema = ENDPOINTS.input_schema output_schema = ENDPOINTS.output_schema st.markdown("## Schemas") st.markdown("### Input Schema") st.json(input_schema['properties']) st.markdown("### Output Schema") st.json(output_schema['properties']) st.markdown("## Payload") default_payload = PayloadGenerator( images_as_base64=False).generate(DEFAULT_PAYLOAD_IN) input_payload = {} for key in input_schema["properties"]: key_type = input_schema['properties'][key].get( "type") or input_schema['properties'][key].get("$ref") if key_type == "#/definitions/Image": val = st.file_uploader(label="{}: {}".format( key, input_schema['properties'][key]), type=["png", "jpg", "tif", "jp2"]) else: val = st.text_input( label="{}: {}".format(key, input_schema['properties'][key]), value=default_payload[key], ) if key_type == "#/definitions/Image" and val is not None: # Convert image as base64 for processing val = streamlit_utils.encode_image_from_upload(val) if val is not None: try: input_payload[key] = json.loads(val) except: input_payload[key] = val if st.button(label="SEND PAYLOAD"): st.json(truncate_dict_for_debug(input_payload)) try: jsonschema.validate(input_payload, input_schema) st.text("Input payload valid !") except jsonschema.ValidationError as e: st.text(e) except jsonschema.SchemaError as e: st.text(e) @st.cache(show_spinner=False) def _process(input_payload): return ENDPOINTS.process(input_payload) t1 = time.time() with st.spinner("Processing"): response = _process(input_payload) t2 = time.time() st.balloons() st.markdown("## Display") image, result = streamlit_utils.parse_payloads(input_payload, response) st.image(image, channels="RGB", use_column_width=True, caption="RGB image clipped to 8 bits") st.markdown("## Response") st.success("Response computed in {:.02f}s".format(t2 - t1)) st.json(response)
def write(state): if state.task == "Regression": from pycaret.regression import predict_model, finalize_model,save_model elif state.task == "Classification": from pycaret.classification import predict_model, finalize_model,save_model else: from pycaret.clustering import predict_model,save_model def online_predict(model, input_df,target_type): """make prediction on online data Args: model (object): a trained model input_df (pd.DataFrame): the input dataframe for predicitons target_type (str): the type of training target Returns: str: predcition """ prediction_df = predict_model(model, data=input_df) if target_type == 'Regression' or target_type == 'Classification': predictions = prediction_df['Label'][0] else: predictions = prediction_df['Cluster'][0] return predictions if state.trained_model is not None: st.header("Make a Prediction on Given Input or Upload a File.") add_selectbox = st.sidebar.selectbox( "How would you like to predict?", ("Online", "Batch", "SaveModel") ) X_before_preprocess = state.X_before_preprocess target_name = state.y_before_preprocess ignore_columns = state.ignore_columns trained_model = state.trained_model if add_selectbox == "Online": with st.spinner("Predicting ..."): input_df = retrieve_train_element(X_before_preprocess, target_name, ignore_columns,state.task) output = "" if st.button("Predict"): output = online_predict(trained_model, input_df,state.task) output = str(output) st.success(f'The Prediction is **{output}**') if add_selectbox == 'Batch': file_upload = st.file_uploader('Upload csv file for prediciton', type=["csv", "xlsx"]) if file_upload is not None: file_extension = file_upload.name.split('.')[1] if file_extension == "csv": data = pd.read_csv(file_upload) else: data = pd.read_excel(file_upload) predictions = predict_model(trained_model, data=data) st.write(predictions) is_download = st.checkbox("Do You Want to Download the Prediction File?", value=False) if is_download: file_extension = st.selectbox("Choose Csv or Excel File to Download", options=[".csv",".xlsx"]) file_name = st.text_input("File Name",value="prediction",key=1) if file_name: href = download_button(predictions, file_name, "Download",file_extension) st.markdown(href, unsafe_allow_html=True) else: st.error("File Name cannot be empty!") if add_selectbox == "SaveModel": is_download = st.checkbox("Do You Want to Download the Model?", value=False) if is_download: file_name = st.text_input("File Name",value="",key=2) is_finalize = st.checkbox("Do You Want to Finalize the Model (not for clustering)?", value=False) if file_name: if is_finalize: finalized_model = finalize_model(trained_model) _,name = save_model(finalized_model, file_name) else: _,name = save_model(trained_model, file_name) with open(name, "rb") as f: e = joblib.load(f) href = download_button(e, file_name, "Download",".pkl",pickle_it=True) st.markdown(href, unsafe_allow_html=True) remove_cache = st.checkbox("Remove the Cache?", value=False) if remove_cache: p = Path(".").glob("*.pkl") for filename in p: filename.unlink() if len(list(p)) == 0: st.success("Delete the Cache File from Local Filesystem!") st.balloons() else: st.error("Please Give a File Name first!") else: st.error("Please Train a Model first!")
def write(): """ Test Dataset """ st.title("First test with data (csv)") st.header("I hope everything works out") html_temp = """ <div style = "background-color:orange"><p align="center" style ="color:white;font_size:30px">(´・ᴗ・ )</p></div>""" st.markdown(html_temp, unsafe_allow_html=True) def file_selector( folder_path='C:/Users/tred1/Desktop/infectious_disease_modelling-master/data' ): filenames = os.listdir(folder_path) selected_filename = st.selectbox("Select a file", filenames) return os.path.join(folder_path, selected_filename) filename = file_selector() st.info("You Selected {}".format(filename)) # ---------------------- Чтение из файла --------------------- df = pd.read_csv(filename) # --------------------- Показать данные --------------------- # ------ Вывод определенного количества ячеек таблицы ------- if st.checkbox("Show Dataset"): number = st.number_input("Number of Rows to View", 1, 200) st.dataframe(df.head(number)) # ---------- Вывод наименований полей таблицы --------------- if st.button("Column Names"): st.write(df.columns) # --------- Общая информация о кол-ве стобцов/строк --------- if st.checkbox("Shape of Dataset"): st.write(df.shape) data_dim = st.radio("Show Dimension By ", ("Rows", "Columns")) if data_dim == 'Rows': st.text("Number of Rows") st.write(df.shape[0]) elif data_dim == 'Columns': st.text("Number of Columns") st.write(df.shape[1]) else: st.write(df.shape) # ---------------- Выбор стобца базы данных --------------- if st.checkbox("Select Columns to Show"): all_columns = df.columns.tolist() selected_columns = st.multiselect("Select", all_columns) new_df = df[selected_columns] st.dataframe(new_df) # ------- Определение количества повторений в столбце ------- if st.button("Value Counts"): st.text("Value Counts By Target/Class") st.write(df.iloc[:, 1].value_counts()) # ---------------- Какой тип данных у поля ----------------- if st.button("Data Types"): st.write(df.dtypes) if st.checkbox("Summary"): st.write(df.describe().T) st.subheader("Data Visualization") st.subheader("Customizable Plot") all_columns_names = df.columns.tolist() type_of_plot = st.selectbox( "Select Type of Plot", ["area", "bar", "line", "hist", "box", "kde"]) selected_columns_names = st.multiselect("Select Columns To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format( type_of_plot, selected_columns_names)) if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) elif type_of_plot: cust_plot = df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() # ---------------- Pie Plot ------------------- if st.checkbox("Pie Plot"): all_columns_names = df.columns.tolist() if st.button("Generate Pie Plot", key=1): st.success("Generating A Pie Plot") st.write(df.iloc[:, 1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() # ---------------- Seaborn ------------------- if st.checkbox("Correlation Plot[Seaborn]"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() # --------------- Count Plot ------------------ if st.checkbox("Plot of Value Counts"): st.text("Value Counts By Target") all_columns_names = df.columns.tolist() primary_col = st.selectbox("Primary Column to GroupBy", all_columns_names) selected_columns_names = st.multiselect("Select Columns", all_columns_names) if st.button("Plot", key=2): st.text("Generate Plot") if selected_columns_names: vc_plot = df.groupby( primary_col)[selected_columns_names].count() else: vc_plot = df.iloc[:, 1].value_counts() st.write(vc_plot.plot(kind="bar")) st.pyplot() if st.button("Шарики"): st.balloons()
def main(): # Test/Title st.title('StreamLit Concepts') # Header/Subheader st.header("To Create Header") st.subheader("To Create subheader") st.subheader("Do you want to build the GUI using web app") st.code('pip install streamlit') #text st.text("hello Streamlit") html_temp = """ <div style="background-color:tomato;padding:10px"> <h2 style="color:white;text-align:center;">Streamlit ML App </h2> </div> """ st.markdown(html_temp, unsafe_allow_html=True) st.markdown('<i class="material-icons">{}</i>'.format("people"), unsafe_allow_html=True) st.latex(r''' e^{i\pi} + 1 = 0 ''') st.latex(r''' ... a + ar + a r^2 + a r^3 + \cdots + a r^{n-1} = ... \sum_{k=0}^{n-1} ar^k = ... a \left(\frac{1-r^{n}}{1-r}\right) ... ''') st.write(['st', 'is <', 3]) st.write("✔� Time up!") st.code('s="Happy" for i in range(0,10): print(s)') df1 = pd.DataFrame(np.random.randn(50, 5), columns=('col %d' % i for i in range(5))) my_table = st.table(df1) df = st.cache( pd.read_csv )("https://github.com/SurendraRedd/StreamlitProjects/raw/master/lang.csv") is_check = st.checkbox("Display Data") if is_check: st.write(df) st.write('Dataframe example') st.dataframe(df1) #Markdown st.markdown("### This is a Markdown") st.markdown("### 🎲 Demo on streamlit") st.markdown("Streamlit python package is used to develop applications" "with out knowing much web application concepts.") st.markdown("**♟ Examples ♟**") st.markdown("* Happly learning!.") #Will be used for displaying the Error Messages in a colourful format st.success("Successful") st.info("Information!") st.warning('this is a warning') st.error("this is an error Danger") data = {'1': "True", '2': "True", '3': "False"} st.json(data) # Exception handling st.exception("IndexError('list out of index')") place_holder = st.empty() place_holder.text('Replaced!') #help of the function st.help(range) st.write("Text with write") st.write(range(10)) st.line_chart({"data": [1, 5, 2, 6, 2, 1]}) st.area_chart({"data": [1, 5, 2, 6, 2, 1]}) st.bar_chart({"data": [1, 5, 2, 6, 2, 1]}) arr = np.random.normal(1, 1, size=100) fig, ax = plt.subplots() ax.hist(arr, bins=20) st.pyplot(fig) ''' # Markdown magic This is some _**text**_. ''' df = pd.DataFrame({'col1': [1, 2, 3]}) df # <-- Draw the dataframe x = 10 'x', x # <-- Draw the string 'x' and then the value of x # Select box exp = st.selectbox("Select your experience: ", np.arange(1, 40, 1)) # Slider exp = st.slider("Select your experience: ", min_value=1, max_value=40, value=1, step=1) # Multiselect movies = st.multiselect( "Select Balayya Favourite movies?", ["SamaraSimhaReddy", "Simha", "NarasimhaNaidu", "Legend"]) # Will only run once if already cached df = load_data() st.write(df) st.button('Click') st.checkbox('Check the checkbox') st.radio('Radio Button', [1, 2, 3]) st.selectbox('Select', [1, 2, 3]) st.multiselect('Multiselect', [1, 2, 3]) st.slider('slide', min_value=0, max_value=10) st.text_input('Enter Username') st.number_input('Enter a Number') st.text_area('Enter Text Here!') st.date_input('Date Input') st.time_input('Time entry') st.file_uploader('File Uploader') st.beta_color_picker('Select color') st.echo() with st.echo(): text = 's="Happy Learning!" for i in range(0,10): print(s)' st.write(text) #Image opening #img = Image.open("download.jfif") #open the image stored in specified location img = Image.open( urllib.request.urlopen( "https://github.com/SurendraRedd/ChallengeDeploy/raw/main/singlefile/Solution.png" )) # Opens the image from the url #response = requests.get("https://github.com/SurendraRedd/Techgig/blob/main/images/Solution.png") #img = Image.open(BytesIO(response.content)) #img = Image.open(urllib.request.urlopen("https://github.com/SurendraRedd/Techgig/blob/main/images/Solution.png")) st.image(img, width=300, caption="Simple Image") # Video playing vid_file = open("sample-mp4-file.mp4", "rb").read() #play the video stored in specified location st.video(vid_file) #videoUserDefined("https://www.youtube.com/embed/B2iAodr0fOo") #widgets if st.checkbox("Show/hide"): st.text("Showing or Hiding Widget") # Radio status = st.radio("What is your status", ("Married", "Single")) if status == 'Married': st.success("You are Married") # Add a selectbox to the sidebar: add_selectbox = st.sidebar.selectbox('Navigation', ('Home', 'About', 'Help')) if add_selectbox == 'About': st.write('You have selected about page') elif add_selectbox == 'Home': st.write('you have selected Home page') else: st.write('you have selected help page') # Sample Progress bar my_bar = st.progress(0) for percent_complete in range(100): time.sleep(0.1) my_bar.progress(percent_complete + 1) with st.spinner('Wait for it...'): time.sleep(5) st.success('Done!') st.balloons() st.write('Happy Stream Lite App Learning')
def Question_Answer(qna): user_answer = ["i"] questions = [sub["question"] for sub in qna] answers = [sub["answer"] for sub in qna] if len(questions) == 1: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) submission = st.button("Submit", key=1) if submission: user_answer.append(user_answer_1) user_answer.pop(0) correct_wrong = get_similarity([answers[0]], user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write("Awesome!!!", str(total_marks)) if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for boolean in correct_wrong: if boolean == 0: st.write("Mistaken Question:", questions[0]) st.write("Correct answer: ", answers[0]) else: pass else: st.balloons() elif len(questions) == 2: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) st.write(questions[1]) user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2) submission = st.button("Submit", key=1) if submission: user_answer.extend([user_answer_1, user_answer_2]) user_answer.pop(0) correct_wrong = get_similarity(answers, user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write("Awesome!!!", str(total_marks)) i = 0 if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for i in range(len(correct_wrong)): if correct_wrong[i] == 0: st.write("Mistaken Question:", questions[i]) st.write("Correct answer: ", answers[i]) else: pass else: st.balloons() elif len(questions) == 3: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) st.write(questions[1]) user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2) st.write(questions[2]) user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3) submission = st.button("Submit", key=1) if submission: user_answer.extend( [user_answer_1, user_answer_2, user_answer_3]) user_answer.pop(0) correct_wrong = get_similarity(answers, user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write("Awesome!!!", str(total_marks)) i = 0 if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for i in range(len(correct_wrong)): if correct_wrong[i] == 0: st.write("Mistaken Question:", questions[i]) st.write("Correct answer: ", answers[i]) else: pass else: st.balloons() elif len(questions) == 4: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) st.write(questions[1]) user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2) st.write(questions[2]) user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3) st.write(questions[3]) user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4) submission = st.button("Submit", key=1) if submission: user_answer.extend([ user_answer_1, user_answer_2, user_answer_3, user_answer_4 ]) user_answer.pop(0) correct_wrong = get_similarity(answers, user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write("Awesome!!!", str(total_marks)) i = 0 if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for i in range(len(correct_wrong)): if correct_wrong[i] == 0: st.write("Mistaken Question:", questions[i]) st.write("Correct answer: ", answers[i]) else: pass else: st.balloons() elif len(questions) == 5: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) st.write(questions[1]) user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2) st.write(questions[2]) user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3) st.write(questions[3]) user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4) st.write(questions[4]) user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5) submission = st.button("Submit", key=1) if submission: user_answer.extend([ user_answer_1, user_answer_2, user_answer_3, user_answer_4, user_answer_5 ]) user_answer.pop(0) correct_wrong = get_similarity(answers, user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write("Awesome!!!", str(total_marks)) i = 0 if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for i in range(len(correct_wrong)): if correct_wrong[i] == 0: st.write("Mistaken Question:", questions[i]) st.write("Correct answer: ", answers[i]) else: pass else: st.balloons() elif len(questions) == 6: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) st.write(questions[1]) user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2) st.write(questions[2]) user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3) st.write(questions[3]) user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4) st.write(questions[4]) user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5) st.write(questions[5]) user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6) submission = st.button("Submit", key=1) if submission: user_answer.extend([ user_answer_1, user_answer_2, user_answer_3, user_answer_4, user_answer_5, user_answer_6 ]) user_answer.pop(0) correct_wrong = get_similarity(answers, user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write("Awesome!!!", str(total_marks)) i = 0 if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for i in range(len(correct_wrong)): if correct_wrong[i] == 0: st.write("Mistaken Question:", questions[i]) st.write("Correct answer: ", answers[i]) else: pass else: st.balloons() elif len(questions) == 7: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) st.write(questions[1]) user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2) st.write(questions[2]) user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3) st.write(questions[3]) user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4) st.write(questions[4]) user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5) st.write(questions[5]) user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6) st.write(questions[6]) user_answer_7 = st.text_area("Enter the Answer:", height=2, key=7) submission = st.button("Submit", key=1) if submission: user_answer.extend([ user_answer_1, user_answer_2, user_answer_3, user_answer_4, user_answer_5, user_answer_6, user_answer_7 ]) user_answer.pop(0) correct_wrong = get_similarity(answers, user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write("Awesome!!!", str(total_marks)) i = 0 if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for i in range(len(correct_wrong)): if correct_wrong[i] == 0: st.write("Mistaken Question:", questions[i]) st.write("Correct answer: ", answers[i]) else: pass else: st.balloons() elif len(questions) == 8: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) st.write(questions[1]) user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2) st.write(questions[2]) user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3) st.write(questions[3]) user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4) st.write(questions[4]) user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5) st.write(questions[5]) user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6) st.write(questions[6]) user_answer_7 = st.text_area("Enter the Answer:", height=2, key=7) st.write(questions[7]) user_answer_8 = st.text_area("Enter the Answer:", height=2, key=8) submission = st.button("Submit", key=1) if submission: user_answer.extend([ user_answer_1, user_answer_2, user_answer_3, user_answer_4, user_answer_5, user_answer_6, user_answer_7, user_answer_8 ]) user_answer.pop(0) correct_wrong = get_similarity(answers, user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write(total_marks) i = 0 if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for i in range(len(correct_wrong)): if correct_wrong[i] == 0: st.write("Mistaken Question:", questions[i]) st.write("Correct answer: ", answers[i]) else: pass else: st.balloons() elif len(questions) == 9: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) st.write(questions[1]) user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2) st.write(questions[2]) user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3) st.write(questions[3]) user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4) st.write(questions[4]) user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5) st.write(questions[5]) user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6) st.write(questions[6]) user_answer_7 = st.text_area("Enter the Answer:", height=2, key=7) st.write(questions[7]) user_answer_8 = st.text_area("Enter the Answer:", height=2, key=8) st.write(questions[8]) user_answer_9 = st.text_area("Enter the Answer:", height=2, key=9) submission = st.button("Submit", key=1) if submission: user_answer.extend([ user_answer_1, user_answer_2, user_answer_3, user_answer_4, user_answer_5, user_answer_6, user_answer_7, user_answer_8, user_answer_9 ]) user_answer.pop(0) correct_wrong = get_similarity(answers, user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write("Awesome!!!", str(total_marks)) i = 0 if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for i in range(len(correct_wrong)): if correct_wrong[i] == 0: st.write("Mistaken Question:", questions[i]) st.write("Correct answer: ", answers[i]) else: pass else: st.balloons() elif len(questions) >= 10: st.write(questions[0]) user_answer_1 = st.text_area("Enter the Answer:", height=2, key=1) st.write(questions[1]) user_answer_2 = st.text_area("Enter the Answer:", height=2, key=2) st.write(questions[2]) user_answer_3 = st.text_area("Enter the Answer:", height=2, key=3) st.write(questions[3]) user_answer_4 = st.text_area("Enter the Answer:", height=2, key=4) st.write(questions[4]) user_answer_5 = st.text_area("Enter the Answer:", height=2, key=5) st.write(questions[5]) user_answer_6 = st.text_area("Enter the Answer:", height=2, key=6) st.write(questions[6]) user_answer_7 = st.text_area("Enter the Answer:", height=2, key=7) st.write(questions[7]) user_answer_8 = st.text_area("Enter the Answer:", height=2, key=8) st.write(questions[8]) user_answer_9 = st.text_area("Enter the Answer:", height=2, key=9) st.write(questions[9]) user_answer_10 = st.text_area("Enter the Answer:", height=2, key=10) submission = st.button("Submit", key=1) if submission: user_answer.extend([ user_answer_1, user_answer_2, user_answer_3, user_answer_4, user_answer_5, user_answer_6, user_answer_7, user_answer_8, user_answer_9, user_answer_10 ]) user_answer.pop(0) correct_wrong = get_similarity(answers, user_answer) total_marks = (sum(correct_wrong) / len(correct_wrong)) * 100 st.write("Awesome!!!", str(total_marks)) i = 0 if 0 in correct_wrong: st.write( "You have done some mistakes, here is where you went wrong..." ) for i in range(len(correct_wrong)): if correct_wrong[i] == 0: st.write("Mistaken Question:", questions[i]) st.write("Correct answer: ", answers[i]) else: pass else: st.balloons()
def check_answer(questionNum, answer): params = {'key': my_key, 'question': questionNum, 'answer': answer} result = requests.get(url, params).text if 'correct' in result[0:8].lower(): st.balloons() return result
def write(): st.title('Glassdoor Salary Predictor') st.header('Predict a potential salary') st.markdown('''Uses Sci-Kit Learn Random Forest Regressor based on the following specifiable traits:''') option = st.selectbox( 'Select Job Type', ('Software Engineering', 'Data Science', 'Product Design')) if option == 'Software Engineering': df = pd.read_csv('swe_cleaned.csv') swe_model = joblib.load('model.pkl') st.subheader('Company Details: \n Check Glassdoor for exact values, if unsure') rating = st.slider('Glassdoor Rating of the Company', min_value=0.0, max_value=5.0, step=0.1) age = st.number_input('Age of the Company', step=1.0, min_value=0.0) st.subheader('Details about the Job:') jobhq = st.radio("Is the Job at Headquarters? (0 for No, 1 for Yes)", options=[0, 1]) job_type_num = st.selectbox("Job Type", options=df["job_simp"].unique()) def title_number_simplifier(title): if 'reg' in title.lower(): return 1 elif 'back-end' in title.lower(): return 2 elif 'full-stack' in title.lower(): return 3 elif 'web' in title.lower(): return 4 elif 'data' in title.lower(): return 5 elif 'mobile' in title.lower(): return 6 elif 'systems' in title.lower(): return 7 job_type_num1 = title_number_simplifier(job_type_num) def seniority_number(title): if 'senior' in title.lower(): return '1' elif 'junior' in title.lower(): return '2' else: return '3' seniority_num = st.radio("Senior role?", options=["Senior", "Not Senior"]) seniority_num1 = seniority_number(seniority_num) seniority_num2 = seniority_number(seniority_num) seniority_num3 = seniority_number(seniority_num) st.subheader('Your skills:') python_yn = st.radio("Python (0 for No, 1 for Yes)", options=[0, 1]) java_yn = st.radio("Java (0 for No, 1 for Yes)", options=[0, 1]) javascript_yn = st.radio("Javascript (0 for No, 1 for Yes)", options=[0, 1]) c_yn = st.radio("C (0 for No, 1 for Yes)", options=[0, 1]) html_yn = st.radio("HTML/CSS (0 for No, 1 for Yes)", options=[0, 1]) features = [ rating, jobhq, age, python_yn, java_yn, javascript_yn, c_yn, html_yn, job_type_num1, seniority_num1, seniority_num2, seniority_num3 ] final_features = np.array(features).reshape(1, -1) if st.button('Predict'): prediction = swe_model.predict(final_features) st.balloons() st.success(f'Your predicted salary is US$ {round(prediction[0],3)*1000} ') elif option == 'Data Science': df = pd.read_csv('data_cleaned.csv') data_model = joblib.load('data_model.pkl') st.subheader('Company Details: \n Check Glassdoor for exact values, if unsure') rating = st.slider('Glassdoor Rating of the Company', min_value=0.0, max_value=5.0, step=0.1) age = st.number_input('Age of the Company', step=1.0, min_value=0.0) st.subheader('Details about the Job:') jobhq = st.radio("Is the Job at Headquarters? (0 for No, 1 for Yes)", options=[0, 1]) job_type_num = st.selectbox("Job Type", options=['Data Scientist', 'Data Engineer', 'Analyst', 'Director', 'Manager', 'Machine Learning Engineer', 'Research', 'Software']) def number_simplifier(title): if "data scientist" in title.lower(): return 3 elif "data engineer" in title.lower(): return 2 elif "analyst" in title.lower(): return 1 elif "director" in title.lower(): return 4 elif "manager" in title.lower(): return 5 elif "machine learning engineer" in title.lower(): return 6 elif "unspecified" in title.lower(): return 7 elif "research" in title.lower(): return 8 elif "software" in title.lower(): return 9 job_type_num1 = number_simplifier(job_type_num) def senior_simplifier(title): if title == "Senior": return 1 else: return 2 seniority_num = st.radio("Senior role?", options=["Senior", "Not Senior"]) seniority_num1 = senior_simplifier(seniority_num) st.subheader('Your skills:') python_yn = st.radio("Python (0 for No, 1 for Yes)", options=[0, 1]) R_yn = st.radio("R (0 for No, 1 for Yes)", options=[0, 1]) aws = st.radio("AWS (0 for No, 1 for Yes)", options=[0, 1]) spark = st.radio("Spark (0 for No, 1 for Yes)", options=[0, 1]) excel = st.radio("Excel (0 for No, 1 for Yes)", options=[0, 1]) features = [ rating, jobhq, age, python_yn, R_yn, aws, spark, excel, job_type_num1, seniority_num1 ] final_features = np.array(features).reshape(1, -1) if st.button('Predict'): prediction = data_model.predict(final_features) st.balloons() st.success(f'Your predicted salary is US$ {round(prediction[0],3)*1000} ') elif option == 'Product Design': df = pd.read_csv('dsgn_cleaned.csv') dsgn_model = joblib.load('dsgn_model.pkl') st.subheader( 'Company Details: \n Check Glassdoor for exact values, if unsure') rating = st.slider('Glassdoor Rating of the Company', min_value=0.0, max_value=5.0, step=0.1) age = st.number_input('Age of the Company', step=1.0, min_value=0.0) st.subheader('Details about the Job:') jobhq = st.radio("Is the Job at Headquarters? (0 for No, 1 for Yes)", options=[0, 1]) job_type_num = st.selectbox("Job Type", options=[ 'Product Designer', 'UI/UX Designer', 'Graphic Designer', 'Structural Designer', 'Web Designer', 'Unspecified' ]) def number_simplifier(title): if "product designer" in title.lower(): return 6 elif "ui/ux designer" in title.lower(): return 5 elif "graphic designer" in title.lower(): return 2 elif "structural designer" in title.lower(): return 3 elif "web designer" in title.lower(): return 4 elif "unspecified" in title.lower(): return 1 job_type_num1 = number_simplifier(job_type_num) def senior_simplifier(title): if title == "Senior": return 1 else: return 2 seniority_num = st.radio("Senior role?", options=["Senior", "Not Senior"]) seniority_num1 = senior_simplifier(seniority_num) st.subheader('Your skills:') figma_yn = st.radio("Figma (0 for No, 1 for Yes)", options=[0, 1]) adobe_yn = st.radio("Adobe Creative Suite (0 for No, 1 for Yes)", options=[0, 1]) cad_yn = st.radio("CAD Software (0 for No, 1 for Yes)", options=[0, 1]) html_css_js_yn = st.radio("HTML/CSS/JavaScript (0 for No, 1 for Yes)", options=[0, 1]) photo_yn = st.radio("Photography (0 for No, 1 for Yes)", options=[0, 1]) graphic_yn = st.radio("Graphics (0 for No, 1 for Yes)", options=[0, 1]) features = [ rating, jobhq, age, figma_yn, adobe_yn, cad_yn, html_css_js_yn, photo_yn, graphic_yn, job_type_num1, seniority_num1 ] final_features = np.array(features).reshape(1, -1) if st.button('Predict'): prediction = dsgn_model.predict(final_features) st.balloons() st.success( f'Your predicted salary is US$ {round(prediction[0],3)*1000} ')
def main(): st.title('MACHINE LEARNING FOR YOU..') options = ['WELCOME', 'EXPLORE'] option = st.sidebar.selectbox('Select option: ', options) if option == options[0]: welcome_text = st.markdown(get_content("README.md")) elif option == options[1]: #ensuring producibility seed = st.sidebar.slider('SEED', 1, 50, step=1) np.random.seed(seed=seed) # np.random.RandomState(seed=seed) # welcome_text.empty() try: train_df = st.file_uploader("Upload Train dataset: ", type=['csv', 'xlsx']) test_df = st.file_uploader("Upload Test dataset: ", type=['csv', 'xlsx']) except Exception as e: st.warning(e) if train_df is not None and test_df is not None: # ##st.code(""" # df.select_dtypes(include=[np.number]).shape # """, language='python') st.success('Upload complete. Status: SUCCESS') train = pd.read_csv(train_df) test = pd.read_csv(test_df) train.columns = map(str.lower, train.columns) test.columns = map(str.lower, test.columns) train["marker"] = "train" test["marker"] = "test" df = pd.concat([train, test], axis=0) df, mem_reduced = reduce_mem_usage(df) st.write("MEMORY SAVED: ", mem_reduced, "MB") df = df.loc[:, ~df.columns.duplicated()].drop_duplicates() keep_cols = df.columns datetime_ = st.multiselect('SELECT FEATURES OF TYPE DATE: ', df.columns.tolist(), date_catcher(df)) if datetime_: datetime_ = list(datetime_) for col_ in datetime_: try: df[col_] = pd.to_datetime(df[col_], infer_datetime_format=True, format="%y%m%d") except Exception as e: st.write("EXCEPTION (can be ignored): ", str(e)) # else: st.write("DATETIME COLUMNS PARSED SUCCESSFULLY.") else: st.write("NO DATE COLUMN FOUND.") full_df = None full_train = None full_test = None st.dataframe(df) #show code df_head() st.write("SHAPE: ", df.shape) #show code df_shape() id_ = st.multiselect( 'SELECT *ONE* FEATURE FOR FINAL TEST FILE (ex: ID): ', test.columns.tolist(), ["id" if "id" in test.columns else test.columns.tolist()[0]]) if not id_: st.warning( "YOU REALLY SHOULD PICK AN IDENTIFY FOR YOUR TEST SUBMISSION FILE." ) test_id = test[id_] #store ID for test dataframe train_data = df[df["marker"] == "train"] # test_data = df[df["marker"] == "test"] target_col = st.multiselect( "Choose preferred target column: ", train.columns.tolist(), [ "target" if "target" in train.columns else train.columns.tolist()[-1] ]) # st.write(target_col) if target_col: target_col = list(target_col) target_cp, ax = plt.subplots() sns.countplot(data=train_data, x=target_col[0]) st.pyplot(target_cp) plot_target() else: st.warning("TARGET VARIABLE NOT YET DECLARED") if len(datetime_) < 1: st.write("NO DATETIME COLUMN FOUND. SKIPPING......") else: st.write( "INITIALIZING DATE FEATURE ENGINEERING VIA SANGO SHRINE...." ) date_parser_v1(df, datetime_) df = df.apply(lambda col: col.str.lower() if (col.dtype == 'object') else col) st.dataframe(df) st.write("DATE FEATURE ENGINEERING COMPLETE") num_df = df.select_dtypes(include=[np.number]).shape[1] obj_df = df.select_dtypes(include='object').shape[1] if num_df: st.write('Numerical column count: ', num_df) st.code('''df.select_dtypes(include=[np.number])''', language='python') if obj_df: cat_cols = [ col for col in df.columns if col not in list(df.select_dtypes(include=[np.number])) ] st.write('Categorical column count: ', obj_df) #show code st.code('''#see categorical columns df.select_dtypes(include=['object']) ''', language='python') st.write(cat_cols[:5]) st.subheader("Data Summary") st.write(df.describe().T) #show code st.code(''' df.describe() ''', language='python') train_data = df[df["marker"] == "train"] test_data = df[df["marker"] == "test"] train_data = train_data.dropna(subset=[target_col[0]]) test_data.loc[test_data["marker"] == "test", target_col[0]] = "N/A" # pre_miss_df = pd.concat([train_data, test_data], axis=0) target_var = train_data[target_col[0]] missing_df = pd.DataFrame(data=np.round( (pre_miss_df.isnull().sum() / pre_miss_df.shape[0]) * 100, 1), columns=["missing (%)"]) #show code st.code(''' pd.DataFrame(data=np.round((train.isnull().sum()/train.shape[0])*100,1), columns=["missing (%)"]) ''', language='python') st.dataframe(missing_df.T) if missing_df["missing (%)"].any(): #check for nans (True if any) keep = st.slider("KEEP COLUMNS WITH MISSING DATA (%)", 0, 100, 50, 10) keep_cols = missing_df[ missing_df["missing (%)"] <= int(keep)].index keep_cols = list(keep_cols) handle_nan = st.selectbox(label="HANDLE NANs", options=["MODE", "MEDIAN", "MEAN"]) """Read on SimpleImputer""" if handle_nan == "MODE": full_train = train_data[keep_cols].fillna( train_data[keep_cols].mode().iloc[0]) full_test = test_data[keep_cols].fillna( test_data[keep_cols].mode().iloc[0]) elif handle_nan == "MEDIAN": full_train = train_data[keep_cols].fillna( train_data[keep_cols].median().iloc[0]) full_test = test_data[keep_cols].fillna( test_data[keep_cols].median().iloc[0]) elif handle_nan == "MEAN": full_train = train_data[keep_cols].fillna( train_data[keep_cols].mean().iloc[0]) full_test = test_data[keep_cols].fillna( test_data[keep_cols].mean().iloc[0]) else: st.write("NO SELECTED WAY TO HANDLE NAN") #precaution st.write("MISSING DATA PADDED") else: st.write("NO MISSING DATA") #conserve memory df = None if full_train is not None and full_test is not None: new_df = pd.concat([full_train, full_test], axis=0) #use padded data else: new_df = pre_miss_df #use this since missing data wasn't present st.dataframe(new_df.head(50)) st.write("SHAPE: ", new_df.shape) if new_df.shape[1] > 50: st.write("ABSOLUTE CORRELATION WITH TARGET VARIABLE") st.write(new_df[new_df["marker"] == "train"].corr()[ target_col[0]].sort_values(by=target_col[0], ascending=False).T) st.write("[correlation is not causation]") #show code heatmap_code() else: heatmap_fig, ax = plt.subplots() sns.heatmap(new_df[new_df["marker"] == "train"].corr(), annot=True, linewidth=.5, fmt='.1f', ax=ax) st.pyplot(heatmap_fig) #show code heatmap_sns() new_df_cols = list(new_df.columns) if target_col[0] in list(new_df.columns): new_df_cols.remove(target_col[0]) if id_[0] in list(new_df.columns): new_df_cols.remove(id_[0]) st.subheader("PLOTTING POSSIBLE RELATIONSHIP WITH TARGET FEATURE") check_relationship(new_df_cols, target_col[0], new_df[new_df["marker"] == "train"]) #handle features excluded remove_feat = st.multiselect("SELECT FEATURE(S) TO DROP", new_df_cols) if remove_feat: new_df = remove_features(new_df, remove_feat) #show code st.code(''' df.dropna([list of columns to drop]), axis=1, inplace=True) ''', language='python') else: st.write("KEEPING ALL FEATURES") st.dataframe(new_df.head(50)) st.write(new_df.shape) #test_id = new_df[new_df["marker"] == "test"][id_] #store ID for test dataframe #remove monotonic or unique features new_df = remove_mono_unique(dataframe=new_df, cols=new_df.columns) st.dataframe(new_df.head(50)) st.write(new_df.shape) st.write("MONOTONIC AND UNIQUE FEATURES REMOVED") NOT_DUMMY = [ target_col[0], "target", "marker", "claim", "prediction", "response" ] #features we do not need the dummy for exclude_cols = [ col for col in new_df.columns if col not in NOT_DUMMY ] exclude_cols = list( set(exclude_cols).intersection(list(new_df.columns))) dum_df = pd.get_dummies(new_df[exclude_cols], drop_first=True) dum_df["marker"] = new_df["marker"].copy() dum_df[target_col[0]] = new_df[target_col[0]].copy() st.dataframe(dum_df.head(100)) st.write(dum_df.shape) st.write("CATEGORICAL FEATURES ENCODED") new_df = None dum_train = dum_df[dum_df["marker"] == "train"].drop( [target_col[0], "marker"], axis=1) dum_train_y = pd.DataFrame(list( dum_df.iloc[:dum_train.shape[0]][target_col[0]].astype('int')), columns=["target"]) dum_test = dum_df[dum_df["marker"] == "test"].drop( [target_col[0], "marker"], axis=1) #feature scaling train_scaled, test_scaled = feature_scaling(dum_train, dum_test) st.subheader("Train Data") st.dataframe(train_scaled.head(200)) st.write(train_scaled.shape) st.markdown(download_csv(train_scaled, "cpt_train.csv", info="DOWNLOAD TRAIN FILE"), unsafe_allow_html=True) st.subheader("Test Data") st.dataframe(test_scaled.head(200)) st.write(test_scaled.shape) st.markdown(download_csv(test_scaled, "cpt_test.csv", info="DOWNLOAD TEST FILE"), unsafe_allow_html=True) #downsample/upsample # _train = _target = None if len(set(dum_train_y["target"])) == 2: #binary classification classones = int( (dum_train_y[dum_train_y["target"] == 1].count() / dum_train_y.shape[0]) * 100) classzeroes = int( (dum_train_y[dum_train_y["target"] == 0].count() / dum_train_y.shape[0]) * 100) if classones >= 70 or classzeroes >= 70: st.warning("IMBALANCED TRAINING SET DETECTED!") st.write("CLASS 1(", classones, "%) to CLASS 0(", classzeroes, "%)") _train, _target, balance_type = balance_out( train_scaled, dum_train_y, seed) if balance_type != "DEFAULT": st.subheader("Train Data (BALANCED)") st.dataframe(_train.head(50)) st.write(_train.shape) st.markdown(download_csv( _train, "cpt_train_balanced.csv", info="DOWNLOAD BALANCED TRAIN FILE"), unsafe_allow_html=True) else: _train, _target = train_scaled.copy(), dum_train_y.copy() else: st.write("") st.header('TRAINING/TESTING SECTION') model = st.sidebar.selectbox('Select Algorithm: ', MODELS) #algorithm selection and hyperparameter tuning params = model_parameter(model) model_ = build_model(model, params, seed) train_, val_, test_, test_resp = initialize_model(model=model_, Xtrain_file=_train, ytrain_file=_target["target"], \ test_file=test_scaled, test_dataframe=test_id, target_var_=target_col[0], seed=seed) if test_resp is not None: # st.write("Train Accuracy (on train data: ", sklearn.metrics.accuracy_score(train_[0], train_[1])) st.write( "VALIDATION Accuracy (on train data): ", np.round( sklearn.metrics.accuracy_score(val_[0], val_[1]) * 100, 1), '(%)') st.write( "TEST Accuracy (on train data): ", np.round( sklearn.metrics.accuracy_score(test_[0], test_[1]) * 100, 1), '(%)') st.write( "TEST F1 SCORE (on train data): ", np.round( sklearn.metrics.f1_score(test_[0], test_[1]) * 100, 1), '(%)') st.write(test_resp.head(1000)) st.write(test_resp.shape) st.write("") st.markdown(download_csv(test_resp, "MLCPT_TEST_PRED.csv", info="DOWNLOAD TEST PREDICTION FILE"), unsafe_allow_html=True) st.write("MODEL ESTABLISHED. YAY!") st.balloons() train_scaled = test_scaled = None else: st.write("YOUR MODEL FAILED TO COMPLETE") elif train_df: st.write("YOU NEED TEST DATASET TOO") elif test_df: st.write("YOU NEED TRAIN DATASET AS WELL") else: st.write("ABEG UPLOAD TRAIN AND TEST DATASET") else: st.write('INVALID ARGUMENT! ') st.markdown( "<h5 style='text-align: center'>Made with <span style='color:red'>♥</span> By <a href='https://www.twitter.com/__oemmanuel__'>Emmanuel</a> </h5>", unsafe_allow_html=True)
def main(): menu = ["Team","Lieu d'intérêt", "Evénement", "Produit", "Itinéraire", "Maintenance"] choice = st.sidebar.radio("Menu", menu) if choice == "Team": image = Image.open('DATAtourisme.png') st.image(image, use_column_width = True, output_format = 'PNG') st.markdown("<h1 style='text-align: center; font-size:15px; color:#A11F40;'>Qu'est ce que DATAtourisme ?</h1>", unsafe_allow_html=True) st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True) col1, col2, col3 = st.beta_columns((1,13,1)) with col1: st.markdown("") with col2: st.markdown("DATAtourisme est un dispositif national visant à faciliter l’accès aux données publiques d’information touristique produites à travers les territoires par les offices de tourisme et les comités départements ou régionaux du tourisme. Il se matérialise par une plateforme de collecte, de normalisation et de diffusion de données en open data, directement reliée aux bases de données territoriales, et repose sur l’animation d’une communauté d’utilisateurs. Le dispositif est copiloté par la Direction générale des entreprises et la fédération ADN Tourisme. Les données collectées sont relatives au recensement de l’ensemble des événements et points d’intérêt touristiques de France (musées, monuments, sites naturels, activités, itinéraires, expos et concerts, etc)", unsafe_allow_html=True) with col3: st.markdown("") st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True) st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True) st.markdown("<h1 style='text-align: center; font-size:15px; color:#A11F40;'>Qui sommes-nous ?</h1>", unsafe_allow_html=True) st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True) col1, col2, col3 = st.beta_columns((1,13,1)) with col1: st.markdown("") with col2: st.markdown("A propos de ce projet : Dans le cadre de notre formation professionnelle de Data Analyst, notre équipe de 5 s'est alliée à ADN Tourisme pour proposer un état des lieux, à jour, du projet DATAtourisme, ce qui n'existait pas jusqu'alors.", unsafe_allow_html=True) with col3: st.markdown("") st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True) col1, col2, col3 = st.beta_columns((3,0.72,3)) with col1: st.markdown("") with col2: if st.button("Team"): st.balloons() with col3: st.markdown("") st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'></h1>", unsafe_allow_html=True) # st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Team</h1>", unsafe_allow_html=True) col1, col2, col3, col4, col5 = st.beta_columns(5) with col1: st.image("cm1.jpg", use_column_width=True) st.markdown("""**Carla Moreno**""") st.markdown("""*Scrum Master*""") st.markdown(link1, unsafe_allow_html=True) with col2: st.image("cc.jpg", use_column_width=True) st.markdown("""**Corentin Guillo**""") st.markdown("""*Product Owner*""") st.markdown(link4, unsafe_allow_html=True) with col3: st.image("Yvanne.jpg", use_column_width=True) st.markdown("""**Yvanne Euchin**""") st.markdown("""*Equipe Tech*""") st.markdown(link2, unsafe_allow_html=True) with col4: st.image("md.jpg", use_column_width=True) st.markdown("""**Michael Desforges**""") st.markdown("""*Equipe Tech*""") st.markdown(link, unsafe_allow_html=True) with col5: st.image("ab.jpg", use_column_width=True) st.markdown("""**Amar Barache**""") st.markdown("""*Equipe Tech*""") st.markdown(link5, unsafe_allow_html=True) image = Image.open('WCS.png') st.image(image, use_column_width = True, output_format = 'PNG') page_bg_img = ''' <style> body { background-image: url("https://i.ibb.co/cD9CndX/nuages2.jpg"); background-size: cover; } </style> ''' st.markdown(page_bg_img, unsafe_allow_html=True) if choice == "Produit": data = data1 image = Image.open('DATAtourisme.png') st.image(image, use_column_width = True, output_format = 'PNG') st.markdown("<h1 style='text-align:center; font-size:29px; color: #57565B;'>Produit</h1>", unsafe_allow_html=True) if st.checkbox('voir dataframe'): st.write(data) # st.write(data.iloc[0:100,:]) f" POI : **{len(data1.index)}** sur **{len(data1.index)+len(data2.index)+len(data3.index)+len(data4.index)}** au total" f" Créateurs de données : **{len(data1.createur_donnée.unique())}**" f" Fournisseurs : **{len(data1.fournisseur.unique())}**" f" Villes : **{len(data1.ville.unique())}**" f" POI avec photo : **{int(round(data1.photo.sum()/len(data1.photo.index)*100))}%**" st.markdown(""" # **Densité de POI** """) fig = px.density_mapbox(data, lat='latitude', lon='longitude', radius=4, center={"lat": 46.037763, "lon": 4.4}, zoom=4, color_continuous_midpoint = 5, mapbox_style='carto-positron', color_continuous_scale=['grey','darkgrey','grey','red','red']) fig.update_layout(coloraxis_showscale=False,margin=dict( l=0, r=0, b=0, t=0, pad = 4 )) fig.update_traces(hoverinfo='skip', hovertemplate=None) st.plotly_chart(fig) st.markdown("""# **Par départements**""") fig = px.choropleth_mapbox(data, geojson=france_regions_geo, color=data.code_departement.value_counts(), locations=data.code_departement.value_counts().index.tolist(), featureidkey='properties.code', opacity=1, center={"lat": 46.037763, "lon": 2.062783}, mapbox_style="carto-positron", zoom=4) fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}) st.plotly_chart(fig) # st.markdown(""" # **Répartition des sous-categories** """) # x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])#.drop("HébergementProduit",axis=0).index[0:17]) # y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])#.drop("HébergementProduit",axis=0).iloc[0:17]) # fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) # fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) # st.plotly_chart(fig) # x = list(data.sous_categorie.str.split(',',expand = True).stack().explode().value_counts().drop("HébergementProduit",axis=0).index) # y=list(data.sous_categorie.str.split(',',expand = True).stack().explode().value_counts().drop("HébergementProduit",axis=0)) # fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) # fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) # st.plotly_chart(fig) # image = Image.open('WCS.png') # st.image(image, use_column_width = True, output_format = 'PNG') elif choice == "Evénement": data = data2 image = Image.open('DATAtourisme.png') st.image(image, use_column_width = True, output_format = 'PNG') st.markdown("<h1 style='text-align:center; font-size:29px; color: #57565B;'>Evénement</h1>", unsafe_allow_html=True) if st.checkbox('voir dataframe'): st.write(data.iloc[0:100,:]) f" POI : **{len(data2.index)}** sur **{len(data1.index)+len(data2.index)+len(data3.index)+len(data4.index)}** au total" f" Créateurs de données : **{len(data2.createur_donnée.unique())}**" f" Fournisseurs : **{len(data2.fournisseur.unique())}**" f" Villes : **{len(data2.ville.unique())}**" f" POI avec photo : **{int(round(data2.photo.sum()/len(data2.photo.index)*100))}%**" st.markdown(""" # **Densité de POI** """) fig = px.density_mapbox(data, lat='latitude', lon='longitude', radius=4, center={"lat": 46.037763, "lon": 4.4}, zoom=4, color_continuous_midpoint = 5, mapbox_style='carto-positron', color_continuous_scale=['grey','darkgrey','grey','red','red']) fig.update_layout(coloraxis_showscale=False,margin=dict( l=0, r=0, b=0, t=0, pad = 4 )) fig.update_traces(hoverinfo='skip', hovertemplate=None) st.plotly_chart(fig) st.markdown("""# **Par départements**""") fig = px.choropleth_mapbox(data, geojson=france_regions_geo, color=data.code_departement.value_counts(), locations=data.code_departement.value_counts().index.tolist(), featureidkey='properties.code', opacity=1, center={"lat": 46.037763, "lon": 2.062783}, mapbox_style="carto-positron", zoom=4) fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}) st.plotly_chart(fig) # st.markdown(""" # **Répartition des sous-categories** """) # x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17]) # y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17]) # fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) # fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) # st.plotly_chart(fig) # x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index) # y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts()) # fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) # fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) # st.plotly_chart(fig) # image = Image.open('WCS.png') # st.image(image, use_column_width = True, output_format = 'PNG') elif choice == "Lieu d'intérêt": data = data3 image = Image.open('DATAtourisme.png') st.image(image, use_column_width = True, output_format = 'PNG') st.markdown("<h1 style='text-align:center; font-size:29px; color: #57565B;'>Lieux d'intérêt</h1>", unsafe_allow_html=True) if st.checkbox('voir dataframe'): st.write(data.iloc[0:100,:]) f" POI : **{len(data3.index)}** sur **{len(data1.index)+len(data2.index)+len(data3.index)+len(data4.index)}** au total" f" Créateurs de données : **{len(data3.createur_donnée.unique())}**" f" Fournisseurs : **{len(data3.fournisseur.unique())}**" f" Villes : **{len(data3.ville.unique())}**" f" POI avec photo : **{int(round(data3.photo.sum()/len(data3.photo.index)*100))}%**" st.markdown(""" # **Densité de POI** """) fig = px.density_mapbox(data, lat='latitude', lon='longitude', radius=4, center={"lat": 46.037763, "lon": 4.4}, zoom=4, color_continuous_midpoint = 5, mapbox_style='carto-positron', color_continuous_scale=['grey','darkgrey','grey','red','red']) fig.update_layout(coloraxis_showscale=False,margin=dict( l=0, r=0, b=0, t=0, pad = 4 )) fig.update_traces(hoverinfo='skip', hovertemplate=None) st.plotly_chart(fig) st.markdown("""# **Par départements**""") fig = px.choropleth_mapbox(data, geojson=france_regions_geo, color=data.code_departement.value_counts(), locations=data.code_departement.value_counts().index.tolist(), featureidkey='properties.code', opacity=1, center={"lat": 46.037763, "lon": 2.062783}, mapbox_style="carto-positron", zoom=4) fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}) st.plotly_chart(fig) # st.markdown(""" # **Répartition des sous-categories** """) # x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17]) # y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17]) # fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) # fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) # st.plotly_chart(fig) # image = Image.open('WCS.png') # st.image(image, use_column_width = True, output_format = 'PNG') elif choice == "Itinéraire": data = data4 image = Image.open('DATAtourisme.png') st.image(image, use_column_width = True, output_format = 'PNG') st.markdown("<h1 style='text-align:center; font-size:29px; color: #57565B;'>Itinéraire</h1>", unsafe_allow_html=True) if st.checkbox('voir dataframe'): st.write(data.iloc[0:100,:]) f" **POI** : **{len(data4.index)}** sur **{len(data1.index)+len(data2.index)+len(data3.index)+len(data4.index)}** au total" f" Créateurs de données : **{len(data4.createur_donnée.unique())}**" f" Fournisseurs : **{len(data4.fournisseur.unique())}**" f" Villes : **{len(data4.ville.unique())}**" f" POI avec photo : **{int(round(data4.photo.sum()/len(data4.photo.index)*100))}%**" st.markdown(""" # **Densité de POI** """) fig = px.density_mapbox(data, lat='latitude', lon='longitude', radius=4, center={"lat": 46.037763, "lon": 4.4}, zoom=4, color_continuous_midpoint = 5, mapbox_style='carto-positron', color_continuous_scale=['grey','darkgrey','grey','red','red']) fig.update_layout(coloraxis_showscale=False,margin=dict( l=0, r=0, b=0, t=0, pad = 4 )) fig.update_traces(hoverinfo='skip', hovertemplate=None) st.plotly_chart(fig) st.markdown("""# **Par départements**""") fig = px.choropleth_mapbox(data, geojson=france_regions_geo, color=data.code_departement.value_counts(), locations=data.code_departement.value_counts().index.tolist(), featureidkey='properties.code', opacity=1, center={"lat": 46.037763, "lon": 2.062783}, mapbox_style="carto-positron", zoom=4) fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0}) st.plotly_chart(fig) # x = list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17]) # y=list(data.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17]) # fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) # fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) # st.plotly_chart(fig) # image = Image.open('WCS.png') # st.image(image, use_column_width = True, output_format = 'PNG') elif choice == "Maintenance": image = Image.open('DATAtourisme.png') st.image(image, use_column_width = True, output_format = 'PNG') mdp = st.text_input("Mot de passe ?") st.write() if mdp == "Les+tour1stes.": if st.checkbox('voir dataframe'): st.write(data_erreur) st.markdown("") download = st.button('télécharger') if download: csv = data_erreur.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() linko= f'<a href="data:file/csv;base64,{b64}" download="data_erreur.csv">Download csv file</a>' st.markdown(linko, unsafe_allow_html=True) f" Départements sans fournisseurs : **{data_erreur[data_erreur.fournisseur.isna()].code_departement.unique()}**" f" Départements sans créateur : **{data_erreur[data_erreur.createur_donnée.isna()].code_departement.unique()}**" f" Fournisseurs sans région : **{data_erreur[data_erreur.region.isna()].fournisseur.unique()}**" st.markdown("") st.markdown(""" # **Carte des erreurs latitude & longitude** """) st.markdown("") st.map(data_erreur_map) st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Répartition des sous-categories de la categorie Lieu d'intérêt</h1>", unsafe_allow_html=True) x = list(data3.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17]) y=list(data3.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17]) fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) st.plotly_chart(fig) st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Répartition des sous-categories de la categorie Lieu Evénement</h1>", unsafe_allow_html=True) x = list(data2.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17]) y=list(data2.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17]) fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) st.plotly_chart(fig) st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Répartition des sous-categories de la categorie Produit</h1>", unsafe_allow_html=True) x = list(data1.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17])#.drop("HébergementProduit",axis=0).index[0:17]) y=list(data1.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17])#.drop("HébergementProduit",axis=0).iloc[0:17]) fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) st.plotly_chart(fig) st.markdown("<h1 style='text-align: center; font-size:29px; color:#57565B;'>Répartition des sous-categories de la categorie Itinéraire</h1>", unsafe_allow_html=True) x = list(data4.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().index[0:17]) y=list(data4.sous_categorie.str.split(', ',expand = True).stack().explode().value_counts().iloc[0:17]) fig = px.bar(x=x,y=y,color_discrete_sequence =['#A11F40']) fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0},showlegend=False,yaxis=dict(title=None), xaxis=dict(title=None,type="category")) st.plotly_chart(fig) # image = Image.open('WCS.png') # st.image(image, use_column_width = True, output_format = 'PNG') else: st.subheader(""" """)
def showBalloons(): # display only once, by caching the function st.balloons() pass
def main(): st.title('Pylanceを使ってエラーをスムーズに解決しよう') st.markdown('Pythonで自力でエラー解決が解決できるようになるためには、エラー文を読む、デバッグをするなどがあります。ただ、数百行のコードのエラー箇所の特定には時間がかかります。') st.markdown('エラー箇所の特定を素早くできるように視覚化してくれるのがVSCodeの拡張機能の**Pylance**です。') st.markdown('ここでは、そのPylanceを使ったエラーの解決方法について紹介していきます。') im('img/head_img.png') st.subheader('どのメニューを選びますか?') option = st.selectbox( '', ('Pylanceのインストール方法←まずはこちら!', 'Pylanceによるエラー発見の方法')) if option == 'Pylanceのインストール方法←まずはこちら!': st.subheader('Pylanceのインストール方法') st.markdown('Pylanceをインストールするには、VSCodeの拡張機能でpylanceと入力して、') im('img/inst1.png') st.markdown('出てきたものをインストールします。') im('img/inst2.png') st.markdown('その後に、VSCode右下の歯車ボタンの"管理"をクリック') im('img/kanri.png') st.markdown('"設定"を開きます。') im('img/inst4.png') st.markdown('そこで、「**python.analysis.typeCheckingMode**」を検索して、設定を**basic**にします。') im('img/inst5.png') st.markdown('これで、Pylanceのインストールが完了です。') if st.button('完了したらクリック!'): st.markdown('おめでとうございます!!😆😆') im('img/happy.png') st.balloons() if option == 'Pylanceによるエラー発見の方法': st.subheader('Pylanceでのエラーの発見の仕方') st.markdown('Pylanceを入れると下の画像のように、赤い波線が出る箇所があります。エラーを発見するには、この赤い波線にカーソルを合わせて見ていきます。') im('img/error.png') st.subheader('解決したいエラー文を選択する') option = st.selectbox( '', ('Expected indented block', 'A is not defined', 'Unexpected indentation', 'Expected ")"', 'Expected expression', 'String literal is unterminated', 'Invalid character in token ""', 'Expected ":"', 'A is possibly unbound')) if option == 'Expected indented block': st.subheader('Expected indented block') im('img/expindent.png') st.markdown('この表記が出た場合は**インデントをできていない箇所**があります。Expectedは「期待されている」という意味です。Expected indented blockなので、**インデントすることを期待されている**という意味になります。') st.markdown('例えば次の例だと、if文の後のprint文がインデントされていません。') im('img/erim6.png') st.subheader('対処法') st.markdown('インデントを忘れている部分があるのでそこをインデントしてあげます。') im('img/erim7.png') st.subheader('このエラーがよく起こる場所') st.markdown('関数の定義、if文、for文、try-except文周りでよく起きます。') im('img/er1.png') st.markdown('このエラーが出た場合は、それらの文の周りを見直してみましょう。') st.subheader('エラーは解消できましたか?') if st.button('赤い波線が消えたらクリック'): st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') im('img/happy.png') st.balloons() if option == 'A is not defined': st.subheader('A is not defined') im('img/er2.png') st.markdown('この表記が出た場合は**Aという変数を定義していない。**もしくは、**Aというモジュールをimportしていない**のどちらかです。Aがモジュール名の場合はモジュールのimportの記述を忘れている可能性があります。') st.subheader('対処法') st.markdown('次の3つを確認してみて下さい。') st.markdown('**①import文を忘れている場合はそれを書きます。**') st.markdown('**②変数が定義されていない場合は定義します。**') st.markdown('**③文字に間違いがないかチェックします。**') st.subheader('このエラーがよく起こる場面') st.subheader('変数の未定義') im('img/meri1.png') st.markdown('上の例ではtestという変数が定義されていません。') st.markdown('【対処法】→手前で変数を定義する。') im('img/meri2.png') st.subheader('文字のタイポ') im('img/m1.png') st.markdown('上の例ではprintと書くところをprntと書いてしまっています。') st.markdown('【対処法】→誤字の訂正') st.subheader('関数の中の変数') im('img/m2.png') st.markdown('上の例ではinitialが関数の中で初めて出てきた形になっていて、エラーになっています。') st.markdown('【対処法】→引数として設定してあげる') im('img/emm.png') st.subheader('エラーは解消できましたか?') if st.button('赤い波線が消えたらクリック'): st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') im('img/beach.jpg') st.balloons() if option == 'Unexpected indentation': st.subheader('Unexpected indentation') im('img/er3.png') st.markdown('この表記が出た場合はインデントをする必要がないところでインデントをしている可能性があります。') im('img/er4.png') st.subheader('対処法') st.markdown('インデントを解消します。例えば上の2つ目のprintだと、if文や、for文、関数の定義のdefの後などではなく、インデントの必要はないのでインデントを戻します。') im('img/er5.png') st.subheader('エラーは解消できましたか?') if st.button('赤い波線が消えたらクリック'): st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') im('img/happy.png') st.balloons() if option == 'Expected ")"': st.subheader('Expected ")"') im('img/mml1.png') st.markdown('この表記が出た場合は、どこか近くの行で**)が抜けている**可能性があります。') st.markdown('例えば、こちら↓はfor文の下のprintの最後の)を忘れている例です。') im('img/erim.png') st.subheader('対処法') st.markdown(')が抜けている箇所を見つけて)を付け足してあげる必要があります。') im('img/elm1.png') st.subheader('エラーは解消できましたか?') if st.button('赤い波線が消えたらクリック'): st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') im('img/smile.jpg') st.balloons() if option == 'Expected expression': st.subheader('Expected expression') im('img/er8.png') st.markdown('この表記の意味は**「式」が期待されています。**という意味です。') st.markdown('Pythonで「式」とは、実行されることによって何かの結果になるものです。') st.markdown('例をあげると、') st.markdown('1 + 5') st.markdown('[3, 5, 7, 9]') st.markdown('など、なんらかの結果やデータになるものが式です。') st.markdown('エラーの例としては、例えばこちら') im('img/erim2.png') st.markdown('1 + 2 + だと式として完成していないのでエラーとなります。') st.subheader('対処法') st.markdown('式として成立していない部分を見つけて直してあげよう') im('img/erml.png') st.markdown('5を足すことで一つの式となりました。') st.subheader('エラーは解消できましたか?') if st.button('赤い波線が消えたらクリック'): st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') im('img/banzai.png') st.balloons() if option == 'String literal is unterminated': st.subheader('String literal is unterminated') im('img/er6.png') st.markdown('この表記が出た場合は文字列の**クオテーションの閉じ忘れ**の可能性があります。') st.markdown('例えば、こちらは文字列の最後の"を忘れている例です。') im('img/erim3.png') st.subheader('対処法') st.markdown('足りていないクオテーションを付け足してあげましょう。') im('img/errm.png') st.subheader('エラーは解消できましたか?') if st.button('赤い波線が消えたらクリック'): st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!😆😆') im('img/jumping.jpg') st.balloons() if option == 'Invalid character in token ""': st.subheader('Invalid character in token ""') im('img/er9.png') st.markdown('この表記が出た場合、**全角の空白**がどこかに入っている可能性があります。') st.markdown('例えば、下ではfor文の:の後ろに全角の空白が入っています。') im('img/erim4.png') st.subheader('対処法') st.markdown(':の後に入っている全角の空白を消します。上のように、文の後に#を置くには「半角スペース2つ分」空けて#を置けば問題ないです。') im('img/6em.png') st.subheader('エラーは解消できましたか?') if st.button('赤い波線が消えたらクリック'): st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!一つレベルアップしました😆😆') im('img/jump.jpg') st.balloons() if option == 'Expected ":"': st.subheader('Expected ":"') im('img/ex1.png') st.markdown('この表記が出た場合、どこかにコロン:を忘れています。') st.markdown('例えば、下では関数を定義する際のコロン:を忘れています。') im('img/ex2.png') st.subheader('対処法') st.markdown('コロン:を忘れている部分に付け足してあげます。') im('img/ex3.png') st.markdown('このエラーがよく起こる場面') st.markdown('関数の定義、if文、for文、try-except文周りでよく起きます。') im('img/er1.png') st.subheader('エラーは解消できましたか?') if st.button('赤い波線が消えたらクリック'): st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!一つレベルアップしました😆😆') im('img/happy.png') st.balloons() if option == 'A is possibly unbound': st.subheader('A is possibly unbound') im('img/unbound.png') st.markdown('この表記が出た場合、変数Aが未定義になっている可能性があります。') st.markdown('例えば、下ではinfo_dictはtry文が実行された時だけ定義されて、exceptの時は定義されていません。ですのでexcept文が実行された時はinfo_dictが定義されていないとエラーが出ます。') im('img/unbound2.png') st.subheader('対処法') st.markdown('info_dictをtry、exceptのどちらの場合でも定義されるような場所で定義してあげる。') im('img/unbound3.png') st.subheader('エラーは解消できましたか?') if st.button('赤い波線が消えたらクリック'): st.markdown('素晴らしい!!エラー解決の力がまた一つ身に付きましたね!!一つレベルアップしました😆😆') im('img/gutts.jpg') st.balloons()
def main(): side_img = Image.open("images/emotion3.jpg") with st.sidebar: st.image(side_img, width=300) st.sidebar.subheader("Menu") website_menu = st.sidebar.selectbox( "Menu", ("Emotion Recognition", "Project description", "Our team", "Leave feedback", "Relax")) st.set_option('deprecation.showfileUploaderEncoding', False) if website_menu == "Emotion Recognition": st.sidebar.subheader("Model") model_type = st.sidebar.selectbox("How would you like to predict?", ("mfccs", "mel-specs")) em3 = em6 = em7 = gender = False st.sidebar.subheader("Settings") st.markdown("## Upload the file") with st.beta_container(): col1, col2 = st.beta_columns(2) # audio_file = None # path = None with col1: audio_file = st.file_uploader("Upload audio file", type=['wav', 'mp3', 'ogg']) if audio_file is not None: if not os.path.exists("audio"): os.makedirs("audio") path = os.path.join("audio", audio_file.name) if_save_audio = save_audio(audio_file) if if_save_audio == 1: st.warning("File size is too large. Try another file.") elif if_save_audio == 0: # extract features # display audio st.audio(audio_file, format='audio/wav', start_time=0) try: wav, sr = librosa.load(path, sr=44100) Xdb = get_melspec(path)[1] mfccs = librosa.feature.mfcc(wav, sr=sr) # # display audio # st.audio(audio_file, format='audio/wav', start_time=0) except Exception as e: audio_file = None st.error( f"Error {e} - wrong format of the file. Try another .wav file." ) else: st.error("Unknown error") else: if st.button("Try test file"): wav, sr = librosa.load("test.wav", sr=44100) Xdb = get_melspec("test.wav")[1] mfccs = librosa.feature.mfcc(wav, sr=sr) # display audio st.audio("test.wav", format='audio/wav', start_time=0) path = "test.wav" audio_file = "test" with col2: if audio_file is not None: fig = plt.figure(figsize=(10, 2)) fig.set_facecolor('#d1d1e0') plt.title("Wave-form") librosa.display.waveplot(wav, sr=44100) plt.gca().axes.get_yaxis().set_visible(False) plt.gca().axes.get_xaxis().set_visible(False) plt.gca().axes.spines["right"].set_visible(False) plt.gca().axes.spines["left"].set_visible(False) plt.gca().axes.spines["top"].set_visible(False) plt.gca().axes.spines["bottom"].set_visible(False) plt.gca().axes.set_facecolor('#d1d1e0') st.write(fig) else: pass # st.write("Record audio file") # if st.button('Record'): # with st.spinner(f'Recording for 5 seconds ....'): # st.write("Recording...") # time.sleep(3) # st.success("Recording completed") # st.write("Error while loading the file") if model_type == "mfccs": em3 = st.sidebar.checkbox("3 emotions", True) em6 = st.sidebar.checkbox("6 emotions", True) em7 = st.sidebar.checkbox("7 emotions") gender = st.sidebar.checkbox("gender") elif model_type == "mel-specs": st.sidebar.warning("This model is temporarily disabled") else: st.sidebar.warning("This model is temporarily disabled") # with st.sidebar.beta_expander("Change colors"): # st.sidebar.write("Use this options after you got the plots") # col1, col2, col3, col4, col5, col6, col7 = st.beta_columns(7) # # with col1: # a = st.color_picker("Angry", value="#FF0000") # with col2: # f = st.color_picker("Fear", value="#800080") # with col3: # d = st.color_picker("Disgust", value="#A52A2A") # with col4: # sd = st.color_picker("Sad", value="#ADD8E6") # with col5: # n = st.color_picker("Neutral", value="#808080") # with col6: # sp = st.color_picker("Surprise", value="#FFA500") # with col7: # h = st.color_picker("Happy", value="#008000") # if st.button("Update colors"): # global COLOR_DICT # COLOR_DICT = {"neutral": n, # "positive": h, # "happy": h, # "surprise": sp, # "fear": f, # "negative": a, # "angry": a, # "sad": sd, # "disgust": d} # st.success(COLOR_DICT) if audio_file is not None: st.markdown("## Analyzing...") if not audio_file == "test": st.sidebar.subheader("Audio file") file_details = { "Filename": audio_file.name, "FileSize": audio_file.size } st.sidebar.write(file_details) with st.beta_container(): col1, col2 = st.beta_columns(2) with col1: fig = plt.figure(figsize=(10, 2)) fig.set_facecolor('#d1d1e0') plt.title("MFCCs") librosa.display.specshow(mfccs, sr=sr, x_axis='time') plt.gca().axes.get_yaxis().set_visible(False) plt.gca().axes.spines["right"].set_visible(False) plt.gca().axes.spines["left"].set_visible(False) plt.gca().axes.spines["top"].set_visible(False) st.write(fig) with col2: fig2 = plt.figure(figsize=(10, 2)) fig2.set_facecolor('#d1d1e0') plt.title("Mel-log-spectrogram") librosa.display.specshow(Xdb, sr=sr, x_axis='time', y_axis='hz') plt.gca().axes.get_yaxis().set_visible(False) plt.gca().axes.spines["right"].set_visible(False) plt.gca().axes.spines["left"].set_visible(False) plt.gca().axes.spines["top"].set_visible(False) st.write(fig2) if model_type == "mfccs": st.markdown("## Predictions") with st.beta_container(): col1, col2, col3, col4 = st.beta_columns(4) mfccs = get_mfccs(path, model.input_shape[-1]) mfccs = mfccs.reshape(1, *mfccs.shape) pred = model.predict(mfccs)[0] with col1: if em3: pos = pred[3] + pred[5] * .5 neu = pred[2] + pred[5] * .5 + pred[4] * .5 neg = pred[0] + pred[1] + pred[4] * .5 data3 = np.array([pos, neu, neg]) txt = "MFCCs\n" + get_title(data3, CAT3) fig = plt.figure(figsize=(5, 5)) COLORS = color_dict(COLOR_DICT) plot_colored_polar(fig, predictions=data3, categories=CAT3, title=txt, colors=COLORS) # plot_polar(fig, predictions=data3, categories=CAT3, # title=txt, colors=COLORS) st.write(fig) with col2: if em6: txt = "MFCCs\n" + get_title(pred, CAT6) fig2 = plt.figure(figsize=(5, 5)) COLORS = color_dict(COLOR_DICT) plot_colored_polar(fig2, predictions=pred, categories=CAT6, title=txt, colors=COLORS) # plot_polar(fig2, predictions=pred, categories=CAT6, # title=txt, colors=COLORS) st.write(fig2) with col3: if em7: model_ = load_model("model/model4.h5") mfccs_ = get_mfccs(path, model_.input_shape[-2]) mfccs_ = mfccs_.T.reshape(1, *mfccs_.T.shape) pred_ = model_.predict(mfccs_)[0] txt = "MFCCs\n" + get_title(pred_, CAT7) fig3 = plt.figure(figsize=(5, 5)) COLORS = color_dict(COLOR_DICT) plot_colored_polar(fig3, predictions=pred_, categories=CAT7, title=txt, colors=COLORS) # plot_polar(fig3, predictions=pred_, categories=CAT7, # title=txt, colors=COLORS) st.write(fig3) with col4: if gender: with st.spinner('Wait for it...'): gmodel = load_model("model/model_mw.h5") gmfccs = get_mfccs(path, gmodel.input_shape[-1]) gmfccs = gmfccs.reshape(1, *gmfccs.shape) gpred = gmodel.predict(gmfccs)[0] gdict = [["female", "woman.png"], ["male", "man.png"]] ind = gpred.argmax() txt = "Predicted gender: " + gdict[ind][0] img = Image.open("images/" + gdict[ind][1]) fig4 = plt.figure(figsize=(3, 3)) fig4.set_facecolor('#d1d1e0') plt.title(txt) plt.imshow(img) plt.axis("off") st.write(fig4) # if model_type == "mel-specs": # st.markdown("## Predictions") # st.warning("The model in test mode. It may not be working properly.") # if st.checkbox("I'm OK with it"): # try: # with st.spinner("Wait... It can take some time"): # global tmodel # tmodel = load_model_cache("tmodel_all.h5") # fig, tpred = plot_melspec(path, tmodel) # col1, col2, col3 = st.beta_columns(3) # with col1: # st.markdown("### Emotional spectrum") # dimg = Image.open("images/spectrum.png") # st.image(dimg, use_column_width=True) # with col2: # fig_, tpred_ = plot_melspec(path=path, # tmodel=tmodel, # three=True) # st.write(fig_, use_column_width=True) # with col3: # st.write(fig, use_column_width=True) # except Exception as e: # st.error(f"Error {e}, model is not loaded") elif website_menu == "Project description": import pandas as pd import plotly.express as px st.title("Project description") st.subheader("GitHub") link = '[GitHub repository of the web-application]' \ '(https://github.com/CyberMaryVer/speech-emotion-webapp)' st.markdown(link, unsafe_allow_html=True) st.subheader("Theory") link = '[Theory behind - Medium article]' \ '(https://talbaram3192.medium.com/classifying-emotions-using-audio-recordings-and-python-434e748a95eb)' st.markdown(link + ":clap::clap::clap: Tal!", unsafe_allow_html=True) with st.beta_expander("See Wikipedia definition"): components.iframe( "https://en.wikipedia.org/wiki/Emotion_recognition", height=320, scrolling=True) st.subheader("Dataset") txt = """ This web-application is a part of the final **Data Mining** project for **ITC Fellow Program 2020**. Datasets used in this project * Crowd-sourced Emotional Mutimodal Actors Dataset (**Crema-D**) * Ryerson Audio-Visual Database of Emotional Speech and Song (**Ravdess**) * Surrey Audio-Visual Expressed Emotion (**Savee**) * Toronto emotional speech set (**Tess**) """ st.markdown(txt, unsafe_allow_html=True) df = pd.read_csv("df_audio.csv") fig = px.violin(df, y="source", x="emotion4", color="actors", box=True, points="all", hover_data=df.columns) st.plotly_chart(fig, use_container_width=True) st.subheader("FYI") st.write( "Since we are currently using a free tier instance of AWS, " "we disabled mel-spec and ensemble models.\n\n" "If you want to try them we recommend to clone our GitHub repo") st.code( "git clone https://github.com/CyberMaryVer/speech-emotion-webapp.git", language='bash') st.write( "After that, just uncomment the relevant sections in the app.py file " "to use these models:") elif website_menu == "Our team": st.subheader("Our team") st.balloons() col1, col2 = st.beta_columns([3, 2]) with col1: st.info("*****@*****.**") st.info("*****@*****.**") st.info("*****@*****.**") with col2: liimg = Image.open("images/LI-Logo.png") st.image(liimg) st.markdown( f""":speech_balloon: [Maria Startseva](https://www.linkedin.com/in/maria-startseva)""", unsafe_allow_html=True) st.markdown( f""":speech_balloon: [Tal Baram](https://www.linkedin.com/in/tal-baram-b00b66180)""", unsafe_allow_html=True) st.markdown( f""":speech_balloon: [Asher Holder](https://www.linkedin.com/in/asher-holder-526a05173)""", unsafe_allow_html=True) elif website_menu == "Leave feedback": st.subheader("Leave feedback") user_input = st.text_area("Your feedback is greatly appreciated") user_name = st.selectbox( "Choose your personality", ["checker1", "checker2", "checker3", "checker4"]) if st.button("Submit"): st.success(f"Message\n\"\"\"{user_input}\"\"\"\nwas sent") if user_input == "log123456" and user_name == "checker4": with open("log0.txt", "r", encoding="utf8") as f: st.text(f.read()) elif user_input == "feedback123456" and user_name == "checker4": with open("log.txt", "r", encoding="utf8") as f: st.text(f.read()) else: log_file(user_name + " " + user_input) thankimg = Image.open("images/sticky.png") st.image(thankimg) else: import requests import json url = 'http://api.quotable.io/random' if st.button("get random mood"): with st.beta_container(): col1, col2 = st.beta_columns(2) n = np.random.randint(1, 1000, 1)[0] with col1: quotes = { "Good job and almost done": "checker1", "Great start!!": "checker2", "Please make corrections base on the following observation": "checker3", "DO NOT train with test data": "folk wisdom", "good work, but no docstrings": "checker4", "Well done!": "checker3", "For the sake of reproducibility, I recommend setting the random seed": "checker1" } if n % 5 == 0: a = np.random.choice(list(quotes.keys()), 1)[0] quote, author = a, quotes[a] else: try: r = requests.get(url=url) text = json.loads(r.text) quote, author = text['content'], text['author'] except Exception as e: a = np.random.choice(list(quotes.keys()), 1)[0] quote, author = a, quotes[a] st.markdown(f"## *{quote}*") st.markdown(f"### ***{author}***") with col2: st.image(image=f"https://picsum.photos/800/600?random={n}")
def main(): st.title("NLP App with Streamlit") st.markdown( "Welcome! This is a simple NLP application created using Streamlit and deployed on Heroku." ) st.markdown( "In the box below, you can type custom text or paste an URL from which text is extracted. Once you have a the text, open the sidebar and choose any of the four applications. Currently, we have applications to tokenize text, extract entitiles, analyze sentiment, and summarize text (and a suprise! :wink:)." ) st.markdown( "You can preview a percentage of your text by selecting a value on the slider and clicking on \"Preview\"" ) nlp = load_spacy() text = fetch_text( st.text_area( "Enter Text (or URL) and select application from sidebar", "Here is some sample text. When inputing your custom text or URL make sure you delete this text!" )) pct = st.slider("Preview length (%)", 0, 100) length = (len(text) * pct) // 100 preview_text = text[:length] if st.button("Preview"): st.write(preview_text) apps = [ 'Show tokens & lemmas', 'Extract Entities', 'Show sentiment', 'Summarize text', 'Suprise' ] choice = st.sidebar.selectbox("Select Application", apps) if choice == "Show tokens & lemmas": if st.button("Tokenize"): st.info("Using spaCy for tokenization and lemmatization") st.json([(f"Token: {token.text}, Lemma: {token.lemma_}") for token in analyze_text(nlp, text)]) elif choice == 'Extract Entities': if st.button("Extract"): st.info("Using spaCy for NER") doc = analyze_text(nlp, text) html = displacy.render(doc, style='ent') html = html.replace('\n\n', '\n') st.write(html, unsafe_allow_html=True) elif choice == "Show sentiment": if st.button("Analyze"): st.info("Using TextBlob for sentiment analysis") blob = TextBlob(text) sentiment = { 'polarity': np.round(blob.sentiment[0], 3), 'subjectivity': np.round(blob.sentiment[1], 3), } st.write(sentiment) st.info( "Polarity is between -1 (negative) and 1 (positive) indicating the type of sentiment\nSubjectivity is between 0 (objective) and 1 (subjective) indicating the bias of the sentiment" ) elif choice == "Summarize text": summarizer_type = st.sidebar.selectbox("Select Summarizer", ['Gensim', 'Sumy Lex Rank']) if summarizer_type == 'Gensim': summarizer = gensim_summarizer elif summarizer_type == 'Sumy Lex Rank': summarizer = sumy_summarizer if st.button(f"Summarize using {summarizer_type}"): st.success(summarizer(text)) elif choice == 'Suprise': st.balloons() st.markdown( "The code for this app can be found in [this](https://github.com/sudarshan85/streamlit_nlp) Github repository." )
def app(): st.title('输入你的学号, 试试在圆周率中找到它🐈') number = st.number_input(label='', min_value=0, max_value=99999999, format="%d") if number != 0: f'正在Pi中匹配{number}' # Add a placeholder latest_iteration = st.empty() bar = st.progress(0) for i in range(100): # Update the progress bar with each iteration. latest_iteration.text(f'{i+1}%') bar.progress(i + 1) nd = random.random() if nd > 0.3: time.sleep(0.01) elif nd < 0.03: time.sleep(0.9) else: time.sleep(0.1) # with st.spinner(text='In progress'): # time.sleep(5) # st.success('Done') n = number r = requests.get('http://subidiom.com/pi/piday.asp', params={'s': n}) html_doc = r.text # print( # float(re.findall(pattern="Search time was (.*?) second", string=html_doc)[0])) rank = re.findall(pattern="appears at the ([0-9,]*?)(st|nd|rd|th| )", string=html_doc)[0][0] b = re.findall(pattern="<font size=4>(\d*?)<font color=0f00ff>", string=html_doc)[0] a = re.findall(pattern="</font>(\d*?)<br>", string=html_doc)[0] st.success(f'{n} 在 $\pi$ 的第 {rank} 位!') st.balloons() img_path = 'picat.jpg' W, H = (637, 637) msg = rank im = Image.open(img_path) draw = ImageDraw.Draw(im) w, h = draw.textsize(msg) # myFont = ImageFont.truetype("SimHei.ttf", 30, encoding="utf-8") draw.textsize(msg) draw.text(((W - w) / 2, (H - h) / 2), msg, fill="black") st.image(im, caption='', use_column_width=True) with open('style.html', 'r', encoding='utf-8') as f: pretty_number = f.read() pretty_number += "<br>" pretty_number += "<div align=\"center\" class=\"pi\">" if b: pretty_number += f"<font color=\"gray\" size=\"5\">...{b}</font>" else: pretty_number += f"<font color=\"gray\" size=\"5\">3.{b}</font>" pretty_number += "".join([ f"<span class=\"d{x}\"><font size=\"6\">{x}</font></span>" for x in str(number) ]) pretty_number += f"<font color=\"gray\" size=\"5\">{a}...</font>" pretty_number += "</div>" components.html(pretty_number) st.write(""" *** ## π 中一定包含你的学号么? ### 一定 如果你的学号是 8 位数,那么它一定会在 π 的前 ~18.168 亿位中出现。(认真脸.jpg) """) st.write(""" *** ## π 中包含了所有可能的数字组合吗? ### 不好说 $\pi$ 是一个无理数,此外 $\pi$ 还是一个超越数——它不是任何有理数系数多项式的根。$\pi$ 的数字序列被认为是随机分布的,但至今未能证明,同样 $\pi$ 的合取性(disjunctive)与正规性(normal)也未在十进制下得到证明。 """) st.write(''' *** ### Reference📄 1. [圆周率](https://zh.wikipedia.org/wiki/%E5%9C%93%E5%91%A8%E7%8E%87) 2. [Does 𝜋 contain all possible number combinations?](https://math.stackexchange.com/questions/216343/does-pi-contain-all-possible-number-combinations) 3. [Pi Does NOT Contain the Universe](http://justinparrtech.com/JustinParr-Tech/pi-does-not-contain-the-universe/) 4. [π里包含了所有可能的数字组合吗?](https://www.guokr.com/article/439682/) ''')
def main(): st.sidebar.header("Settings") # GET DATA data = load_data() data_unpivoted = clean_and_reshape_data(data) # >> DISPLAY WIDGETS << # FILTER TO SELECTED LOCATION & YEAR locations_list = list( data_unpivoted.loc[:, 'Location'].sort_values(ascending=True).unique()) location = st.sidebar.selectbox( 'Select location', locations_list, index=locations_list.index('United Kingdom')) indicator_slot = st.sidebar.empty() # reserve slot for indcator selector year = st.sidebar.slider('Select year', min_value=2015, max_value=2022, value=2020) data_view = data_unpivoted.loc[(data_unpivoted.loc[:, 'Location'] == location) & (data_unpivoted.loc[:, 'Year'] == year)] if st.checkbox('Show data', False): ''' ### Data _The sample data used in this application is property of Oxford Economics and provided for personal use and_ _educational purposes only. A 5yr rolling mean transformation has been applied to the original data series values_ _and so is still representative of actual level values. Please do not redistribute this data without the express_ _permission of the owner, Oxford Economics._ ''' # TABLE st.sidebar.subheader('Data view') if st.sidebar.checkbox('Show DataFrame', True): data_view if st.sidebar.checkbox('Show Table'): st.table(data_view) ''' ### Chart ''' # chart data is calculated in two steps # step 1 (using data_unpivoted, filtered by location) chart_data = data_unpivoted[(data_unpivoted['Location'] == location)] indicators_list = list( chart_data.loc[:, 'Indicator'].sort_values(ascending=True).unique()) # this selection box is put into the reserved widget slot created above indicator = indicator_slot.selectbox('Select indicator', indicators_list) # step 2 (using chart_data, filtered by location's indicators) chart_data = chart_data[(chart_data['Indicator'] == indicator)] fig = alt.Chart(chart_data, title=f'{location} | {indicator}').mark_bar().encode( alt.X('Year:O', axis=alt.Axis(domain=False, tickSize=0)), alt.Y('Value', axis=alt.Axis(domain=False, tickSize=0, title='Value')), color='Value', tooltip=['Id','Year','Value']) \ .properties(width=600).interactive() st.altair_chart(fig) # ABOUT st.sidebar.header('About') st.sidebar.info('Using Streamlit to build a Web App.\n\n' + \ '(c) 2020. Oxford Economics Ltd. All rights reserved.') st.sidebar.markdown('---') # Display Readme.md if st.sidebar.checkbox('Readme', False): st.markdown('---') ''' ### Readme ''' with open('./README.md', 'r', encoding='utf-8') as f: readme = f.read() st.markdown(readme) # TESTS if st.sidebar.checkbox('Run Tests', False): st.markdown('---') st.title('Test Suite') ''' ### Data Load Test ''' suite = unittest.TestLoader().loadTestsFromModule(TestFixtures) result = unittest.TextTestRunner(verbosity=2).run(suite) if result.wasSuccessful(): st.info(f'Test PASSED :-)') st.balloons() else: st.error(f'Test FAILED :-(') # Style st.sidebar.markdown('---') if st.sidebar.checkbox('Configure Style'): BlockContainerStyler().block_container_styler()
def layout(page): '''Layout Streamlit commands''' st.title(page) st.write('''This page lists the layout commands (currently in beta) that are available in Streamlit. They are not yet integrated into the basic Streamlit functions and therefore may not always work in unique (edge) cases. If you believe you have encountered such a case please let us know on the [Streamlit Community Platform.](https://discuss.streamlit.io/)''') st.markdown("---") # ***************** COLUMNS SECTION **************** st.header('Columns') st.subheader('Columns of Equal Size:') st.code(''' col1,col2 = st.beta_columns(2) col1.image('img/brain.png', caption= "This ia a blue brain!") data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns = ['a', 'b', 'c']) col2.write(data)''') col1,col2 = st.beta_columns(2) col1.image('img/brain.png', caption= "This ia a blue brain!", use_column_width=True) data = pd.DataFrame([[1, 2, 3], [4, 5, 6], [7, 8, 9]], columns = ['a', 'b', 'c']) col2.write('__A Dataframe__') col2.write(data) st.subheader('Columns of Different Sizes:') st.code(''' col3,col4,col5 = st.beta_columns([1,2,3]) # 3 columns where first is the smallest, the second is 2x the size of the first and 3rd is 3x the first col3.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App") with col4: st.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App") with col5: st.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App") ''') col3,col4,col5 = st.beta_columns([1,2,3]) # 3 columns where first is the smallest, the second is 2x the size of the first and 3rd is 3x the first col3.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App") with col4: st.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App") with col5: st.image('img/MC.png',use_column_width = True, caption="A Streamlit Sharing App") st.subheader('Columns to Make a Grid:') st.code(''' for i in range(1,3): # number of rows in your table! = 2 cols = st.beta_columns(2) # number of columns in each row! = 2 # first column if the ith row cols[0].image('img/row_%i_col_0.png' %i, use_column_width=True) cols[1].image('img/row_%i_col_1.jpg' %i, use_column_width=True) ''') for i in range(1,3): # number of rows in your table! = 2 cols = st.beta_columns(2) # number of columns in each row! = 2 # first column if the ith row cols[0].image('img/row_%i_col_0.png' %i, use_column_width=True) cols[1].image('img/row_%i_col_1.jpg' %i, use_column_width=True) # ***************** CONTAINERS SECTION ************************* st.markdown("---") st.header('Containers') st.write('''You may want to create a container for a couple of reasons. These include: - Creating invisible structure to your app that can help with coding and flow - They allow you to write objects 'out of order' - They serve as a conceptual block of code''') st.subheader('Container using `with`:') st.code(''' with st.beta_container(): st.write("This bar graph is inside the container") # You can call any Streamlit command, including custom components: st.bar_chart(np.random.randn(50, 3)) ''') with st.beta_container(): st.write("This bar graph is inside the container") st.bar_chart(np.random.randn(50, 3)) st.subheader('Container out of order:') st.code(''' container = st.beta_container() container.write("This button is inside a container") button = container.button('Press Me and see something to blow your mind!') if button: st.header("Voila!! The order is backwards!") container.write("This is _after_ the if button statement, but comes _before_ the 'Voila!!'") ''') container = st.beta_container() container.write("This button is inside a container") button = container.button('Press Me and see something to blow your mind!', key='container_button_run') if button: st.header("Voila!! The order is backwards!") container.write("This is _after_ the if button statement, but comes _before_ the 'Voila!!'") # ********************* EXPANDER SECTION ******************* st.markdown("---") st.header("Expander") st.write('''The expander allows you to hide sections that you may not always want expanded. When the user clicks the expander, it *__does not__* rerun the script, so this can be useful for housing additional widgets.''') st.code(''' with st.beta_expander('Expand Me'): st.write('Well hello there!') st.balloons()''') with st.beta_expander('Expand Me'): st.write('Well hello there!') st.balloons() # ********************* SIDEBAR SECTION ******************* st.markdown("---") st.header('Sidebar') st.write(''' You may have noticed the handy sidebar to your left :point_left: If you would like to create your own you simply need to add `sidebar` before you call a streamlit function. For example:''') st.code(''' # use st.sidebar.<widget> notation sidebar_button = st.sidebar.button("Click here to remove your button") if not sidebar_button: # if the button is NOT clicked display this message st.sidebar.markdown('You added a widget to the sidebar!') ''') add_sidebar = st.button('Run this code to add to the sidebar') if add_sidebar: a = st.sidebar.button("Click here to remove your button", key='sidebar_button_run') if not a: st.sidebar.markdown('You added a widget to the sidebar!') st.write(''' NOTE: The `st.sidebar.<function>`notation works for basically ALL the streamlit functions. However, there are a few that it _doesn't_ work with, those we have listed here (as it's shorter to list the few it doesn't work with): functions that will cause an error (and their workarounds): - :exclamation: `st.sidebar.echo()` - :white_check_mark: `st.sidebar.code()` - :exclamation: `st.sidebar.spinner()` - :white_check_mark: no current workarounds :disappointed: ''') #:heavy_multiplication_x: ideal: :X: (big red X from slack) # *************************** SET PAGE CONFIG *********************** st.markdown("---") st.header("Set Page Configuration") buff, config1, mid, config2, buff = st.beta_columns([1,20,0.5,20,1]) with config1: st.subheader("Set the layout") st.write(''' You can change the layout of your app in two ways, the default is centered, with one `centered` column down the centre (surprise!) of the app. The other option is `wide`, which this app already is! If you would like to see what centered looks like click the 'Change the Layout' button. ''') st.code(''' st.set_page_config(page_title="Streamlit cheat sheet", layout="centered") ''') layout_change = st.button('Change the Layout') if layout_change: code_to_save = 'st.set_page_config(page_title="Streamlit cheat sheet",layout="centered")' pkle.dump(code_to_save, open('format.txt', 'wb')) st.experimental_rerun() st.markdown("---") st.subheader("Set the App Name") st.write(''' This setting allows you to change the name that appers in your browser tab. If you would like to change the name of the app yourself then like click the 'Set the app name' button. ''') st.code(''' usr_name = st.text_input('Pick a page name') st.set_page_config(page_title="%s",layout="wide") %usr_name ''') usr_name = st.text_input('Pick a page name') if len(usr_name) == 0: usr_name = "Pick a Name!" page_name = st.button('Set the app name') if page_name: code_to_save = 'st.set_page_config(page_title="%s",layout="wide")' %usr_name pkle.dump(code_to_save, open('format.txt', 'wb')) st.experimental_rerun() with config2: st.subheader("Set the Icon") st.write(''' In the tab what houses this app, there is a default icon of a black and white Streamlit logo. BUT we wanted you to be able to change this to any emoji you wanted to have! To do this, check out the code below. If you would like to see what the emoji looks like click the 'Change the Emoji' button. ''') st.code(''' st.set_page_config(page_title="Streamlit cheat sheet", layout="wide", page_icon=":monkey:") ''') #st.set_page_config() page_icon=None emoji_change = st.button("Change the Emoji") if emoji_change: code_to_save = 'st.set_page_config(page_title="Streamlit cheat sheet",layout="wide", page_icon=":monkey:")' pkle.dump(code_to_save, open('format.txt', 'wb')) st.experimental_rerun() st.markdown("---") st.subheader("Set the Sidebar") # initial_sidebar_state='auto' st.write(''' With this setting, you can change the sidebar to be either `expanded`, `collapsed` or `auto` when a user first arrives to your app. If this is not specified then the default is `auto`, which collapses the sidebar on a mobile device and shows it on all other devices. If you would like to set the sifebar like click the 'Change the Sidebar State' button. ''') st.code(''' st.set_page_config(page_title="Streamlit cheat sheet", layout="wide", initial_sidebar_state="collapsed") ''') #initial_sidebar_state='collapsed' sidebar_change = st.button("Change the Sidebar State") if sidebar_change: code_to_save = 'st.set_page_config(page_title="Streamlit cheat sheet",layout="wide", initial_sidebar_state="collapsed")' pkle.dump(code_to_save, open('format.txt', 'wb')) st.experimental_rerun() return
def main(): st.title("DataHacks 2021") st.write("## Predict any country's pillar and prosperity scores!") st.write( "#### Using our machine learning models, predict any country's prosperity score" ) countries_df = pd.read_csv( 'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Data/merged.csv' ) countries_df = list(countries_df['country'].unique()) names = [ 'busi', 'econ', 'educ', 'envi', 'gove', 'heal', 'pers', 'safe', 'soci', 'prosperity' ] def pillar(name='busi', countries=['Chad']): url = 'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Data/' df = pd.read_csv(url + name + '_train.csv') df = df.drop(['Unnamed: 0'], axis=1) for i in df.columns: if i.find('year') > -1: df = df.drop([i], axis=1) y = df[name] df = df.drop(['rank_' + name, name], axis=1) df = remove_low_information_features(df) df = remove_highly_null_features(df) df = remove_single_value_features(df) df = remove_highly_correlated_features(df) X = df problem_type = 'regression' objective = 'auto' automl = evalml.automl.AutoMLSearch(problem_type=problem_type, objective=objective) best_pipeline = automl.load(name + '_best_pipeline') df = pd.read_csv(url + name + '_test.csv') df = df.drop(['Unnamed: 0'], axis=1) for i in df.columns: if i.find('year') > -1: df = df.drop([i], axis=1) df = remove_low_information_features(df) df = remove_highly_null_features(df) df = remove_single_value_features(df) df = remove_highly_correlated_features(df) predictions = best_pipeline.predict(df) result = pd.DataFrame() result[name] = predictions df = pd.read_csv(url + name + '_test.csv') temp = df[['country', 'year']] result = pd.merge(left=temp, right=result, how="left", on=[temp.index, result.index]) result = result.drop(['key_0', 'key_1'], axis=1) result['rank_' + name] = result.groupby("year")[name].rank( "dense", ascending=False) result['rank_' + name] = result['rank_' + name].astype('int') result = result[result['country'].isin(countries)] metric = pd.read_csv( 'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Metrics/' + name + '_metrics.csv') return result, metric def prosperity( countries=['Chad', 'Togo', 'Zimbabwe', 'Ivory Coast', 'Georgia']): url = 'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Data/' df = pd.read_csv(url + 'merged.csv') df = df.drop(['Unnamed: 0'], axis=1) metrics = [ 'educ', 'soci', 'heal', 'pers', 'busi', 'econ', 'safe', 'gove', 'envi' ] ranks = ['rank_' + metric for metric in metrics] drop = metrics + ranks + ['year', 'prosperity_score'] y = df['prosperity_score'] df = df.drop(drop, axis=1) df = remove_low_information_features(df) df = remove_highly_null_features(df) df = remove_single_value_features(df) df = remove_highly_correlated_features(df) X = df problem_type = 'regression' objective = 'auto' automl = evalml.automl.AutoMLSearch(problem_type=problem_type, objective=objective) #automl.search(X,y) #best_pipeline = automl.best_pipeline #best_pipeline.fit(X,y) #best_pipeline.save('prosperity_best_pipeline') best_pipeline = automl.load('prosperity_best_pipeline') test = pd.read_csv(url + 'test.csv', index_col=0) drop = ['year'] df = test.copy() df = df.drop(drop, axis=1) df = remove_low_information_features(df) df = remove_highly_null_features(df) df = remove_single_value_features(df) df = remove_highly_correlated_features(df) X = df predictions = best_pipeline.predict(X) result = pd.DataFrame() result['prosperity'] = predictions df = pd.read_csv(url + 'test.csv') temp = df[['country', 'year']] result = pd.merge(left=temp, right=result, how="left", on=[temp.index, result.index]) result = result.drop(['key_0', 'key_1'], axis=1) result['rank_prosperity'] = result.groupby("year")["prosperity"].rank( "dense", ascending=False) result['rank_prosperity'] = result['rank_prosperity'].astype('int') result = result[result['country'].isin(countries)] metric = pd.read_csv( 'https://raw.githubusercontent.com/Andrewl7127/UCSD-DataHacks-2021/main/Metrics/prosperity_metrics.csv' ) return result, metric country_sel = st.multiselect( "Select which countries you want to learn more about!", countries_df) name = st.selectbox('Score', names) if st.button("Submit"): st.balloons() if len(list(country_sel)) < 1: country_sel = [ 'Chad', 'Togo', 'Zimbabwe', 'Ivory Coast', 'Georgia' ] if name == 'prosperity': r, m = prosperity(list(country_sel)) else: r, m = pillar(name, list(country_sel)) st.write(r) st.write(m)
def eda_analysis(): global documentation_string global documentation_substring global df global df_categorical global df_numeric global df_date # Utilizing a documentation platform to see all the changes we would be using (Useful for pipelining) st.write("") st.write("") st.write( "This streamlined EDA shows a high-level analysis of your data, with just a few clicks!" ) st.write( "The datasets below have their own unique attributes that touch on specific concepts that I wanted to highlight." ) st.write("") st.write('## Data Input') #read_file() st.info( 'NOTE: You can also upload your own CSV data to play around with through the <Experimental Reading Data> option below' ) option = st.selectbox('Choose which type of data', files.name) st.write("You have chosen " + option) option_index = files.index[files['name'] == option] # st.write(files.loc[option_index,'file_name'].item()) option_name = files.loc[option_index, 'file_name'].item() st.write(files.loc[option_index, 'description'].item()) if (option_name == '<Experimental Reading data>'): read_file() else: df = read_data("", option_name, ",") if st.button('1. Initial features'): initial_features(df) if st.button('2. Check for duplicated values'): check_duplicated(df) # if st.checkbox('Drop Duplicates?'): # #Drop the duplicate # documentation_substring= f"Dropped {len(df[df.duplicated()])} values\n" # df.drop_duplicates(inplace=True) # logging.info(documentation_substring) # documentation_string+=documentation_substring+'\n' # st.write(documentation_substring) # if st.button('Check for duplicated values 2'): # if len(df[df.duplicated()]) > 0: # st.write("No. of duplicated entries: ", len(df[df.duplicated()])) # st.write(df[df.duplicated(keep=False)].sort_values(by=list(df.columns))) # else: # st.write("No duplicated entries found") # Function to calculate missing values by column# Funct if st.button('3a. In-depth analysis on missing values'): missing_values = missing_values_table(df) st.write("### Missing value rows:") st.write(missing_values) if st.button('3b. Visualize missing values'): # Visualize missing values as a matrix # Checks if the missing values are localized visualize_missing_values(df) # if st.button("(1) Drop Missing Rows"): # st.write(1) # if st.button("(1) Drop Missing Rows"): # st.write(1) # if st.button("(1) Drop Missing Rows"): # st.write(1) if st.button("4. Check the data type of each column with an example"): check_data_type(df) if st.button('5. Column-wise analysis'): column_analysis(df) # if st.button("Convert numeric to categorical feature <Pending>"): # pass # if st.button("Convert string to datetime feature <Pending>"): # pass # if st.button("Overview of summary based on the target variable <Pending>"): # pass # if st.button("Rename columns if needed <Pending>"): # pass # if st.button("Drop the target variable from the dataframe <Pending>"): # pass seperate_features() # df_numeric=df.select_dtypes(include=['float64', 'int64']) # df_date=df.select_dtypes(include=['datetime64']) # df_categorical=df.select_dtypes(exclude=['float64', 'int64','datetime64']) if st.button("6. Get implied numeric, categorical and datetime features"): get_column_types(df_categorical, df_numeric, df_date) # if st.button("Remove extra white space in text columns <Pending>"): # pass st.write("### Define the target variable") st.write("") st.info( 'Make sure you define the target variable for bivariate classification' ) if st.checkbox('Find the target variable'): if (files.loc[option_index, 'name'].item() == "<Experimental Reading data>") and ( files.loc[option_index, 'target'].item() == "Find your target variable"): st.info("Search for the target variable from your dataset") st.write(df.head()) else: st.write("For this dataset, it is {0}".format( files.loc[option_index, 'target'].item())) target_name = st.text_input("Enter the target name", files.loc[option_index, 'target'].item()) target = find_target(target_name) st.write("Target: ", target_name) st.write("Target type: ", type(target)) st.write("### Overview") st.write(target.head()) st.write(target.value_counts()) sns.countplot(x=target, data=df) st.pyplot() st.write("### Finding the data variables") st.write( "You can manually change the categorical, numeric and date-time variables" ) if (files.loc[option_index, 'name'].item() == "<Experimental Reading data>"): st.info( "You would need to manually extract the date-time variables yourself" ) if (option == "Cat Shelter information"): st.info( "The variables: date_of_birth and datetime should be manually changed to date-time variables" ) if st.checkbox("Choose data types"): choose_data_types() st.markdown("## Categorical columns") if st.button("Information on categorical columns"): st.write("### Categorical Column names") st.write(df_categorical.columns) st.write("### Categorical Info") buffer = io.StringIO() df_categorical.info(buf=buffer) s = buffer.getvalue() st.text(s) #st.write(df_numeric) categorical_selector = st.radio( "Choose what type of categorical analysis to conduct:", [ "Select one of the two", "Univariate analysis of categorical feature", "Bivariate analysis of categorical feature" ]) categorical_names = df_categorical.columns.tolist() categorical_names.append("All columns") if (categorical_selector == "Univariate analysis of categorical feature"): categorical_option = st.selectbox("Choose which column", categorical_names) if (categorical_option == "All columns"): for col in df_categorical.columns: categorical_summarized(df_categorical, y=col) else: categorical_summarized(df_categorical, y=categorical_option) if (categorical_selector == "Bivariate analysis of categorical feature"): st.info( "**Make sure that you have defined the target variable from the checkbox above**" ) categorical_option = st.selectbox("Choose which column", categorical_names) if (categorical_option == "All columns"): for col in df_categorical.columns: categorical_summarized(df_categorical, y=col, hue=target) else: categorical_summarized(df_categorical, y=categorical_option, hue=target) # if st.button("Categorical Data Imputation <Pending>"): # pass # if st.button("Chi square analysis <Pending>"): # pass # if st.button("Encoding categorical data <Pending>"): # pass if st.button(" View Finalized Categorical columns"): st.write(df_categorical.head(10)) st.markdown("## Date-time columns") date_selector = st.radio("Choose what type of Date analysis to conduct:", ["Select one:", "Breakdown of date features"]) df_date = df_date.apply(pd.to_datetime) date_names = df_date.columns.tolist() date_names.append("All columns") if date_selector == 'Breakdown of date features': date_option = st.selectbox("Choose which column", date_names) if (date_option == "All columns"): for col in df_date.columns: time_summarized(df_date, x=col) else: time_summarized(df_date, x=date_option) st.markdown("## Numeric columns") if st.button("Initial numeric features"): st.write("### Numeric Overviews") st.write(df_numeric.head()) df_numeric.hist(figsize=(20, 20), bins=10, xlabelsize=8, ylabelsize=8) st.pyplot() colsize = len(df_numeric.columns) - 5 if st.button("Correlation matrix"): plt.figure(figsize=(15, 15)) sns.heatmap(df_numeric.corr(), annot=True) st.pyplot() numeric_selector = st.radio( "Choose what type of numeric analysis to conduct:", [ "Select one of the two", "Univariate analysis of numeric feature", "Bivariate analysis of numeric feature" ]) numeric_names = df_numeric.columns.tolist() numeric_names.append("All columns") if (numeric_selector == "Univariate analysis of numeric feature"): numeric_option = st.selectbox("Choose which column", numeric_names) if (numeric_option == "All columns"): for col in df_numeric.columns: quantitative_summarized(df_numeric, y=col) else: quantitative_summarized(df_numeric, y=numeric_option) if (numeric_selector == "Bivariate analysis of numeric feature"): st.info( "**Make sure that you have defined the target variable from the checkbox above**" ) numeric_option = st.selectbox("Choose which column", numeric_names) if (numeric_option == "All columns"): for col in df_numeric.columns: quantitative_summarized(dataframe=df_numeric, y=col, palette=c_palette, x=target, verbose=False) else: quantitative_summarized(dataframe=df_numeric, y=numeric_option, palette=c_palette, x=target, verbose=False) # if (numeric_selector=="Multivariate variate analysis of numeric feature"): # st.info("**Make sure that you have defined the target variable from the checkbox above**") # st.write(df_numeric.head()) # var1 = st.text_input("Enter the first variable") # var2 = st.text_input("Enter the second variable") # quantitative_summarized(dataframe= df_numeric, y = var1, x = var2, hue = target, palette=c_palette3, verbose=False) st.write("") st.write("") if st.button("You're done!! Click here to celebrate"): st.balloons()
def get_dataset(name): wh = None if name == 'Suside Statistics': st.markdown( '<p style="margin-top: 30px">Here, we are analyzing the <b> Horld Health Organization </b> (<i><b>WHO</b></i>) dataset on the suicide statistics in the world.</p><p style="margin-bottom: 30px">We can therefore visualize the data and identify the inequal repartition of this phenomene in countries and also, we can understand the differences depending on the age of the victims and through years</p><hr style="border:1px solid black">', unsafe_allow_html=True) wh = pd.read_csv('data/who_suicide_statistics.csv') "\n\n" wh # st.sidebar.write("Check any option to visualize the related data") st.sidebar.markdown( '<h2 style="text-align: center; color: #f0ad4e"><b>Check any option to visualize the related data</b></h2>', unsafe_allow_html=True) st.sidebar.markdown("<br>", unsafe_allow_html=True) if st.sidebar.checkbox('Show Description for suside dataset'): st.markdown( '<h2 style="text-align: center; color: #d9534f"><b>Here is a Description of the data Series.</b></h2>', unsafe_allow_html=True) st.write(wh.describe(include="all").T) if st.sidebar.checkbox('Show global sucides on years'): st.markdown( '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Global sucides through years.</b></h2>', unsafe_allow_html=True) st.set_option('deprecation.showPyplotGlobalUse', False) sns.set(style="darkgrid") sns.set(rc={'figure.figsize': (15, 10)}) ax = sns.regplot(data=wh, x='year', y='suicides_no', x_jitter=0.2, order=4) ax.set_yscale('log') st.pyplot() if st.sidebar.checkbox('Show Suside number by age and country'): st.markdown( '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age and country.</b></h2>', unsafe_allow_html=True) wh.groupby(['country', 'age' ]).suicides_no.sum().nlargest(10).plot(kind='barh') st.pyplot() if st.sidebar.checkbox('Show Suside number by age and sex'): st.markdown( '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age and sex.</b></h2>', unsafe_allow_html=True) ax = sns.catplot(x="sex", y="suicides_no", col='age', data=wh, estimator=median, height=4, aspect=.7, kind='bar') st.pyplot() if st.sidebar.checkbox('Show Suside number by age interval and sex'): st.markdown( '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age interval and sex.</b></h2>', unsafe_allow_html=True) wh['age'] = wh.age.astype( pd.api.types.CategoricalDtype(categories=[ '5-14 years', '15-24 years', '25-34 years', '35-54 years', '55-74 years', '75+ years' ])) wh.pivot_table(index='age', columns='sex', values='suicides_no', aggfunc='sum').plot(kind='barh') st.pyplot() # if st.sidebar.checkbox('Show Suside number by age, sex and for each year from 1979 to 2016s'): # st.markdown('<h4 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age, sex and for each year from 1979 to 2016.</b></h4>', unsafe_allow_html=True) # df = wh.groupby(['year','age']).suicides_no.sum().reset_index() # df['age'] = df.age.astype(pd.api.types.CategoricalDtype(categories = ['5-14 years','15-24 years','25-34 years','35-54 years','55-74 years','75+ years'])) # sns.set(rc={'figure.figsize':(15,10)}) # st.pyplot() if st.sidebar.checkbox( 'Show Suside number by age, sex and for each year from 1979 to 2016' ): st.markdown( '<h4 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Suside number by age, sex and for each year from 1979 to 2016.</b></h4>', unsafe_allow_html=True) sns.catplot('age', 'suicides_no', hue='sex', col='year', data=wh, kind='bar', col_wrap=3, estimator=sum) st.pyplot() if st.sidebar.checkbox('Show Evolution of Suside number by sex '): st.markdown( '<h2 style="text-align: center; color: #d9534f; margin-top: 30px"><b>Evolution of Suside number by sex.</b></h2>', unsafe_allow_html=True) sns.set(style="darkgrid") g = sns.FacetGrid(wh, row="sex", col="age", margin_titles=True) g.map(plt.scatter, "suicides_no", "population", edgecolor="w") st.pyplot() # elif name == 'Nutrition and Population Statistics': # # health = pd.read_csv('data/data.csv') # # st.write(health.head()) # # health # "# aaaaaaaaaaaaa" elif name == 'Survey on Mental Health in the Tech Workplace': st.sidebar.markdown( '<h5 style="margin-top: 30px; margin-bottom: 30px; text-align: center; color: #d9534f; font-family: cursive"><b>Pick an option to make the diagram display</b></h5>', unsafe_allow_html=True) st.markdown( '<h3 style="margin-top: 20px; color: #fbe25d"><b><u><i>Survey on Mental Health in the Tech Workplace</i></u></b></h3>', unsafe_allow_html=True) st.markdown( ' <p style="margin-top: 0px">Here, we are analyzing the <b> Horld Health Organization </b> (<u><b>WHO</b></u>) dataset mental health statistics in the world.</p><p style="margin-bottom: 30px">We will then be able to visualize data Comming from thousands of peple all around the world and we will classify them depending on some criterias.</p><ul><li>Their employment status</li><li>Their gender</li><li>Their Anonymity</li><li>Their care options</li><li>Their treatement, if they are ill</li></ul>', unsafe_allow_html=True) mtech = pd.read_csv('data/survey.csv') st.markdown( '<h3 style="margin-top: 20px; color: #d9534f; font-family: cursive"><b>DATASET</b></h3>', unsafe_allow_html=True) mtech "\n" st.markdown( '<h3 style="margin-top: 20px; color: #d9534f; font-family: cursive"><b>Description of the data Serie.</b></h3>', unsafe_allow_html=True) # "### Description of the data Serie." st.write(mtech.describe(include='all')) if st.sidebar.checkbox('Diagram depending on the occupation'): st.markdown( '<h3 style="margin-top: 30px; color: #d9534f; font-family: cursive"><b>Diagram depending on the occupation</b></h3>', unsafe_allow_html=True) st.markdown( '<p style="margin-top: 10px">Here, we cas visualize the mental health survey depending on if the people are employed or self employed, onsite werkers or remote workers, work for tech company or non tech company</p>', unsafe_allow_html=True) sns.catplot(x='self_employed', hue='remote_work', col='tech_company', kind='count', data=mtech) st.pyplot() if st.sidebar.checkbox('Diagram depending on the occupatio'): st.markdown( '<h3 style="margin-top: 30px; color: #d9534f; font-family: cursive"><b>anonymity around mental illness</b></h3>', unsafe_allow_html=True) st.markdown( '<p style="margin-top: 10px">The folowing wisualization show the answers from the question </p><ul><li>if the worker\'s anonymity will be protected if they choose to take advantage of mental health or substance abuse treatment resources</li><li>If they Would be willing to discuss a mental health issue with your coworkers</li><li>If they Would be willing to discuss a mental health issue with your direct supervisor(s)</li></ul><br>The answers are not really positive', unsafe_allow_html=True) sns.catplot(x='anonymity', hue='leave', col='supervisor', row='coworkers', kind='count', data=mtech) st.pyplot() if st.sidebar.checkbox('Consequences of mental illness subject'): st.markdown( '<h3 style="margin-top: 30px; color: #d9534f; font-family: cursive"><b>Diagram depending on the occupation</b></h3>', unsafe_allow_html=True) st.markdown( '<p style="margin-top: 10px">The folowing wisualization show the answers from the question: </p><ul><li> Does your employer provide resources to learn more about mental health issues and how to seek help?</li><li>Do you think that discussing a mental health issue with your employer would have negative consequences?</li><li>If they have a family history of mental illness?</li><li>If they Have sought treatment for a mental health condition?</li></ul><br>The answers are not really positive, It show that the subject of mental ilness is still a real taboo in the society', unsafe_allow_html=True) sns.catplot(x='seek_help', hue='mental_health_consequence', col='treatment', row='family_history', kind='count', data=mtech) st.pyplot() if st.sidebar.checkbox('Care options and Consequences'): st.markdown( '<h3 style="margin-top: 30px; color: #d9534f; font-family: cursive"><b>Diagram showing the Care options and Consequences</b></h3>', unsafe_allow_html=True) sns.catplot(x='benefits', hue='treatment', col='wellness_program', row='care_options', kind='count', data=mtech) st.pyplot() elif name == 'Suicide Statistics over years': data = pd.read_csv("data/who_suicide_statistics.csv") numeric_columns = data.select_dtypes( ['float64', 'float32', 'int32', 'int64']).columns # checkbox widget checkbox = st.sidebar.checkbox("Reveal data.") if checkbox: # st.write(data) st.dataframe(data=data) # create jointplot st.sidebar.markdown('<br>', unsafe_allow_html=True) st.sidebar.markdown('<h3><font color=‘#5cb85c>Joint plot</font></h3>', unsafe_allow_html=True) select_box3 = st.sidebar.selectbox(label='x', options=numeric_columns) select_box4 = st.sidebar.selectbox(label="y", options=numeric_columns) sns.jointplot(x=select_box3, y=select_box4, data=data) st.pyplot() # create histograms # st.sidebar.subheader("Histogram") st.sidebar.markdown('<br>', unsafe_allow_html=True) st.sidebar.markdown('<h3><font color=‘#5cb85c>Histogram</font></h3>', unsafe_allow_html=True) select_box3 = st.sidebar.selectbox(label="Feature", options=numeric_columns) histogram_slider = st.sidebar.slider(label="Number of Bins", min_value=5, max_value=100, value=30) sns.distplot(data[select_box3], bins=histogram_slider) st.pyplot() # create scatterplots # st.sidebar.subheader("Scatter plot setup") st.sidebar.markdown('<br>', unsafe_allow_html=True) st.sidebar.markdown( '<h3><font color=‘#5cb85c>Scatter plot setup</font></h3>', unsafe_allow_html=True) # add select widget select_box1 = st.sidebar.selectbox(label='X axis', options=numeric_columns) select_box2 = st.sidebar.selectbox(label="Y axis", options=numeric_columns) sns.relplot(x=select_box1, y=select_box2, data=data) st.pyplot() else: st.balloons() "### About the application" x = wh
def start_ui(): st.title("Sysmon Extractor") st.subheader("Extract sysmon data based off the event type") st.write("Supported data types are csv, json and parquet.") st.write("Load your data by specifying the full path.") st.info("HDFS is supported. Specify hdfs://HOST:PORT/path/to/file for either input or outputh path.") up_file = st.text_input("Input File:") if up_file is not None: if ".csv" in up_file: headers = st.checkbox( "For csv files if the first row contains headers?", value=True) else: headers = False st.write("Specify the output path below. If writing to a local directory, the full path must be specified. If no path is provided, it will write the file to the current working directory.") out_file = st.text_input("Output file") st.write("If you have multiple log sources in one file, please enter the column that specifies the log source for each row.") log_col = st.text_input("Log name columns:") st.write("If your Sysmon data is nested in another column, please enter the column below.") event_col = st.text_input("Event column: ") selection = st.multiselect( "Select events to extract", EVENT_DICTIONARY, format_func=_format_rules ) st.write("If you want to extract any other columns from the data, specify them below separated by a comma") additional_cols = st.text_area("Additional columns", "col1, col2.nested_col") st.write("If you want the resulting file as a single file, check the box below.") st.warning( "Make sure that you have enough memory to fit the data into memory, otherwise this will fail") single_file = st.checkbox("Output as a single file", value=False) st.write("If you have an existing spark cluster you would like to connect to, enter it below.") st.info("For a cluster, spark://HOST:PORT, for a mesos/yarn cluster mesos://HOST:PORT") master = st.text_input("Spark Instance", "local") if st.button("Extract!"): assert up_file, st.write("Must choose a file for upload.") assert selection, st.write("Please select rules you would like to extract") # Assign default values # master = "local" if not master else master additional_cols = [] if not additional_cols else re.sub(r"\s+", "", additional_cols).split(",") out_file = f"{os.getcwd()}/sysmon-output.csv" if not out_file else out_file with st.spinner(text="Extracting logs..."): extract( up_file.strip(), selection, ouput_file=out_file, header=headers, log_column=log_col, event_column=event_col, additional_columns=additional_cols, single_file=single_file, master=master ) st.success("Done!") st.balloons() st.header("Event Mapping") st.subheader("See what each event is.") sysmon_df = SYSMON_SCHEMA.set_index("Event") st.table(sysmon_df) st.header("Event Dictionary Help") st.subheader("Display the schema information for each Sysmon rule") event = st.selectbox("Select an event.", EVENT_DICTIONARY, format_func=_format_rules) df = ossem.getEventDf("windows", "sysmon", f"event-{event}") st.table(df[["event_code", "title", "name", "type", "description", "sample_value"]])
def main(): st.set_page_config( page_title="Oil Spill Dashboard", page_icon=":ship:", layout="centered", initial_sidebar_state="expanded", ) def load_raw_data(x): # data time_series_df = pd.read_csv(x) time_series_df.sort_values(by=['timestamp'], inplace=True, kind="mergesort") time_sorted_df = time_series_df.sort_values(by=['timestamp'], inplace=True) time_series_df['timestamp'] = pd.to_datetime( time_series_df['timestamp']) return time_series_df def load_data(x): time_series_df = load_raw_data(x) # cleaning and indexing time_series_df.drop([ "call_sign", "flag", "draught", "ship_and_cargo_type", "length", "width", "eta", "destination", "status", "maneuver", "accuracy", "collection_type", 'mmsi_label' ], axis=1, inplace=True) time_series_df.drop(['created_at', 'imo', 'name'], axis=1, inplace=True) time_series_df = time_series_df[time_series_df['speed'].notna()] time_series_df = time_series_df.reset_index(drop=True) time_series_df.drop(time_series_df[time_series_df['speed'] == 0].index, inplace=True) return time_series_df st.title("Oil spill prediction Dashboard :rocket:") st.sidebar.title("Enter Parameters :paperclip:") st.sidebar.markdown("Powered by AIS Data set") load_csv_data = st.sidebar.checkbox("Upload Csv") if (load_csv_data): uploaded_file = st.file_uploader("Choose Csv file") else: uploaded_file = None if uploaded_file is not None: uploaded_file.seek(0) time_series_df1 = load_raw_data(uploaded_file) time_series_df = load_data(uploaded_file) else: time_series_df1 = load_raw_data( '../Data/main/Maritius_AOI_20200701_0731_full.csv') time_series_df = load_data( '../Data/main/Maritius_AOI_20200701_0731_full.csv') raw = st.sidebar.checkbox("Show Raw Dataset") not_raw = st.sidebar.checkbox("Show cleaned Dataset") if (raw): st.subheader("AIS Dataset (Raw)") st.dataframe(time_series_df1[:500].style.highlight_max(axis=0)) if (not_raw): st.subheader("AIS Dataset (Cleaned)") st.dataframe(time_series_df[:500].style.highlight_max(axis=0)) vessels = time_series_df.mmsi.unique() st.markdown("Anomaly detection with time series data of: ", len(vessels)) classifier = st.sidebar.selectbox( "Classifier", ("Select one model", "Code", "Benchmark model(IQR)", "K-Means clustering", "Isolation Forest", "All of the above(Best)")) mv_value = st.sidebar.selectbox("Select vessel", vessels) st.write("Selected Vessel: ", mv_value) param = st.sidebar.radio("Vessel Parameter", ("speed", "course", "heading", "rot"), key='param') mv_data = time_series_df[time_series_df['mmsi'] == mv_value] if st.button("Plot all basic graphs"): p = figure(title='Speed Vs Time', x_axis_label='Timestamp', y_axis_label='Speed') p.line(mv_data['timestamp'], mv_data['speed'], legend='Speed Trend', line_width=2) st.bokeh_chart(p, use_container_width=True) q = figure(title='Course Vs Time', x_axis_label='Timestamp', y_axis_label='Course') q.line(mv_data['timestamp'], mv_data['course'], legend='Course Trend', line_width=2) st.bokeh_chart(q, use_container_width=True) r = figure(title='Heading Vs Time', x_axis_label='Timestamp', y_axis_label='Heading') r.line(mv_data['timestamp'], mv_data['heading'], legend='Heading Trend', line_width=2) st.bokeh_chart(r, use_container_width=True) s = figure(title='Rot Vs Time', x_axis_label='Timestamp', y_axis_label='Rot') s.line(mv_data['timestamp'], mv_data['rot'], legend='Rot Trend', line_width=2) st.bokeh_chart(s, use_container_width=True) map_df = mv_data[time_series_df['latitude'].notna()] map_df = map_df[time_series_df['longitude'].notna()] if st.button("Plot Map"): map_df.filter(['latitude', 'longitude']) st.map(map_df) mv_data = mv_data.drop(['mmsi', 'msg_type', 'latitude', 'longitude'], axis=1) mv_data = mv_data[mv_data['speed'].notna()] mv_data = mv_data.set_index(['timestamp']) mv_data.index = pd.to_datetime(mv_data.index, unit='s') names = mv_data.columns rollmean = mv_data.resample(rule='D').mean() rollstd = mv_data.resample(rule='D').std() if classifier == "Benchmark model: Interquartile Range (IQR)": df2 = mv_data names = df2.columns x = mv_data[names] scaler = StandardScaler() pca = PCA() pipeline = make_pipeline(scaler, pca) pipeline.fit(x) features = range(pca.n_components_) pca = PCA(n_components=2) principalComponents = pca.fit_transform(x) principalDf = pd.DataFrame(data=principalComponents, columns=['pc1', 'pc2']) mv_data['pc1'] = pd.Series(principalDf['pc1'].values, index=mv_data.index) mv_data['pc2'] = pd.Series(principalDf['pc2'].values, index=mv_data.index) result = adfuller(principalDf['pc1']) st.write("p value", result[1]) pca1 = principalDf['pc1'].pct_change() autocorrelation = pca1.dropna().autocorr() st.write('Autocorrelation(pc1) is: ', autocorrelation) plot_acf(pca1.dropna(), lags=20, alpha=0.05) pca2 = principalDf['pc2'].pct_change() autocorrelation = pca2.autocorr() st.write('Autocorrelation(pc2) is: ', autocorrelation) plot_acf(pca2.dropna(), lags=20, alpha=0.05) q1_pc1, q3_pc1 = mv_data['pc1'].quantile([0.25, 0.75]) iqr_pc1 = q3_pc1 - q1_pc1 lower_pc1 = q1_pc1 - (1.5 * iqr_pc1) upper_pc1 = q3_pc1 + (1.5 * iqr_pc1) q1_pc2, q3_pc2 = mv_data['pc2'].quantile([0.25, 0.75]) iqr_pc2 = q3_pc2 - q1_pc2 lower_pc2 = q1_pc2 - (1.5 * iqr_pc2) upper_pc2 = q3_pc2 + (1.5 * iqr_pc2) mv_data['anomaly_pc1'] = ((mv_data['pc1'] > upper_pc1) | (mv_data['pc1'] < lower_pc1)).astype('int') mv_data['anomaly_pc2'] = ((mv_data['pc2'] > upper_pc2) | (mv_data['pc2'] < lower_pc2)).astype('int') total_anomaly = mv_data['anomaly_pc1'].value_counts( ) + mv_data['anomaly_pc2'].value_counts() outliers_pc1 = mv_data.loc[(mv_data['pc1'] > upper_pc1) | (mv_data['pc1'] < lower_pc1), 'pc1'] outliers_pc2 = mv_data.loc[(mv_data['pc2'] > upper_pc2) | (mv_data['pc2'] < lower_pc2), 'pc2'] st.write("Outlier Propotion(pc1): ", len(outliers_pc1) / len(mv_data)) st.write("Outlier Propotion(pc2): ", len(outliers_pc2) / len(mv_data)) a = mv_data[mv_data['anomaly_pc1'] == 1] #anomaly b = mv_data[mv_data['anomaly_pc2'] == 1] #anomaly fig = plt.figure() plt.plot(mv_data[param], color='blue', label='Normal') plt.plot(a[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly1') plt.plot(b[param], linestyle='none', marker='X', color='green', markersize=12, label='Anomaly2') plt.xlabel('Date and Time') plt.ylabel(param) plt.title('Anomalies with given MMSI') plt.legend(loc='best') plt.show() plt.gcf().autofmt_xdate() st.pyplot(fig) data1 = a data2 = b if classifier == "K-Means clustering": df2 = mv_data names = df2.columns x = mv_data[names] scaler = StandardScaler() pca = PCA() pipeline = make_pipeline(scaler, pca) pipeline.fit(x) features = range(pca.n_components_) pca = PCA(n_components=2) principalComponents = pca.fit_transform(x) principalDf = pd.DataFrame(data=principalComponents, columns=['pc1', 'pc2']) mv_data['pc1'] = pd.Series(principalDf['pc1'].values, index=mv_data.index) mv_data['pc2'] = pd.Series(principalDf['pc2'].values, index=mv_data.index) fraction = st.number_input("Fraction", 0.00, 1.00, step=0.01, key='fraction') kmeans = KMeans(n_clusters=2, random_state=42) kmeans.fit(principalDf.values) labels = kmeans.predict(principalDf.values) unique_elements, counts_elements = np.unique(labels, return_counts=True) clusters = np.asarray((unique_elements, counts_elements)) # no of points in each clusters fig = plt.figure() plt.bar(clusters[0], clusters[1], tick_label=clusters[0]) plt.xlabel('Clusters') plt.ylabel('Number of points') plt.title('Number of points in each cluster') st.pyplot(fig) # cluster graph fig = plt.figure() plt.scatter(principalDf['pc1'], principalDf['pc2'], c=labels) plt.xlabel('pc1') plt.ylabel('pc2') plt.title('K-means of clustering') st.pyplot(fig) # distance function to be used def getDistanceByPoint(data, model): distance = [] for i in range(0, len(data)): Xa = np.array(data.loc[i]) Xb = model.cluster_centers_[model.labels_[i] - 1] distance.append(np.linalg.norm(Xa - Xb)) return pd.Series(distance, index=data.index) outliers_fraction = fraction distance = getDistanceByPoint(principalDf, kmeans) number_of_outliers = int(outliers_fraction * len(distance)) threshold = distance.nlargest(number_of_outliers).min() principalDf['anomaly1'] = (distance >= threshold).astype(int) st.write("Anomaly Count by Kmeans", principalDf['anomaly1'].value_counts()) mv_data['anomaly1'] = pd.Series(principalDf['anomaly1'].values, index=mv_data.index) a = mv_data[mv_data['anomaly1'] == 1] #anomaly fig = plt.figure(figsize=(18, 6)) plt.plot(mv_data[param], color='blue', label='Normal') plt.plot(a[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly') plt.xlabel('Date and Time') plt.ylabel(param) plt.title('Anomalies with given MMSI') plt.legend(loc='best') plt.gcf().autofmt_xdate() st.pyplot(fig) data3 = a if classifier == "Isolation Forest": df2 = mv_data names = df2.columns x = mv_data[names] scaler = StandardScaler() pca = PCA() pipeline = make_pipeline(scaler, pca) pipeline.fit(x) features = range(pca.n_components_) pca = PCA(n_components=2) principalComponents = pca.fit_transform(x) principalDf = pd.DataFrame(data=principalComponents, columns=['pc1', 'pc2']) mv_data['pc1'] = pd.Series(principalDf['pc1'].values, index=mv_data.index) mv_data['pc2'] = pd.Series(principalDf['pc2'].values, index=mv_data.index) fraction = st.number_input("Fraction", 0.00, 1.00, step=0.01, key='fraction') kmeans = KMeans(n_clusters=2, random_state=42) kmeans.fit(principalDf.values) labels = kmeans.predict(principalDf.values) unique_elements, counts_elements = np.unique(labels, return_counts=True) clusters = np.asarray((unique_elements, counts_elements)) # IsolationForest method 3 (checkpoint) outliers_fraction = fraction model = IsolationForest(contamination=outliers_fraction) model.fit(principalDf.values) principalDf['anomaly2'] = pd.Series(model.predict(principalDf.values)) # visualization mv_data['anomaly2'] = pd.Series(principalDf['anomaly2'].values, index=mv_data.index) a = mv_data.loc[mv_data['anomaly2'] == -1] #anomaly # anomaly count method 3 st.write("Anomaly count isolated forest: ", mv_data['anomaly2'].value_counts()) fig = plt.figure() plt.plot(mv_data[param], color='blue', label='Normal') plt.plot(a[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly') plt.xlabel('Date and Time') plt.ylabel('Reading') plt.title('Anomalies with given MMSI') plt.legend(loc='best') plt.gcf().autofmt_xdate() st.pyplot(fig) data4 = a if classifier == "All of the above(Best)": df2 = mv_data names = df2.columns x = mv_data[names] scaler = StandardScaler() pca = PCA() pipeline = make_pipeline(scaler, pca) pipeline.fit(x) features = range(pca.n_components_) pca = PCA(n_components=2) principalComponents = pca.fit_transform(x) principalDf = pd.DataFrame(data=principalComponents, columns=['pc1', 'pc2']) mv_data['pc1'] = pd.Series(principalDf['pc1'].values, index=mv_data.index) mv_data['pc2'] = pd.Series(principalDf['pc2'].values, index=mv_data.index) result = adfuller(principalDf['pc1']) st.write("p value", result[1]) pca1 = principalDf['pc1'].pct_change() autocorrelation = pca1.dropna().autocorr() st.write('Autocorrelation(pc1) is: ', autocorrelation) pca2 = principalDf['pc2'].pct_change() autocorrelation = pca2.autocorr() st.write('Autocorrelation(pc2) is: ', autocorrelation) q1_pc1, q3_pc1 = mv_data['pc1'].quantile([0.25, 0.75]) iqr_pc1 = q3_pc1 - q1_pc1 lower_pc1 = q1_pc1 - (1.5 * iqr_pc1) upper_pc1 = q3_pc1 + (1.5 * iqr_pc1) q1_pc2, q3_pc2 = mv_data['pc2'].quantile([0.25, 0.75]) iqr_pc2 = q3_pc2 - q1_pc2 lower_pc2 = q1_pc2 - (1.5 * iqr_pc2) upper_pc2 = q3_pc2 + (1.5 * iqr_pc2) mv_data['anomaly_pc1'] = ((mv_data['pc1'] > upper_pc1) | (mv_data['pc1'] < lower_pc1)).astype('int') mv_data['anomaly_pc2'] = ((mv_data['pc2'] > upper_pc2) | (mv_data['pc2'] < lower_pc2)).astype('int') total_anomaly = mv_data['anomaly_pc1'].value_counts( ) + mv_data['anomaly_pc2'].value_counts() outliers_pc1 = mv_data.loc[(mv_data['pc1'] > upper_pc1) | (mv_data['pc1'] < lower_pc1), 'pc1'] outliers_pc2 = mv_data.loc[(mv_data['pc2'] > upper_pc2) | (mv_data['pc2'] < lower_pc2), 'pc2'] st.write("Outlier Propotion(pc1): ", len(outliers_pc1) / len(mv_data)) st.write("Outlier Propotion(pc2): ", len(outliers_pc2) / len(mv_data)) a = mv_data[mv_data['anomaly_pc1'] == 1] #anomaly b = mv_data[mv_data['anomaly_pc2'] == 1] #anomaly data1 = a data2 = b fraction = st.number_input("Fraction", 0.00, 1.00, step=0.01, key='fraction') kmeans = KMeans(n_clusters=2, random_state=42) kmeans.fit(principalDf.values) labels = kmeans.predict(principalDf.values) unique_elements, counts_elements = np.unique(labels, return_counts=True) clusters = np.asarray((unique_elements, counts_elements)) # distance function to be used def getDistanceByPoint(data, model): distance = [] for i in range(0, len(data)): Xa = np.array(data.loc[i]) Xb = model.cluster_centers_[model.labels_[i] - 1] distance.append(np.linalg.norm(Xa - Xb)) return pd.Series(distance, index=data.index) outliers_fraction = fraction distance = getDistanceByPoint(principalDf, kmeans) number_of_outliers = int(outliers_fraction * len(distance)) threshold = distance.nlargest(number_of_outliers).min() principalDf['anomaly1'] = (distance >= threshold).astype(int) st.write("Anomaly Count by Kmeans", principalDf['anomaly1'].value_counts()) mv_data['anomaly1'] = pd.Series(principalDf['anomaly1'].values, index=mv_data.index) a = mv_data[mv_data['anomaly1'] == 1] #anomaly data3 = a outliers_fraction = fraction model = IsolationForest(contamination=outliers_fraction) model.fit(principalDf.values) principalDf['anomaly2'] = pd.Series(model.predict(principalDf.values)) # visualization mv_data['anomaly2'] = pd.Series(principalDf['anomaly2'].values, index=mv_data.index) a = mv_data.loc[mv_data['anomaly2'] == -1] #anomaly # anomaly count method 3 st.write("Anomaly count isolated forest: ", mv_data['anomaly2'].value_counts()) data4 = a def intersection(lst1, lst2, lst3, lst4): lst5 = [value for value in lst2 if value in lst1] lst6 = [value for value in lst3 if value in lst5] lst7 = [value for value in lst4 if value in lst6] return lst7 time_common = intersection(data1.index.unique(), data2.index.unique(), data3.index.unique(), data4.index.unique()) time_df = pd.DataFrame(columns=mv_data.columns, index=time_common) for time in time_common: time_df.loc[time] = mv_data.loc[time] # visualization fig = plt.figure() plt.plot(mv_data[param], color='blue', label='Normal') plt.plot(time_df[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly') plt.xlabel('Date and Time') plt.ylabel('Reading') plt.title('Anomalies') plt.legend(loc='best') plt.gcf().autofmt_xdate() st.pyplot(fig) st.dataframe(time_df) code = ''' # data time_series_df=pd.read_csv('../Data/main/Maritius_AOI_20200701_0731_full.csv') time_series_df.sort_values(by=['timestamp'], inplace=True) time_sorted_df = time_series_df.sort_values(by=['timestamp'], inplace=True) time_series_df['timestamp'] = pd.to_datetime(time_series_df['timestamp']) # cleaning and indexing time_series_df.drop(["call_sign", "flag" ,"draught" , "ship_and_cargo_type", "length", "width","eta" , "destination", "status", "maneuver", "accuracy" , "collection_type" ,'mmsi_label'], axis=1, inplace=True) time_series_df.drop(['created_at','imo', 'name'], axis=1, inplace=True) time_series_df = time_series_df[time_series_df['speed'].notna()] time_series_df = time_series_df.reset_index(drop=True) time_series_df.drop(time_series_df[time_series_df['speed'] == 0].index, inplace = True) # list mmsi time_series_df.mmsi.unique() # paramameters -> speed, course, rot, heading mv_value = 477269900 param = 'speed' fraction = 0.05 # Data after parameters and everything len(time_series_df[time_series_df["mmsi"]==mv_value]) mv_data = time_series_df[time_series_df['mmsi']==mv_value] # param ploting after selecting vessel plt.plot( mv_data['timestamp'], mv_data['speed']) plt.gcf().autofmt_xdate() plt.show() plt.plot( mv_data['timestamp'], mv_data['course']) plt.gcf().autofmt_xdate() plt.show() plt.plot( mv_data['timestamp'], mv_data['rot']) plt.gcf().autofmt_xdate() plt.show() plt.plot( mv_data['timestamp'], mv_data['heading']) plt.gcf().autofmt_xdate() plt.show() # data cleaning again mv_data = mv_data.drop(['mmsi','msg_type','latitude', 'longitude'], axis=1) mv_data = mv_data[mv_data['speed'].notna()] mv_data = mv_data.set_index(['timestamp']) mv_data.index = pd.to_datetime(mv_data.index, unit='s') names=mv_data.columns rollmean = mv_data.resample(rule='D').mean() rollstd = mv_data.resample(rule='D').std() # Method 1 (checkpoint) df2 = mv_data names=df2.columns x = mv_data[names] scaler = StandardScaler() pca = PCA() pipeline = make_pipeline(scaler, pca) pipeline.fit(x) # PCA feature graph (Not done) features = range(pca.n_components_) plt.figure(figsize=(15, 5)) plt.bar(features, pca.explained_variance_) plt.xlabel('PCA feature') plt.ylabel('Variance') plt.xticks(features) plt.title("Importance of the Principal Components based on inertia") plt.show() pca = PCA(n_components=2) principalComponents = pca.fit_transform(x) principalDf = pd.DataFrame(data = principalComponents, columns = ['pc1', 'pc2']) mv_data['pc1']=pd.Series(principalDf['pc1'].values, index=mv_data.index) mv_data['pc2']=pd.Series(principalDf['pc2'].values, index=mv_data.index) # p value and pc1 and pc2 autocorrelation result = adfuller(principalDf['pc1']) print(result[1]) pca1 = principalDf['pc1'].pct_change() autocorrelation = pca1.dropna().autocorr() print('Autocorrelation is: ', autocorrelation) plot_acf(pca1.dropna(), lags=20, alpha=0.05) pca2 = principalDf['pc2'].pct_change() autocorrelation = pca2.autocorr() print('Autocorrelation is: ', autocorrelation) plot_acf(pca2.dropna(), lags=20, alpha=0.05) # model calculations method 1 q1_pc1, q3_pc1 = mv_data['pc1'].quantile([0.25, 0.75]) iqr_pc1 = q3_pc1 - q1_pc1 lower_pc1 = q1_pc1 - (1.5*iqr_pc1) upper_pc1 = q3_pc1 + (1.5*iqr_pc1) q1_pc2, q3_pc2 = mv_data['pc2'].quantile([0.25, 0.75]) iqr_pc2 = q3_pc2 - q1_pc2 lower_pc2 = q1_pc2 - (1.5*iqr_pc2) upper_pc2 = q3_pc2 + (1.5*iqr_pc2) mv_data['anomaly_pc1'] = ((mv_data['pc1']>upper_pc1) | (mv_data['pc1']<lower_pc1)).astype('int') mv_data['anomaly_pc2'] = ((mv_data['pc2']>upper_pc2) | (mv_data['pc2']<lower_pc2)).astype('int') total_anomaly = mv_data['anomaly_pc1'].value_counts() + mv_data['anomaly_pc2'].value_counts() outliers_pc1 = mv_data.loc[(mv_data['pc1']>upper_pc1) | (mv_data['pc1']<lower_pc1), 'pc1'] outliers_pc2 = mv_data.loc[(mv_data['pc2']>upper_pc2) | (mv_data['pc2']<lower_pc2), 'pc2'] len(outliers_pc1)/len(mv_data) len(outliers_pc2)/len(mv_data) # ploting anomaly method 1 a = mv_data[mv_data['anomaly_pc1'] == 1] #anomaly b = mv_data[mv_data['anomaly_pc2'] == 1] #anomaly plt.figure(figsize=(18,6)) plt.plot(mv_data[param], color='blue', label='Normal') plt.plot(a[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly1') plt.plot(b[param], linestyle='none', marker='X', color='green', markersize=12, label='Anomaly2') plt.xlabel('Date and Time') plt.ylabel(param) plt.title(param +' Anomalies with MMSI: ' mv_value) plt.legend(loc='best') plt.show(); data1 = a data2 = b # Method 2 K means (checkpoint) kmeans = KMeans(n_clusters=2, random_state=42) kmeans.fit(principalDf.values) labels = kmeans.predict(principalDf.values) unique_elements, counts_elements = np.unique(labels, return_counts=True) clusters = np.asarray((unique_elements, counts_elements)) # no of points in each clusters plt.figure(figsize = (9, 7)) plt.bar(clusters[0], clusters[1], tick_label=clusters[0]) plt.xlabel('Clusters') plt.ylabel('Number of points') plt.title('Number of points in each cluster') plt.show() # cluster graph plt.figure(figsize=(9,7)) plt.scatter(principalDf['pc1'], principalDf['pc2'], c=labels) plt.xlabel('pc1') plt.ylabel('pc2') plt.title('K-means of clustering') plt.show() # function to be used def getDistanceByPoint(data, model): distance = [] for i in range(0,len(data)): Xa = np.array(data.loc[i]) Xb = model.cluster_centers_[model.labels_[i]-1] distance.append(np.linalg.norm(Xa-Xb)) return pd.Series(distance, index=data.index) # method 2 calulations outliers_fraction = fraction distance = getDistanceByPoint(principalDf, kmeans) number_of_outliers = int(outliers_fraction*len(distance)) threshold = distance.nlargest(number_of_outliers).min() principalDf['anomaly1'] = (distance >= threshold).astype(int) # Anomaly count principalDf['anomaly1'].value_counts() # K means anomaly plots mv_data['anomaly1'] = pd.Series(principalDf['anomaly1'].values, index=mv_data.index) a = mv_data[mv_data['anomaly1'] == 1] #anomaly plt.figure(figsize=(18,6)) plt.plot(mv_data[param], color='blue', label='Normal') plt.plot(a[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly') plt.xlabel('Date and Time') plt.ylabel(param) plt.title(param +' Anomalies with MMSI: ' mv_value) plt.legend(loc='best') plt.show(); data3 = a # IsolationForest method 3 (checkpoint) outliers_fraction = fraction model = IsolationForest(contamination=outliers_fraction) model.fit(principalDf.values) principalDf['anomaly2'] = pd.Series(model.predict(principalDf.values)) # visualization mv_data['anomaly2'] = pd.Series(principalDf['anomaly2'].values, index=mv_data.index) a = mv_data.loc[mv_data['anomaly2'] == -1] #anomaly plt.figure(figsize=(18,6)) plt.plot(mv_data[param], color='blue', label='Normal') plt.plot(a[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly') plt.xlabel('Date and Time') plt.ylabel(param +'Reading') plt.title(param +' Anomalies with MMSI: ' mv_value) plt.legend(loc='best') plt.show(); data4 = a # anomaly count method 3 mv_data['anomaly2'].value_counts() # Method 4 def intersection(lst1, lst2,lst3,lst4): lst5 = [value for value in lst2 if value in lst1] lst6 = [value for value in lst3 if value in lst5] lst7 = [value for value in lst4 if value in lst6] return lst7 time_common = intersection(data1.index.unique() , data2.index.unique() , data3.index.unique() , data4.index.unique() ) time_df = pd.DataFrame(columns = mv_data.columns, index = time_common) for time in time_common: time_df.loc[time] = mv_data.loc[time] # visualization plt.figure(figsize=(18,6)) plt.plot(mv_data[param], color='blue', label='Normal') plt.plot(time_df[param], linestyle='none', marker='X', color='red', markersize=12, label='Anomaly') plt.xlabel('Date and Time') plt.ylabel(param +'Reading') plt.title('Anomalies') plt.legend(loc='best') plt.show(); ''' if classifier == "Code": st.code(code, language='python') st.balloons()
def main(): """Common ML Dataset Explorer""" st.title("Common Machine Learning Dataset Explorer") st.subheader("Simple Data Science Explorer with streamlit") html_temp = """ <div style="background-color:tomato;"><p style="color:white; font-weight:bold;">By IMRAN S M</p></div> """ st.markdown(html_temp, unsafe_allow_html=True) def file_selector(folder_path="./datasets"): filenames = os.listdir(folder_path) selected_filename = st.selectbox("Select a File", filenames) return os.path.join(folder_path, str(selected_filename)) filename = file_selector() st.info("You Selected {}".format(filename)) # Read Data df = pd.read_csv(filename) # Show Dataset if st.checkbox("Show Dataset"): number = st.number_input("Number of Rows to View", 1) st.dataframe(df.head(number)) # Show Columns if st.button("Column Names"): st.write(df.columns) # Show Shape Of Dataframe if st.checkbox("Shape Of Dataframe"): data_dim = st.radio("Show Dimension By ", ("Rows", "Columns")) if data_dim == 'Rows': st.text("Number of Rows") st.write(df.shape[0]) elif data_dim == 'Columns': st.text("Number of Columns") st.write(df.shape[1]) else: st.write(df.shape) # Select Columns if st.checkbox("Select Columns To Show"): all_columns = df.columns.tolist() Selected_columns = st.multiselect("Select", all_columns) new_df = df[Selected_columns] st.dataframe(new_df) # Show Values if st.button("Value Counts"): st.text("Value Counts By Target/Class") st.write(df.iloc[:-1].value_counts()) # Show Data Types if st.button("Data Types"): st.write(df.dtypes) # Show Summary if st.checkbox("Describe"): st.write(df.describe().T) # Plot And Visualization st.subheader("Data Visualization") # Correlation # Seaborn Plot if st.checkbox("Correlation Plot[Seaborn]"): st.write(sns.heatmap(df.corr(), annot=True)) st.pyplot() # Pie Chart if st.checkbox("Pie Plot"): all_columns_names = df.columns.tolist() if st.button("Generate Plot"): st.success("Generated Pie Plot:") st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%")) st.pyplot() # Count Plot if st.checkbox("Plot of Value Counts"): st.text("Value Counts By Target") all_columns_names = df.columns.tolist() primary_col = st.selectbox("Primary Columm to GroupBy", all_columns_names) selected_columns_names = st.multiselect("Select Columns", all_columns_names) if st.button("Plot"): st.text("Generate Plot") if selected_columns_names: vc_plot = df.groupby( primary_col)[selected_columns_names].count() else: vc_plot = df.iloc[:, -1].value_counts() st.write(vc_plot.plot(kind="bar")) st.pyplot() # Customizable Plot st.subheader("Customizable Plot") all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot ", ["area", "bar", "line", "hist", "box", "kde"]) selected_column_names = st.multiselect("Select Column To Plot", all_columns_names) if st.button("Generate Plot"): st.success("Generated Customizable {} Plot for {}".format( type_of_plot, selected_column_names)) # Plot By Streamlit if type_of_plot == "area": cust_data = df[selected_column_names] st.area_chart(cust_data) elif type_of_plot == "bar": cust_data = df[selected_column_names] st.bar_chart(cust_data) elif type_of_plot == "line": cust_data = df[selected_column_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot = df[selected_column_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() if st.button("Celebrate!!"): st.balloons()
def main(): st.sidebar.markdown( "<h1 style='text-align: center; color: black;'>🧭 Navigation Bar 🧭</h1>", unsafe_allow_html=True) nav = st.sidebar.radio( "", ["Home 🏡", "User defined Prediction📟", "Forecasting 📊", "Dashboard 📌"]) if nav == "Home 🏡": st.markdown( "<h1 style ='color:black; text_align:center;font-family:times new roman;font-size:20pt; font-weight: bold;'>DEEP WINDS ⚒️</h1>", unsafe_allow_html=True) st.markdown( "<h1 style=' color:brown; text_align:center;font-weight: bold;font-size:19pt;'>Made by Quad Techies with ❤️</h1>", unsafe_allow_html=True) st.markdown( "<h1 style ='color:green; text_align:center;font-weight: bold;font-size:18pt;'>🌎 Wind Power Prediction DL Web-App 🌎</h1>", unsafe_allow_html=True) with st.beta_expander("Write a review 📝"): col1, col2 = st.beta_columns(2) with col1: username = st.text_input("Name") with col2: comments = st.text_input("Comments") if st.button("Post ✔️"): if ((username == '' and comments == '') or username == '' or comments == ''): st.markdown( "<h1 style='text-align: center; font-weight:bold;color:red;background-color:white;font-size:12pt;border-style: solid;border-color:red;border-radius:4px'>❌ Empty field ❌ </h1>" .format(username), unsafe_allow_html=True) else: create_usertable() add_userdata(username, comments) result = login_user(username, comments) if result: st.markdown( "<h1 style='text-align: center; font-weight: normal;color:DeepPink;background-color:white;font-size:12pt;border-style: solid;border-color:Deeppink;border-radius:6px'> Thankyou for your comment {} 🎉 - with regards Team DeepWind❤️ </h1>" .format(username), unsafe_allow_html=True) with st.beta_expander("View reviews 📝"): result = select_all() data = pd.DataFrame(result, columns=['UserName', 'Comments']) st.table(data) with st.beta_expander("Like this page💰🏆!!"): if st.button("❤️"): st.markdown( "<h1 style='text-align: center; font-weight: normal;color:DeepPink;background-color:white;font-size:12pt;border-style: solid;border-color:Deeppink;border-radius:6px'> Thanks for your like😀!</h1>", unsafe_allow_html=True) create_likestable() add_likesdata('1') like = count_likes() like = pd.DataFrame(like, columns=['Total Likes 🎖️ : ']) like = like.to_string(index=False) st.markdown( "<h1 style='text-align: left; color: black;font-size:12pt'>{}</h1>" .format(like), unsafe_allow_html=True) if nav == "User defined Prediction📟": set_png_as_page_bg('gra (1).jpg') st.markdown( "<h1 style='text-align: center; color: green;'>User Input Parameters 💻️</h1>", unsafe_allow_html=True) with st.beta_expander("Preferences"): st.markdown( "<h1 style='text-align: left; font-weight:bold;color:black;background-color:white;font-size:11pt;'> Temperature ⛅🌞🌧️ (°C) </h1>", unsafe_allow_html=True) col1, col2 = st.beta_columns(2) with col1: min_temp = st.number_input('🌡️ Minimum Temperature (°C)', min_value=-89, max_value=55, value=-15, step=1) with col2: max_temp = st.number_input('🌡️ Maximum Temperature (°C)', min_value=-88, max_value=56, value=50, step=1) st.markdown( "<h1 style='text-align: left; font-weight:bold;color:black;background-color:white;font-size:11pt;'> Wind Speed 🌬️ (m/s) </h1>", unsafe_allow_html=True) col1, col2 = st.beta_columns(2) with col1: min_speed = st.number_input('🚀 Minimum Wind Speed (m/s)', min_value=0, max_value=99, value=1, step=1) with col2: max_speed = st.number_input('🚀 Maximum Wind Speed (m/s)', min_value=2, max_value=100, value=27, step=1) st.write("") temperature = st.slider('Temperature ⛅🌞🌧️ [°C]', min_value=min_temp, step=1, max_value=max_temp, value=max_temp) pressure = st.slider('Pressure ⚡ [atm]️', min_value=800, step=1, max_value=1050, value=1050) wind_speed = st.slider('Wind Speed 🌬️ [m/s]', min_value=min_speed, step=1, max_value=max_speed, value=max_speed) wind_direction = st.slider('Wind Direction 🚩🌀 [deg]', 0, 1, 360) dew_point = st.slider('Dew Point 💦 [deg]', float(-360), float(1), float(360)) relative_humidity = st.slider('Relative Humidity ☔ [%]', 0, 1, 100) result = "" profit = 0 if st.button("Predict"): result = predict(temperature, pressure, wind_speed, wind_direction, dew_point, relative_humidity) profit = result * 0.017 * 24 * 365 * 0.39 profit = int(74.19 * profit) st.balloons() st.success('Predicted Power is {} kW'.format(result)) st.warning('Annual Profit is {} Rupees'.format(round(profit, 2))) if nav == "Forecasting 📊": set_png_as_page_bg('04.gif') st.markdown( "<h1 style='text-align: center; color:black ;'>⚡FORECASTING⚡</h1>", unsafe_allow_html=True) with st.beta_expander("📁 Sample Dataset 📁"): st.markdown(get_binary_file_downloader_html('SampleData.csv'), unsafe_allow_html=True) # Setup file upload st.markdown( "<h1 style='text-align:center; color:white;background-color:black;font-size:14pt'>📂 Upload your CSV or Excel file. (200MB max) 📂</h1>", unsafe_allow_html=True) uploaded_file = st.file_uploader(label="", type=['csv', 'xlsx']) global df if uploaded_file is not None: print(uploaded_file) st.markdown( "<h1 style='text-align:center; color:black;background-color:lightgreen;font-size:14pt'>📂 File upload successful 📂</h1>", unsafe_allow_html=True) print("hello") try: df = pd.read_csv(uploaded_file) st.write(df) except Exception as e: df = pd.read_excel(uploaded_file) st.write(df) st.markdown( "<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'>📈 INPUT DATA IN TERMS OF DATE 📈</h1>", unsafe_allow_html=True) trace = go.Scatter(x=df['DateTime'], y=df['Power generated by system | (kW)'], mode='lines', name='Data') layout = go.Layout( title="", xaxis={'title': "Date"}, yaxis={'title': "Power generated by system | (kW)"}) fig = go.Figure(data=[trace], layout=layout) #fig.show() st.write(fig) df1 = df.reset_index()['Power generated by system | (kW)'] import matplotlib.pyplot as plt st.write("\n") st.markdown( "<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'>📈 INPUT DATA IN TERMS OF NO. OF HOURS 📈 </h1>", unsafe_allow_html=True) trace = go.Scatter(x=df1.index, y=df['Power generated by system | (kW)'], mode='lines', name='Data') layout = go.Layout( title="", xaxis={'title': "No. of hours"}, yaxis={'title': "Power generated by system (kW)"}) fig = go.Figure(data=[trace], layout=layout) #fig.show() st.write(fig) from sklearn.preprocessing import MinMaxScaler scaler = MinMaxScaler(feature_range=(0, 1)) df1 = scaler.fit_transform(np.array(df1).reshape(-1, 1)) ##splitting dataset into train and test split training_size = int(len(df1) * 0.65) test_size = len(df1) - training_size train_data, test_data = df1[0:training_size, :], df1[ training_size:len(df1), :1] import numpy # convert an array of values into a dataset matrix # convert an array of values into a dataset matrix def create_dataset(dataset, time_step=1): dataX, dataY = [], [] for i in range(len(dataset) - time_step - 1): a = dataset[i:(i + time_step), 0] ###i=0, 0,1,2,3-----99 100 dataX.append(a) dataY.append(dataset[i + time_step, 0]) return numpy.array(dataX), numpy.array(dataY) # reshape into X=t,t+1,t+2,t+3 and Y=t+4 time_step = 30 X_train, y_train = create_dataset(train_data, time_step) X_test, ytest = create_dataset(test_data, time_step) # reshape input to be [samples, time steps, features] which is required for LSTM X_train = X_train.reshape(X_train.shape[0], X_train.shape[1], 1) X_test = X_test.reshape(X_test.shape[0], X_test.shape[1], 1) ### Create the BILSTM model from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense from tensorflow.keras.layers import LSTM from tensorflow.keras.layers import Bidirectional model = Sequential() model.add(Bidirectional(LSTM(300, input_shape=(1, 30)))) model.add(Dense(1)) model.compile(loss='mae', optimizer='adam') model.fit(X_train, y_train, validation_data=(X_test, ytest), epochs=10, batch_size=64, verbose=1) import tensorflow as tf ### Lets Do the prediction and check performance metrics train_predict = model.predict(X_train) test_predict = model.predict(X_test) ##Transformback to original form train_predict = scaler.inverse_transform(train_predict) test_predict = scaler.inverse_transform(test_predict) ### Calculate RMSE performance metrics import math from sklearn.metrics import mean_squared_error math.sqrt(mean_squared_error(y_train, train_predict)) ### Test Data RMSEmath.sqrt(mean_squared_error(ytest,test_predict)) ### Plotting # shift train predictions for plotting look_back = 30 trainPredictPlot = numpy.empty_like(df1) trainPredictPlot[:, :] = np.nan trainPredictPlot[look_back:len(train_predict) + look_back, :] = train_predict # shift test predictions for plotting testPredictPlot = numpy.empty_like(df1) testPredictPlot[:, :] = numpy.nan testPredictPlot[len(train_predict) + (look_back * 2) + 1:len(df1) - 1, :] = test_predict # plot baseline and predictions st.markdown( "<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'>📈 TRAIN AND TEST DATA 📈 </h1>", unsafe_allow_html=True) #plt.plot(scaler.inverse_transform(df1)) plt.plot(scaler.inverse_transform(df1), color="blue", linewidth=1, linestyle="-") plt.xlabel('No. of hours') # Set the y axis label of the current axis. plt.ylabel('Power generated by system | (kW)') plt.plot(trainPredictPlot, label='Train Data', color="black", linewidth=2, linestyle="--") plt.plot(testPredictPlot, label='Test Data', color="orange", linewidth=2, linestyle="--") plt.legend(loc="upper left") #plt.show() st.pyplot(plt) x_input = test_data[len(test_data) - 30:].reshape(1, -1) temp_input = list(x_input) temp_input = temp_input[0].tolist() # demonstrate prediction for next 24 hours from numpy import array lst_output = [] n_steps = 30 i = 0 while (i < 24): if (len(temp_input) > 30): #print(temp_input) x_input = np.array(temp_input[1:]) x_input = x_input.reshape(1, -1) x_input = x_input.reshape((1, n_steps, 1)) yhat = model.predict(x_input, verbose=0) temp_input.extend(yhat[0].tolist()) temp_input = temp_input[1:] lst_output.extend(yhat.tolist()) i = i + 1 else: x_input = x_input.reshape((1, n_steps, 1)) yhat = model.predict(x_input, verbose=0) print(yhat[0]) temp_input.extend(yhat[0].tolist()) print(len(temp_input)) lst_output.extend(yhat.tolist()) i = i + 1 print(lst_output) day_new = np.arange(1, 31) day_pred = np.arange(len(df1), len(df1) + 24) import matplotlib.pyplot as plt print(len(df1)) progress = st.progress(0) for i in range(100): time.sleep(0.1) progress.progress(i + 1) st.balloons() st.markdown( "<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'>📈 PREDICTED RESULTS FOR NEXT 24 HOURS 📈</h1>", unsafe_allow_html=True) plt.plot(day_pred, scaler.inverse_transform(lst_output), color="green", linewidth=1.5, linestyle="--", marker='*', markerfacecolor='yellow', markersize=7) plt.legend('GTtP', loc="upper left") plt.xlabel('No. of hours') # Set the y axis label of the current axis. plt.ylabel('Power generated by system | (kW)') st.pyplot(plt) st.markdown( "<h1 style='text-align: center; color:black ;background-color:yellow;font-size:14pt'>🏷️ G-Given Data, \n🏷️T-Train Data, \n🏷️t-Test Data, \n🏷️P-Predicted Results</h1>", unsafe_allow_html=True) power = pd.DataFrame(scaler.inverse_transform(lst_output), columns=['Predicted Power(kW)']) st.write(power) avg_power = power.sum() avg_power = int(avg_power / 24) profit1 = avg_power * 0.017 * 24 * 0.39 profit1 = 74.19 * profit1 st.balloons() value = f"<h1 style='text-align: center; color:black ;background-color:powderblue;font-size:14pt'> Day Profit is {profit1:.2f} Rupees</h1>" st.markdown(value, unsafe_allow_html=True) if nav == "Dashboard 📌": set_png_as_page_bg('white.jpg') city = st.text_input('Enter the city:') print() try: query = city w_data = weather_data(query) print_weather(w_data, city) except: pass st.warning('City name not found...')
def main(): html_temp = """<div style="background-color:tansparent;"><div class="header-category__background" style="background-image: url('https://img.freepik.com/free-photo/pile-3d-twitter-logos_1379-879.jpg?size=620&ext=jpg');"><p style="color:white;font-size:50px;padding:50px">TWEET CLASSIFIER</p></div>""" st.markdown(html_temp, unsafe_allow_html=True) # Creates a main title and subheader on your page - # these are static across all pagesss image = Image.open('_110627626_trump_climate_quotesv7_976-nc.png').convert( 'RGB') st.image(image, caption='TRUMP TWEET QOUTES', use_column_width=True) # Creating sidebar with selection box - # you can create multiple pages this way st.subheader("Climate change tweet classification") options = ["Prediction", "Information", "Models", "EDA"] selection = st.sidebar.selectbox("Choose Option", options) if selection == "Models": st.info('The infomation about the models') # You can read a markdown file from supporting resources folder html = markdown.markdown(open("Data/models.md").read()) st.markdown(html, unsafe_allow_html=True) # Building out the "Information" page if selection == "Information": st.info("General Information") # You can read a markdown file from supporting resources folder html = markdown.markdown(open("Data/info.md").read()) st.markdown(html, unsafe_allow_html=True) st.subheader("Raw Twitter data and label") if st.checkbox('Show raw data'): # data is hidden if box is unchecked st.write(train[['sentiment', 'message']]) # will write the df to the page if selection == "EDA": st.subheader( "The Visualizations used to explore the raw and processed tweeter data" ) if st.checkbox('The popular words used in the Tweets message data' ): # data is hidden if box is unchecked image = Image.open('joint_cloud.png') st.image(image, caption='WORD CLOUD ', use_column_width=True) if st.checkbox( 'Tweet message distribution over the sentiments pie chart' ): # data is hidden if box is unchecked image = Image.open( 'Tweet message distribution over the sentiments bar chart.png') st.image( image, caption= 'Tweet message distribution over the sentiments bar chart', use_column_width=True) if st.checkbox( 'Tweet message distribution over the sentiments bar chart' ): # data is hidden if box is unchecked image = Image.open( 'Tweet message distribution over the sentiments.png') st.image(image, caption='Tweet message distribution over the sentiments ', use_column_width=True) if st.checkbox('The count of word used in the Tweets message data' ): # data is hidden if box is unchecked image = Image.open('wordcount_bar.png') st.image(image, caption='WORD COUNT BAR', use_column_width=True) # Building out the predication page if selection == "Prediction": st.info("Prediction with ML Models") # Creating a text box for user input tweet_text = st.text_area("Enter Text/Tweet", "Type Here") st.subheader( "Select a check box of the model you wish to use to classify your tweet" ) if st.checkbox("LinearSVC"): # Transforming user input with vectorizer vect_text = vectoriser.transform([tweet_text]).toarray() # Load your .pkl file with the model of your choice + make predictions # Try loading in multiple models to give the user a choice predictor = joblib.load( open(os.path.join("Data/LinearSVC.pkl"), "rb")) prediction = predictor.predict(vect_text) st.success("Text Categorized as: {}".format(prediction)) st.balloons() if st.checkbox('Logistic'): # Transforming user input with vectorizer vect_text = vectoriser.transform([tweet_text]).toarray() # Load your .pkl file with the model of your choice + make predictions # Try loading in multiple models to give the user a choice predictor = joblib.load( open(os.path.join("Data/LogisticRegression.pkl"), "rb")) prediction = predictor.predict(vect_text) st.success("Text Categorized as: {}".format(prediction)) st.balloons() if st.checkbox('SVC'): # Transforming user input with vectorizer vect_text = vectoriser.transform([tweet_text]).toarray() # Load your .pkl file with the model of your choice + make predictions # Try loading in multiple models to give the user a choice predictor = joblib.load(open(os.path.join("Data/SVC.pkl"), "rb")) prediction = predictor.predict(vect_text) st.success("Text Categorized as: {}".format(prediction)) st.balloons() if st.checkbox('MultiNB'): # Transforming user input with vectorizer vect_text = vectoriser.transform([tweet_text]).toarray() # Load your .pkl file with the model of your choice + make predictions # Try loading in multiple models to give the user a choice predictor = joblib.load( open(os.path.join("Data/MultinomialNB.pkl"), "rb")) prediction = predictor.predict(vect_text) # When model has successfully run, will print prediction # You can use a dictionary or similar structure to make this output # more human interpretable. st.success("Text Categorized as: {}".format(prediction)) st.balloons()
def main(): round_type = st.selectbox( 'Choose the round', ('Test', 'Description', 'Charades', '1 Word', 'Review')) current_round = round_type if current_round == 'Test': r = [ 'This is a sample dash', 'cruise missile lana', 'gypsy Molly', 'Mike is the greatest!' ] st.header("This is a test round") st.write( "1) If you are the first person in the round, click the plus sign for the next word" ) st.write( "2) If you are not the first person, enter the clue count to start" ) st.write("3) After 60 seconds, your turn is over") st.write( "4) Relay the clue count to the group and place your phone face down on the table" ) ss.x = st.number_input('Clue Count', min_value=-1, max_value=dash_num + 1, value=-1, step=1) if ss.x == -1: st.markdown('**Start the round!**') elif ss.x == dash_num + 1: #st.markdown('**End of Round**!') """### Round Over!""" st.markdown( "![Alt Text](https://media.giphy.com/media/R3eONMIcGP8nr8ajSs/giphy.gif)" ) st.balloons() else: st.markdown('**{}**'.format(r[ss.x])) elif current_round == 'Description': r = r1 ss.x = -1 st.header("Welcome to the 1st Round of The Dash") st.write( "1) If you are the first person in the round, click the plus sign for the next word" ) st.write( "2) If you are not the first person, enter the clue count to start" ) st.write("3) After 60 seconds, your turn is over") st.write( "4) Relay the clue count to the group and place your phone face down on the table" ) ss.x = st.number_input('Clue Count', min_value=-1, max_value=dash_num, value=-1, step=1) if ss.x == -1: st.markdown('**Start the round!**') elif ss.x == dash_num: """### Round Over!""" st.markdown( "![Alt Text](https://media.giphy.com/media/R3eONMIcGP8nr8ajSs/giphy.gif)" ) st.balloons() else: st.write(r[ss.x]) elif current_round == 'Charades': r = r2 st.header("Welcome to the 2nd Round of The Dash") st.write("Use normal charades to help your team guess the answer") st.write( "1) If you are the first person in the round, click the plus sign for the next word" ) st.write( "2) If you are not the first person, enter the clue count to start" ) st.write("3) After 60 seconds, your turn is over") st.write( "4) Relay the clue count to the group and place your phone face down on the table" ) ss.x = st.number_input('Clue Count', min_value=-1, max_value=dash_num, value=-1, step=1) if ss.x == -1: st.markdown('**Start the round!**') elif ss.x == dash_num: """### Round Over!""" st.markdown( "![Alt Text](https://media.giphy.com/media/PhH6eIv19BsnDTrX0F/giphy.gif)" ) st.balloons() else: st.write(r[ss.x]) elif current_round == '1 Word': r = r3 st.header("Welcome to the Last Round of The Dash!") st.write( "Use only one word,including proper nouns, to help your team guess the clue" ) st.write( "1) If you are the first person in the round, click the plus sign for the next word" ) st.write( "2) If you are not the first person, enter the clue count to start" ) st.write("3) After 60 seconds, your turn is over") st.write( "4) Relay the clue count to the group and place your phone face down on the table" ) ss.x = st.number_input('Clue Count', min_value=-1, max_value=dash_num, value=-1, step=1) if ss.x == -1: st.markdown('**Start the round!**') elif ss.x == dash_num: """### Round Over!""" st.markdown( "![Alt Text](https://media.giphy.com/media/oOEtax0fEqdoiNNfnP/giphy.gif)" ) st.balloons() else: st.write(r[ss.x]) else: st.dataframe(dashes['dashes'])
def main(local=False): st.set_page_config( page_title="Simple NTU Course Viewer", page_icon="🧊", layout="wide", initial_sidebar_state="expanded", ) with st.spinner('讀取資料中⋯'): course_df = read_df(local) course_df = pre_processing(course_df.copy()) st.write(""" # 台大 110 年課表查詢""") col1, col2 = st.beta_columns((7, 4)) with col1: search_txt = st.text_input('輸入課程名稱/ID/老師名稱', '') need_help = st.beta_expander('需要幫忙嗎 👉') with need_help: st.markdown( """輸入**課程名稱**或是**課程 ID** 或是**老師名稱**。不能夠同時輸入課程名稱和老師名稱。""", unsafe_allow_html=True) with col2: valid_column = course_df.drop('raw_day', axis=1).columns view_options = st.multiselect('選擇檢視欄位', list(valid_column), list(valid_column)) days = ['一', '二', '三', '四', '五', '六', '七'] # days_select = [False for i in range(7)] if 'days_select' not in st.session_state: st.session_state['days_select'] = [False for i in range(7)] def update_day(d): st.session_state['days_select'][ d] = not st.session_state['days_select'][d] with st.form("date_picker"): st.write("選擇上課日") cols = st.beta_columns(7) for i, col in enumerate(cols): st.session_state['days_select'][i] = col.checkbox(days[i]) date_opt = st.radio("篩選條件", ('Subset', 'All Matched')) # Every form must have a submit button. submitted = st.form_submit_button("確認") if submitted: # st.write(st.session_state['days_select']) days_select = st.session_state['days_select'] pass other_info = st.beta_expander('其他資訊 🔗') with other_info: st.markdown("""一些常用連結: + [PTT NTUcourse 看板](https://www.ptt.cc/bbs/NTUcourse/index.html) + [Original Repo](https://github.com/hungchun0201/NTUclassCrawler) + [台大課程網](https://nol.ntu.edu.tw/nol/guest/index.php) <span style="font-size: 10px">* 註:僅為小型試用版,故僅用 Streamlit 簡單製作而已。若有不週全的地方,請自行修正 🙌🏾</span> """, unsafe_allow_html=True) df = course_df def in_list(x, date_opt): if date_opt == 'Subset': if set(x).issubset( set(np.array(days)[st.session_state['days_select']])): return True else: return False else: if set(x) == set(np.array(days)[st.session_state['days_select']]): return True else: return False st.write("## 課表結果") with st.spinner("結果產生中⋯"): if search_txt == "" and np.sum(st.session_state['days_select']) == 0: display_df = df[view_options] else: if np.sum(st.session_state['days_select']) == 0: display_df = df[( df['Title'].str.contains(search_txt) | df['Instructor'].str.contains(search_txt) | df['Id'].str.contains(search_txt))][view_options] else: display_df = df[(df['Title'].str.contains(search_txt) | df['Instructor'].str.contains(search_txt) | df['Id'].str.contains(search_txt)) & course_df['raw_day'].apply( in_list, args=(date_opt, ))][view_options] st.write("""<style> tr:hover {background-color:#50536b42; table { max-width: -moz-fit-content; max-width: fit-content; white-space: nowrap; } </style>""", unsafe_allow_html=True) st.write(f"""<div style="overflow:scroll; justify-content: center;"> {display_df.to_html()} </div>""", unsafe_allow_html=True) # <div class="styledTable" style="overflow:scroll"> # </div> st.balloons()
def app(): st.balloons() st.markdown("# Visualizations :art:") menu = ["Number of Tweets per Day", "Number of Retweets per Day", "Number of Likes per Day", "Most Common Tweets", "Sentiment Scores", "Common Entities"] choice = st.selectbox("View", menu) if choice == "Number of Retweets per Day": fig1 = px.histogram(covid_data, x="datetime", color="retweets", title="Number of Retweets Per Day") st.write(fig1) elif choice == "Number of Likes per Day": fig2 = px.histogram(covid_data, x="datetime", color="likes" ,title="Likes Per Day") st.write(fig2) elif choice == "Most Common Tweets": st.write("Word Cloud for Most Common Tweets") stop_words = get_stop_words('english') concat_quotes = ' '.join( [i for i in covid_data.text_without_stopwords.astype(str)]) #print(concat_quotes[:10]) stylecloud.gen_stylecloud( text=concat_quotes, icon_name='fab fa-twitter', palette='cartocolors.qualitative.Bold_9', background_color='white', output_name='tweets.png', collocations=False, custom_stopwords=stop_words ) #Displaying image from a file Image(filename="tweets.png", width=780, height=780) st.image("tweets.png") #Display the most common words after stemming # #Create separate columns table_col, input_col = st.beta_columns([3,2]) covid_data['text_stem'] = covid_data['text_stem'].apply(lambda x:str(x).split()) #Use tokenize or split, smae results top = Counter([item for sublist in covid_data['text_stem'] for item in sublist]) #Counts the frequency of words with input_col: top_n = st.slider("How many of the common words do you want to see?", 0, 5, 10) temp = pd.DataFrame(top.most_common(top_n)) temp.columns = ['common_words', 'count'] #temp = temp.reset_index() with table_col: fig = px.pie(temp, values='count', names='common_words', title='Top Common Words', hover_data=['common_words'], color_discrete_sequence=px.colors.qualitative.G10) fig.update_layout(showlegend=False, width=450, height=450) st.write(fig) # colorscale = [[0, '#4d004c'], [.5, '#f2e5ff'], [1, '#ffffff']] # fig = ff.create_table(temp, height_constant=15, colorscale=colorscale) # st.write(fig) #st.write(temp.style.background_gradient(cmap = 'Blues')) elif choice == "Sentiment Scores": pie_col, input_col = st.beta_columns([3,2]) #Convert the text_stem column to string type. nrclext only takes input of type str covid_data['text_stem'] = covid_data['text_stem'].astype(str) #Create a text object text_object = NRCLex(' '.join(covid_data['text_stem'])) #Create a list from the text object sentiment_scores = pd.DataFrame(list(text_object.raw_emotion_scores.items())) #Create a dataframe of two columns sentiment_scores = sentiment_scores.rename(columns={0: "Sentiment", 1: "Count"}) with input_col: num_n = st.slider("Change Pie Chart Values Here", 0, 5, 10) sentiment_scores = sentiment_scores.head(num_n) btn = st.button("Show Table") colorscale = [[0, '#272D31'], [.5, '#ffffff'], [1, '#ffffff']] font=['#FCFCFC', '#00EE00', '#008B00'] if btn: fig = ff.create_table(sentiment_scores, colorscale=colorscale, font_colors=font) st.write(fig) with pie_col: fig = px.pie(sentiment_scores, values='Count', names='Sentiment', title='Top Emotional Affects', hover_data=['Sentiment'], color_discrete_sequence=px.colors.qualitative.Dark24) fig.update_traces(textposition='inside', textinfo='percent+label') fig.update_layout(showlegend = False, width = 450, height = 450, font=dict(color='#383635', size=15) ) st.write(fig) #Create a dataframe with a dictionary of the sentiments st.title("Table Showing Words & Sentiments") sentiment_words = pd.DataFrame(list(text_object.affect_dict.items()),columns = ['words','sentiments']) num_o = st.slider("Change table size", 0, 50, 100) sentiment_words = sentiment_words.head(num_o) fig = go.Figure(data=[go.Table(columnwidth=[1, 2], header=dict(values= list(sentiment_words[['words', 'sentiments']].columns), fill_color='maroon', align=['left', 'center'], height=40, font=dict(color='white', size=18)), cells=dict(values=[sentiment_words.words, sentiment_words.sentiments], fill_color='lightseagreen', align='left')) ]) fig.update_layout(margin=dict(l=5, r=5, b=10, t=10)) st.write(fig) elif choice == "Common Entities": st.write("Word Cloud for Most Common Entities") # remove duplicate claims (Not really needed since dropped already) words = covid_data.text_stem.unique() # NER list we'll use - Perhaps could be expanded? nlp = en_core_web_sm.load() #nlp = spacy.load(en_core_web_sm) corpus = list(nlp.pipe(words[:700])) all_ents = defaultdict(int) for i, doc in enumerate(corpus): #print(i,doc) for ent in doc.ents: all_ents[str(ent)] += 1 sorted_ents = pd.DataFrame(sorted(all_ents.items(), key=operator.itemgetter(1), reverse=True),columns = ['entities','count']) stop_words = get_stop_words('english') hashtags = sorted_ents['entities'].dropna().tolist() unique_entities=(" ").join(hashtags) # concat_quotes = ' '.join( # [i for i in sorted_ents.entities.astype(str)]) # #print(concat_quotes[:10]) stylecloud.gen_stylecloud( text=unique_entities, #file_path='concat_quotes', icon_name='fas fa-comments', palette='cartocolors.qualitative.Prism_8', background_color='white', output_name='entities.png', collocations=False, custom_stopwords=stop_words ) #Displaying image from a file Image(filename="entities.png", width=780, height=780) st.image("entities.png") else: fig3 = px.histogram(covid_data, x="datetime", title="Number of Tweets Per Day") st.write(fig3)