import streamlit as st from PIL import Image import numpy as np img_file_buffer = st.file_uploader("Upload an image", type=["png", "jpg", "jpeg"]) image = Image.open(img_file_buffer) img_array = np.array(image) if image is not None: st.image( image, caption=f"You amazing image has shape {img_array.shape[0:2]}", use_column_width=True, )
def main(): st.title('Maze Path Planner') uploaded_file = st.file_uploader("Choose an image", ["jpg", "jpeg", "png"]) st.write('Or') use_default_image = st.checkbox('Use default maze') maze_image = None maze_aug_image = None marked_image = None if use_default_image: maze_img_obj = ImgMaze('./img/maze.png', is_filebytes=False) maze_image = maze_img_obj.get_bgr_maze() maze_aug_image = maze_img_obj.get_augmented_bgr_maze() elif uploaded_file is not None: file_bytes = np.asarray(bytearray(uploaded_file.read()), dtype=np.uint8) maze_img_obj = ImgMaze(file_bytes, is_filebytes=True) maze_image = maze_img_obj.get_bgr_maze() maze_aug_image = maze_img_obj.get_augmented_bgr_maze() if maze_image is not None: st.subheader( 'Use the sliders on the left to position the start and end points') start_x = st.sidebar.slider("Start X", value=8 if use_default_image else 50, min_value=0, max_value=maze_image.shape[1], key='sx') start_y = st.sidebar.slider("Start Y", value=9 if use_default_image else 100, min_value=0, max_value=maze_image.shape[0], key='sy') finish_x = st.sidebar.slider("Finish X", value=216 if use_default_image else 100, min_value=0, max_value=maze_image.shape[1], key='fx') finish_y = st.sidebar.slider("Finish Y", value=216 if use_default_image else 100, min_value=0, max_value=maze_image.shape[0], key='fy') marked_image = maze_image.copy() circle_thickness = ( marked_image.shape[0] + marked_image.shape[0] ) // 2 // 100 #ui circle thickness based on img size cv2.circle(marked_image, (start_x, start_y), circle_thickness, (0, 255, 0), -1) cv2.circle(marked_image, (finish_x, finish_y), circle_thickness, (255, 0, 0), -1) st.image(marked_image, channels="RGB", width=500) if marked_image is not None: if st.button('Get Path'): with st.spinner('Searching for path...'): maze_solver_obj = Maze(maze_aug_image, is_augmented=True) maze_solver_obj.get_shortest_path(start=(start_x, start_y), end=(finish_x, finish_y)) path_thickness = (maze_image.shape[0] + maze_image.shape[0]) // 2 // 100 maze_sol_image = maze_solver_obj.get_solution_image( alt_img=maze_image, line_width=path_thickness) st.image(maze_sol_image, channels="RGB", width=500)
def page(state): # Streamlit session state print(state.mlflow_res.shape) # Delete any existing mlflow experiments naned '0' try: mlflow.delete_experiment('0') except: pass # Create a new experiment with unique ID exp_uniq_name = create_mlflow_exp() # Title and Description st.title('Probabilistic Programming') st.markdown(""" This page guides you through the selection of algorithms and hyperparameters for Multi Logit models using Bayesian Inference **Start by loading the data**. """) data_file = st.file_uploader("Upload data...", type="csv", key='train_data') try: text_io = io.TextIOWrapper(data_file) except: pass if data_file is not None: W = pd.read_csv(data_file, index_col=False) y = W.iloc[:, 0] X = W.iloc[:, 1:] st.write(W) # Options to select Model st.subheader('Select Model') model_types = ['Multi-Logit'] sel_model = st.radio('Which model to use for training?', model_types) params = {} if sel_model == 'Multi-Logit': # Get parameters for Multi-Logit params['num-chains'] = st.sidebar.number_input( label="Number of chains for sampling", min_value=1, max_value=4, value=4, step=1) params['num-iters'] = st.sidebar.number_input( label="Number of iterations for sampling", min_value=100, max_value=1000, value=1000, step=100) params['num-warmup-iters'] = st.sidebar.number_input( label="Number of iterations for warmup", min_value=100, max_value=1000, value=500, step=100) params['max-tree-depth'] = st.sidebar.number_input( label="Maximum tree depth for the NUTS sampler", min_value=10, max_value=20, value=10, step=1) sel_sampler = st.radio('Which sample to use?', ['NUTS', 'HMC']) # Sagemaker Training options instance_types = [ 'local', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.4xlarge', 'ml.m5.24xlarge', 'ml.g4dn.xlarge', 'ml.g4dn.4xlarge', 'ml.g4dn.16xlarge' ] sagemaker_instance = st.sidebar.selectbox( 'Instance type for Sagemaker training', instance_types) if len(params) > 0: samp_submit = st.sidebar.button('Run Sampling') if samp_submit: if sel_model == 'Multi-Logit': model_title = 'Multi-Logit' image_name = IMAGE_NAME model_name_suffix = 'penguin_xgb_model.json' if sagemaker_instance == 'local': # Data prep W = W.iloc[:, 1:] dummy_X = dummify_X(W.iloc[:, 1:], cat_columns=['island', 'sex']) y = W.iloc[:, 0] encoder, encoder_dict = encode_y(y) # Remove nan rows nan_rows = np.where(dummy_X.isnull().any(axis=1)) X = dummy_X.drop(dummy_X.index[nan_rows], inplace=False) y = y.drop(y.index[nan_rows], inplace=False) # Split train and test data with the same random state X_train, X_test, y_train, y_test = train_test_split( X, y, stratify=y, test_size=0.25, random_state=42) data = { 'N': X_train.shape[0], 'N2': X_test.shape[0], 'D': X_train.shape[1], 'K': len(np.unique(y_train)), 'y': encoder.transform(y_train) + 1, 'x': X_train, 'x_new': X_test, } # Model specifications model = pystan.StanModel( file='penguin/prob_prog/multinomial.stan') if sel_sampler == 'NUTS': fit = model.sampling( data=data, iter=params['num-iters'], chains=params['num-chains'], algorithm=sel_sampler, control=dict(max_treedepth=params['max-tree-depth'])) fit_samp = fit.extract(permuted=True) np.save( 'data/pystan_results/beta_posterior_NUTS_max_tree_depth_15.npy', fit_samp['beta']) # Plots tmp = fit.stansummary().replace( '\n', '\n\t') # For streamlit to render the table better st.write(tmp) arviz.plot_trace(fit) st.pyplot() # Model predictions for Training X_np_train = X_train.to_numpy() X_np_test = X_test.to_numpy() preds_train = np.empty( [X_np_train.shape[0], fit_samp['beta'].shape[0]]) preds_test = np.empty( [X_np_test.shape[0], fit_samp['beta'].shape[0]]) for i in range(fit_samp['beta'].shape[0]): # Train scipy.special.softmax(X_np_train.dot( fit_samp['beta'][i, :]), axis=1) preds_train[:, i] = np.argmax(scipy.special.softmax( X_np_train.dot(fit_samp['beta'][0, :]), axis=1), axis=1) # Test scipy.special.softmax(X_np_test.dot( fit_samp['beta'][i, :]), axis=1) preds_test[:, i] = np.argmax(scipy.special.softmax( X_np_test.dot(fit_samp['beta'][0, :]), axis=1), axis=1) # Get consensus predictions from all samples cons_preds_train = mode(preds_train, axis=1)[0] cons_preds_test = mode(preds_test, axis=1)[0] np.savetxt( 'data/pystan_results/preds_posterior_NUTS_max_tree_depth_10.csv', preds, delimiter=',') plot_confusion_matrix( encoder.transform(y_train), cons_preds_train[:, 0], classes=np.asarray(list(encoder_dict.keys())), title='Confusion matrix, without normalization') plot_confusion_matrix( encoder.transform(y_test), cons_preds_test[:, 0], classes=np.asarray(list(encoder_dict.keys())), title='Confusion matrix, without normalization')
model, model_version, summary, metrics = load_model_gloves() cls_model, cls_model_version, cls_summary, cls_metrics = load_model_gloves( 'gloves-classifier') with st.expander("1. Distances and Classification"): # TODO dynamically pull from a medium artical with information in it st.write(""" ## How to use Input an anchor image of the animal type you want to classifer (best on dog/cat breeds). \n Input 1 or more other images to see predicting distances and predicted matching percentage. """) anchor_file = st.file_uploader( "Input an Image to use an anchor image", type="jpg", ) if anchor_file is not None: st.image(anchor_file, caption="Uploaded Anchor Image.", use_column_width=True) other_files = st.file_uploader("Input Images to compare to Anchor Image", accept_multiple_files=True, type="jpg") if other_files is not None: cols_1 = st.columns(4) for idx, file in enumerate(other_files): cols_1[idx % 4].image(file, caption=file.name, use_column_width=True)
def main(): """Web App""" st.title("Diabetes Risk Prediction 💉") st.text("-- By Mrinal Gosain") activites = ["EDA","Plot","Model Building","About"] choice = st.sidebar.selectbox("Select Activity",activites) # Exploratory data analysis! if choice == 'EDA': st.subheader("Exploratory Data Analysis 🔍") data = st.file_uploader("Upload Dataset",type=["csv","txt"]) if data is not None: # If data isn't empty! df = pd.read_csv(data) st.dataframe(df.head()) if st.checkbox("Show shape"): st.write(df.shape) if st.checkbox("Show Columns"): all_columns = df.columns.to_list() st.write(all_columns) if st.checkbox("Select Columns To Show"): selected_columns = st.multiselect("Select Columns",all_columns) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Show Summary"): st.write(df.describe()) if st.checkbox("Show Value Counts"): st.write(df.iloc[:,-1].value_counts()) if st.checkbox("Correlation with Seaborn"): st.write(sns.heatmap(df.corr(),annot=True)) st.pyplot() if st.checkbox("Pie Chart"): all_columns = df.columns.to_list() columns_to_plot = st.selectbox("Select 1 Column ",all_columns) pie_plot = df[columns_to_plot].value_counts().plot.pie(autopct="%1.1f%%") st.write(pie_plot) st.pyplot() # Plotting ! elif choice == 'Plot': st.subheader("Data Visualization 📈") data = st.file_uploader("Upload Dataset",type=["csv","txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) all_columns_names = df.columns.tolist() type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"]) selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names) if st.button("Generate Plot"): st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names)) # Plot By Streamlit if type_of_plot == 'area': cust_data = df[selected_columns_names] st.area_chart(cust_data) elif type_of_plot == 'bar': cust_data = df[selected_columns_names] st.bar_chart(cust_data) elif type_of_plot == 'line': cust_data = df[selected_columns_names] st.line_chart(cust_data) # Custom Plot elif type_of_plot: cust_plot= df[selected_columns_names].plot(kind=type_of_plot) st.write(cust_plot) st.pyplot() # Model building! elif choice == 'Model Building': st.subheader("Building ML Model 👨💻") data = st.file_uploader("Upload Dataset",type=["csv","txt"]) if data is not None: df = pd.read_csv(data) st.dataframe(df.head()) # Model Building X = df.iloc[:,0:-1] Y = df.iloc[:,-1] #Make sure that the predicted column is the last one! seed = 42 # Model models = [] models.append(("LR",LogisticRegression())) models.append(("LDA",LinearDiscriminantAnalysis())) models.append(("KNN",KNeighborsClassifier())) models.append(('CART', DecisionTreeClassifier())) models.append(('NB', GaussianNB())) models.append(('SVM', SVC())) # evaluate each model in turn # List model_names = [] model_mean = [] model_std = [] all_models = [] scoring = 'accuracy' for name,model in models: kfold = model_selection.KFold(n_splits=10, random_state=seed) cv_results = model_selection.cross_val_score(model,X,Y,cv=kfold,scoring=scoring) model_names.append(name) model_mean.append(cv_results.mean()) model_std.append(cv_results.std()) accuracy_results = {"model_name":name,"model_accuracy":cv_results.mean(),"standard_deviation":cv_results.std()} all_models.append(accuracy_results) if st.checkbox("Metrics as Table"): st.dataframe(pd.DataFrame(zip(model_names,model_mean,model_std),columns=["Model Name","Model Accuracy","Standard Deviation"])) if st.checkbox("Metrics as JSON"): st.json(all_models) elif choice == 'About': st.subheader("About Me 👨🎓") st.text("Hi, I am Mrinal. I am a data science enthusiast who loves to build machine learning application end to end.") st.text("I take avid interests in breaking down complex problems and leverage data to drive business") if st.button("Github 🔗"): webbrowser.open_new_tab(github_url) if st.button("Linkedin 🔗"): webbrowser.open_new_tab(linkedin_url)
import pandas as pd import streamlit as st st.info( "## Instructions:\n" + "1. Upload simple csv (like `data.csv` from this repo)\n" + "2. Check the box to choose column names\n" + "3. Change the column names selection and see the dataframe update in response\n" + "4. Uncheck the box to exit column name selection and see the dataframe go back to its previous state\n" ) csv_file = st.file_uploader("File", type="csv") if csv_file is not None: dataframe = pd.read_csv(csv_file) all_columns = list(dataframe.columns) if st.checkbox("Select Columns", False): columns = st.multiselect("Columns", all_columns, all_columns) else: columns = all_columns st.write(dataframe.filter(columns))
def main(): st.set_option('deprecation.showfileUploaderEncoding', False) st.title("HATI.AI") image = Image.open('macroview.jpg') #st.image(image, use_column_width=False) st.sidebar.image(image) st.sidebar.title("Hati.Ai Web App") menu = ["Login","SignUp"] choice = st.sidebar.selectbox("Menu",menu) if choice == "Login": st.subheader("Login Section") username = st.sidebar.text_input("User Name") password = st.sidebar.text_input("Password",type='password') if st.sidebar.checkbox("Login"): # if password == '12345': create_usertable() hashed_pswd = make_hashes(password) result = login_user(username,check_hashes(password,hashed_pswd)) if result: st.success("Logged In as {}".format(username)) def process_text(text): processed_data = [] # Make all the strings lowercase and remove non alphabetic characters #text = re.sub('[^A-Za-z]', ' ', text.lower()) # Tokenize the text; this is, separate every sentence into a list of words # Since the text is already split into sentences you don't have to call sent_tokenize tokenized_text = word_tokenize(text) #append the result into a new list called processed_data processed_data.append(tokenized_text) # Remember, this final output is a list of words return processed_data @st.cache(suppress_st_warning=True) def load_data(uploaded_file): df = pd.read_csv(uploaded_file) return df st.sidebar.subheader("Choose What Do You Want To Do") classifier = st.sidebar.selectbox(" ", ("Find new topics automatically", "POWER BI Dashboard", "Interact with our chatbot")) if classifier == 'POWER BI Dashboard': import streamlit.components.v1 as components from urllib.request import urlopen html = urlopen("https://app.powerbi.com/view?r=eyJrIjoiZTA4NWU4MjYtOTk3Yi00N2ZhLTgwZWQtZWFhMzNkNDk1Zjk3IiwidCI6Ijk5NmQwYTI3LWUwOGQtNDU1Ny05OWJlLTY3ZmQ2Yjk3OTA0NCIsImMiOjEwfQ%3D%3D&pageName=ReportSection06db5928b6af61b2868f").read() #components.html(html, width=None, height=600, scrolling=True) st.markdown(""" <iframe width="900" height="606" src="https://app.powerbi.com/view?r=eyJrIjoiZTA4NWU4MjYtOTk3Yi00N2ZhLTgwZWQtZWFhMzNkNDk1Zjk3IiwidCI6Ijk5NmQwYTI3LWUwOGQtNDU1Ny05OWJlLTY3ZmQ2Yjk3OTA0NCIsImMiOjEwfQ%3D%3D&pageName=ReportSection06db5928b6af61b2868f" frameborder="0" style="border:0" allowfullscreen></iframe> """, unsafe_allow_html=True) if classifier == 'Interact with our chatbot': import pickle with open('tnb_topic_classifier_svm', 'rb') as training_model: topic_model = pickle.load(training_model) import malaya model = malaya.sentiment.transformer(model = 'albert', size = 'base') #from src import model #malay_bert = model.BertModel() # eng_flair = model.Flair() # eng_vader = model.Vader() test = pd.DataFrame() test['Positive'] = '' test['Neutral'] = '' test['Negative'] = '' st.title("Sentiment Analyzer") message = st.text_area("Enter Text","Type Here ..") if st.button("Analyze"): with st.spinner("Analyzing the text …"): result = model.predict_proba([message]) #result = malay_bert.predict(message) message = [message] topic = topic_model.predict(message) #output = "Result is: Positive:" + str(result[0]) + "Neutral:" + str(result[1]) + "Negative:" + str(result[2]) + "topic is: " + str(topic) output = "result is:" + str(result) + "topic is: " + str(topic) st.write(output) else: st.warning("Not sure! Try to add some more words") from stop_words import get_stop_words if classifier == 'Find new topics automatically': uploaded_file = st.file_uploader('Upload CSV file to begin', type='csv') #if upload then show left bar if uploaded_file is not None: df = load_data(uploaded_file) if st.sidebar.checkbox("Show raw data", False): st.subheader("Uploaded Data Set") st.write(df) st.sidebar.subheader("Text column to analyse") st_ms = st.sidebar.selectbox("Select Text Columns To Analyse", (df.columns.tolist())) df_list = list(df) import top2vec from top2vec import Top2Vec #INITIALIZE AN EMPTY DATAFRAME, CONVERT THE TEXT INTO STRING AND APPEND INTO THE NEW COLUMN d1 = pd.DataFrame() d1['text'] = "" d1['text'] = df[st_ms] d1['text'] = d1['text'].astype(str) #INITIALIZE THE TOP2VEC MODEL AND FIT THE TEXT #model.build_vocab(df_list, update=False) model = Top2Vec(documents=d1['text'], speed="learn", workers=10) topic_sizes, topic_nums = model.get_topic_sizes() for topic in topic_nums: st.pyplot(model.generate_topic_wordcloud(topic)) # Display the generated image: else: st.warning("Incorrect Username/Password") elif choice == "SignUp": st.subheader("Create New Account") new_user = st.text_input("Username") new_password = st.text_input("Password",type='password') if st.button("Signup"): create_usertable() add_userdata(new_user,make_hashes(new_password)) st.success("You have successfully created a valid Account") st.info("Go to Login Menu to login")
import streamlit as st import pdfplumber def main(file): with pdfplumber.open(file) as pdf: page = pdf.pages[0] text = page.extract_text(x_tolerance=2) return text st.title("invoice processing") file = st.file_uploader("choose a file to extract") element = st.text_input("enter the parameter to extract") if st.button('classify'): c = main(file) st.write(c) for column in c.split('\n'): if column.startswith(element): word = column.split()[-1] st.write("element:",word)
st.write('- *sdmt/bvmt/cvlt*: raw score on the test (integer)') st.write('**Note 1**: please use exactly these column names in this order') st.write( '**Note 2**: only the 3 first columns are an absolute requirement. ' 'For the cognitive scores, please prepare your dataframe to only contain columns for which you have data. ' 'Hence, this can be a subset of the latter 3 columns, but should at least include one of them' ) st.header( 'Step 2: Define the z-score on which you want to declare cognitive impairment' ) z_cutoff = st.selectbox(label='Choose the z cutoff score', options=[-1.5, -1, -0.5, 0]) st.header('Step 3: Upload your excel file') input_object = st.file_uploader("Browse for a file or drag and drop here:", type=("xlsx")) if input_object: input_data = pd.read_excel(input_object) # region Perform checks if the data was correctly entered error_dict = { 'columns': 'Please be sure to use the correct column names and that they are lower case', 'age': 'Please use age values between 0 and 125 years, and only use integer values', 'sex': 'Please assure the following encoding: Male = 1, Female = 2', 'education': 'Please use education levels that are encoded as 6, 12, 13, 15, 17 or 21 years', 'sdmt': 'Please use sdmt values between 0 and 110', 'bvmt': 'Please use bvmt values between 0 and 36', 'cvlt': 'Please use cvlt values between 0 and 80'
par_caps = [word_2_indices[i] for i in start_word] par_caps = sequence.pad_sequences([par_caps], maxlen=max_len, padding='post') model = im_model() preds = model.predict([np.array([image]), np.array(par_caps)]) word_pred = indices_2_word[np.argmax(preds[0])] start_word.append(word_pred) if word_pred == "<end>" or len(start_word) > max_len: break return ' '.join(start_word[1:-1]) uploaded_file = st.file_uploader('Upload the image', type=['jpg', 'png']) st.set_option('deprecation.showfileUploaderEncoding', False) if uploaded_file is not None: img = Image.open(uploaded_file) st.image(img) s = st.success('Generating Caption') test_img = get_encoding(resnet(), uploaded_file) Argmax_Search = predict_captions(test_img) s.empty() st.markdown(f'''<html> <p style="color:white; background-color:black; font-size:140%; display:inline-block; padding:10px;
from nltk import word_tokenize from sklearn.cluster import KMeans from sklearn.decomposition import TruncatedSVD from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer from sklearn.metrics import classification_report, pairwise_distances from sklearn.metrics.pairwise import pairwise_kernels from cleaning import (apply_cleaning, build_idf_matrix, build_lexicon, freq, fulldataset, idf, l2_normalizer, numDocsContaining) st.write(""" # Simple Traceability SRS Document Berikut ini algoritma yang digunakan untuk pengukuran keterlacakan pada dokumen """) #file upload index0 = st.file_uploader("Choose a file") if index0 is not None: st.sidebar.header('Dataset Parameter') x1 = pd.ExcelFile(index0) index1 = st.sidebar.selectbox('What Dataset you choose?', x1.sheet_names) # Load data example (dari functional maupun nonfunctional) st.header('Dataset parameters') statement = fulldataset(index0, index1) # Get text to clean (dari row yang diinginkan) text_to_clean = list(statement['Requirement Statement']) # Clean text print("Loading Original & Cleaned Text...") cleaned_text = apply_cleaning(text_to_clean)
from PIL import ImageFont st.title('顔を検出します') st.write('jpg or JPGのファイルを読み込めます。') st.write('顔を認識し、性別と年齢を推測します。') with open('secret.json') as f: secret_json = json.load(f) subscription_key = secret_json['subscription_key'] assert subscription_key face_api_url = 'https://cntr2020.cognitiveservices.azure.com/face/v1.0/detect' uploaded_file = st.file_uploader("Choose an image...", type='jpg') if uploaded_file is not None: img = Image.open(uploaded_file) with io.BytesIO() as output: img.save(output, format="JPEG") binary_img = output.getvalue() headers = { 'Content-type': 'application/octet-stream', 'Ocp-Apim-Subscription-Key': subscription_key } params = { 'returnFaceId': 'true', 'returnFaceAttributes': 'age,gender' }
import io import streamlit as st import pandas as pd st.set_option('deprecation.showfileUploaderEncoding', False) # title GROUP_NAME = 'RandomGroup' file_buffer = st.file_uploader("Upload WhatsApp chat text file", type='txt') chat_text = file_buffer.readlines() if chat_text is not None: st.write(chat_text) st.title(f"{GROUP_NAME} Chat Analysis") # chat data at a galance st.dataframe() st.write( pd.DataFrame({ 'first column': [1, 2, 3, 4], 'second column': [10, 20, 30, 40] }))
csv = df.to_csv().encode() b64 = base64.b64encode(csv).decode() href = f'<a href="data:file/csv;base64,{b64}" download="captura.csv" target="_blank">Download file data</a>' return href # Press the green button in the gutter to run the script. if __name__ == "__main__": """ """ st.sidebar.info("Data Generating!") st.sidebar.info("Please upload a file with rules for generating data!") n_try = st.sidebar.number_input('Attempts to generating', min_value=1, max_value=10, value=5) file_uploaded = st.file_uploader("Upload File") if file_uploaded is not None: listData = prepared_file_uploaded(file_uploaded) dataCombination = data_generating(listData, n_try=n_try) data2export = export2file(dataCombination) st.info("Data Overview!") st.info(f"Data after generated had {data2export.shape[0]} rows") st.dataframe(data2export.head(10)) st.markdown(get_table_download_link_csv(data2export), unsafe_allow_html=True) pr = ProfileReport(data2export, explorative=True) st.title("Data Statistic") st_profile_report(pr)
import streamlit as st from streamlit.script_runner import RerunException import dbutils import image_process name_regex = '[A-Za-z]{2,25}( [A-Za-z]{2,25})?' name_regex = re.compile(name_regex) pan_regex = r'^[A-Za-z]{5}[0-9]{4}[A-Za-z]$' pan_regex = re.compile(pan_regex) st.title("PAN Card Reader") st.markdown("submit and verify your pan card") pan_card_image = st.file_uploader("Upload image", type=['png', 'jpg'], accept_multiple_files=False) cols = st.beta_columns(2) with cols[0]: pan = st.text_input("PAN number") with cols[1]: min_value = date(1921, 1, 1) dob = st.date_input("Date of Birth", min_value=min_value) name = st.text_input("Name") fathers_name = st.text_input("Father's Name") placeholder = st.empty() def standardize_name(name: str) -> str: name = " ".join(name.lower().split()) return name
def main(): st.image('img/gui_logo.jpeg', use_column_width=True) st.header('Bem vindo!') st.subheader('**Você está no sistema de recomendação de clientes**') st.markdown( 'O sistema recomendará novos clientes baseado em comparações com os seus atuais clientes de forma customizada a partir das características desejadas.' ) st.markdown( '### Precisamos que você nos forneça o **portifólio de seus clientes!**' ) st.markdown( ' *Obs.: Caso você não tenha um portifólio para usar, escolha um [desses](https://github.com/guireis1/Codenation-Final-Project/tree/master/data). *' ) file3 = st.file_uploader('Upload clientes.csv', type='csv') if file3 is not None: market_pre = pd.read_csv('data/data_preprocess.csv') market = pd.read_csv('data/market.csv') #market = pd.DataFrame(readcsv(file2)) #market= pd.read_csv(file2) #market_pre = pd.DataFrame(readcsv(file1)) #market_pre = pd.read_csv(file1) port = pd.DataFrame(readcsv(file3)) st.text('Loading data...done!') #Començando o processamento #market = pd.read_csv('market.csv') #market_pre = pd.read_csv('data_preprocess.csv') #port = pd.read_csv('data/estaticos_portfolio1.csv') market_pre.set_index('id', inplace=True) market.set_index(market_pre.index, inplace=True) market.drop('Unnamed: 0', axis=1, inplace=True) port = port.set_index('id') port.drop(port.columns, axis=1, inplace=True) port_market = market.merge(port, how='right', left_index=True, right_index=True) port_market_pre = market_pre.merge(port, how='right', left_index=True, right_index=True) st.markdown('DataFrame do Portofólio:') head(port_market) #Todos datasets prontos #st.sidebar.image(st.image('img/logo.png', use_column_width=True)) st.sidebar.header('Opções de análise do Portifólio:') sidemulti = st.sidebar.multiselect( 'Escolha: ', ('Visualização', 'Descritiva', 'Geolocalização')) if ('Visualização' in sidemulti): st.markdown('## **Visualização do Portifólio**') st.markdown('Perfil de clientes considerando features importantes') vis(port_market) st.markdown('*Para melhor visualização clique na imagem*') if ('Descritiva' in sidemulti): st.markdown('## **Análise Descritiva do Portifólio**') st.dataframe(descritiva(port_market)) missing(port_market, 'Visualização dos nulos do Portifólio') missing_dendo(port_market, 'Dendograma dos nulos do Portifólio') st.markdown('*Para melhor visualização clique na imagem*') if ('Geolocalização' in sidemulti): coordenadas = pd.read_csv( 'https://raw.githubusercontent.com/guireis1/Codenation-Final-Project/master/data/coordenadas' ) coordenadas.drop('Unnamed: 0', axis=1, inplace=True) st.markdown('## **Geolocalização do Portifólio**') st.markdown('Localização das empresas contidas no portifólio') cord_port = geoloc(port_market, coordenadas) cord_port_df = pd.DataFrame(cord_port, columns=('lat', 'lon')) st.map(cord_port_df) st.sidebar.header('Opções de análise do mercado:') sidemulti_market = st.sidebar.multiselect( 'Escolha: ', ('Visualização', 'Descritiva', 'Correlação', 'Análise dos Nulos', 'Colunas excluídas')) if ('Visualização' in sidemulti_market): st.markdown('## **Visualização do Mercado**') vis(market) st.markdown('*Para melhor visualização clique na imagem*') if ('Descritiva' in sidemulti_market): st.markdown('## **Análise Descritiva do Mercado**') st.dataframe(descritiva(market)) #missing(market,'Visualização dos nulos') #missing_dendo(market,'Dendograma nulos') if ('Correlação' in sidemulti_market): st.markdown('## **Correlações do Mercado**') st.markdown('Correlação padrão') st.image('img/corr_matrix.png', use_column_width=True) st.markdown('Correlação usando PPS') st.image('img/corr_pps.png', use_column_width=True) if ('Análise dos Nulos' in sidemulti_market): st.markdown('## **Análise dos nulos **') st.markdown('### **Colunas Numéricas:**') st.image('img/valores20.png', use_column_width=True) st.image('img/valores60.png', use_column_width=True) st.image('img/valores80.png', use_column_width=True) st.image('img/dendo_90.png', use_column_width=True) st.image('img/dendo100.png', use_column_width=True) st.markdown('### **Colunas Categoricas:**') st.image('img/valores_nulos.png', use_column_width=True) st.image('img/dendo_cat.png', use_column_width=True) if ('Colunas excluídas' in sidemulti_market): col_excluidas = [ 'sg_uf', 'idade_emp_cat', 'fl_me', 'fl_sa', 'fl_epp', 'fl_ltda', 'dt_situacao', 'fl_st_especial', 'nm_divisao', 'nm_segmento', 'fl_spa', 'vl_total_tancagem', 'vl_total_veiculos_antt', 'fl_optante_simples', 'qt_art', 'vl_total_veiculos_pesados_grupo', 'vl_total_veiculos_leves_grupo', 'vl_total_tancagem_grupo', 'vl_total_veiculos_antt_grupo', 'vl_potenc_cons_oleo_gas', 'fl_optante_simei', 'sg_uf_matriz', 'de_saude_rescencia', 'nu_meses_rescencia', 'de_indicador_telefone', 'fl_simples_irregular', 'vl_frota', 'qt_socios_pf', 'qt_socios_pj', 'idade_maxima_socios', 'idade_minima_socios', 'qt_socios_st_regular', 'qt_socios_st_suspensa', 'qt_socios_masculino', 'qt_socios_feminino', 'qt_socios_pep', 'qt_alteracao_socio_total', 'qt_alteracao_socio_90d', 'qt_alteracao_socio_180d', 'qt_alteracao_socio_365d', 'qt_socios_pj_ativos', 'qt_socios_pj_nulos', 'qt_socios_pj_baixados', 'qt_socios_pj_suspensos', 'qt_socios_pj_inaptos', 'vl_idade_media_socios_pj', 'vl_idade_maxima_socios_pj', 'vl_idade_minima_socios_pj', 'qt_coligados', 'qt_socios_coligados', 'qt_coligados_matriz', 'qt_coligados_ativo', 'qt_coligados_baixada', 'qt_coligados_inapta', 'qt_coligados_suspensa', 'qt_coligados_nula', 'idade_media_coligadas', 'idade_maxima_coligadas', 'idade_minima_coligadas', 'coligada_mais_nova_ativa', 'coligada_mais_antiga_ativa', 'idade_media_coligadas_ativas', 'coligada_mais_nova_baixada', 'coligada_mais_antiga_baixada', 'idade_media_coligadas_baixadas', 'qt_coligados_sa', 'qt_coligados_me', 'qt_coligados_mei', 'qt_coligados_ltda', 'qt_coligados_epp', 'qt_coligados_norte', 'qt_coligados_sul', 'qt_coligados_nordeste', 'qt_coligados_centro', 'qt_coligados_sudeste', 'qt_coligados_exterior', 'qt_ufs_coligados', 'qt_regioes_coligados', 'qt_ramos_coligados', 'qt_coligados_industria', 'qt_coligados_agropecuaria', 'qt_coligados_comercio', 'qt_coligados_serviço', 'qt_coligados_ccivil', 'qt_funcionarios_coligados', 'qt_funcionarios_coligados_gp', 'media_funcionarios_coligados_gp', 'max_funcionarios_coligados_gp', 'min_funcionarios_coligados_gp', 'vl_folha_coligados', 'media_vl_folha_coligados', 'max_vl_folha_coligados', 'min_vl_folha_coligados', 'vl_folha_coligados_gp', 'media_vl_folha_coligados_gp', 'max_vl_folha_coligados_gp', 'min_vl_folha_coligados_gp', 'faturamento_est_coligados', 'media_faturamento_est_coligados', 'max_faturamento_est_coligados', 'min_faturamento_est_coligados', 'faturamento_est_coligados_gp', 'media_faturamento_est_coligados_gp', 'max_faturamento_est_coligados_gp', 'min_faturamento_est_coligados_gp', 'total_filiais_coligados', 'media_filiais_coligados', 'max_filiais_coligados', 'min_filiais_coligados', 'qt_coligados_atividade_alto', 'qt_coligados_atividade_medio', 'qt_coligados_atividade_baixo', 'qt_coligados_atividade_mt_baixo', 'qt_coligados_atividade_inativo', 'qt_coligadas', 'sum_faturamento_estimado_coligadas', 'de_faixa_faturamento_estimado', 'vl_faturamento_estimado_aux', 'vl_faturamento_estimado_grupo_aux', 'qt_ex_funcionarios', 'qt_funcionarios_grupo', 'percent_func_genero_masc', 'percent_func_genero_fem', 'idade_ate_18', 'idade_de_19_a_23', 'idade_de_24_a_28', 'idade_de_29_a_33', 'idade_de_34_a_38', 'idade_de_39_a_43', 'idade_de_44_a_48', 'idade_de_49_a_53', 'idade_de_54_a_58', 'idade_acima_de_58', 'grau_instrucao_macro_analfabeto', 'grau_instrucao_macro_escolaridade_fundamental', 'grau_instrucao_macro_escolaridade_media', 'grau_instrucao_macro_escolaridade_superior', 'grau_instrucao_macro_desconhecido', 'total', 'meses_ultima_contratacaco', 'qt_admitidos_12meses', 'qt_desligados_12meses', 'qt_desligados', 'qt_admitidos', 'media_meses_servicos_all', 'max_meses_servicos_all', 'min_meses_servicos_all', 'media_meses_servicos', 'max_meses_servicos', 'min_meses_servicos', 'qt_funcionarios_12meses', 'qt_funcionarios_24meses', 'tx_crescimento_12meses', 'tx_crescimento_24meses' ] st.markdown('## **Colunas excluídas**') st.markdown( 'Decidimos não utiliza-las por quantidade de linhas não preenchidas, grandes correlações com outrar variáveis, pouca importância para o modelo ou redundância!' ) st.markdown('**São elas:**') st.write(col_excluidas) st.sidebar.header('Sistema de recomendação') start_model = st.sidebar.checkbox( 'Aperte para começarmos a modelagem do sistema!') st.sidebar.markdown('**Desenvolvido por,**') st.sidebar.markdown('*Guilherme Reis Mendes*') st.sidebar.markdown( '[LinkedIn](https://www.linkedin.com/in/guilherme-reis-2862ab153/)' ) st.sidebar.markdown('[GitHub](https://github.com/guireis1/)') if start_model: st.header('**Modelagem**') st.subheader( '**Primeiro selecione as features que gostaria de usar**') st.markdown( '*Essas serão as colunas que serão utilizadas no sistema de recomendação!*' ) st.markdown('**Colunas que recomendamos:**') col_select = [] ramo = st.checkbox('de_ramo') idade = st.checkbox('idade_emp_cat') meso = st.checkbox('nm_meso_regiao') juridica = st.checkbox('natureza_juridica_macro') faturamento = st.checkbox('de_faixa_faturamento_estimado_grupo') filiais = st.checkbox('qt_filiais') mei = st.checkbox('fl_mei') rm = st.checkbox('fl_rm') st.markdown('**Colunas opcionais:**') setor = st.checkbox('setor') rotatividade = st.checkbox('tx_rotatividade') idade_socios = st.checkbox('idade_media_socios') socios = st.checkbox('qt_socios') renda = st.checkbox('empsetorcensitariofaixarendapopulacao') leve = st.checkbox('vl_total_veiculos_leves_grupo') pesado = st.checkbox('vl_total_veiculos_pesados_grupo') iss = st.checkbox('fl_passivel_iss') atividade = st.checkbox('de_nivel_atividade') saude = st.checkbox('de_saude_tributaria') veiculo = st.checkbox('fl_veiculo') antt = st.checkbox('fl_antt') telefone = st.checkbox('fl_telefone') email = st.checkbox('fl_email') matriz = st.checkbox('fl_matriz') if ramo: col_select.append('de_ramo') if idade: col_select.append('idade_emp_cat') if meso: col_select.append('nm_meso_regiao') meso_ohe = pd.get_dummies(market_pre['nm_meso_regiao'], drop_first=True) if faturamento: col_select.append('de_faixa_faturamento_estimado_grupo') if juridica: col_select.append('natureza_juridica_macro') juridico_ohe = pd.get_dummies( market_pre['natureza_juridica_macro'], drop_first=True) if filiais: col_select.append('qt_filiais') if mei: col_select.append('fl_mei') if rm: col_select.append('fl_rm') if setor: col_select.append('setor') setor_ohe = pd.get_dummies(market_pre['setor'], drop_first=True) if rotatividade: col_select.append('tx_rotatividade') if idade_socios: col_select.append('idade_media_socios') if socios: col_select.append('qt_socios') if renda: col_select.append('empsetorcensitariofaixarendapopulacao') if leve: col_select.append('vl_total_veiculos_leves_grupo') if pesado: col_select.append('vl_total_veiculos_pesados_grupo') if iss: col_select.append('fl_passivel_iss') if atividade: col_select.append('de_nivel_atividade') if saude: col_select.append('de_saude_tributaria') if veiculo: col_select.append('fl_veiculo') if antt: col_select.append('fl_antt') if telefone: col_select.append('fl_telefone') if email: col_select.append('fl_email') if matriz: col_select.append('fl_matriz') st.markdown('## **Podemos continuar?**') features_select = st.checkbox('Sim') if features_select: st.text('*Colunas selecionadas com sucesso!*') st.write('Colunas Selecionadas:', col_select) st.subheader( 'Agora escolha a quantidade de recomendações que deseja!') st.markdown( '**Estamos trabalhando com k-nearest Kneighbors. O valor selecionado será proporcional ao número de samples do portifólio!**' ) st.markdown( '*Lembrando que quanto maior o valor de K, mais recomendações, porém, menos preciso*' ) slider_nn = st.slider('Número de vizinhos:', 2, 10) market_col_select = market_pre[col_select] if 'setor' in market_col_select: market_col_select.drop('setor', axis=1, inplace=True) market_col_select = pd.concat( [market_col_select, setor_ohe], axis=1) if 'nm_meso_regiao' in market_col_select: market_col_select.drop('nm_meso_regiao', axis=1, inplace=True) market_col_select = pd.concat( [market_col_select, meso_ohe], axis=1) if 'setor' in market_col_select: market_col_select.drop('natureza_juridica_macro', axis=1, inplace=True) market_col_select = pd.concat( [market_col_select, juridico_ohe], axis=1) market_col_select_scaled = StandardScaler().fit_transform( market_col_select) market_col_select_scaled = pd.DataFrame( market_col_select_scaled, columns=market_col_select.columns, index=market_col_select.index) head(market_col_select_scaled) st.markdown('## **Recomendação**') button_model = st.checkbox('Aperte para iniciar o sistema') if button_model: st.text('Loading model...wait!') port_model = market_col_select_scaled.merge( port, how='right', left_index=True, right_index=True) port_model.dropna(inplace=True) suggestion = recommend(port_model, slider_nn, market_col_select_scaled, market) suggestion['id'] = suggestion.index st.text('Loading model...done!') st.markdown('**Sistema de recomendação completo!**') size_sug = suggestion.shape[0] st.write('Foram geraradas ', size_sug, ' recomendações!') st.markdown('Baixe aqui:') st.markdown(get_table_download_link(suggestion), unsafe_allow_html=True) coordenadas_market = pd.read_csv( 'https://raw.githubusercontent.com/guireis1/Codenation-Final-Project/master/data/coordenadas' ) coordenadas_market.drop('Unnamed: 0', axis=1, inplace=True) cord_reco = geoloc(suggestion, coordenadas_market) cord_reco_df = pd.DataFrame(cord_reco, columns=('lat', 'lon')) st.markdown('**Geolocalização das empresas recomendadas**') st.map(cord_reco_df) st.markdown('**Visualização das empresas recomendadas**') vis(suggestion)
def main(): st.set_option('deprecation.showPyplotGlobalUse', False) image = Image.open('data/Reconn.png') st.image(image, use_column_width=False) def load_data(uploaded_file): df = pd.read_csv(uploaded_file) return df uploaded_file = st.file_uploader('Upload file to begin', type=("csv")) if uploaded_file is not None: df = load_data(uploaded_file) target_column = st.selectbox('Select Target Column', list(df.columns), key='target_column') st.sidebar.title('Know your dataset') if st.sidebar.checkbox("Preview Dataset"): st.markdown('## Dataset preview') if st.button("Head"): st.write(df.head(10)) elif st.button("Tail"): st.write(df.tail(10)) else: number = st.slider("Select No of Rows to show", 10, df.shape[0]) st.write(df.head(number)) if st.sidebar.checkbox("Show Column Names"): st.markdown('## Column names') st.write(df.columns) if st.sidebar.checkbox("Show Dimensions"): st.write(df.shape) if st.sidebar.checkbox('Describe', value=False): st.markdown('## Data Description') st.write(df.describe()) st.markdown('### Columns that are potential binary features') bin_cols = [] for col in df.columns: if len(df[col].value_counts()) == 2: bin_cols.append(col) st.write(bin_cols) st.markdown('### Columns Types') st.write(df.dtypes) if st.sidebar.checkbox('Missing Data', value=False): st.markdown('## Missing Data') total = df.isnull().sum().sort_values(ascending=False) percent = (df.isnull().sum() / df.isnull().count()).sort_values(ascending=False) missing_data = pd.concat([total, percent], axis=1, keys=['Total', 'Percent']) st.write(missing_data) try: sns.heatmap(df.isnull()) st.pyplot() except: st.warning('Error when showing plots') if st.sidebar.checkbox('Value Counts', value=False): st.markdown('## Value Counts') col = st.selectbox('Select Column', list(df.columns), key='val_col') st.write(df[col].value_counts()) if st.sidebar.checkbox('Unique elements', value=False): st.markdown('## Unique elements') if st.checkbox('Show all unique elements', value=False): st.write(df.nunique()) col = st.selectbox('Show columnwise unique elements', list(df.columns), key='unique_col') st.write(df[col].unique()) if st.sidebar.checkbox('Show Distribution', False): st.subheader(f'Distribution of {target_column}') try: sns.distplot(df[target_column]) st.write("Skewness: %.3f" % df[target_column].skew()) st.write("Kurtosis: %.3f" % df[target_column].kurt()) st.pyplot() except: st.error('Invalid Column') st.sidebar.title('Explore the Dataset') if target_column is not None: if st.sidebar.checkbox('Scatter Plot', value=False): scatter_cols = st.sidebar.multiselect('Select Column', list(df.columns), key='scatter_cols') st.markdown('## Scatter Plots') for col in scatter_cols: try: data = pd.concat([df[target_column], df[col]], axis=1) data.plot.scatter(x=col, y=target_column, ylim=(0, 800000)) st.pyplot() except: st.error('Invalid column') if st.sidebar.checkbox('Box Plot', value=False): box_cols = st.sidebar.multiselect('Select Column', list(df.columns), key='box_cols') st.markdown('## Box Plots') for col in box_cols: try: data = pd.concat([df[target_column], df[col]], axis=1) f, ax = plt.subplots(figsize=(8, 6)) fig = sns.boxplot(x=col, y=target_column, data=data) fig.axis(ymin=np.min(df[target_column]), ymax=np.max(df[target_column])) st.pyplot() except: st.error('Invalid column') if st.sidebar.checkbox('Pair Plot', value=False): pair_cols = st.sidebar.multiselect('Select Column', list(df.columns), key='pair_plot') plot_size = st.sidebar.number_input('Select Plot size', 1.0, 5.0, step=0.5, key='plot_size', value=2.5) st.markdown('## Pair Plots') cols = [target_column] for col in pair_cols: cols.append(col) try: sns.set() sns.pairplot(df[cols], height=plot_size) st.pyplot() except: st.error('Invalid column') if st.sidebar.checkbox('Correlation matrix', value=False): st.markdown('## Correlation matrix (heatmap style)') corrmat = df.corr() f, ax = plt.subplots(figsize=(12, 9)) sns.heatmap(corrmat, vmax=.8, square=True) st.pyplot() if st.checkbox('With Target Column', value=False): k = st.number_input( '# of Cols for heatmap', 3, len(df.columns), step=1, key='k') #number of variables for heatmap cols = corrmat.nlargest(k, target_column)[target_column].index cm = np.corrcoef(df[cols].values.T) sns.set(font_scale=1.25) hm = sns.heatmap(cm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10}, yticklabels=cols.values, xticklabels=cols.values) st.pyplot() st.sidebar.title('Data processing') if st.sidebar.checkbox('Treat missing values'): st.markdown('## Treat missing values') # Select a column to treat missing values col_option = st.selectbox("Select Column to treat missing values", df.columns) # Specify options to treat missing values missing_values_clear = st.selectbox( "Select Missing values treatment method", ("Replace with Mean", "Replace with Median", "Replace with Mode")) if missing_values_clear == "Replace with Mean": replaced_value = df[col_option].mean() st.write("Mean value of column is :", replaced_value) elif missing_values_clear == "Replace with Median": replaced_value = df[col_option].median() st.write("Median value of column is :", replaced_value) elif missing_values_clear == "Replace with Mode": replaced_value = df[col_option].mode() st.write("Mode value of column is :", replaced_value) Replace = st.selectbox("Replace values of column?", ("No", "Yes")) if Replace == "Yes": df[col_option] = df[col_option].fillna(replaced_value) st.write("Null values replaced") elif Replace == "No": st.write("No changes made") if st.sidebar.checkbox('Encode categorical column'): st.markdown("## Encode categorical column") # Select a column to do encoding col_selected = st.selectbox( "Select Column to treat categorical values", df.columns) # Specify options to do encoding encoder_type = st.selectbox("Select Encoding method", ("Label Encoder", "")) if encoder_type == "Label Encoder": encoded_value = helper.labelEncoder.fit_transform( df[col_selected]) st.write("Label Encoded value of column is :", encoded_value) # elif encoder_type == "Ordinal Encoder": # encoded_value = helper.ordinalEncoder.fit_transform(df[col_selected]) # st.write("Ordinal Encoded value of column is :", encoded_value) Replace = st.selectbox("Replace values of column?", ("No", "Yes"), key='encoder') if Replace == "Yes": df[col_selected] = encoded_value st.write("Added encoded column in dataframe") st.write(df.head()) elif Replace == "No": st.write('No values replaced yet') if st.sidebar.checkbox('Scale column'): st.markdown("## Scaling column") col_scaled = st.selectbox("Select Column for feature scaling", df.columns) scaler_type = st.selectbox("Select Scaling method", ("Standard Scaler", "Min Max Scaler")) if scaler_type == "Standard Scaler": scaled_value = helper.standartScaler.fit_transform( df[col_scaled].values.reshape(-1, 1)) st.write("Standard scaled value of column is :", scaled_value) elif scaler_type == "Min Max Scaler": scaled_value = helper.minMaxScaler.fit_transform( df[col_scaled].values.reshape(-1, 1)) st.write("Min-Max scaled value of column is :", scaled_value) Replace = st.selectbox("Replace values of column?", ("No", "Yes"), key='scaler') if Replace == "Yes": df[col_scaled] = scaled_value st.write("Added scaled column in dataframe") st.write(df.head()) elif Replace == "No": st.write('No values replaced yet') st.sidebar.title('Download processed dataset') if st.sidebar.checkbox("download file"): st.sidebar.markdown(helper.get_table_download_link(df), unsafe_allow_html=True) if st.sidebar.button('Credits'): st.sidebar.markdown(''' **Md.Sadab Wasim** Get in touch: [Twitter](https://twitter.com/@sadab_wasim) Source Code: [Github](https://github.com/mdsadabwasim/reconn) ''')
# Copyright 2018-2020 Streamlit Inc. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import streamlit as st single_file = st.file_uploader("Drop a file:", type=["txt"]) if single_file is None: st.text("No upload") else: st.text(single_file.read()) multiple_files = st.file_uploader("Drop multiple files:", type=["txt"], accept_multiple_files=True) if multiple_files is None: st.text("No upload") else: files = [file.read().decode() for file in multiple_files] st.text("\n".join(files))
# Title the "advanced options" section of the sidebar. streamlit.sidebar.markdown("### Advanced Options") # Add a checkbox to add a watermark. add_watermark = streamlit.sidebar.checkbox( label="Add watermark to verify grade", value=False # default ) # Allow the user to upload a file. FILE_TYPES = [".png", ".jpg", ".jpeg"] uploader_title = """ ## Use AI to grade the condition of a trading card! """ streamlit.markdown(uploader_title) file = streamlit.file_uploader(label="Option 1: Upload a Picture of the Card") ## Get an image from ebay. ebay_md = """ Option 2: enter an ebay auction URL, e.g. https://ebay.to/32conjA """ #streamlit.markdown(ebay_md) ebay_url = streamlit.text_input(ebay_md) if ebay_url not in [None, ""]: try: # output loading message to user. #streamlit.text("Loading image from ebay...please wait")
|------------|----------|------------|--------|----------|-----| | 2019-10-01 | CASH.USD | deposit | 1 | 100000 | 0 | | 2019-10-11 | AAPL | purchase | 234.52 | 88 | 35 | | 2019-11-25 | MSFT | purchase | 148.3 | 250 | 25 | | 2019-12-04 | AAPL | sale | 262.08 | 50 | 20 | | 2020-01-06 | FB | purchase | 208 | 100 | 10 | | 2020-01-25 | CASH.USD | withdrawal | 1 | 30000 | 0 | [Download example](https://github.com/simprecicchiani/PyPortfolioAnalytics/raw/master/assets/portfolios/generic.csv) ### Input file rules: - File format is `.csv` - Firs row contains these columns `Date`, `Ticker`, `Order`, `Price`, `Quantity`, `Fee` - Date format is `%Y-%m-%d` - Type of order are `deposit`, `withdrawal`, `purchase`, `sale` - Only supports [Yahoo Finance](https://finance.yahoo.com/) tickers ### Caveats - Works with single currency account only - Requires a deposit to calculate return on investment - Only accepts transactions within business days ''' uploaded_file = st.file_uploader('Upload your transactions', type='csv') if uploaded_file is not None: st.portfolio = Portfolio(uploaded_file) st.portfolio.run() dashboard()
def main(): """Streamlit demo web app""" st.sidebar.title('Menu') choose_model = st.sidebar.selectbox("Choose the page or model", [ "Home", "Logistic Regression", "XGB", "Stock Backtesting", "Predict Volatility", "Technical Indicators", "Bollinger Band", "Stock Market", "Dashbord" ]) # Load data df, rows, columns, filename = load_data() data, drop_list = data_preprocessing(df) # Provide checkbox for uploading different training dataset if choose_model == "Home": if st.checkbox('Want to use other training set?'): uploaded_file = st.file_uploader("Choose a CSV file", type="csv") st.text( "Note: Don't easily change training set which may bring big influence on prediction" ) if uploaded_file: df, data, drop_list, filename, rows, columns = upload_different_data( uploaded_file) # Home page building if choose_model == "Home": home_page_builder(df, data, rows, columns) # Page for Logistic Regression if choose_model == "Logistic Regression": model_reg = logistic_page_builder(data) if (st.checkbox("Want to Use this model to predict on a new dataset?") ): logistic_predictor(model_reg, rows, columns, df, drop_list) # Page for XGB if choose_model == "XGB": model_xgb = xgb_page_builder(data) if (st.checkbox("Want to Use this model to predict on a new dataset?") ): xgb_predictor(model_xgb, rows, columns, df, drop_list) if choose_model == "Stock Backtesting": from Stock_Backtesting import ticker st.sidebar.header('Hyper Parameters') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = ticker(tickertxt) if choose_model == "Predict Volatility": from PredictVolatility import tickerpv st.sidebar.header('Predickt') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = tickerpv(tickertxt) if choose_model == "Technical Indicators": from Technical_Indicators import tickerti st.sidebar.header('Technical ') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = tickerti(tickertxt) if choose_model == "Bollinger Band": from bollinger import tickerbol st.sidebar.header('Bollinger Nabd ') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = tickerbol(tickertxt) if choose_model == "Stock Market": from Stockmarketscreening import stockmarket st.sidebar.header('Stock Market') st.sidebar.markdown('Enter a new ticker') tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN') model_xgb = stockmarket(tickertxt) if choose_model == "Dashbord": st.sidebar.header('Dashboard') cwd = os.getcwd() data1 = pd.read_excel(cwd + "/Tabelle Mustermann-2.xls") # print(data1) st.table(data1)
def main(): st.title("IST Automated EDA Test") st.info("JP EDA app test") """https://github.com/joeperrotta/streamlit""" activities = [ "Pandas Profile", "SweetViz", "General EDA", "EDA For Linear Models", "Model Building for Classification Problem" ] choice = st.sidebar.selectbox("Select Activities", activities) if choice == "Pandas Profile": st.subheader("Automated EDA with Pandas Profile") data_file = st.file_uploader("Upload CSV", type=['csv']) if data_file is not None: df = pd.read_csv(data_file) st.dataframe(df.head()) profile = ProfileReport(df) st_profile_report(profile) # elif choice == "Sweetviz": # st.subheader("Automated EDA with Sweetviz") # data = st.file_uploader("Upload a Dataset", type=["csv"]) # if data is not None: # df = pd.read_csv(data) # st.dataframe(df.head()) # if st.button("Generate Sweetviz Report"): # # Normal Workflow # report = sv.analyze(df) # report.show_html() # st_display_sweetviz("SWEETVIZ_REPORT.html") elif choice == 'General EDA': st.subheader("Exploratory Data Analysis") data = st.file_uploader("Upload a Dataset", type=["csv", "txt"]) if data is not None: df = load.read_csv(data) st.dataframe(df.head()) st.success("Data Frame Loaded successfully") if st.checkbox("Show dtypes"): st.write(dataframe.show_dtypes(df)) if st.checkbox("Show Columns"): st.write(dataframe.show_columns(df)) if st.checkbox("Show Missing"): st.write(dataframe.Show_Missing1(df)) if st.checkbox("column information"): st.write(info.Column_information(df)) if st.checkbox("Aggregation Tabulation"): st.write(dataframe.Tabulation(df)) if st.checkbox("Num Count Summary"): st.write(info.num_count_summary(df)) if st.checkbox("Statistical Summary"): st.write(info.statistical_summary(df)) # if st.checkbox("Show Selected Columns"): # selected_columns = st.multiselect("Select Columns",all_columns) # new_df = df[selected_columns] # st.dataframe(new_df) if st.checkbox("Show Selected Columns"): selected_columns = st.multiselect("Select Columns", dataframe.show_columns(df)) new_df = df[selected_columns] st.dataframe(new_df) if st.checkbox("Numerical Variables"): num_df = dataframe.Numerical_variables(df) numer_df = pd.DataFrame(num_df) st.dataframe(numer_df) if st.checkbox("Categorical Variables"): new_df = dataframe.categorical_variables(df) catego_df = pd.DataFrame(new_df) st.dataframe(catego_df) if st.checkbox("DropNA"): imp_df = dataframe.impute(num_df) st.dataframe(imp_df) if st.checkbox("Missing after DropNA"): st.write(dataframe.Show_Missing(imp_df)) all_columns_names = dataframe.show_columns(df) all_columns_names1 = dataframe.show_columns(df) selected_columns_names = st.selectbox( "Select Column 1 For Cross Tabultion", all_columns_names) selected_columns_names1 = st.selectbox( "Select Column 2 For Cross Tabultion", all_columns_names1) if st.button("Generate Cross Tab"): st.dataframe( pd.crosstab(df[selected_columns_names], df[selected_columns_names1])) all_columns_names3 = dataframe.show_columns(df) all_columns_names4 = dataframe.show_columns(df) selected_columns_name3 = st.selectbox( "Select Column 1 For Pearsonr Correlation (Numerical Columns)", all_columns_names3) selected_columns_names4 = st.selectbox( "Select Column 2 For Pearsonr Correlation (Numerical Columns)", all_columns_names4) if st.button("Generate Pearsonr Correlation"): df = pd.DataFrame(dataframe.Show_pearsonr( imp_df[selected_columns_name3], imp_df[selected_columns_names4]), index=['Pvalue', '0']) st.dataframe(df) spearmanr3 = dataframe.show_columns(df) spearmanr4 = dataframe.show_columns(df) spearmanr13 = st.selectbox( "Select Column 1 For spearmanr Correlation (Categorical Columns)", spearmanr4) spearmanr14 = st.selectbox( "Select Column 2 For spearmanr Correlation (Categorical Columns)", spearmanr4) if st.button("Generate spearmanr Correlation"): df = pd.DataFrame(dataframe.Show_spearmanr( catego_df[spearmanr13], catego_df[spearmanr14]), index=['Pvalue', '0']) st.dataframe(df) st.subheader("UNIVARIATE ANALYSIS") all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox( "Select Column for Histogram ", all_columns_names) if st.checkbox("Show Histogram for Selected variable"): st.write(dataframe.show_hist(df[selected_columns_names])) st.pyplot() all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox("Select Columns Distplot ", all_columns_names) if st.checkbox("Show DisPlot for Selected variable"): st.write(dataframe.Show_DisPlot(df[selected_columns_names])) st.pyplot() all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox("Select Columns CountPlot ", all_columns_names) if st.checkbox("Show CountPlot for Selected variable"): st.write(dataframe.Show_CountPlot(df[selected_columns_names])) st.pyplot() st.subheader("BIVARIATE ANALYSIS") Scatter1 = dataframe.show_columns(df) Scatter2 = dataframe.show_columns(df) Scatter11 = st.selectbox( "Select Column 1 For Scatter Plot (Numerical Columns)", Scatter1) Scatter22 = st.selectbox( "Select Column 2 For Scatter Plot (Numerical Columns)", Scatter2) if st.button("Generate PLOTLY Scatter PLOT"): st.pyplot(dataframe.plotly(df, df[Scatter11], df[Scatter22])) bar1 = dataframe.show_columns(df) bar2 = dataframe.show_columns(df) bar11 = st.selectbox("Select Column 1 For Bar Plot ", bar1) bar22 = st.selectbox("Select Column 2 For Bar Plot ", bar2) if st.button("Generate PLOTLY histogram PLOT"): st.pyplot(dataframe.plotly_histogram(df, df[bar11], df[bar22])) violin1 = dataframe.show_columns(df) violin2 = dataframe.show_columns(df) violin11 = st.selectbox("Select Column 1 For violin Plot", violin1) violin22 = st.selectbox("Select Column 2 For violin Plot", violin2) if st.button("Generate PLOTLY violin PLOT"): st.pyplot( dataframe.plotly_violin(df, df[violin11], df[violin22])) st.subheader("MULTIVARIATE ANALYSIS") if st.checkbox("Show Histogram"): st.write(dataframe.show_hist(df)) st.pyplot() if st.checkbox("Show HeatMap"): st.write(dataframe.Show_HeatMap(df)) st.pyplot() if st.checkbox("Show PairPlot"): st.write(dataframe.Show_PairPlot(df)) st.pyplot() if st.button("Generate Word Cloud"): st.write(dataframe.wordcloud(df)) st.pyplot() elif choice == 'EDA For Linear Models': st.subheader("EDA For Linear Models") data = st.file_uploader("Upload a Dataset", type=["csv", "txt", "xlsx", "tsv"]) if data is not None: df = load.read_csv(data) st.dataframe(df.head()) st.success("Data Frame Loaded successfully") all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox("Select Columns qqplot ", all_columns_names) if st.checkbox("Show qqplot for variable"): st.write(dataframe.qqplot(df[selected_columns_names])) st.pyplot() all_columns_names = dataframe.show_columns(df) selected_columns_names = st.selectbox("Select Columns outlier ", all_columns_names) if st.checkbox("Show outliers in variable"): st.write(dataframe.outlier(df[selected_columns_names])) # all_columns_names = show_columns(df) # selected_columns_names = st.selectbox("Select target ",all_columns_names) # if st.checkbox("Anderson Normality Test"): # st.write(Anderson_test(df[selected_columns_names])) if st.checkbox("Show Distplot Selected Columns"): selected_columns_names = st.selectbox( "Select Columns for Distplot ", all_columns_names) st.dataframe(dataframe.show_displot( df[selected_columns_names])) st.pyplot() con1 = dataframe.show_columns(df) con2 = dataframe.show_columns(df) conn1 = st.selectbox("Select 1st Columns for chi square test", con1) conn2 = st.selectbox("Select 2st Columns for chi square test", con2) if st.button("Generate chi square test"): st.write( dataframe.check_cat_relation(df[conn1], df[conn2], 0.5)) elif choice == 'Model Building for Classification Problem': st.subheader("Model Building for Classification Problem") data = st.file_uploader("Upload a Dataset", type=["csv", "txt", "xlsx", "tsv"]) if data is not None: df = load.read_csv(data) st.dataframe(df.head()) st.success("Data Frame Loaded successfully") if st.checkbox( "Select your Variables (Target Variable should be at last)" ): selected_columns_ = st.multiselect( "Select Columns for seperation ", dataframe.show_columns(df)) sep_df = df[selected_columns_] st.dataframe(sep_df) if st.checkbox("Show Indpendent Data"): x = sep_df.iloc[:, :-1] st.dataframe(x) if st.checkbox("Show Dependent Data"): y = sep_df.iloc[:, -1] st.dataframe(y) if st.checkbox("Dummay Variable"): x = dataframe.dummy(x) st.dataframe(x) if st.checkbox("IMpupter "): x = model.IMpupter(x) st.dataframe(x) if st.checkbox("Compute Principle Component Analysis"): x = dataframe.PCA(x) st.dataframe(x) st.subheader("TRAIN TEST SPLIT") if st.checkbox("Select X Train"): from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=0) st.dataframe(x_train) if st.checkbox("Select x_test"): from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=0) st.dataframe(x_test) if st.checkbox("Select y_train"): from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=0) st.dataframe(y_train) if st.checkbox("Select y_test"): from sklearn.model_selection import train_test_split x_train, x_test, y_train, y_test = train_test_split( x, y, random_state=0) st.dataframe(y_test) st.subheader("MODEL BUILDING") st.write("Build youe BaseLine Model") if st.checkbox("Logistic Regression "): x = model.Logistic_Regression(x_train, y_train, x_test, y_test) st.write(x) if st.checkbox("Decision Tree "): x = model.Decision_Tree(x_train, y_train, x_test, y_test) st.write(x) if st.checkbox("Random Forest "): x = model.RandomForest(x_train, y_train, x_test, y_test) st.write(x) if st.checkbox("naive_bayes "): x = model.naive_bayes(x_train, y_train, x_test, y_test) st.write(x) if st.checkbox("XGB Classifier "): x = model.XGb_classifier(x_train, y_train, x_test, y_test) st.write(x) st.markdown('Automation is **_really_ _cool_**.') st.markdown('<style>h1{color: red;}</style>', unsafe_allow_html=True) st.title("Credits and Inspiration") """https://pycaret.org/"""
import streamlit as st from PIL import Image import style import io import os st.title('Pytorch Style Transfer') input_image = st.file_uploader('Upload Image:') input_image_path = '' if input_image is not None: input_image_path = 'neural_style/images/content-images/' + input_image.name st.write('### Source Image:') if input_image is not None: with open( os.path.join("neural_style/images/content-images/", input_image.name), "wb") as f: f.write(input_image.getbuffer()) img = Image.open(input_image) st.image(img, width=250) if input_image: st.write(input_image.name) style_name = st.selectbox('Select Style', ('candy', 'mosaic', 'rain_princess', 'udnie')) model = 'neural_style/saved_models/' + style_name + '.pth' if input_image is not None: output_image = 'neural_style/images/output-images/' + style_name + '-' + input_image.name
import altair as alt import numpy as np import pandas as pd import streamlit as st def find_outliers(df, window, sigma): avg = df[variable].rolling(window=window).mean() residual = df[variable] - avg std = residual.rolling(window=window).std() return np.abs(residual) > std * sigma st.header("Outlier detection in Occupancy Detection Data Set") uploaded_file = st.file_uploader("Choose data/occupancy.csv", type="csv") if uploaded_file is not None: data = pd.read_csv(uploaded_file) if len(data.columns) != 7: st.error("This doesn't look like the correct dataset...") else: data["date"] = data.date.astype("datetime64[ns]") variable = "Temperature" window = st.slider("Window : ", 1, 60, 30) sigma = st.slider("Sigma : ", 1, 20, 10) data["is_outlier"] = find_outliers(data, window, sigma)
def main(): with open('cat_to_name.json', 'r') as f: cat_to_name = json.load(f) checkpoint = torch.load('my_model.pt') arch = checkpoint['arch'] num_labels = len(checkpoint['class_to_idx']) hidden_units = checkpoint['hidden_units'] model = train.load_model(arch=arch, num_labels=num_labels, hidden_units=hidden_units) model.load_state_dict(checkpoint['state_dict']) model.class_to_idx = checkpoint['class_to_idx'] st.set_option('deprecation.showfileUploaderEncoding', False) st.title("Testing Class Prediction") filename = st.file_uploader('Select the flower', encoding="auto", default="ftest.jpg") img= Image.open(filename) st.image(img,width=300, caption="Flower Selected") if st.button("Predict"): st.title("Predicted Results") # Process a PIL image for use in a PyTorch model def process_image(image): ''' Scales, crops, and normalizes a PIL image for a PyTorch model, returns an Numpy array ''' img_loader = transforms.Compose([ transforms.Resize(256), transforms.CenterCrop(224), transforms.ToTensor()]) pil_image = Image.open(image) pil_image = img_loader(pil_image).float() np_image = np.array(pil_image) mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) np_image = (np.transpose(np_image, (1, 2, 0)) - mean)/std np_image = np.transpose(np_image, (2, 0, 1)) return np_image def imshow(image, ax=None, title=None): #Imshow for Tensor if ax is None: fig, ax = plt.subplots() # PyTorch tensors assume the color channel is the first dimension # but matplotlib assumes is the third dimension image = np.transpose(image, (1, 2, 0)) # Undo preprocessing mean = np.array([0.485, 0.456, 0.406]) std = np.array([0.229, 0.224, 0.225]) image = std * image + mean # Image needs to be clipped between 0 and 1 or it looks like noise when displayed image = np.clip(image, 0, 1) ax.imshow(image) return ax # Display an image along with the top 5 classes img =filename probs, classes = predict.predict(image=filename, checkpoint='my_model.pt', labels='cat_to_name.json', gpu=True) st.image(img) st.header("Table results of Possible Class") df = pd.DataFrame({'Classes':classes , 'Probability': probs}) st.table(df) Classes =classes Probability = probs p = figure(x_range=Classes, plot_height=250, title="Possible Prediction", toolbar_location=None, tools="") p.vbar(x=Classes, top=Probability, width=0.6) p.xgrid.grid_line_color = None p.y_range.start = 0 st.bokeh_chart(p)
def main_elements(): """ Создает шапку страницы """ # === Верхняя часть шапки === row1_1, row1_2 = st.columns([2, 1]) with row1_1: logo_css("АО РОСГЕОЛОГИЯ", align="left", clr="#07689F", size=33) # logo_css( # "<i>Департамент по восстановлению и утилизации<br>трубной продукции</i>", # align="left", # clr="#52616B", # size=20, # ) with row1_2: pass header_css( f"<i>{title_app}</i>", align="left", clr="#07689F", size=26, ) row1, row2 = st.columns([2, 1]) with row1: pass with row2: pass # === Нижняя часть шапки === row2_1, row2_2 = st.columns([3, 1]) with row2_1: uploaded_file = st.file_uploader( "Загрузить данные...", type=["xls", "xlsx", "csv", "tsv"], accept_multiple_files=False, ) if uploaded_file is not None: st.info( "В демонстрационной версии приложения оперировать " "можно только тестовым набором данных, который генерируется " "при первой загрузке страницы или её последующих обновлениях" ) row3_1, row3_2 = st.columns([1, 3]) with row3_1: pass with row3_2: annotation_css( "ВАЖНО: в демонстрационной версии приложении используется " "автоматически генерируемый набор данных, обновляемый при " "взаимодействии с любым динамическим виджетом приложения", clr="#769FCD", ) equipment_list = [ ("УРБ-2А2", "УРБ-4Т", "ПБУ-74", "УШ-2Т4В", "HD2500RC"), ("МБШ-303", "УБН-Т", "МБШ-812", "МБШ-509", "БКМ-307", "БКМ-303"), ("СБУ-115", "СБУ-125", "БМ-302", "УРБ-4Т", "БКМ-307"), ] full_data_for_plot: List[Dict[str, Dict[str, Tuple]]] = [] for idx, fleet_name in enumerate( ["Серпуховской ПТСН", "Челябинский ПТСН", "Екатеринбургский ПТСН"] ): dict_fleet: Dict[ str, Dict[str, Tuple] ] = prepare_duration_downtime_for_plot(fleet_name, equipment_list[idx]) full_data_for_plot.append(dict_fleet) fleet_names_for_selectbox = [ list(fleet.keys())[0] for fleet in full_data_for_plot ] selected_fleet_equipment = st.selectbox( "Выберите парк спец. техники", fleet_names_for_selectbox ) selected_data_for_plot: Dict[str, Tuple] = [ fleet[selected_fleet_equipment] for fleet in full_data_for_plot if selected_fleet_equipment in fleet.keys() ][0] # отображение summary_table(selected_fleet_equipment, selected_data_for_plot) annotation_css( "Детали сводки", size=18, text_align="center", clr="#07689F" ) duration_downtime_plot(selected_fleet_equipment, selected_data_for_plot) duration_downtime_boxplot(selected_fleet_equipment, selected_data_for_plot) duration_downtime_econ_costs_scatter_plot( selected_fleet_equipment, selected_data_for_plot )
import streamlit as st import pandas as pd import numpy as np import time import seaborn as sns import matplotlib.pyplot as plt #st.dataframe(df.style.highlight_max(axis=0)) #df.head(5) st.title('Case Portocred Financeira') st.file_uploader("Subir arquivos", accept_multiple_files=True) st.subheader("Desenvolvido por: Renato Dias") st.sidebar.title("Exploração de dados (EAD)") st.sidebar.subheader("Visões gerais") col1, col2, col3 = st.sidebar.beta_columns(3) pr = col1.checkbox('Stats', False) bx = col2.checkbox('Boxplot', False) cor = col3.checkbox("Correlação", False) st.sidebar.subheader("Tratamento de outliers") chosen = st.sidebar.radio('', ("Sim", "Não")) st.sidebar.subheader("Balanceamento das classes") c1 = st.sidebar.checkbox("Verificar percentual por classe") st.sidebar.text("Balancear classes?") col11, col21 = st.sidebar.beta_columns(2) chosen1 = col11.radio('', (" Sim", " Não")) st.sidebar.subheader("Tratamento dos dados")
download_button_str = download_button(s, './age1.csv', 'Age CSV file') st.sidebar.markdown(download_button_str, unsafe_allow_html=True) with open('./count.csv', 'rb') as f: s = f.read() download_button_str1 = download_button(s,'./count.csv', 'count') st.sidebar.markdown(download_button_str1, unsafe_allow_html=True) with open('./output.avi', 'rb') as f: s = f.read() download_button_str1 = download_button(s,'output.avi', 'output file') st.sidebar.markdown(download_button_str1, unsafe_allow_html=True) elif app_mode == "Device": uploaded_file = st.file_uploader("Choose a video...", type=["mp4","avi"]) temporary_location = False if uploaded_file is not None: g = io.BytesIO(uploaded_file.read()) temporary_location = "testout_simple.mp4" with open(temporary_location, 'wb') as out: out.write(g.read()) vs = get_cap(temporary_location) rate = vs.get(cv2.CAP_PROP_FPS) dur = 49 up1, down1, gend1, agen1= age_gender(vs,dur) st.success('done!') st.title("plots")
def mean_iou(y_true, y_pred): prec = [] for t in np.arange(0.5, 1.0, 0.05): y_pred_ = tensorflow.to_int32(y_pred > t) score, up_opt = tensorflow.metrics.mean_iou(y_true, y_pred_, 2) K.get_session().run(tensorflow.local_variables_initializer()) with tensorflow.control_dependencies([up_opt]): score = tensorflow.identity(score) prec.append(score) return K.mean(K.stack(prec), axis=0) dependencies = {'mean_iou': mean_iou} model = tensorflow.keras.models.load_model('first_model.h5', custom_objects=dependencies) file = st.file_uploader("Please upload an image file", type=["jpg", "png"]) def import_and_predict(image_data, model): X_test = np.zeros((1, 128, 128, 1), dtype=np.uint8) sizes_test = [] #img = load_img(image_data) x = img_to_array(image_data)[:, :, 1] sizes_test.append([x.shape[0], x.shape[1]]) x = resize(x, (128, 128, 1), mode='constant', preserve_range=True) X_test[0] = x preds_test = model.predict(X_test, verbose=1) preds_test_t = (preds_test > 0.5).astype(np.uint8) tmp = np.squeeze(preds_test_t[0]).astype(np.float32) return np.dstack((tmp, tmp, tmp))
def main(): """Invasive Ductal Carcinoma Detection Using CNN""" st.title("Invasive Ductal Carcinoma Detection Using CNN") menu = ["Home", "Login", "Signup"] submenu = ["Plot", "Visualisasi IDC", "Feature Maps", "Prediction"] choice = st.sidebar.selectbox("Menu", menu) if choice == "Home": st.subheader("What is Invasive Ductal Carcinoma (IDC)?") st.markdown("#### Context") """ Invasive Ductal Carcinoma (IDC) is the most common subtype of all breast cancers. To assign an aggressiveness grade to a whole mount sample, pathologists typically focus on the regions which contain the IDC. As a result, one of the common pre-processing steps for automatic aggressiveness grading is to delineate the exact regions of IDC inside of a whole mount slide. """ st.markdown("#### Content") """ The original dataset consisted of 162 whole mount slide images of Breast Cancer (BCa) specimens scanned at 40x. From that, 277,524 patches of size 50 x 50 were extracted (198,738 IDC negative and 78,786 IDC positive). Each patch’s file name is of the format: uxXyYclassC.png — > example 10253idx5x1351y1101class0.png . Where u is the patient ID (10253idx5), X is the x-coordinate of where this patch was cropped from, Y is the y-coordinate of where this patch was cropped from, and C indicates the class where 0 is non-IDC and 1 is IDC. """ st.markdown("#### Acknowledgements") """ The original files are located here: http://gleason.case.edu/webdata/jpi-dl-tutorial/IDC_regular_ps50_idx5.zip Citation: https://www.ncbi.nlm.nih.gov/pubmed/27563488 and http://spie.org/Publications/Proceedings/Paper/10.1117/12.2043872 """ st.markdown("#### Inspiration") """ Breast cancer is the most common form of cancer in women, and invasive ductal carcinoma (IDC) is the most common form of breast cancer. Accurately identifying and categorizing breast cancer subtypes is an important clinical task, and automated methods can be used to save time and reduce error. """ elif choice == "Login": username = st.sidebar.text_input("Username") password = st.sidebar.text_input("Password", type='password') if st.sidebar.checkbox("Login"): create_usertable() hashed_pwsd = generate_hashes(password) result = login_user(username, verify_hashes(password, hashed_pwsd)) if result: st.success("Welcome {}".format(username)) activity = st.selectbox("Activity", submenu) if activity == "Plot": st.subheader("Data Plot") status = st.radio("Data Distribution", ("Data raw", "Data preprocessed")) if status == 'Data raw': img = Image.open( os.path.join("data/sns.countplot(y_train).jpeg")) st.image(img, width=300, caption="Data Train") img = Image.open( os.path.join("data/sns.countplot(y_test).jpeg")) st.image(img, width=300, caption="Data Test") else: img = Image.open( os.path.join("data/sns.countplot(y_train2).jpeg")) st.image(img, width=300, caption="Data Train") img = Image.open( os.path.join("data/sns.countplot(y_test2).jpeg")) st.image(img, width=300, caption="Data Test") elif activity == "Visualisasi IDC": st.subheader("Visualisasi IDC(-/+)") sample_gambar = st.radio( "Few example of IDC with its coordinate", ("IDC (-)", "IDC (+)")) if sample_gambar == 'IDC (-)': figure_path = glob.glob("gambar visual/0/*.png", recursive=True) figure = show_image(figure_path) st.pyplot(figure) else: figure_path = glob.glob("gambar visual/1/*.png", recursive=True) figure = show_image(figure_path) st.pyplot(figure) elif activity == "Feature Maps": st.subheader("Feature Maps") feature_maps = st.radio( "Visualization Feature Maps from hidden layer", ("VGG16", "5 Layers Conv2d")) if feature_maps == 'VGG16': model_ = load_model( os.path.join( "models/vgg-model-weights-improvement-the-best.h5" )) model_baru = model_.layers[0] # Khusus vgg model_baru = Model(inputs=model_baru.inputs, outputs=model_baru.layers[1].output) model_baru.summary() img = Image.open( os.path.join( "gambar visual/0/9178_idx5_x2651_y1251_class0.png" )) img = preprocessed_image(img) img = preprocess_input(img) feature_maps = model_baru.predict(img) figure = feature_of(feature_maps, 8) st.pyplot(figure) else: model_ = load_model( os.path.join( "models/weights-improvement-the-best.h5")) model_baru = model_ model_baru = Model(inputs=model_baru.inputs, outputs=model_baru.layers[1].output) model_baru.summary() img = Image.open( os.path.join( "gambar visual/0/9178_idx5_x2651_y1251_class0.png" )) img = preprocessed_image(img) img = preprocess_input(img) feature_maps = model_baru.predict(img) figure = feature_of(feature_maps, 5) st.pyplot(figure) elif activity == "Prediction": st.subheader("Predictive Analytics") # Upload Image image_file = st.file_uploader("Upload Image", type=['jpg', 'png', 'jpeg']) if image_file is not None: our_image = Image.open(image_file) st.text("Image Uploaded!") st.image(our_image) # Processed Image image_test = preprocessed_image(our_image) else: st.warning("Please upload the image!") # ML / Predict Image model_choice = st.selectbox("Select Model", ["VGG16", "5 Layers Conv2d"]) if st.button("Predict"): if model_choice == "VGG16": model_ = load_model( os.path.join( "models/vgg-model-weights-improvement-the-best.h5" )) opt = SGD(lr=0.001, momentum=0.9) model_.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) prediction = model_.predict(image_test) prediction_result = np.argmax(prediction[0]) elif model_choice == "5 Layers Conv2d": model_ = load_model( os.path.join( "models/weights-improvement-the-best.h5")) opt = SGD(lr=0.001, momentum=0.9) model_.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy']) prediction = model_.predict(image_test) prediction_result = np.argmax(prediction[0]) # st.write(prediction_result) if prediction_result == 1: st.warning("Patient's positive IDC!") st.error( "Please seek for treatment and keep healthy lifestyle!" ) else: st.success("It's negative!") else: st.warning("Incorrect Username/Password") elif choice == "Signup": new_username = st.text_input("user name") new_password = st.text_input("Password", type='password') confirm_password = st.text_input("Confirm Password", type='password') if new_password == confirm_password: st.success("Password Confirmed") else: st.warning("Passwords not the same") if st.button("Submit"): create_usertable() hashed_new_password = generate_hashes(new_password) add_userdata(new_username, hashed_new_password) st.success("You have successfully created a new account") st.info("Login to Get Started ")