コード例 #1
0
import streamlit as st
from PIL import Image
import numpy as np

img_file_buffer = st.file_uploader("Upload an image",
                                   type=["png", "jpg", "jpeg"])

image = Image.open(img_file_buffer)
img_array = np.array(image)

if image is not None:
    st.image(
        image,
        caption=f"You amazing image has shape {img_array.shape[0:2]}",
        use_column_width=True,
    )
コード例 #2
0
def main():

    st.title('Maze Path Planner')
    uploaded_file = st.file_uploader("Choose an image", ["jpg", "jpeg", "png"])
    st.write('Or')
    use_default_image = st.checkbox('Use default maze')
    maze_image = None
    maze_aug_image = None
    marked_image = None

    if use_default_image:
        maze_img_obj = ImgMaze('./img/maze.png', is_filebytes=False)
        maze_image = maze_img_obj.get_bgr_maze()
        maze_aug_image = maze_img_obj.get_augmented_bgr_maze()

    elif uploaded_file is not None:
        file_bytes = np.asarray(bytearray(uploaded_file.read()),
                                dtype=np.uint8)
        maze_img_obj = ImgMaze(file_bytes, is_filebytes=True)
        maze_image = maze_img_obj.get_bgr_maze()
        maze_aug_image = maze_img_obj.get_augmented_bgr_maze()

    if maze_image is not None:
        st.subheader(
            'Use the sliders on the left to position the start and end points')
        start_x = st.sidebar.slider("Start X",
                                    value=8 if use_default_image else 50,
                                    min_value=0,
                                    max_value=maze_image.shape[1],
                                    key='sx')
        start_y = st.sidebar.slider("Start Y",
                                    value=9 if use_default_image else 100,
                                    min_value=0,
                                    max_value=maze_image.shape[0],
                                    key='sy')
        finish_x = st.sidebar.slider("Finish X",
                                     value=216 if use_default_image else 100,
                                     min_value=0,
                                     max_value=maze_image.shape[1],
                                     key='fx')
        finish_y = st.sidebar.slider("Finish Y",
                                     value=216 if use_default_image else 100,
                                     min_value=0,
                                     max_value=maze_image.shape[0],
                                     key='fy')
        marked_image = maze_image.copy()
        circle_thickness = (
            marked_image.shape[0] + marked_image.shape[0]
        ) // 2 // 100  #ui circle thickness based on img size
        cv2.circle(marked_image, (start_x, start_y), circle_thickness,
                   (0, 255, 0), -1)
        cv2.circle(marked_image, (finish_x, finish_y), circle_thickness,
                   (255, 0, 0), -1)
        st.image(marked_image, channels="RGB", width=500)

    if marked_image is not None:
        if st.button('Get Path'):
            with st.spinner('Searching for path...'):
                maze_solver_obj = Maze(maze_aug_image, is_augmented=True)
                maze_solver_obj.get_shortest_path(start=(start_x, start_y),
                                                  end=(finish_x, finish_y))

            path_thickness = (maze_image.shape[0] +
                              maze_image.shape[0]) // 2 // 100
            maze_sol_image = maze_solver_obj.get_solution_image(
                alt_img=maze_image, line_width=path_thickness)

            st.image(maze_sol_image, channels="RGB", width=500)
コード例 #3
0
ファイル: ppl_ui.py プロジェクト: kimoyerr/penguin-sagemaker
def page(state):

    # Streamlit session state
    print(state.mlflow_res.shape)

    # Delete any existing mlflow experiments naned '0'
    try:
        mlflow.delete_experiment('0')
    except:
        pass

    # Create a new experiment with unique ID
    exp_uniq_name = create_mlflow_exp()

    # Title and Description
    st.title('Probabilistic Programming')
    st.markdown("""
        This page guides you through the selection of algorithms and hyperparameters for Multi Logit models using Bayesian Inference

        **Start by loading the data**.
        """)

    data_file = st.file_uploader("Upload data...",
                                 type="csv",
                                 key='train_data')
    try:
        text_io = io.TextIOWrapper(data_file)
    except:
        pass
    if data_file is not None:
        W = pd.read_csv(data_file, index_col=False)
        y = W.iloc[:, 0]
        X = W.iloc[:, 1:]
        st.write(W)

    # Options to select Model
    st.subheader('Select Model')
    model_types = ['Multi-Logit']
    sel_model = st.radio('Which model to use for training?', model_types)

    params = {}
    if sel_model == 'Multi-Logit':
        # Get parameters for Multi-Logit
        params['num-chains'] = st.sidebar.number_input(
            label="Number of chains for sampling",
            min_value=1,
            max_value=4,
            value=4,
            step=1)
        params['num-iters'] = st.sidebar.number_input(
            label="Number of iterations for sampling",
            min_value=100,
            max_value=1000,
            value=1000,
            step=100)
        params['num-warmup-iters'] = st.sidebar.number_input(
            label="Number of iterations for warmup",
            min_value=100,
            max_value=1000,
            value=500,
            step=100)
        params['max-tree-depth'] = st.sidebar.number_input(
            label="Maximum tree depth for the NUTS sampler",
            min_value=10,
            max_value=20,
            value=10,
            step=1)
        sel_sampler = st.radio('Which sample to use?', ['NUTS', 'HMC'])

    # Sagemaker Training options
    instance_types = [
        'local', 'ml.m5.large', 'ml.m5.xlarge', 'ml.m5.4xlarge',
        'ml.m5.24xlarge', 'ml.g4dn.xlarge', 'ml.g4dn.4xlarge',
        'ml.g4dn.16xlarge'
    ]
    sagemaker_instance = st.sidebar.selectbox(
        'Instance type for Sagemaker training', instance_types)

    if len(params) > 0:
        samp_submit = st.sidebar.button('Run Sampling')
        if samp_submit:
            if sel_model == 'Multi-Logit':
                model_title = 'Multi-Logit'
                image_name = IMAGE_NAME
                model_name_suffix = 'penguin_xgb_model.json'

            if sagemaker_instance == 'local':

                # Data prep
                W = W.iloc[:, 1:]
                dummy_X = dummify_X(W.iloc[:, 1:],
                                    cat_columns=['island', 'sex'])
                y = W.iloc[:, 0]
                encoder, encoder_dict = encode_y(y)

                # Remove nan rows
                nan_rows = np.where(dummy_X.isnull().any(axis=1))
                X = dummy_X.drop(dummy_X.index[nan_rows], inplace=False)
                y = y.drop(y.index[nan_rows], inplace=False)

                # Split train and test data with the same random state
                X_train, X_test, y_train, y_test = train_test_split(
                    X, y, stratify=y, test_size=0.25, random_state=42)

                data = {
                    'N': X_train.shape[0],
                    'N2': X_test.shape[0],
                    'D': X_train.shape[1],
                    'K': len(np.unique(y_train)),
                    'y': encoder.transform(y_train) + 1,
                    'x': X_train,
                    'x_new': X_test,
                }

                # Model specifications
                model = pystan.StanModel(
                    file='penguin/prob_prog/multinomial.stan')
                if sel_sampler == 'NUTS':
                    fit = model.sampling(
                        data=data,
                        iter=params['num-iters'],
                        chains=params['num-chains'],
                        algorithm=sel_sampler,
                        control=dict(max_treedepth=params['max-tree-depth']))
                fit_samp = fit.extract(permuted=True)
                np.save(
                    'data/pystan_results/beta_posterior_NUTS_max_tree_depth_15.npy',
                    fit_samp['beta'])

                # Plots
                tmp = fit.stansummary().replace(
                    '\n', '\n\t')  # For streamlit to render the table better
                st.write(tmp)
                arviz.plot_trace(fit)
                st.pyplot()

                # Model predictions for Training
                X_np_train = X_train.to_numpy()
                X_np_test = X_test.to_numpy()
                preds_train = np.empty(
                    [X_np_train.shape[0], fit_samp['beta'].shape[0]])
                preds_test = np.empty(
                    [X_np_test.shape[0], fit_samp['beta'].shape[0]])
                for i in range(fit_samp['beta'].shape[0]):
                    # Train
                    scipy.special.softmax(X_np_train.dot(
                        fit_samp['beta'][i, :]),
                                          axis=1)
                    preds_train[:, i] = np.argmax(scipy.special.softmax(
                        X_np_train.dot(fit_samp['beta'][0, :]), axis=1),
                                                  axis=1)
                    # Test
                    scipy.special.softmax(X_np_test.dot(
                        fit_samp['beta'][i, :]),
                                          axis=1)
                    preds_test[:, i] = np.argmax(scipy.special.softmax(
                        X_np_test.dot(fit_samp['beta'][0, :]), axis=1),
                                                 axis=1)

                # Get consensus predictions from all samples
                cons_preds_train = mode(preds_train, axis=1)[0]
                cons_preds_test = mode(preds_test, axis=1)[0]

                np.savetxt(
                    'data/pystan_results/preds_posterior_NUTS_max_tree_depth_10.csv',
                    preds,
                    delimiter=',')
                plot_confusion_matrix(
                    encoder.transform(y_train),
                    cons_preds_train[:, 0],
                    classes=np.asarray(list(encoder_dict.keys())),
                    title='Confusion matrix, without normalization')
                plot_confusion_matrix(
                    encoder.transform(y_test),
                    cons_preds_test[:, 0],
                    classes=np.asarray(list(encoder_dict.keys())),
                    title='Confusion matrix, without normalization')
コード例 #4
0

model, model_version, summary, metrics = load_model_gloves()
cls_model, cls_model_version, cls_summary, cls_metrics = load_model_gloves(
    'gloves-classifier')
with st.expander("1. Distances and Classification"):
    # TODO dynamically pull from a medium artical with information in it
    st.write("""
    ## How to use
    Input an anchor image of the animal type you want to classifer (best on dog/cat breeds).
    \n
    Input 1 or more other images to see predicting distances and predicted matching percentage.
    """)

    anchor_file = st.file_uploader(
        "Input an Image to use an anchor image",
        type="jpg",
    )
    if anchor_file is not None:
        st.image(anchor_file,
                 caption="Uploaded Anchor Image.",
                 use_column_width=True)

    other_files = st.file_uploader("Input Images to compare to Anchor Image",
                                   accept_multiple_files=True,
                                   type="jpg")
    if other_files is not None:
        cols_1 = st.columns(4)
        for idx, file in enumerate(other_files):
            cols_1[idx % 4].image(file,
                                  caption=file.name,
                                  use_column_width=True)
コード例 #5
0
def main():
	"""Web App"""

	st.title("Diabetes Risk Prediction 💉")
	st.text("-- By Mrinal Gosain")

	activites = ["EDA","Plot","Model Building","About"]

	choice = st.sidebar.selectbox("Select Activity",activites)

	# Exploratory data analysis!
	if choice == 'EDA':
		st.subheader("Exploratory Data Analysis 🔍")

		data = st.file_uploader("Upload Dataset",type=["csv","txt"])
		if data is not None:  # If data isn't empty!
			df = pd.read_csv(data)
			st.dataframe(df.head())

			if st.checkbox("Show shape"):
				st.write(df.shape)

			if st.checkbox("Show Columns"):
				all_columns = df.columns.to_list()
				st.write(all_columns)

			if st.checkbox("Select Columns To Show"):
				selected_columns = st.multiselect("Select Columns",all_columns)
				new_df =  df[selected_columns]
				st.dataframe(new_df)


			if st.checkbox("Show Summary"):
				st.write(df.describe())

			if st.checkbox("Show Value Counts"):
				st.write(df.iloc[:,-1].value_counts())

			if st.checkbox("Correlation with Seaborn"):
				st.write(sns.heatmap(df.corr(),annot=True))
				st.pyplot()

			if st.checkbox("Pie Chart"):
				all_columns = df.columns.to_list()
				columns_to_plot = st.selectbox("Select 1 Column ",all_columns)
				pie_plot = df[columns_to_plot].value_counts().plot.pie(autopct="%1.1f%%")
				st.write(pie_plot)
				st.pyplot()


	# Plotting !
	elif choice == 'Plot':
		st.subheader("Data Visualization 📈")

		data = st.file_uploader("Upload Dataset",type=["csv","txt"])
		if data is not None:
			df = pd.read_csv(data)
			st.dataframe(df.head())

	

		all_columns_names = df.columns.tolist()
		type_of_plot = st.selectbox("Select Type of Plot",["area","bar","line","hist","box","kde"])
		selected_columns_names = st.multiselect("Select Columns To Plot",all_columns_names)

		if st.button("Generate Plot"):
			st.success("Generating Customizable Plot of {} for {}".format(type_of_plot,selected_columns_names))

			# Plot By Streamlit
			if type_of_plot == 'area':
				cust_data = df[selected_columns_names]
				st.area_chart(cust_data)

			elif type_of_plot == 'bar':
				cust_data = df[selected_columns_names]
				st.bar_chart(cust_data)

			elif type_of_plot == 'line':
				cust_data = df[selected_columns_names]
				st.line_chart(cust_data)

			# Custom Plot 
			elif type_of_plot:
				cust_plot= df[selected_columns_names].plot(kind=type_of_plot)
				st.write(cust_plot)
				st.pyplot()




	# Model building!
	elif choice == 'Model Building':
		st.subheader("Building ML Model 👨‍💻")

		data = st.file_uploader("Upload Dataset",type=["csv","txt"])
		if data is not None:
			df = pd.read_csv(data)
			st.dataframe(df.head())

			# Model Building
			X = df.iloc[:,0:-1]
			Y = df.iloc[:,-1]  #Make sure that the predicted column is the last one!
			seed = 42

			# Model 
			models = []
			models.append(("LR",LogisticRegression()))
			models.append(("LDA",LinearDiscriminantAnalysis()))
			models.append(("KNN",KNeighborsClassifier()))
			models.append(('CART', DecisionTreeClassifier()))
			models.append(('NB', GaussianNB()))
			models.append(('SVM', SVC()))
			# evaluate each model in turn

			# List
			model_names = []
			model_mean = []
			model_std = []
			all_models = []
			scoring = 'accuracy'

			for name,model in models:
				kfold = model_selection.KFold(n_splits=10, random_state=seed)
				cv_results = model_selection.cross_val_score(model,X,Y,cv=kfold,scoring=scoring)
				model_names.append(name)
				model_mean.append(cv_results.mean())
				model_std.append(cv_results.std())

				accuracy_results = {"model_name":name,"model_accuracy":cv_results.mean(),"standard_deviation":cv_results.std()}
				all_models.append(accuracy_results)

			if st.checkbox("Metrics as Table"):
				st.dataframe(pd.DataFrame(zip(model_names,model_mean,model_std),columns=["Model Name","Model Accuracy","Standard Deviation"]))

			if st.checkbox("Metrics as JSON"):
				st.json(all_models)



	elif choice == 'About':
			
			st.subheader("About Me 👨‍🎓")
			st.text("Hi, I am Mrinal. I am a data science enthusiast who loves to build machine learning application end to end.")
			st.text("I take avid interests in breaking down complex problems and leverage data to drive business")
			if st.button("Github 🔗"):
					webbrowser.open_new_tab(github_url)
			if st.button("Linkedin 🔗"):
					webbrowser.open_new_tab(linkedin_url)
import pandas as pd
import streamlit as st

st.info(
    "## Instructions:\n" +
    "1. Upload simple csv (like `data.csv` from this repo)\n" +
    "2. Check the box to choose column names\n" +
    "3. Change the column names selection and see the dataframe update in response\n"
    +
    "4. Uncheck the box to exit column name selection and see the dataframe go back to its previous state\n"
)

csv_file = st.file_uploader("File", type="csv")

if csv_file is not None:
    dataframe = pd.read_csv(csv_file)
    all_columns = list(dataframe.columns)
    if st.checkbox("Select Columns", False):
        columns = st.multiselect("Columns", all_columns, all_columns)
    else:
        columns = all_columns
    st.write(dataframe.filter(columns))
コード例 #7
0
def main():
    st.set_option('deprecation.showfileUploaderEncoding', False)

    st.title("HATI.AI")
    image = Image.open('macroview.jpg')
    #st.image(image, use_column_width=False)
    st.sidebar.image(image)
    st.sidebar.title("Hati.Ai Web App")
    
    menu = ["Login","SignUp"]
    choice = st.sidebar.selectbox("Menu",menu)


    if choice == "Login":
        st.subheader("Login Section")

        username = st.sidebar.text_input("User Name")
        password = st.sidebar.text_input("Password",type='password')
        if st.sidebar.checkbox("Login"):
			# if password == '12345':
            create_usertable()
            hashed_pswd = make_hashes(password)

            result = login_user(username,check_hashes(password,hashed_pswd))
            if result:

                st.success("Logged In as {}".format(username))
                def process_text(text):
                    processed_data = []
                    # Make all the strings lowercase and remove non alphabetic characters
                    #text = re.sub('[^A-Za-z]', ' ', text.lower())
                
                    # Tokenize the text; this is, separate every sentence into a list of words
                    # Since the text is already split into sentences you don't have to call sent_tokenize
                    tokenized_text = word_tokenize(text)
                
                    #append the result into a new list called processed_data
                    processed_data.append(tokenized_text)
                
                
                    # Remember, this final output is a list of words
                    return processed_data
            
                @st.cache(suppress_st_warning=True)
                def load_data(uploaded_file):
                    
            
                    df = pd.read_csv(uploaded_file)
                            
             
                    return df
                
                st.sidebar.subheader("Choose What Do You Want To Do")
                classifier = st.sidebar.selectbox(" ", ("Find new topics automatically", "POWER BI Dashboard", "Interact with our chatbot"))
                if classifier == 'POWER BI Dashboard':
                    import streamlit.components.v1 as components
                    from urllib.request import urlopen
                    html = urlopen("https://app.powerbi.com/view?r=eyJrIjoiZTA4NWU4MjYtOTk3Yi00N2ZhLTgwZWQtZWFhMzNkNDk1Zjk3IiwidCI6Ijk5NmQwYTI3LWUwOGQtNDU1Ny05OWJlLTY3ZmQ2Yjk3OTA0NCIsImMiOjEwfQ%3D%3D&pageName=ReportSection06db5928b6af61b2868f").read()
                    #components.html(html, width=None, height=600, scrolling=True)
                    st.markdown("""
                        <iframe width="900" height="606" src="https://app.powerbi.com/view?r=eyJrIjoiZTA4NWU4MjYtOTk3Yi00N2ZhLTgwZWQtZWFhMzNkNDk1Zjk3IiwidCI6Ijk5NmQwYTI3LWUwOGQtNDU1Ny05OWJlLTY3ZmQ2Yjk3OTA0NCIsImMiOjEwfQ%3D%3D&pageName=ReportSection06db5928b6af61b2868f" frameborder="0" style="border:0" allowfullscreen></iframe>
                        """, unsafe_allow_html=True)

              
                if classifier == 'Interact with our chatbot':    
                    import pickle
                    with open('tnb_topic_classifier_svm', 'rb') as training_model:
                        topic_model = pickle.load(training_model)
                    import malaya
                    model = malaya.sentiment.transformer(model = 'albert', size = 'base')
                    #from src import model          
                    #malay_bert = model.BertModel()
                    # eng_flair = model.Flair()
                    # eng_vader = model.Vader()
                    test = pd.DataFrame()
                    test['Positive'] = ''
                    test['Neutral'] = ''
                    test['Negative'] = ''
                    
                    st.title("Sentiment Analyzer")
                    message = st.text_area("Enter Text","Type Here ..")
                    if st.button("Analyze"):
                     with st.spinner("Analyzing the text …"):
                         result = model.predict_proba([message])
                         #result = malay_bert.predict(message)
                         message = [message]
                         topic = topic_model.predict(message)
                         #output = "Result is: Positive:" + str(result[0]) + "Neutral:" + str(result[1]) + "Negative:" + str(result[2]) + "topic is: " + str(topic)
                         output = "result is:" + str(result) + "topic is: " + str(topic)
                         st.write(output)
            
                    else:
                     st.warning("Not sure! Try to add some more words")
    
                from stop_words import get_stop_words
                if classifier == 'Find new topics automatically':
            
                    
                    uploaded_file = st.file_uploader('Upload CSV file to begin', type='csv')
                
                    #if upload then show left bar
                    if uploaded_file is not None:
                        df = load_data(uploaded_file)
                
                
                
                        if st.sidebar.checkbox("Show raw data", False):
                            st.subheader("Uploaded Data Set")
                            st.write(df)
                
                
            
                        st.sidebar.subheader("Text column to analyse")
                        st_ms = st.sidebar.selectbox("Select Text Columns To Analyse", (df.columns.tolist()))
                        

                        df_list = list(df)
     
        
                        import top2vec
                        from top2vec import Top2Vec
                        
                        #INITIALIZE AN EMPTY DATAFRAME, CONVERT THE TEXT INTO STRING AND APPEND INTO THE NEW COLUMN
                        d1 = pd.DataFrame()
                        d1['text'] = ""
                        d1['text'] = df[st_ms]
                        d1['text'] = d1['text'].astype(str)
                        
                
                        #INITIALIZE THE TOP2VEC MODEL AND FIT THE TEXT
                        #model.build_vocab(df_list, update=False)
                        model = Top2Vec(documents=d1['text'], speed="learn", workers=10)
                        
                        topic_sizes, topic_nums = model.get_topic_sizes()
                        for topic in topic_nums:
                            st.pyplot(model.generate_topic_wordcloud(topic))
                            # Display the generated image:

        


            else:
                st.warning("Incorrect Username/Password")


    elif choice == "SignUp":
        st.subheader("Create New Account")
        new_user = st.text_input("Username")
        new_password = st.text_input("Password",type='password')

        if st.button("Signup"):
            create_usertable()
            add_userdata(new_user,make_hashes(new_password))
            st.success("You have successfully created a valid Account")
            st.info("Go to Login Menu to login")
コード例 #8
0
import streamlit as st
import pdfplumber



def main(file):
    with pdfplumber.open(file) as pdf:
        page = pdf.pages[0]
        text = page.extract_text(x_tolerance=2)
        return text




st.title("invoice processing")
file = st.file_uploader("choose a file to extract")
element = st.text_input("enter the parameter to extract")
if st.button('classify'):
        c = main(file)
        st.write(c)
for column in c.split('\n'):
    if column.startswith(element):
        word = column.split()[-1]

st.write("element:",word)





コード例 #9
0
st.write('- *sdmt/bvmt/cvlt*: raw score on the test (integer)')
st.write('**Note 1**: please use exactly these column names in this order')
st.write(
    '**Note 2**: only the 3 first columns are an absolute requirement. '
    'For the cognitive scores, please prepare your dataframe to only contain columns for which you have data. '
    'Hence, this can be a subset of the latter 3 columns, but should at least include one of them'
)

st.header(
    'Step 2: Define the z-score on which you want to declare cognitive impairment'
)
z_cutoff = st.selectbox(label='Choose the z cutoff score',
                        options=[-1.5, -1, -0.5, 0])

st.header('Step 3: Upload your excel file')
input_object = st.file_uploader("Browse for a file or drag and drop here:",
                                type=("xlsx"))
if input_object:
    input_data = pd.read_excel(input_object)

    # region Perform checks if the data was correctly entered
    error_dict = {
        'columns':
        'Please be sure to use the correct column names and that they are lower case',
        'age':
        'Please use age values between 0 and 125 years, and only use integer values',
        'sex': 'Please assure the following encoding: Male = 1, Female = 2',
        'education':
        'Please use education levels that are encoded as 6, 12, 13, 15, 17 or 21 years',
        'sdmt': 'Please use sdmt values between 0 and 110',
        'bvmt': 'Please use bvmt values between 0 and 36',
        'cvlt': 'Please use cvlt values between 0 and 80'
コード例 #10
0
        par_caps = [word_2_indices[i] for i in start_word]
        par_caps = sequence.pad_sequences([par_caps],
                                          maxlen=max_len,
                                          padding='post')
        model = im_model()
        preds = model.predict([np.array([image]), np.array(par_caps)])
        word_pred = indices_2_word[np.argmax(preds[0])]
        start_word.append(word_pred)

        if word_pred == "<end>" or len(start_word) > max_len:
            break

    return ' '.join(start_word[1:-1])


uploaded_file = st.file_uploader('Upload the image', type=['jpg', 'png'])
st.set_option('deprecation.showfileUploaderEncoding', False)

if uploaded_file is not None:
    img = Image.open(uploaded_file)
    st.image(img)
    s = st.success('Generating Caption')
    test_img = get_encoding(resnet(), uploaded_file)
    Argmax_Search = predict_captions(test_img)
    s.empty()
    st.markdown(f'''<html>
    <p style="color:white;
              background-color:black;
              font-size:140%;
              display:inline-block;
              padding:10px;
コード例 #11
0
from nltk import word_tokenize
from sklearn.cluster import KMeans
from sklearn.decomposition import TruncatedSVD
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
from sklearn.metrics import classification_report, pairwise_distances
from sklearn.metrics.pairwise import pairwise_kernels
from cleaning import (apply_cleaning, build_idf_matrix, build_lexicon, freq,
                      fulldataset, idf, l2_normalizer, numDocsContaining)

st.write("""
# Simple Traceability SRS Document
Berikut ini algoritma yang digunakan untuk pengukuran keterlacakan pada dokumen
""")

#file upload
index0 = st.file_uploader("Choose a file")
if index0 is not None:
    st.sidebar.header('Dataset Parameter')
    x1 = pd.ExcelFile(index0)
    index1 = st.sidebar.selectbox('What Dataset you choose?', x1.sheet_names)

    # Load data example (dari functional maupun nonfunctional)
    st.header('Dataset parameters')
    statement = fulldataset(index0, index1)

    # Get text to clean (dari row yang diinginkan)
    text_to_clean = list(statement['Requirement Statement'])

    # Clean text
    print("Loading Original & Cleaned Text...")
    cleaned_text = apply_cleaning(text_to_clean)
コード例 #12
0
from PIL import ImageFont


st.title('顔を検出します')
st.write('jpg or JPGのファイルを読み込めます。')
st.write('顔を認識し、性別と年齢を推測します。')

with open('secret.json') as f:
    secret_json = json.load(f)   
subscription_key = secret_json['subscription_key']
assert subscription_key

face_api_url = 'https://cntr2020.cognitiveservices.azure.com/face/v1.0/detect'


uploaded_file = st.file_uploader("Choose an image...", type='jpg')
if uploaded_file is not None:
    img = Image.open(uploaded_file)
    with io.BytesIO() as output:
        img.save(output, format="JPEG")
        binary_img = output.getvalue()
        
    headers = {
        'Content-type': 'application/octet-stream',
        'Ocp-Apim-Subscription-Key': subscription_key
    }

    params = {
        'returnFaceId': 'true',
        'returnFaceAttributes': 'age,gender'
    }
コード例 #13
0
import io
import streamlit as st
import pandas as pd

st.set_option('deprecation.showfileUploaderEncoding', False)

# title
GROUP_NAME = 'RandomGroup'
file_buffer = st.file_uploader("Upload WhatsApp chat text file", type='txt')
chat_text = file_buffer.readlines()

if chat_text is not None:
    st.write(chat_text)

st.title(f"{GROUP_NAME} Chat Analysis")

# chat data at a galance
st.dataframe()

st.write(
    pd.DataFrame({
        'first column': [1, 2, 3, 4],
        'second column': [10, 20, 30, 40]
    }))
コード例 #14
0
    csv = df.to_csv().encode()
    b64 = base64.b64encode(csv).decode()
    href = f'<a href="data:file/csv;base64,{b64}" download="captura.csv" target="_blank">Download file data</a>'
    return href


# Press the green button in the gutter to run the script.
if __name__ == "__main__":
    """
    """
    st.sidebar.info("Data Generating!")
    st.sidebar.info("Please upload a file with rules for generating data!")
    n_try = st.sidebar.number_input('Attempts to generating',
                                    min_value=1,
                                    max_value=10,
                                    value=5)
    file_uploaded = st.file_uploader("Upload File")
    if file_uploaded is not None:
        listData = prepared_file_uploaded(file_uploaded)
        dataCombination = data_generating(listData, n_try=n_try)
        data2export = export2file(dataCombination)
        st.info("Data Overview!")
        st.info(f"Data after generated had {data2export.shape[0]} rows")
        st.dataframe(data2export.head(10))
        st.markdown(get_table_download_link_csv(data2export),
                    unsafe_allow_html=True)

        pr = ProfileReport(data2export, explorative=True)
        st.title("Data Statistic")
        st_profile_report(pr)
コード例 #15
0
import streamlit as st
from streamlit.script_runner import RerunException

import dbutils
import image_process

name_regex = '[A-Za-z]{2,25}( [A-Za-z]{2,25})?'
name_regex = re.compile(name_regex)

pan_regex = r'^[A-Za-z]{5}[0-9]{4}[A-Za-z]$'
pan_regex = re.compile(pan_regex)

st.title("PAN Card Reader")
st.markdown("submit and verify your pan card")

pan_card_image = st.file_uploader("Upload image", type=['png', 'jpg'], accept_multiple_files=False)

cols = st.beta_columns(2)
with cols[0]:
    pan = st.text_input("PAN number")
with cols[1]:
    min_value = date(1921, 1, 1)
    dob = st.date_input("Date of Birth", min_value=min_value)
name = st.text_input("Name")
fathers_name = st.text_input("Father's Name")

placeholder = st.empty()

def standardize_name(name: str) -> str:
    name = " ".join(name.lower().split())
    return name
コード例 #16
0
def main():

    st.image('img/gui_logo.jpeg', use_column_width=True)
    st.header('Bem vindo!')
    st.subheader('**Você está no sistema de recomendação de clientes**')
    st.markdown(
        'O sistema recomendará novos clientes baseado em comparações com os seus atuais clientes de forma customizada a partir das características desejadas.'
    )

    st.markdown(
        '###  Precisamos que você nos forneça o **portifólio de seus clientes!**'
    )
    st.markdown(
        ' *Obs.: Caso você não tenha um portifólio para usar, escolha um [desses](https://github.com/guireis1/Codenation-Final-Project/tree/master/data). *'
    )
    file3 = st.file_uploader('Upload clientes.csv', type='csv')

    if file3 is not None:

        market_pre = pd.read_csv('data/data_preprocess.csv')
        market = pd.read_csv('data/market.csv')
        #market = pd.DataFrame(readcsv(file2))
        #market= pd.read_csv(file2)
        #market_pre = pd.DataFrame(readcsv(file1))
        #market_pre = pd.read_csv(file1)
        port = pd.DataFrame(readcsv(file3))
        st.text('Loading data...done!')

        #Començando o processamento
        #market = pd.read_csv('market.csv')
        #market_pre = pd.read_csv('data_preprocess.csv')
        #port = pd.read_csv('data/estaticos_portfolio1.csv')

        market_pre.set_index('id', inplace=True)
        market.set_index(market_pre.index, inplace=True)
        market.drop('Unnamed: 0', axis=1, inplace=True)

        port = port.set_index('id')
        port.drop(port.columns, axis=1, inplace=True)

        port_market = market.merge(port,
                                   how='right',
                                   left_index=True,
                                   right_index=True)
        port_market_pre = market_pre.merge(port,
                                           how='right',
                                           left_index=True,
                                           right_index=True)

        st.markdown('DataFrame do Portofólio:')
        head(port_market)
        #Todos datasets prontos
        #st.sidebar.image(st.image('img/logo.png', use_column_width=True))
        st.sidebar.header('Opções de análise do Portifólio:')
        sidemulti = st.sidebar.multiselect(
            'Escolha: ', ('Visualização', 'Descritiva', 'Geolocalização'))

        if ('Visualização' in sidemulti):
            st.markdown('## **Visualização do Portifólio**')
            st.markdown('Perfil de clientes considerando features importantes')
            vis(port_market)
            st.markdown('*Para melhor visualização clique na imagem*')
        if ('Descritiva' in sidemulti):
            st.markdown('## **Análise Descritiva do Portifólio**')
            st.dataframe(descritiva(port_market))
            missing(port_market, 'Visualização dos nulos do Portifólio')
            missing_dendo(port_market, 'Dendograma dos nulos do Portifólio')
            st.markdown('*Para melhor visualização clique na imagem*')
        if ('Geolocalização' in sidemulti):
            coordenadas = pd.read_csv(
                'https://raw.githubusercontent.com/guireis1/Codenation-Final-Project/master/data/coordenadas'
            )
            coordenadas.drop('Unnamed: 0', axis=1, inplace=True)
            st.markdown('## **Geolocalização do Portifólio**')
            st.markdown('Localização das empresas contidas no portifólio')
            cord_port = geoloc(port_market, coordenadas)
            cord_port_df = pd.DataFrame(cord_port, columns=('lat', 'lon'))
            st.map(cord_port_df)

        st.sidebar.header('Opções de análise do mercado:')
        sidemulti_market = st.sidebar.multiselect(
            'Escolha: ', ('Visualização', 'Descritiva', 'Correlação',
                          'Análise dos Nulos', 'Colunas excluídas'))

        if ('Visualização' in sidemulti_market):
            st.markdown('## **Visualização do Mercado**')
            vis(market)
            st.markdown('*Para melhor visualização clique na imagem*')
        if ('Descritiva' in sidemulti_market):
            st.markdown('## **Análise Descritiva do Mercado**')
            st.dataframe(descritiva(market))
            #missing(market,'Visualização dos nulos')
            #missing_dendo(market,'Dendograma nulos')

        if ('Correlação' in sidemulti_market):
            st.markdown('## **Correlações do Mercado**')
            st.markdown('Correlação padrão')
            st.image('img/corr_matrix.png', use_column_width=True)
            st.markdown('Correlação usando PPS')
            st.image('img/corr_pps.png', use_column_width=True)

        if ('Análise dos Nulos' in sidemulti_market):
            st.markdown('## **Análise dos nulos **')

            st.markdown('### **Colunas Numéricas:**')
            st.image('img/valores20.png', use_column_width=True)
            st.image('img/valores60.png', use_column_width=True)
            st.image('img/valores80.png', use_column_width=True)
            st.image('img/dendo_90.png', use_column_width=True)
            st.image('img/dendo100.png', use_column_width=True)

            st.markdown('### **Colunas Categoricas:**')
            st.image('img/valores_nulos.png', use_column_width=True)
            st.image('img/dendo_cat.png', use_column_width=True)
        if ('Colunas excluídas' in sidemulti_market):
            col_excluidas = [
                'sg_uf', 'idade_emp_cat', 'fl_me', 'fl_sa', 'fl_epp',
                'fl_ltda', 'dt_situacao', 'fl_st_especial', 'nm_divisao',
                'nm_segmento', 'fl_spa', 'vl_total_tancagem',
                'vl_total_veiculos_antt', 'fl_optante_simples', 'qt_art',
                'vl_total_veiculos_pesados_grupo',
                'vl_total_veiculos_leves_grupo', 'vl_total_tancagem_grupo',
                'vl_total_veiculos_antt_grupo', 'vl_potenc_cons_oleo_gas',
                'fl_optante_simei', 'sg_uf_matriz', 'de_saude_rescencia',
                'nu_meses_rescencia', 'de_indicador_telefone',
                'fl_simples_irregular', 'vl_frota', 'qt_socios_pf',
                'qt_socios_pj', 'idade_maxima_socios', 'idade_minima_socios',
                'qt_socios_st_regular', 'qt_socios_st_suspensa',
                'qt_socios_masculino', 'qt_socios_feminino', 'qt_socios_pep',
                'qt_alteracao_socio_total', 'qt_alteracao_socio_90d',
                'qt_alteracao_socio_180d', 'qt_alteracao_socio_365d',
                'qt_socios_pj_ativos', 'qt_socios_pj_nulos',
                'qt_socios_pj_baixados', 'qt_socios_pj_suspensos',
                'qt_socios_pj_inaptos', 'vl_idade_media_socios_pj',
                'vl_idade_maxima_socios_pj', 'vl_idade_minima_socios_pj',
                'qt_coligados', 'qt_socios_coligados', 'qt_coligados_matriz',
                'qt_coligados_ativo', 'qt_coligados_baixada',
                'qt_coligados_inapta', 'qt_coligados_suspensa',
                'qt_coligados_nula', 'idade_media_coligadas',
                'idade_maxima_coligadas', 'idade_minima_coligadas',
                'coligada_mais_nova_ativa', 'coligada_mais_antiga_ativa',
                'idade_media_coligadas_ativas', 'coligada_mais_nova_baixada',
                'coligada_mais_antiga_baixada',
                'idade_media_coligadas_baixadas', 'qt_coligados_sa',
                'qt_coligados_me', 'qt_coligados_mei', 'qt_coligados_ltda',
                'qt_coligados_epp', 'qt_coligados_norte', 'qt_coligados_sul',
                'qt_coligados_nordeste', 'qt_coligados_centro',
                'qt_coligados_sudeste', 'qt_coligados_exterior',
                'qt_ufs_coligados', 'qt_regioes_coligados',
                'qt_ramos_coligados', 'qt_coligados_industria',
                'qt_coligados_agropecuaria', 'qt_coligados_comercio',
                'qt_coligados_serviço', 'qt_coligados_ccivil',
                'qt_funcionarios_coligados', 'qt_funcionarios_coligados_gp',
                'media_funcionarios_coligados_gp',
                'max_funcionarios_coligados_gp',
                'min_funcionarios_coligados_gp', 'vl_folha_coligados',
                'media_vl_folha_coligados', 'max_vl_folha_coligados',
                'min_vl_folha_coligados', 'vl_folha_coligados_gp',
                'media_vl_folha_coligados_gp', 'max_vl_folha_coligados_gp',
                'min_vl_folha_coligados_gp', 'faturamento_est_coligados',
                'media_faturamento_est_coligados',
                'max_faturamento_est_coligados',
                'min_faturamento_est_coligados',
                'faturamento_est_coligados_gp',
                'media_faturamento_est_coligados_gp',
                'max_faturamento_est_coligados_gp',
                'min_faturamento_est_coligados_gp', 'total_filiais_coligados',
                'media_filiais_coligados', 'max_filiais_coligados',
                'min_filiais_coligados', 'qt_coligados_atividade_alto',
                'qt_coligados_atividade_medio', 'qt_coligados_atividade_baixo',
                'qt_coligados_atividade_mt_baixo',
                'qt_coligados_atividade_inativo', 'qt_coligadas',
                'sum_faturamento_estimado_coligadas',
                'de_faixa_faturamento_estimado', 'vl_faturamento_estimado_aux',
                'vl_faturamento_estimado_grupo_aux', 'qt_ex_funcionarios',
                'qt_funcionarios_grupo', 'percent_func_genero_masc',
                'percent_func_genero_fem', 'idade_ate_18', 'idade_de_19_a_23',
                'idade_de_24_a_28', 'idade_de_29_a_33', 'idade_de_34_a_38',
                'idade_de_39_a_43', 'idade_de_44_a_48', 'idade_de_49_a_53',
                'idade_de_54_a_58', 'idade_acima_de_58',
                'grau_instrucao_macro_analfabeto',
                'grau_instrucao_macro_escolaridade_fundamental',
                'grau_instrucao_macro_escolaridade_media',
                'grau_instrucao_macro_escolaridade_superior',
                'grau_instrucao_macro_desconhecido', 'total',
                'meses_ultima_contratacaco', 'qt_admitidos_12meses',
                'qt_desligados_12meses', 'qt_desligados', 'qt_admitidos',
                'media_meses_servicos_all', 'max_meses_servicos_all',
                'min_meses_servicos_all', 'media_meses_servicos',
                'max_meses_servicos', 'min_meses_servicos',
                'qt_funcionarios_12meses', 'qt_funcionarios_24meses',
                'tx_crescimento_12meses', 'tx_crescimento_24meses'
            ]

            st.markdown('## **Colunas excluídas**')
            st.markdown(
                'Decidimos não utiliza-las por quantidade de linhas não preenchidas, grandes correlações com outrar variáveis, pouca importância para o modelo ou redundância!'
            )
            st.markdown('**São elas:**')
            st.write(col_excluidas)

        st.sidebar.header('Sistema de recomendação')
        start_model = st.sidebar.checkbox(
            'Aperte para começarmos a modelagem do sistema!')

        st.sidebar.markdown('**Desenvolvido por,**')
        st.sidebar.markdown('*Guilherme Reis Mendes*')
        st.sidebar.markdown(
            '[LinkedIn](https://www.linkedin.com/in/guilherme-reis-2862ab153/)'
        )
        st.sidebar.markdown('[GitHub](https://github.com/guireis1/)')

        if start_model:
            st.header('**Modelagem**')
            st.subheader(
                '**Primeiro selecione as features que gostaria de usar**')
            st.markdown(
                '*Essas serão as colunas que serão utilizadas no sistema de recomendação!*'
            )
            st.markdown('**Colunas que recomendamos:**')

            col_select = []

            ramo = st.checkbox('de_ramo')
            idade = st.checkbox('idade_emp_cat')
            meso = st.checkbox('nm_meso_regiao')
            juridica = st.checkbox('natureza_juridica_macro')
            faturamento = st.checkbox('de_faixa_faturamento_estimado_grupo')
            filiais = st.checkbox('qt_filiais')
            mei = st.checkbox('fl_mei')
            rm = st.checkbox('fl_rm')

            st.markdown('**Colunas opcionais:**')

            setor = st.checkbox('setor')
            rotatividade = st.checkbox('tx_rotatividade')
            idade_socios = st.checkbox('idade_media_socios')
            socios = st.checkbox('qt_socios')
            renda = st.checkbox('empsetorcensitariofaixarendapopulacao')
            leve = st.checkbox('vl_total_veiculos_leves_grupo')
            pesado = st.checkbox('vl_total_veiculos_pesados_grupo')
            iss = st.checkbox('fl_passivel_iss')
            atividade = st.checkbox('de_nivel_atividade')
            saude = st.checkbox('de_saude_tributaria')
            veiculo = st.checkbox('fl_veiculo')
            antt = st.checkbox('fl_antt')
            telefone = st.checkbox('fl_telefone')
            email = st.checkbox('fl_email')
            matriz = st.checkbox('fl_matriz')
            if ramo:
                col_select.append('de_ramo')
            if idade:
                col_select.append('idade_emp_cat')
            if meso:
                col_select.append('nm_meso_regiao')
                meso_ohe = pd.get_dummies(market_pre['nm_meso_regiao'],
                                          drop_first=True)
            if faturamento:
                col_select.append('de_faixa_faturamento_estimado_grupo')
            if juridica:
                col_select.append('natureza_juridica_macro')
                juridico_ohe = pd.get_dummies(
                    market_pre['natureza_juridica_macro'], drop_first=True)
            if filiais:
                col_select.append('qt_filiais')
            if mei:
                col_select.append('fl_mei')
            if rm:
                col_select.append('fl_rm')
            if setor:
                col_select.append('setor')
                setor_ohe = pd.get_dummies(market_pre['setor'],
                                           drop_first=True)
            if rotatividade:
                col_select.append('tx_rotatividade')
            if idade_socios:
                col_select.append('idade_media_socios')
            if socios:
                col_select.append('qt_socios')
            if renda:
                col_select.append('empsetorcensitariofaixarendapopulacao')
            if leve:
                col_select.append('vl_total_veiculos_leves_grupo')
            if pesado:
                col_select.append('vl_total_veiculos_pesados_grupo')
            if iss:
                col_select.append('fl_passivel_iss')
            if atividade:
                col_select.append('de_nivel_atividade')
            if saude:
                col_select.append('de_saude_tributaria')
            if veiculo:
                col_select.append('fl_veiculo')
            if antt:
                col_select.append('fl_antt')
            if telefone:
                col_select.append('fl_telefone')
            if email:
                col_select.append('fl_email')
            if matriz:
                col_select.append('fl_matriz')

            st.markdown('## **Podemos continuar?**')
            features_select = st.checkbox('Sim')

            if features_select:
                st.text('*Colunas selecionadas com sucesso!*')

                st.write('Colunas Selecionadas:', col_select)

                st.subheader(
                    'Agora escolha a quantidade de recomendações que deseja!')
                st.markdown(
                    '**Estamos trabalhando com k-nearest Kneighbors. O valor selecionado será proporcional ao número de samples do portifólio!**'
                )
                st.markdown(
                    '*Lembrando que quanto maior o valor de K, mais recomendações, porém, menos preciso*'
                )
                slider_nn = st.slider('Número de vizinhos:', 2, 10)

                market_col_select = market_pre[col_select]

                if 'setor' in market_col_select:
                    market_col_select.drop('setor', axis=1, inplace=True)
                    market_col_select = pd.concat(
                        [market_col_select, setor_ohe], axis=1)

                if 'nm_meso_regiao' in market_col_select:
                    market_col_select.drop('nm_meso_regiao',
                                           axis=1,
                                           inplace=True)
                    market_col_select = pd.concat(
                        [market_col_select, meso_ohe], axis=1)

                if 'setor' in market_col_select:
                    market_col_select.drop('natureza_juridica_macro',
                                           axis=1,
                                           inplace=True)
                    market_col_select = pd.concat(
                        [market_col_select, juridico_ohe], axis=1)

                market_col_select_scaled = StandardScaler().fit_transform(
                    market_col_select)
                market_col_select_scaled = pd.DataFrame(
                    market_col_select_scaled,
                    columns=market_col_select.columns,
                    index=market_col_select.index)

                head(market_col_select_scaled)

                st.markdown('## **Recomendação**')
                button_model = st.checkbox('Aperte para iniciar o sistema')

                if button_model:
                    st.text('Loading model...wait!')
                    port_model = market_col_select_scaled.merge(
                        port, how='right', left_index=True, right_index=True)
                    port_model.dropna(inplace=True)
                    suggestion = recommend(port_model, slider_nn,
                                           market_col_select_scaled, market)
                    suggestion['id'] = suggestion.index
                    st.text('Loading model...done!')
                    st.markdown('**Sistema de recomendação completo!**')
                    size_sug = suggestion.shape[0]
                    st.write('Foram geraradas ', size_sug, ' recomendações!')
                    st.markdown('Baixe aqui:')
                    st.markdown(get_table_download_link(suggestion),
                                unsafe_allow_html=True)
                    coordenadas_market = pd.read_csv(
                        'https://raw.githubusercontent.com/guireis1/Codenation-Final-Project/master/data/coordenadas'
                    )
                    coordenadas_market.drop('Unnamed: 0', axis=1, inplace=True)
                    cord_reco = geoloc(suggestion, coordenadas_market)
                    cord_reco_df = pd.DataFrame(cord_reco,
                                                columns=('lat', 'lon'))
                    st.markdown('**Geolocalização das empresas recomendadas**')
                    st.map(cord_reco_df)
                    st.markdown('**Visualização das empresas recomendadas**')
                    vis(suggestion)
コード例 #17
0
def main():
    st.set_option('deprecation.showPyplotGlobalUse', False)
    image = Image.open('data/Reconn.png')
    st.image(image, use_column_width=False)

    def load_data(uploaded_file):
        df = pd.read_csv(uploaded_file)
        return df

    uploaded_file = st.file_uploader('Upload file to begin', type=("csv"))

    if uploaded_file is not None:
        df = load_data(uploaded_file)
        target_column = st.selectbox('Select Target Column',
                                     list(df.columns),
                                     key='target_column')

        st.sidebar.title('Know your dataset')

        if st.sidebar.checkbox("Preview Dataset"):
            st.markdown('## Dataset preview')
            if st.button("Head"):
                st.write(df.head(10))
            elif st.button("Tail"):
                st.write(df.tail(10))
            else:
                number = st.slider("Select No of Rows to show", 10,
                                   df.shape[0])
                st.write(df.head(number))

        if st.sidebar.checkbox("Show Column Names"):
            st.markdown('## Column names')
            st.write(df.columns)

        if st.sidebar.checkbox("Show Dimensions"):
            st.write(df.shape)

        if st.sidebar.checkbox('Describe', value=False):
            st.markdown('## Data Description')
            st.write(df.describe())
            st.markdown('### Columns that are potential binary features')
            bin_cols = []
            for col in df.columns:
                if len(df[col].value_counts()) == 2:
                    bin_cols.append(col)
            st.write(bin_cols)
            st.markdown('### Columns Types')
            st.write(df.dtypes)

        if st.sidebar.checkbox('Missing Data', value=False):
            st.markdown('## Missing Data')
            total = df.isnull().sum().sort_values(ascending=False)
            percent = (df.isnull().sum() /
                       df.isnull().count()).sort_values(ascending=False)
            missing_data = pd.concat([total, percent],
                                     axis=1,
                                     keys=['Total', 'Percent'])
            st.write(missing_data)
            try:
                sns.heatmap(df.isnull())
                st.pyplot()
            except:
                st.warning('Error when showing plots')

        if st.sidebar.checkbox('Value Counts', value=False):
            st.markdown('## Value Counts')
            col = st.selectbox('Select Column',
                               list(df.columns),
                               key='val_col')
            st.write(df[col].value_counts())

        if st.sidebar.checkbox('Unique elements', value=False):
            st.markdown('## Unique elements')
            if st.checkbox('Show all unique elements', value=False):
                st.write(df.nunique())
            col = st.selectbox('Show columnwise unique elements',
                               list(df.columns),
                               key='unique_col')
            st.write(df[col].unique())

        if st.sidebar.checkbox('Show Distribution', False):
            st.subheader(f'Distribution of {target_column}')
            try:
                sns.distplot(df[target_column])
                st.write("Skewness: %.3f" % df[target_column].skew())
                st.write("Kurtosis: %.3f" % df[target_column].kurt())
                st.pyplot()
            except:
                st.error('Invalid Column')

        st.sidebar.title('Explore the Dataset')

        if target_column is not None:
            if st.sidebar.checkbox('Scatter Plot', value=False):
                scatter_cols = st.sidebar.multiselect('Select Column',
                                                      list(df.columns),
                                                      key='scatter_cols')
                st.markdown('## Scatter Plots')
                for col in scatter_cols:
                    try:
                        data = pd.concat([df[target_column], df[col]], axis=1)
                        data.plot.scatter(x=col,
                                          y=target_column,
                                          ylim=(0, 800000))
                        st.pyplot()
                    except:
                        st.error('Invalid column')

            if st.sidebar.checkbox('Box Plot', value=False):
                box_cols = st.sidebar.multiselect('Select Column',
                                                  list(df.columns),
                                                  key='box_cols')
                st.markdown('## Box Plots')
                for col in box_cols:
                    try:
                        data = pd.concat([df[target_column], df[col]], axis=1)
                        f, ax = plt.subplots(figsize=(8, 6))
                        fig = sns.boxplot(x=col, y=target_column, data=data)
                        fig.axis(ymin=np.min(df[target_column]),
                                 ymax=np.max(df[target_column]))
                        st.pyplot()
                    except:
                        st.error('Invalid column')

            if st.sidebar.checkbox('Pair Plot', value=False):
                pair_cols = st.sidebar.multiselect('Select Column',
                                                   list(df.columns),
                                                   key='pair_plot')
                plot_size = st.sidebar.number_input('Select Plot size',
                                                    1.0,
                                                    5.0,
                                                    step=0.5,
                                                    key='plot_size',
                                                    value=2.5)
                st.markdown('## Pair Plots')
                cols = [target_column]
                for col in pair_cols:
                    cols.append(col)
                try:
                    sns.set()
                    sns.pairplot(df[cols], height=plot_size)
                    st.pyplot()
                except:
                    st.error('Invalid column')

            if st.sidebar.checkbox('Correlation matrix', value=False):
                st.markdown('## Correlation matrix (heatmap style)')
                corrmat = df.corr()
                f, ax = plt.subplots(figsize=(12, 9))
                sns.heatmap(corrmat, vmax=.8, square=True)
                st.pyplot()

                if st.checkbox('With Target Column', value=False):
                    k = st.number_input(
                        '# of Cols for heatmap',
                        3,
                        len(df.columns),
                        step=1,
                        key='k')  #number of variables for heatmap
                    cols = corrmat.nlargest(k,
                                            target_column)[target_column].index
                    cm = np.corrcoef(df[cols].values.T)
                    sns.set(font_scale=1.25)
                    hm = sns.heatmap(cm,
                                     cbar=True,
                                     annot=True,
                                     square=True,
                                     fmt='.2f',
                                     annot_kws={'size': 10},
                                     yticklabels=cols.values,
                                     xticklabels=cols.values)
                    st.pyplot()

        st.sidebar.title('Data processing')

        if st.sidebar.checkbox('Treat missing values'):
            st.markdown('## Treat missing values')
            # Select a column to treat missing values
            col_option = st.selectbox("Select Column to treat missing values",
                                      df.columns)

            # Specify options to treat missing values
            missing_values_clear = st.selectbox(
                "Select Missing values treatment method",
                ("Replace with Mean", "Replace with Median",
                 "Replace with Mode"))

            if missing_values_clear == "Replace with Mean":
                replaced_value = df[col_option].mean()
                st.write("Mean value of column is :", replaced_value)
            elif missing_values_clear == "Replace with Median":
                replaced_value = df[col_option].median()
                st.write("Median value of column is :", replaced_value)
            elif missing_values_clear == "Replace with Mode":
                replaced_value = df[col_option].mode()
                st.write("Mode value of column is :", replaced_value)

            Replace = st.selectbox("Replace values of column?", ("No", "Yes"))
            if Replace == "Yes":
                df[col_option] = df[col_option].fillna(replaced_value)
                st.write("Null values replaced")
            elif Replace == "No":
                st.write("No changes made")

        if st.sidebar.checkbox('Encode categorical column'):
            st.markdown("## Encode categorical column")
            # Select a column to do encoding
            col_selected = st.selectbox(
                "Select Column to treat categorical values", df.columns)

            # Specify options to do encoding
            encoder_type = st.selectbox("Select Encoding method",
                                        ("Label Encoder", ""))

            if encoder_type == "Label Encoder":
                encoded_value = helper.labelEncoder.fit_transform(
                    df[col_selected])
                st.write("Label Encoded value of column is :", encoded_value)
            # elif encoder_type == "Ordinal Encoder":
            #     encoded_value = helper.ordinalEncoder.fit_transform(df[col_selected])
            #     st.write("Ordinal Encoded value of column is :", encoded_value)

            Replace = st.selectbox("Replace values of column?", ("No", "Yes"),
                                   key='encoder')
            if Replace == "Yes":
                df[col_selected] = encoded_value
                st.write("Added encoded column in dataframe")
                st.write(df.head())
            elif Replace == "No":
                st.write('No values replaced yet')

        if st.sidebar.checkbox('Scale column'):
            st.markdown("## Scaling column")
            col_scaled = st.selectbox("Select Column for feature scaling",
                                      df.columns)

            scaler_type = st.selectbox("Select Scaling method",
                                       ("Standard Scaler", "Min Max Scaler"))

            if scaler_type == "Standard Scaler":
                scaled_value = helper.standartScaler.fit_transform(
                    df[col_scaled].values.reshape(-1, 1))
                st.write("Standard scaled value of column is :", scaled_value)
            elif scaler_type == "Min Max Scaler":
                scaled_value = helper.minMaxScaler.fit_transform(
                    df[col_scaled].values.reshape(-1, 1))
                st.write("Min-Max scaled value of column is :", scaled_value)

            Replace = st.selectbox("Replace values of column?", ("No", "Yes"),
                                   key='scaler')
            if Replace == "Yes":
                df[col_scaled] = scaled_value
                st.write("Added scaled column in dataframe")
                st.write(df.head())
            elif Replace == "No":
                st.write('No values replaced yet')

        st.sidebar.title('Download processed dataset')
        if st.sidebar.checkbox("download file"):
            st.sidebar.markdown(helper.get_table_download_link(df),
                                unsafe_allow_html=True)

        if st.sidebar.button('Credits'):
            st.sidebar.markdown('''

            **Md.Sadab Wasim**

            Get in touch: [Twitter](https://twitter.com/@sadab_wasim)

            Source Code: [Github](https://github.com/mdsadabwasim/reconn)
            ''')
コード例 #18
0
# Copyright 2018-2020 Streamlit Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import streamlit as st

single_file = st.file_uploader("Drop a file:", type=["txt"])
if single_file is None:
    st.text("No upload")
else:
    st.text(single_file.read())

multiple_files = st.file_uploader("Drop multiple files:",
                                  type=["txt"],
                                  accept_multiple_files=True)
if multiple_files is None:
    st.text("No upload")
else:
    files = [file.read().decode() for file in multiple_files]
    st.text("\n".join(files))
コード例 #19
0
# Title the "advanced options" section of the sidebar.
streamlit.sidebar.markdown("### Advanced Options")

# Add a checkbox to add a watermark.
add_watermark = streamlit.sidebar.checkbox(
    label="Add watermark to verify grade",
    value=False  # default
)
# Allow the user to upload a file.
FILE_TYPES = [".png", ".jpg", ".jpeg"]
uploader_title = """
## Use AI to grade the condition of a trading card!
"""
streamlit.markdown(uploader_title)
file = streamlit.file_uploader(label="Option 1: Upload a Picture of the Card")

## Get an image from ebay.
ebay_md = """
Option 2: enter an ebay auction URL, e.g. https://ebay.to/32conjA
"""
#streamlit.markdown(ebay_md)
ebay_url = streamlit.text_input(ebay_md)

if ebay_url not in [None, ""]:

    try:

        # output loading message to user.
        #streamlit.text("Loading image from ebay...please wait")
コード例 #20
0
    |------------|----------|------------|--------|----------|-----|
    | 2019-10-01 | CASH.USD | deposit    | 1      | 100000   | 0   |
    | 2019-10-11 | AAPL     | purchase   | 234.52 | 88       | 35  |
    | 2019-11-25 | MSFT     | purchase   | 148.3  | 250      | 25  |
    | 2019-12-04 | AAPL     | sale       | 262.08 | 50       | 20  |
    | 2020-01-06 | FB       | purchase   | 208    | 100      | 10  |
    | 2020-01-25 | CASH.USD | withdrawal | 1      | 30000    | 0   |
    
    [Download example](https://github.com/simprecicchiani/PyPortfolioAnalytics/raw/master/assets/portfolios/generic.csv)
    
    ### Input file rules:

    - File format is `.csv`
    - Firs row contains these columns `Date`, `Ticker`, `Order`, `Price`, `Quantity`, `Fee`
    - Date format is `%Y-%m-%d`
    - Type of order are `deposit`, `withdrawal`, `purchase`, `sale`
    - Only supports [Yahoo Finance](https://finance.yahoo.com/) tickers

    ### Caveats

    - Works with single currency account only
    - Requires a deposit to calculate return on investment
    - Only accepts transactions within business days
    '''
uploaded_file = st.file_uploader('Upload your transactions', type='csv')

if uploaded_file is not None:
    st.portfolio = Portfolio(uploaded_file)
    st.portfolio.run()
    dashboard()
コード例 #21
0
ファイル: app.py プロジェクト: estkae/StreamlitDemoCyrus
def main():
    """Streamlit demo web app"""

    st.sidebar.title('Menu')
    choose_model = st.sidebar.selectbox("Choose the page or model", [
        "Home", "Logistic Regression", "XGB", "Stock Backtesting",
        "Predict Volatility", "Technical Indicators", "Bollinger Band",
        "Stock Market", "Dashbord"
    ])

    # Load data
    df, rows, columns, filename = load_data()
    data, drop_list = data_preprocessing(df)

    # Provide checkbox for uploading different training dataset
    if choose_model == "Home":
        if st.checkbox('Want to use other training set?'):
            uploaded_file = st.file_uploader("Choose a CSV file", type="csv")
            st.text(
                "Note: Don't easily change training set which may bring big influence on prediction"
            )

            if uploaded_file:
                df, data, drop_list, filename, rows, columns = upload_different_data(
                    uploaded_file)

    # Home page building
    if choose_model == "Home":
        home_page_builder(df, data, rows, columns)

    # Page for Logistic Regression
    if choose_model == "Logistic Regression":
        model_reg = logistic_page_builder(data)

        if (st.checkbox("Want to Use this model to predict on a new dataset?")
            ):
            logistic_predictor(model_reg, rows, columns, df, drop_list)

    # Page for XGB
    if choose_model == "XGB":
        model_xgb = xgb_page_builder(data)

        if (st.checkbox("Want to Use this model to predict on a new dataset?")
            ):
            xgb_predictor(model_xgb, rows, columns, df, drop_list)

    if choose_model == "Stock Backtesting":
        from Stock_Backtesting import ticker
        st.sidebar.header('Hyper Parameters')
        st.sidebar.markdown('Enter a new ticker')
        tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN')
        model_xgb = ticker(tickertxt)

    if choose_model == "Predict Volatility":
        from PredictVolatility import tickerpv
        st.sidebar.header('Predickt')
        st.sidebar.markdown('Enter a new ticker')
        tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN')
        model_xgb = tickerpv(tickertxt)

    if choose_model == "Technical Indicators":
        from Technical_Indicators import tickerti
        st.sidebar.header('Technical ')
        st.sidebar.markdown('Enter a new ticker')
        tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN')
        model_xgb = tickerti(tickertxt)

    if choose_model == "Bollinger Band":
        from bollinger import tickerbol
        st.sidebar.header('Bollinger Nabd ')
        st.sidebar.markdown('Enter a new ticker')
        tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN')
        model_xgb = tickerbol(tickertxt)

    if choose_model == "Stock Market":
        from Stockmarketscreening import stockmarket
        st.sidebar.header('Stock Market')
        st.sidebar.markdown('Enter a new ticker')
        tickertxt = st.sidebar.text_input("Paste Aktie here", value='AMZN')
        model_xgb = stockmarket(tickertxt)

    if choose_model == "Dashbord":
        st.sidebar.header('Dashboard')
        cwd = os.getcwd()
        data1 = pd.read_excel(cwd + "/Tabelle Mustermann-2.xls")
        # print(data1)
        st.table(data1)
コード例 #22
0
def main():

    st.title("IST Automated EDA Test")

    st.info("JP EDA app test")
    """https://github.com/joeperrotta/streamlit"""
    activities = [
        "Pandas Profile", "SweetViz", "General EDA", "EDA For Linear Models",
        "Model Building for Classification Problem"
    ]
    choice = st.sidebar.selectbox("Select Activities", activities)

    if choice == "Pandas Profile":
        st.subheader("Automated EDA with Pandas Profile")
        data_file = st.file_uploader("Upload CSV", type=['csv'])
        if data_file is not None:
            df = pd.read_csv(data_file)
            st.dataframe(df.head())
            profile = ProfileReport(df)
            st_profile_report(profile)

    # elif choice == "Sweetviz":
    #     st.subheader("Automated EDA with Sweetviz")
    #     data = st.file_uploader("Upload a Dataset", type=["csv"])
    #     if data is not None:
    #         df = pd.read_csv(data)
    #         st.dataframe(df.head())
    #         if st.button("Generate Sweetviz Report"):
    #             # Normal Workflow
    #             report = sv.analyze(df)
    #             report.show_html()
    #             st_display_sweetviz("SWEETVIZ_REPORT.html")

    elif choice == 'General EDA':
        st.subheader("Exploratory Data Analysis")

        data = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        if data is not None:
            df = load.read_csv(data)
            st.dataframe(df.head())
            st.success("Data Frame Loaded successfully")

            if st.checkbox("Show dtypes"):
                st.write(dataframe.show_dtypes(df))

            if st.checkbox("Show Columns"):
                st.write(dataframe.show_columns(df))

            if st.checkbox("Show Missing"):
                st.write(dataframe.Show_Missing1(df))

            if st.checkbox("column information"):
                st.write(info.Column_information(df))

            if st.checkbox("Aggregation Tabulation"):
                st.write(dataframe.Tabulation(df))

            if st.checkbox("Num Count Summary"):
                st.write(info.num_count_summary(df))

            if st.checkbox("Statistical Summary"):
                st.write(info.statistical_summary(df))

            # 			if st.checkbox("Show Selected Columns"):
            # 				selected_columns = st.multiselect("Select Columns",all_columns)
            # 				new_df = df[selected_columns]
            # 				st.dataframe(new_df)

            if st.checkbox("Show Selected Columns"):
                selected_columns = st.multiselect("Select Columns",
                                                  dataframe.show_columns(df))
                new_df = df[selected_columns]
                st.dataframe(new_df)

            if st.checkbox("Numerical Variables"):
                num_df = dataframe.Numerical_variables(df)
                numer_df = pd.DataFrame(num_df)
                st.dataframe(numer_df)

            if st.checkbox("Categorical Variables"):
                new_df = dataframe.categorical_variables(df)
                catego_df = pd.DataFrame(new_df)
                st.dataframe(catego_df)

            if st.checkbox("DropNA"):
                imp_df = dataframe.impute(num_df)
                st.dataframe(imp_df)

            if st.checkbox("Missing after DropNA"):
                st.write(dataframe.Show_Missing(imp_df))

            all_columns_names = dataframe.show_columns(df)
            all_columns_names1 = dataframe.show_columns(df)
            selected_columns_names = st.selectbox(
                "Select Column 1 For Cross Tabultion", all_columns_names)
            selected_columns_names1 = st.selectbox(
                "Select Column 2 For Cross Tabultion", all_columns_names1)
            if st.button("Generate Cross Tab"):
                st.dataframe(
                    pd.crosstab(df[selected_columns_names],
                                df[selected_columns_names1]))

            all_columns_names3 = dataframe.show_columns(df)
            all_columns_names4 = dataframe.show_columns(df)
            selected_columns_name3 = st.selectbox(
                "Select Column 1 For Pearsonr Correlation (Numerical Columns)",
                all_columns_names3)
            selected_columns_names4 = st.selectbox(
                "Select Column 2 For Pearsonr Correlation (Numerical Columns)",
                all_columns_names4)
            if st.button("Generate Pearsonr Correlation"):
                df = pd.DataFrame(dataframe.Show_pearsonr(
                    imp_df[selected_columns_name3],
                    imp_df[selected_columns_names4]),
                                  index=['Pvalue', '0'])
                st.dataframe(df)

            spearmanr3 = dataframe.show_columns(df)
            spearmanr4 = dataframe.show_columns(df)
            spearmanr13 = st.selectbox(
                "Select Column 1 For spearmanr Correlation (Categorical Columns)",
                spearmanr4)
            spearmanr14 = st.selectbox(
                "Select Column 2 For spearmanr Correlation (Categorical Columns)",
                spearmanr4)
            if st.button("Generate spearmanr Correlation"):
                df = pd.DataFrame(dataframe.Show_spearmanr(
                    catego_df[spearmanr13], catego_df[spearmanr14]),
                                  index=['Pvalue', '0'])
                st.dataframe(df)

            st.subheader("UNIVARIATE ANALYSIS")

            all_columns_names = dataframe.show_columns(df)
            selected_columns_names = st.selectbox(
                "Select Column for Histogram ", all_columns_names)
            if st.checkbox("Show Histogram for Selected variable"):
                st.write(dataframe.show_hist(df[selected_columns_names]))
                st.pyplot()

            all_columns_names = dataframe.show_columns(df)
            selected_columns_names = st.selectbox("Select Columns Distplot ",
                                                  all_columns_names)
            if st.checkbox("Show DisPlot for Selected variable"):
                st.write(dataframe.Show_DisPlot(df[selected_columns_names]))
                st.pyplot()

            all_columns_names = dataframe.show_columns(df)
            selected_columns_names = st.selectbox("Select Columns CountPlot ",
                                                  all_columns_names)
            if st.checkbox("Show CountPlot for Selected variable"):
                st.write(dataframe.Show_CountPlot(df[selected_columns_names]))
                st.pyplot()

            st.subheader("BIVARIATE ANALYSIS")

            Scatter1 = dataframe.show_columns(df)
            Scatter2 = dataframe.show_columns(df)
            Scatter11 = st.selectbox(
                "Select Column 1 For Scatter Plot (Numerical Columns)",
                Scatter1)
            Scatter22 = st.selectbox(
                "Select Column 2 For Scatter Plot (Numerical Columns)",
                Scatter2)
            if st.button("Generate PLOTLY Scatter PLOT"):
                st.pyplot(dataframe.plotly(df, df[Scatter11], df[Scatter22]))

            bar1 = dataframe.show_columns(df)
            bar2 = dataframe.show_columns(df)
            bar11 = st.selectbox("Select Column 1 For Bar Plot ", bar1)
            bar22 = st.selectbox("Select Column 2 For Bar Plot ", bar2)
            if st.button("Generate PLOTLY histogram PLOT"):
                st.pyplot(dataframe.plotly_histogram(df, df[bar11], df[bar22]))

            violin1 = dataframe.show_columns(df)
            violin2 = dataframe.show_columns(df)
            violin11 = st.selectbox("Select Column 1 For violin Plot", violin1)
            violin22 = st.selectbox("Select Column 2 For violin Plot", violin2)
            if st.button("Generate PLOTLY violin PLOT"):
                st.pyplot(
                    dataframe.plotly_violin(df, df[violin11], df[violin22]))

            st.subheader("MULTIVARIATE ANALYSIS")

            if st.checkbox("Show Histogram"):
                st.write(dataframe.show_hist(df))
                st.pyplot()

            if st.checkbox("Show HeatMap"):
                st.write(dataframe.Show_HeatMap(df))
                st.pyplot()

            if st.checkbox("Show PairPlot"):
                st.write(dataframe.Show_PairPlot(df))
                st.pyplot()

            if st.button("Generate Word Cloud"):
                st.write(dataframe.wordcloud(df))
                st.pyplot()

    elif choice == 'EDA For Linear Models':
        st.subheader("EDA For Linear Models")
        data = st.file_uploader("Upload a Dataset",
                                type=["csv", "txt", "xlsx", "tsv"])
        if data is not None:
            df = load.read_csv(data)
            st.dataframe(df.head())
            st.success("Data Frame Loaded successfully")

            all_columns_names = dataframe.show_columns(df)
            selected_columns_names = st.selectbox("Select Columns qqplot ",
                                                  all_columns_names)
            if st.checkbox("Show qqplot for variable"):
                st.write(dataframe.qqplot(df[selected_columns_names]))
                st.pyplot()

            all_columns_names = dataframe.show_columns(df)
            selected_columns_names = st.selectbox("Select Columns outlier ",
                                                  all_columns_names)
            if st.checkbox("Show outliers in variable"):
                st.write(dataframe.outlier(df[selected_columns_names]))

            # all_columns_names = show_columns(df)
            # selected_columns_names = st.selectbox("Select target ",all_columns_names)
            # if st.checkbox("Anderson Normality Test"):
            # 	st.write(Anderson_test(df[selected_columns_names]))

            if st.checkbox("Show Distplot Selected Columns"):
                selected_columns_names = st.selectbox(
                    "Select Columns for Distplot ", all_columns_names)
                st.dataframe(dataframe.show_displot(
                    df[selected_columns_names]))
                st.pyplot()

            con1 = dataframe.show_columns(df)
            con2 = dataframe.show_columns(df)
            conn1 = st.selectbox("Select 1st Columns for chi square test",
                                 con1)
            conn2 = st.selectbox("Select 2st Columns for chi square test",
                                 con2)
            if st.button("Generate chi square test"):
                st.write(
                    dataframe.check_cat_relation(df[conn1], df[conn2], 0.5))

    elif choice == 'Model Building for Classification Problem':
        st.subheader("Model Building for Classification Problem")
        data = st.file_uploader("Upload a Dataset",
                                type=["csv", "txt", "xlsx", "tsv"])
        if data is not None:
            df = load.read_csv(data)
            st.dataframe(df.head())
            st.success("Data Frame Loaded successfully")

            if st.checkbox(
                    "Select your Variables  (Target Variable should be at last)"
            ):
                selected_columns_ = st.multiselect(
                    "Select Columns for seperation ",
                    dataframe.show_columns(df))
                sep_df = df[selected_columns_]
                st.dataframe(sep_df)

            if st.checkbox("Show Indpendent Data"):
                x = sep_df.iloc[:, :-1]
                st.dataframe(x)

            if st.checkbox("Show Dependent Data"):
                y = sep_df.iloc[:, -1]
                st.dataframe(y)

            if st.checkbox("Dummay Variable"):
                x = dataframe.dummy(x)
                st.dataframe(x)

            if st.checkbox("IMpupter "):
                x = model.IMpupter(x)
                st.dataframe(x)

            if st.checkbox("Compute Principle Component Analysis"):
                x = dataframe.PCA(x)
                st.dataframe(x)

            st.subheader("TRAIN TEST SPLIT")

            if st.checkbox("Select X Train"):
                from sklearn.model_selection import train_test_split
                x_train, x_test, y_train, y_test = train_test_split(
                    x, y, random_state=0)
                st.dataframe(x_train)

            if st.checkbox("Select x_test"):
                from sklearn.model_selection import train_test_split
                x_train, x_test, y_train, y_test = train_test_split(
                    x, y, random_state=0)
                st.dataframe(x_test)

            if st.checkbox("Select y_train"):
                from sklearn.model_selection import train_test_split
                x_train, x_test, y_train, y_test = train_test_split(
                    x, y, random_state=0)
                st.dataframe(y_train)

            if st.checkbox("Select y_test"):
                from sklearn.model_selection import train_test_split
                x_train, x_test, y_train, y_test = train_test_split(
                    x, y, random_state=0)
                st.dataframe(y_test)

            st.subheader("MODEL BUILDING")
            st.write("Build youe BaseLine Model")

            if st.checkbox("Logistic Regression "):
                x = model.Logistic_Regression(x_train, y_train, x_test, y_test)
                st.write(x)

            if st.checkbox("Decision Tree "):
                x = model.Decision_Tree(x_train, y_train, x_test, y_test)
                st.write(x)

            if st.checkbox("Random Forest "):
                x = model.RandomForest(x_train, y_train, x_test, y_test)
                st.write(x)

            if st.checkbox("naive_bayes "):
                x = model.naive_bayes(x_train, y_train, x_test, y_test)
                st.write(x)

            if st.checkbox("XGB Classifier "):
                x = model.XGb_classifier(x_train, y_train, x_test, y_test)
                st.write(x)

    st.markdown('Automation is **_really_ _cool_**.')
    st.markdown('<style>h1{color: red;}</style>', unsafe_allow_html=True)
    st.title("Credits and Inspiration")
    """https://pycaret.org/"""
コード例 #23
0
import streamlit as st
from PIL import Image
import style
import io
import os

st.title('Pytorch Style Transfer')

input_image = st.file_uploader('Upload Image:')
input_image_path = ''
if input_image is not None:
    input_image_path = 'neural_style/images/content-images/' + input_image.name

st.write('### Source Image:')
if input_image is not None:
    with open(
            os.path.join("neural_style/images/content-images/",
                         input_image.name), "wb") as f:
        f.write(input_image.getbuffer())
    img = Image.open(input_image)
    st.image(img, width=250)

if input_image:
    st.write(input_image.name)

style_name = st.selectbox('Select Style',
                          ('candy', 'mosaic', 'rain_princess', 'udnie'))

model = 'neural_style/saved_models/' + style_name + '.pth'
if input_image is not None:
    output_image = 'neural_style/images/output-images/' + style_name + '-' + input_image.name
コード例 #24
0
import altair as alt
import numpy as np
import pandas as pd
import streamlit as st


def find_outliers(df, window, sigma):
    avg = df[variable].rolling(window=window).mean()
    residual = df[variable] - avg
    std = residual.rolling(window=window).std()
    return np.abs(residual) > std * sigma


st.header("Outlier detection in Occupancy Detection Data Set")

uploaded_file = st.file_uploader("Choose data/occupancy.csv", type="csv")

if uploaded_file is not None:
    data = pd.read_csv(uploaded_file)

    if len(data.columns) != 7:
        st.error("This doesn't look like the correct dataset...")

    else:
        data["date"] = data.date.astype("datetime64[ns]")

        variable = "Temperature"
        window = st.slider("Window : ", 1, 60, 30)
        sigma = st.slider("Sigma : ", 1, 20, 10)

        data["is_outlier"] = find_outliers(data, window, sigma)
コード例 #25
0
def main():
    with open('cat_to_name.json', 'r') as f:
        cat_to_name = json.load(f)

    checkpoint = torch.load('my_model.pt')

    arch = checkpoint['arch']
    num_labels = len(checkpoint['class_to_idx'])
    hidden_units = checkpoint['hidden_units']

    model = train.load_model(arch=arch, num_labels=num_labels, hidden_units=hidden_units)
    model.load_state_dict(checkpoint['state_dict'])

    model.class_to_idx = checkpoint['class_to_idx']
    st.set_option('deprecation.showfileUploaderEncoding', False)

    st.title("Testing Class Prediction")
    filename = st.file_uploader('Select the flower', encoding="auto", default="ftest.jpg")
    img= Image.open(filename)
    st.image(img,width=300, caption="Flower Selected")

    if st.button("Predict"):

        st.title("Predicted Results")
    # Process a PIL image for use in a PyTorch model
        def process_image(image):
            ''' Scales, crops, and normalizes a PIL image for a PyTorch model,
                returns an Numpy array
            '''    
            img_loader = transforms.Compose([
                transforms.Resize(256), 
                transforms.CenterCrop(224), 
                transforms.ToTensor()])
            
            pil_image = Image.open(image)
            pil_image = img_loader(pil_image).float()
            
            np_image = np.array(pil_image)    
            
            mean = np.array([0.485, 0.456, 0.406])
            std = np.array([0.229, 0.224, 0.225])
            np_image = (np.transpose(np_image, (1, 2, 0)) - mean)/std    
            np_image = np.transpose(np_image, (2, 0, 1))
                    
            return np_image


            def imshow(image, ax=None, title=None):
            #Imshow for Tensor
                if ax is None:
                     fig, ax = plt.subplots()
                
                # PyTorch tensors assume the color channel is the first dimension
                # but matplotlib assumes is the third dimension
                image = np.transpose(image, (1, 2, 0))
                
                # Undo preprocessing
                mean = np.array([0.485, 0.456, 0.406])
                std = np.array([0.229, 0.224, 0.225])
                image = std * image + mean
                
                # Image needs to be clipped between 0 and 1 or it looks like noise when displayed
                image = np.clip(image, 0, 1)
                
                ax.imshow(image)
                
                return ax
        
            # Display an image along with the top 5 classes
        img =filename
        probs, classes = predict.predict(image=filename, checkpoint='my_model.pt', labels='cat_to_name.json', gpu=True)
        st.image(img)
        st.header("Table results of Possible Class")
        df = pd.DataFrame({'Classes':classes , 'Probability': probs})
        st.table(df)
        Classes =classes
        Probability = probs

        p = figure(x_range=Classes, plot_height=250, title="Possible Prediction",
           toolbar_location=None, tools="")

        p.vbar(x=Classes, top=Probability, width=0.6)

        p.xgrid.grid_line_color = None
        p.y_range.start = 0

        st.bokeh_chart(p)
コード例 #26
0
def main_elements():
    """
    Создает шапку страницы
    """
    # === Верхняя часть шапки ===
    row1_1, row1_2 = st.columns([2, 1])

    with row1_1:
        logo_css("АО РОСГЕОЛОГИЯ", align="left", clr="#07689F", size=33)
        # logo_css(
        #     "<i>Департамент по восстановлению и утилизации<br>трубной продукции</i>",
        #     align="left",
        #     clr="#52616B",
        #     size=20,
        # )

    with row1_2:
        pass

    header_css(
        f"<i>{title_app}</i>",
        align="left",
        clr="#07689F",
        size=26,
    )

    row1, row2 = st.columns([2, 1])
    with row1:
        pass

    with row2:
        pass

    # === Нижняя часть шапки ===
    row2_1, row2_2 = st.columns([3, 1])

    with row2_1:
        uploaded_file = st.file_uploader(
            "Загрузить данные...",
            type=["xls", "xlsx", "csv", "tsv"],
            accept_multiple_files=False,
        )
        if uploaded_file is not None:
            st.info(
                "В демонстрационной версии приложения оперировать "
                "можно только тестовым набором данных, который генерируется "
                "при первой загрузке страницы или её последующих обновлениях"
            )

    row3_1, row3_2 = st.columns([1, 3])

    with row3_1:
        pass
    with row3_2:
        annotation_css(
            "ВАЖНО: в демонстрационной версии приложении используется "
            "автоматически генерируемый набор данных, обновляемый при "
            "взаимодействии с любым динамическим виджетом приложения",
            clr="#769FCD",
        )

    equipment_list = [
        ("УРБ-2А2", "УРБ-4Т", "ПБУ-74", "УШ-2Т4В", "HD2500RC"),
        ("МБШ-303", "УБН-Т", "МБШ-812", "МБШ-509", "БКМ-307", "БКМ-303"),
        ("СБУ-115", "СБУ-125", "БМ-302", "УРБ-4Т", "БКМ-307"),
    ]

    full_data_for_plot: List[Dict[str, Dict[str, Tuple]]] = []
    for idx, fleet_name in enumerate(
        ["Серпуховской ПТСН", "Челябинский ПТСН", "Екатеринбургский ПТСН"]
    ):
        dict_fleet: Dict[
            str, Dict[str, Tuple]
        ] = prepare_duration_downtime_for_plot(fleet_name, equipment_list[idx])
        full_data_for_plot.append(dict_fleet)

    fleet_names_for_selectbox = [
        list(fleet.keys())[0] for fleet in full_data_for_plot
    ]
    selected_fleet_equipment = st.selectbox(
        "Выберите парк спец. техники", fleet_names_for_selectbox
    )

    selected_data_for_plot: Dict[str, Tuple] = [
        fleet[selected_fleet_equipment]
        for fleet in full_data_for_plot
        if selected_fleet_equipment in fleet.keys()
    ][0]

    # отображение
    summary_table(selected_fleet_equipment, selected_data_for_plot)

    annotation_css(
        "Детали сводки", size=18, text_align="center", clr="#07689F"
    )
    duration_downtime_plot(selected_fleet_equipment, selected_data_for_plot)
    duration_downtime_boxplot(selected_fleet_equipment, selected_data_for_plot)
    duration_downtime_econ_costs_scatter_plot(
        selected_fleet_equipment, selected_data_for_plot
    )
コード例 #27
0
import streamlit as st
import pandas as pd
import numpy as np
import time
import seaborn as sns
import matplotlib.pyplot as plt

#st.dataframe(df.style.highlight_max(axis=0))
#df.head(5)
st.title('Case Portocred Financeira')
st.file_uploader("Subir arquivos", accept_multiple_files=True)

st.subheader("Desenvolvido por: Renato Dias")
st.sidebar.title("Exploração de dados (EAD)")

st.sidebar.subheader("Visões gerais")
col1, col2, col3 = st.sidebar.beta_columns(3)
pr = col1.checkbox('Stats', False)
bx = col2.checkbox('Boxplot', False)
cor = col3.checkbox("Correlação", False)

st.sidebar.subheader("Tratamento de outliers")
chosen = st.sidebar.radio('', ("Sim", "Não"))

st.sidebar.subheader("Balanceamento das classes")
c1 = st.sidebar.checkbox("Verificar percentual por classe")
st.sidebar.text("Balancear classes?")
col11, col21 = st.sidebar.beta_columns(2)
chosen1 = col11.radio('', (" Sim", " Não"))

st.sidebar.subheader("Tratamento dos dados")
            download_button_str = download_button(s, './age1.csv', 'Age CSV file')
            st.sidebar.markdown(download_button_str, unsafe_allow_html=True)

            with open('./count.csv', 'rb') as f:
                s = f.read()
            download_button_str1 = download_button(s,'./count.csv', 'count')
            st.sidebar.markdown(download_button_str1, unsafe_allow_html=True)

            with open('./output.avi', 'rb') as f:
                s = f.read()
            download_button_str1 = download_button(s,'output.avi', 'output file')
            st.sidebar.markdown(download_button_str1, unsafe_allow_html=True)


    elif app_mode == "Device":
        uploaded_file = st.file_uploader("Choose a video...", type=["mp4","avi"])
        temporary_location = False

        if uploaded_file is not None:
            g = io.BytesIO(uploaded_file.read())  
            temporary_location = "testout_simple.mp4"

            with open(temporary_location, 'wb') as out: 
                out.write(g.read())

            vs = get_cap(temporary_location)
            rate = vs.get(cv2.CAP_PROP_FPS)
            dur = 49
            up1, down1, gend1, agen1= age_gender(vs,dur)
            st.success('done!')
            st.title("plots")
コード例 #29
0
ファイル: main.py プロジェクト: monk-boop/modeldeploy-TGSsalt
def mean_iou(y_true, y_pred):
    prec = []
    for t in np.arange(0.5, 1.0, 0.05):
        y_pred_ = tensorflow.to_int32(y_pred > t)
        score, up_opt = tensorflow.metrics.mean_iou(y_true, y_pred_, 2)
        K.get_session().run(tensorflow.local_variables_initializer())
        with tensorflow.control_dependencies([up_opt]):
            score = tensorflow.identity(score)
        prec.append(score)
    return K.mean(K.stack(prec), axis=0)


dependencies = {'mean_iou': mean_iou}
model = tensorflow.keras.models.load_model('first_model.h5',
                                           custom_objects=dependencies)
file = st.file_uploader("Please upload an image file", type=["jpg", "png"])


def import_and_predict(image_data, model):

    X_test = np.zeros((1, 128, 128, 1), dtype=np.uint8)
    sizes_test = []
    #img = load_img(image_data)
    x = img_to_array(image_data)[:, :, 1]
    sizes_test.append([x.shape[0], x.shape[1]])
    x = resize(x, (128, 128, 1), mode='constant', preserve_range=True)
    X_test[0] = x
    preds_test = model.predict(X_test, verbose=1)
    preds_test_t = (preds_test > 0.5).astype(np.uint8)
    tmp = np.squeeze(preds_test_t[0]).astype(np.float32)
    return np.dstack((tmp, tmp, tmp))
コード例 #30
0
ファイル: app.py プロジェクト: radith-arch/streamlit
def main():
    """Invasive Ductal Carcinoma Detection Using CNN"""
    st.title("Invasive Ductal Carcinoma Detection Using CNN")

    menu = ["Home", "Login", "Signup"]
    submenu = ["Plot", "Visualisasi IDC", "Feature Maps", "Prediction"]

    choice = st.sidebar.selectbox("Menu", menu)
    if choice == "Home":
        st.subheader("What is Invasive Ductal Carcinoma (IDC)?")
        st.markdown("#### Context")
        """
        Invasive Ductal Carcinoma (IDC) is the most common subtype of all breast cancers. To assign 
        an aggressiveness grade to a whole mount sample, pathologists typically focus on the regions 
        which contain the IDC. As a result, one of the common pre-processing steps for automatic 
        aggressiveness grading is to delineate the exact regions of IDC inside of a whole mount slide.
        """
        st.markdown("#### Content")
        """
        The original dataset consisted of 162 whole mount slide images of Breast Cancer (BCa) specimens 
        scanned at 40x. From that, 277,524 patches of size 50 x 50 were extracted (198,738 IDC negative 
        and 78,786 IDC positive). Each patch’s file name is of the format: uxXyYclassC.png — > example 
        10253idx5x1351y1101class0.png . Where u is the patient ID (10253idx5), X is the x-coordinate of 
        where this patch was cropped from, Y is the y-coordinate of where this patch was cropped from, 
        and C indicates the class where 0 is non-IDC and 1 is IDC.
        """
        st.markdown("#### Acknowledgements")
        """
        The original files are located here: http://gleason.case.edu/webdata/jpi-dl-tutorial/IDC_regular_ps50_idx5.zip
        Citation: https://www.ncbi.nlm.nih.gov/pubmed/27563488 and http://spie.org/Publications/Proceedings/Paper/10.1117/12.2043872
        """
        st.markdown("#### Inspiration")
        """
        Breast cancer is the most common form of cancer in women, and invasive ductal carcinoma (IDC) is 
        the most common form of breast cancer. Accurately identifying and categorizing breast cancer 
        subtypes is an important clinical task, and automated methods can be used to save time and reduce error.
        """
    elif choice == "Login":
        username = st.sidebar.text_input("Username")
        password = st.sidebar.text_input("Password", type='password')
        if st.sidebar.checkbox("Login"):
            create_usertable()
            hashed_pwsd = generate_hashes(password)
            result = login_user(username, verify_hashes(password, hashed_pwsd))
            if result:
                st.success("Welcome {}".format(username))

                activity = st.selectbox("Activity", submenu)
                if activity == "Plot":
                    st.subheader("Data Plot")

                    status = st.radio("Data Distribution",
                                      ("Data raw", "Data preprocessed"))

                    if status == 'Data raw':
                        img = Image.open(
                            os.path.join("data/sns.countplot(y_train).jpeg"))
                        st.image(img, width=300, caption="Data Train")

                        img = Image.open(
                            os.path.join("data/sns.countplot(y_test).jpeg"))
                        st.image(img, width=300, caption="Data Test")
                    else:
                        img = Image.open(
                            os.path.join("data/sns.countplot(y_train2).jpeg"))
                        st.image(img, width=300, caption="Data Train")

                        img = Image.open(
                            os.path.join("data/sns.countplot(y_test2).jpeg"))
                        st.image(img, width=300, caption="Data Test")

                elif activity == "Visualisasi IDC":
                    st.subheader("Visualisasi IDC(-/+)")
                    sample_gambar = st.radio(
                        "Few example of IDC with its coordinate",
                        ("IDC (-)", "IDC (+)"))
                    if sample_gambar == 'IDC (-)':
                        figure_path = glob.glob("gambar visual/0/*.png",
                                                recursive=True)
                        figure = show_image(figure_path)
                        st.pyplot(figure)
                    else:
                        figure_path = glob.glob("gambar visual/1/*.png",
                                                recursive=True)
                        figure = show_image(figure_path)
                        st.pyplot(figure)

                elif activity == "Feature Maps":
                    st.subheader("Feature Maps")
                    feature_maps = st.radio(
                        "Visualization Feature Maps from hidden layer",
                        ("VGG16", "5 Layers Conv2d"))
                    if feature_maps == 'VGG16':
                        model_ = load_model(
                            os.path.join(
                                "models/vgg-model-weights-improvement-the-best.h5"
                            ))
                        model_baru = model_.layers[0]  # Khusus vgg
                        model_baru = Model(inputs=model_baru.inputs,
                                           outputs=model_baru.layers[1].output)
                        model_baru.summary()

                        img = Image.open(
                            os.path.join(
                                "gambar visual/0/9178_idx5_x2651_y1251_class0.png"
                            ))
                        img = preprocessed_image(img)
                        img = preprocess_input(img)
                        feature_maps = model_baru.predict(img)

                        figure = feature_of(feature_maps, 8)
                        st.pyplot(figure)
                    else:
                        model_ = load_model(
                            os.path.join(
                                "models/weights-improvement-the-best.h5"))
                        model_baru = model_
                        model_baru = Model(inputs=model_baru.inputs,
                                           outputs=model_baru.layers[1].output)
                        model_baru.summary()

                        img = Image.open(
                            os.path.join(
                                "gambar visual/0/9178_idx5_x2651_y1251_class0.png"
                            ))
                        img = preprocessed_image(img)
                        img = preprocess_input(img)
                        feature_maps = model_baru.predict(img)

                        figure = feature_of(feature_maps, 5)
                        st.pyplot(figure)

                elif activity == "Prediction":
                    st.subheader("Predictive Analytics")

                    # Upload Image
                    image_file = st.file_uploader("Upload Image",
                                                  type=['jpg', 'png', 'jpeg'])

                    if image_file is not None:
                        our_image = Image.open(image_file)
                        st.text("Image Uploaded!")
                        st.image(our_image)

                        # Processed Image
                        image_test = preprocessed_image(our_image)
                    else:
                        st.warning("Please upload the image!")

                    # ML / Predict Image
                    model_choice = st.selectbox("Select Model",
                                                ["VGG16", "5 Layers Conv2d"])
                    if st.button("Predict"):
                        if model_choice == "VGG16":
                            model_ = load_model(
                                os.path.join(
                                    "models/vgg-model-weights-improvement-the-best.h5"
                                ))
                            opt = SGD(lr=0.001, momentum=0.9)
                            model_.compile(optimizer=opt,
                                           loss='categorical_crossentropy',
                                           metrics=['accuracy'])
                            prediction = model_.predict(image_test)
                            prediction_result = np.argmax(prediction[0])

                        elif model_choice == "5 Layers Conv2d":
                            model_ = load_model(
                                os.path.join(
                                    "models/weights-improvement-the-best.h5"))
                            opt = SGD(lr=0.001, momentum=0.9)
                            model_.compile(optimizer=opt,
                                           loss='categorical_crossentropy',
                                           metrics=['accuracy'])
                            prediction = model_.predict(image_test)
                            prediction_result = np.argmax(prediction[0])

                        # st.write(prediction_result)
                        if prediction_result == 1:
                            st.warning("Patient's positive IDC!")
                            st.error(
                                "Please seek for treatment and keep healthy lifestyle!"
                            )
                        else:
                            st.success("It's negative!")
            else:
                st.warning("Incorrect Username/Password")

    elif choice == "Signup":
        new_username = st.text_input("user name")
        new_password = st.text_input("Password", type='password')

        confirm_password = st.text_input("Confirm Password", type='password')
        if new_password == confirm_password:
            st.success("Password Confirmed")
        else:
            st.warning("Passwords not the same")

        if st.button("Submit"):
            create_usertable()
            hashed_new_password = generate_hashes(new_password)
            add_userdata(new_username, hashed_new_password)
            st.success("You have successfully created a new account")
            st.info("Login to Get Started ")