コード例 #1
0
ファイル: app.py プロジェクト: saitej123/StreamlitApps
def main():
    image = Image.open('sai_app_header.png')
    st.image(image, use_column_width=True)
    tasks = ["EDA Analysis", "ML Model Building with Pycaret"]
    choice = st.sidebar.selectbox("Select Task To do", tasks)
    st.subheader("Exploratory Data Analysis")
    st.set_option('deprecation.showfileUploaderEncoding', False)

    data = st.file_uploader("Upload a Dataset (CSV or TXT)",
                            type=["csv", "txt"])
    if data is not None:
        df = pd.read_csv(data)
        st.success("Your Data Frame Loaded successfully")
        all_columns = show_columns(df)
        if choice == 'EDA Analysis':
            if st.button("Generate Pandas Profiling Report"):
                pr = ProfileReport(df, explorative=True)
                st.subheader(
                    "Pandas Profiling : Quick Exploratory data analysis")
                st.text('We Get :')
                st.text('DataFrame overview')
                st.text('Each attribute on which DataFrame is defined')
                st.text('Correlations between attributes')
                st.text('A sample of DataFrame')
                st_profile_report(pr)

        if choice == 'ML Model Building with Pycaret':
            target_selection = st.selectbox("Select Target Variable Column",
                                            all_columns)

            if st.button("Run Pycaret AutoML"):
                pycaret_automl(df, target_selection)
コード例 #2
0
def main():
    """ Common ML Dataset Explorer """
    st.title("The EDA App")
    st.subheader("Intelligent EDA App Generator using Streamlit")

    html_temp = """
	<div style="background-color:blue;"><p style="color:white;font-size:20px;padding:10px">Data has a better idea</p></div>
	"""
    st.markdown(html_temp, unsafe_allow_html=True)

    def file_selector(folder_path='./datasets'):
        filenames = os.listdir(folder_path)
        selected_filename = st.selectbox("Select A file", filenames)
        return os.path.join(folder_path, selected_filename)

    filename = file_selector()
    st.info("Selected Datasets {}".format(filename))

    # Read Data
    df = pd.read_csv(filename)

    # Show Dataset
    if st.checkbox("Show Dataset"):
        number = st.number_input("Number of Rows to View", 5, 10)
        st.dataframe(df.head(number))

    #pandas Profilling
    pr = ProfileReport(df, explorative=True)
    st.header('**Input DataFrame**')
    st.write(df)
    st.write('---')
    st.header('**Pandas Profiling Report**')
    st_profile_report(pr)
コード例 #3
0
ファイル: home.py プロジェクト: streamlit-badge-bot/Stocks
def Profit():
    page_bg_img = '''
    <style>
    body {
    background-image: url("https://img.freepik.com/free-photo/3d-geometric-abstract-cuboid-wallpaper-background_1048-9891.jpg?size=626&ext=jpg&ga=GA1.2.635976572.1603931911");
    background-size: cover;
    }
    </style>
    '''
    st.markdown(page_bg_img, unsafe_allow_html=True)
    st.markdown(
        "<h1 style='text-align: center; color: #002966;'>Profiling Report</h1>",
        unsafe_allow_html=True)
    # st.title("This is my second page")
    symbols = 'https://raw.githubusercontent.com/Moly-malibu/Stocks/main/bxo_lmmS1.csv'
    df = pd.read_csv(symbols)
    tickerSymbol = st.sidebar.selectbox('Company List', (df['Symbol']))

    def get_symbol(symbol):
        url = "http://d.yimg.com/autoc.finance.yahoo.com/autoc?query={}&region=1&lang=en".format(
            symbol)
        result = requests.get(url).json()
        for x in result['ResultSet']['Result']:
            if x['symbol'] == symbol:
                return x['name']

    company_name = get_symbol(tickerSymbol.upper())
    st.write("""# Analysis of """, company_name)

    company = yf.Ticker(tickerSymbol)
    analysis = company.history(period='max', interval='1wk')
    profile = ProfileReport(analysis, explorative=True)
    st_profile_report(profile)
コード例 #4
0
def main():

    st.markdown('''
    # ** EDA APP**
    ''')

    # Web App Title
    st.markdown('''
    # **The EDA App**
    This is the **EDA App** created in Streamlit using the **pandas-profiling** library.
    ''')

    # Upload CSV data
    with st.sidebar.header('1. Upload your CSV data'):
        uploaded_file = st.sidebar.file_uploader("Upload your input CSV file",
                                                 type=["csv"])
        st.sidebar.markdown("""
    [Example CSV input file](https://raw.githubusercontent.com/dataprofessor/data/master/delaney_solubility_with_descriptors.csv)
    """)

    # Pandas Profiling Report
    if uploaded_file is not None:

        @st.cache
        def load_csv():
            csv = pd.read_csv(uploaded_file,
                              encoding='ISO 8859-1',
                              sep=";",
                              decimal=",")
            return csv

        df = load_csv()
        st.write('The whole datasets:', df.shape)
        st.write('Variables in the dataset', df.columns.values)

        pr = ProfileReport(df, explorative=True)
        st.header('**Input DataFrame**')
        st.write(df)
        st.write('---')
        st.header('**Pandas Profiling Report**')
        st_profile_report(pr)
    else:
        st.info('Awaiting for CSV file to be uploaded.')
        if st.button('Press to use Example Dataset'):
            # Example data
            @st.cache
            def load_data():
                a = pd.DataFrame(np.random.rand(100, 5),
                                 columns=['a', 'b', 'c', 'd', 'e'])
                return a

            df = load_data()
            st.write('The whole datasets:', df.shape)

            pr = ProfileReport(df, explorative=True)
            st.header('**Input DataFrame**')
            st.write(df)
            st.write('---')
            st.header('**Pandas Profiling Report**')
            st_profile_report(pr)
コード例 #5
0
def profilling_analysis(df):
    try:
        pr = ProfileReport(df, explorative=True)
        st.title("Pandas Profiling in Streamlit")
        st.write(df)
        st_profile_report(pr)
    except:
        st.title("Error - Pandas profiling was not generated")
コード例 #6
0
ファイル: app.py プロジェクト: Mjboothaus/dbqed
def app_mainscreen(df, pr, APP_NAME):
    st.title(APP_NAME)

    st.write(df)
    st.header('Data Profile:')
    st_profile_report(pr)

    return df
コード例 #7
0
ファイル: myapp.py プロジェクト: mehmetsuci/DL
def main():
    menu = ['Pandas-Profiling', 'SweetViz', 'D-tale', 'About']
    option = st.selectbox("Select Tool for Viz", menu)

    if option == 'Pandas-Profiling':
        st.header("Pandas-Profiling")
        data_file = st.file_uploader("Upload_csv", type=['csv'])
        if data_file is not None:
            load_csv = pd.read_csv(data_file)
            st.write(load_csv.head())
            st.success("Successfully uploaded!")
            if st.button('Generate Report'):
                report = ProfileReport(load_csv,
                                       title="CSV Profiling Report",
                                       explorative=True)
                st.write('---')
                st.header('**Pandas Profiling Report**')
                st_profile_report(report)

    elif option == 'SweetViz':
        st.header("SweetViz")
        data_file = st.file_uploader("Upload_csv", type=['csv'])
        st.success("Successfully uploaded!")
        if data_file is not None:
            load_csv = pd.read_csv(data_file)
            st.write(load_csv)
            st.write('---')
            st.header('**SweetViz Profiling Report**')
            if st.button('Generate Report'):
                report = sv.analyze(load_csv)
                report.show_html()
                display_sweetviz("SWEETVIZ_REPORT.html")

    elif option == 'D-tale':
        st.header('D-tale')
        data_file = st.file_uploader("Upload_csv", type=['csv'])
        st.success("Successfully uploaded!")
        if data_file is not None:
            load_csv = pd.read_csv(data_file)
            st.write(load_csv)
            st.write('---')
            st.header('**D-Tale Profiling Report**')
            if st.button('Generate Report'):
                dtale.show(load_csv)
                components.iframe('http://dell-virlgti:40000/dtale/main/1',
                                  width=1500,
                                  height=800,
                                  scrolling=True)
                # st.markdown(html, unsafe_allow_html=True)

    elif menu == 'About':
        st.subheader(
            "Simple tool for better and quick visualization and EDA!!")
        st.write()
        st.write(
            "check out this [link](https://share.streamlit.io/mesmith027/streamlit_webapps/main/MC_pi/streamlit_app.py)"
        )
コード例 #8
0
def write(state_df):
    st.header("Data Explotary Analysis")

    with st.spinner("Loading Data Info ..."):
        if state_df is not None:
            pr = ProfileReport(state_df, explorative=True, minimal=True)
            st_profile_report(pr)
        else:
            st.error("Please upload dataset first!")
コード例 #9
0
def app3(uploaded_file):
    warnings.filterwarnings("ignore")
    df = pd.read_csv(uploaded_file)
    st.write("Total data sample numbers : " + str(len(df)))
    num = st.number_input("Enter The sample size", 0)
    if num != 0 and num <= len(df):
        pr = ProfileReport(df.sample(n=num), minimal=True)
        st.header('**Exploratory data Analysis**')
        st_profile_report(pr)
    elif num > len(df) and num != 0:
        st.warning("Enter proper sample size")
    else:
        st.warning("Enter the sample size")
コード例 #10
0
def main():
    st.sidebar.title('Options')
    menu = ['Home', 'Data Exploration']
    choice = st.sidebar.selectbox("Menu", menu)
    if choice == 'Data Exploration':
        st.markdown('''
        <h1 style="text-align: center">
        Exploration
        </h1>''',
                    unsafe_allow_html=True)
        # datafile = st.file_uploader('Upload a CSV file.', type=['csv', 'txt'])

        # if datafile is not None:
        #     df = pd.read_csv(datafile, error_bad_lines=False)
        #     # st.dataframe(df.head())
        #     profile = ProfileReport(df)
        #     st_profile_report(profile)
        df = load_data('Assignment2.csv')
        profile = ProfileReport(df)
        st_profile_report(profile)

    else:
        st.markdown('''
        <h1 style="text-align: center">
        PMASDS18: Data Mining
        </h1>''',
                    unsafe_allow_html=True)
        # st.subheader('Assignment 02')
        st.markdown('''
        <div style="text-align: center">

        -----
        ### Submitted By

        Shanto Jourder - 201900101012 \n
        Md. Meftaul Haque Mishu - 201900101040

        ------
        ### Submitted To
        Dr. Md Rezaul Karim

        -----
        #### 15th December 2020

        </div>
        ''',
                    unsafe_allow_html=True)
コード例 #11
0
def app():
    st.image('eda.png', width=280)
    st.title('''
    Exploratory Data Analysis App
    ''')

    # Upload CSV data
    st.set_option('deprecation.showfileUploaderEncoding', False)
    st.sidebar.header('1. Upload your CSV data')
    uploaded_file = st.sidebar.file_uploader("Upload your input CSV file",
                                             type=["csv"])
    st.sidebar.markdown("""
    [Example CSV input file](https://raw.githubusercontent.com/dataprofessor/data/master/delaney_solubility_with_descriptors.csv)
    """)

    # Pandas Profiling Report
    if uploaded_file is not None:

        @st.cache
        def load_csv():
            csv = pd.read_csv(uploaded_file)
            return csv

        df = load_csv()
        pr = ProfileReport(df, explorative=True)
        st.header('**Input DataFrame**')
        st.write(df)
        st.write('---')
        st.header('**Pandas Profiling Report**')
        st_profile_report(pr)
    else:
        st.info('Awaiting for CSV file to be uploaded.')
        if st.button('Press to use Example Dataset'):
            # Example data
            @st.cache
            def load_data():
                a = pd.DataFrame(np.random.rand(100, 5),
                                 columns=['a', 'b', 'c', 'd', 'e'])
                return a

            df = load_data()
            pr = ProfileReport(df, explorative=True)
            st.header('**Input DataFrame**')
            st.write(df)
            st.write('---')
            st.header('**Pandas Profiling Report**')
            st_profile_report(pr)
コード例 #12
0
def run_eda(df, dep_var="", chosen_val="Pandas Profiling"):
    if chosen_val == "Pandas Profiling":
        pr = ProfileReport(df, explorative=True)
        st_profile_report(pr)
    elif chosen_val == "Sweetviz":
        st.write("opening new tab")
        rep = sv.analyze(
            df.select_dtypes(exclude="datetime64[ns]"), target_feat=dep_var
        )
        rep.show_html()
    elif chosen_val == "Autoviz":
        AV = AutoViz_Class()
        chart_format = "jpg"

        dft = AV.AutoViz(
            filename="",
            sep=",",
            depVar=dep_var,
            dfte=df,
            header=0,
            verbose=2,
            lowess=False,
            chart_format=chart_format,
            max_rows_analyzed=len(df),  # 150000,
            max_cols_analyzed=df.shape[1],
        )  # 30
        st.write(dft.head())
        st.write("Autoviz")
        # st.write(os.getcwd()+f"/AutoViz_Plots/empty_string/*.{chart_format}")
        if dep_var != "":
            stored_folder = dep_var
        else:
            stored_folder = "empty_string"
        for i in list(glob(cwd + f"/AutoViz_Plots/{stored_folder}/*.{chart_format}")):

            st.image(Image.open(i))
    elif chosen_val == "DataPrep":
        try:
            dpplot(df, *xy).show_browser()
        except:
            #s_buf = io.BytesIO()
            # dpplot(df).save(s_buf)
            stc.html(display_html(dpplot(df).report))  # .show_browser()
        # create_report(df).show_browser()
    elif chosen_val == "Summary Table":
        get_df(df)
コード例 #13
0
def render_eda():
    st.title("Create a Complete Report of your data.")
    st.subheader("Exploratory Data Analysis using pandas profiling.")
    st.write("""All you need to do is upload a dataset and get a quick
            sense of your data.""")
    data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
    if data is not None:
        df = pd.read_csv(data)
        # to adjust profile report check this link
        # https://pandas-profiling.github.io/pandas-profiling/docs/master/rtd/index.html
        # use --> (minimal=True) setting for large datasets
        pr = ProfileReport(df, explorative=True)
        st.title("Pandas Profiling Report in Streamlit")
        st.write(df)
        st_profile_report(pr)
        # save report
        pr.to_file("Output.html")
        st.write("Your report has been saved!")
コード例 #14
0
def create_mainscreen(df, pr, APP_NAME):
    st.title(APP_NAME)

    # Folium mapping example: center on Sydney Opera House
    m = folium.Map(location=[-33.85719805, 151.21512338473752], zoom_start=15)

    # add marker for Opera House
    tooltip = "Sydney Opera House"
    folium.Marker([-33.85719805, 151.21512338473752],
                  popup="Sydney Opera House",
                  tooltip=tooltip).add_to(m)

    # call to render Folium map in Streamlit
    folium_static(m)

    # Dataframe / Profiling example
    st.write(df)
    st.header('Data Profile:')
    st_profile_report(pr)
コード例 #15
0
def profiling_page():
    logger.info({"message": "Loading profiling page."})
    st.title("Profiling Tables")

    # Select table
    db = Database(file_name=st.session_state.db_name)
    db_tables = db.show_tables()

    if len(db_tables) == 0:
        st.warning("The database has no tables available.")
        logger.warning({"message": "The database has no tables available."})
        st.stop()

    st.write(
        "You can select an entire table or create your custom SQL-statement.")

    with st.form(key="profiling_form"):
        query = st.text_area("SQL-statement",
                             value="SELECT * FROM table",
                             height=300,
                             help="SQL-statement based on SQLite syntax.")

        st.write(' ')

        if st.form_submit_button(label='Profiling'):
            logger.info({"message": "Profiling Table."})

            df_query = db.query(query)
        else:
            df_query = None

    if df_query is not None:
        pr = ProfileReport(df_query, explorative=True, dark_mode=True)
        st_profile_report(pr)

    logger.info({"message": "Profiling page loaded."})
コード例 #16
0
ファイル: analytics.py プロジェクト: Avighan/Analytics_Heroku
def run(st, data):
    expander = st.beta_expander("Menu", expanded=True)
    with expander:
        ana_choice = st.radio(
            "Analysis",
            ["Data", "Visualization", "Statistics", "Data Profiling"])
        filters = st.checkbox('Add Filters')
        if filters:
            st.info("Select column and values from below")
            filtered_cols = st.multiselect("Select columns to filter",
                                           data.columns.tolist())
            filtered_sets = []
            if len(filtered_cols) > 0:
                iterations = len(filtered_cols) // 3
                difference = len(filtered_cols) % 3
                jack = 0

                while jack < iterations:
                    cols_filtered = []
                    try:
                        cols_filtered = cols_filtered + st.beta_columns(3)
                    except:
                        pass
                    counter = 0
                    for i in range(jack * 3, 3 * jack + 3):
                        filtered_sets.append(
                            cols_filtered[counter].multiselect(
                                filtered_cols[i],
                                data[filtered_cols[i]].unique().tolist()))
                        counter += 1
                    jack += 1
                if difference == 0:
                    pass
                else:
                    cols_filtered = []
                    cols_filtered = cols_filtered + st.beta_columns(difference)
                    counter = 0
                    for i in range(iterations * 3,
                                   iterations * 3 + difference):
                        filtered_sets.append(
                            cols_filtered[counter].multiselect(
                                filtered_cols[i],
                                data[filtered_cols[i]].unique().tolist()))
                        counter += 1

            #Now filtering the data
            tracker = 0
            for filter_value in filtered_sets:
                if len(filter_value) > 0:
                    data = data[data[filtered_cols[tracker]].isin(
                        filter_value)]
                tracker += 1

        if ana_choice == 'Data':
            data_options = st.selectbox(
                "", ["View Records", "Data Correlation", "Pivot"])
            if data_options == "View Records":
                c1, c2 = st.beta_columns(2)
                top_bottom_options = c1.radio("Records", ["Top", "Bottom"])
                num_rec = c2.number_input("No. of Records:",
                                          min_value=0,
                                          max_value=100,
                                          step=1,
                                          value=10)
                if top_bottom_options == 'Top':
                    st.dataframe(data.head(num_rec))
                else:
                    st.dataframe(data.tail(num_rec))
            elif data_options == "Data Correlation":
                select_columns = st.multiselect("Select Columns",
                                                data.columns.tolist())
                corr_view = st.radio("Correlation View", ["Table", "Chart"])
                if corr_view == 'Table':
                    if len(select_columns) == 0:
                        st.dataframe(data.corr())
                    else:
                        st.dataframe(data[select_columns].corr())
                else:
                    if len(select_columns) == 0:
                        st.write(sns.heatmap(data.corr(), annot=True))
                        st.pyplot()
                    else:
                        st.write(
                            sns.heatmap(data[select_columns].corr(),
                                        annot=True))
                        st.pyplot()
            elif data_options == 'Pivot':
                dimensions = st.multiselect("Select X axis columns",
                                            data.columns.tolist())
                measures = st.multiselect("Select Y axis columns",
                                          data.columns.tolist())
                numeric_cols = st.multiselect("Aggregation columns",
                                              data.columns.tolist())
                aggregation_operations = st.selectbox(
                    "Aggregation Operation",
                    ['sum', 'average', 'median', 'count'])
                button = st.button("Execute!!!")
                if button:
                    if len(numeric_cols) > 0:
                        if aggregation_operations == 'sum':
                            operation = np.sum
                        elif aggregation_operations == 'average':
                            operation = np.mean
                        elif aggregation_operations == 'median':
                            operation = np.median
                        elif aggregation_operations == 'count':
                            operation = np.count_nonzero
                        pivot_table = pd.pivot_table(data,
                                                     values=numeric_cols,
                                                     index=measures,
                                                     columns=dimensions,
                                                     aggfunc=operation)
                        st.dataframe(pivot_table)
        elif ana_choice == "Visualization":
            chart_options = st.selectbox(
                'Charts', ['Bar', 'Line', 'Heatmap', 'Distplot', 'Customized'])
            if chart_options == 'Bar':
                x_col = st.selectbox('X', data.columns.tolist())
                y_col = st.selectbox('Y', data.columns.tolist())
                hue_color = st.checkbox("Add color column")
                direction = st.radio('chart direction',
                                     ['vertical', 'horizontal'])
                if hue_color:
                    hue_col = st.selectbox('hue', data.columns.tolist())
                button = st.button("Execute!!!")
                if button:
                    if direction == 'vertical':
                        chart_direction = 'v'
                    else:
                        chart_direction = 'h'
                    if hue_color:
                        if hue_col:
                            st.write(
                                sns.barplot(x=x_col,
                                            y=y_col,
                                            hue=hue_col,
                                            data=data,
                                            orient=chart_direction))
                            st.pyplot()
                        else:
                            st.write(
                                sns.barplot(x=x_col,
                                            y=y_col,
                                            data=data,
                                            orient=chart_direction))
                            st.pyplot()
                    else:
                        st.write(
                            sns.barplot(x=x_col,
                                        y=y_col,
                                        data=data,
                                        orient=chart_direction))
                        st.pyplot()
            elif chart_options == 'Line':
                x_col = st.selectbox('X', data.columns.tolist())
                y_col = st.selectbox('Y', data.columns.tolist())
                hue_color = st.checkbox("Add color column")
                if hue_color:
                    hue_col = st.selectbox('hue', data.columns.tolist())
                button = st.button("Execute!!!")
                if button:
                    if hue_color:
                        if hue_col:
                            st.write(
                                sns.lineplot(x=x_col,
                                             y=y_col,
                                             hue=hue_col,
                                             data=data))
                            st.pyplot()
                        else:
                            st.write(sns.lineplot(x=x_col, y=y_col, data=data))
                            st.pyplot()
                    else:
                        st.write(sns.lineplot(x=x_col, y=y_col, data=data))
                        st.pyplot()
            elif chart_options == 'Heatmap':
                select_columns = st.multiselect("Select Columns",
                                                data.columns.tolist())
                button = st.button("Execute!!!")
                if button:
                    if len(select_columns) == 0:
                        st.write(sns.heatmap(data, annot=True))
                        st.pyplot()
                    else:
                        st.write(sns.heatmap(data[select_columns], annot=True))
                        st.pyplot()
            elif chart_options == 'Distplot':
                x_col = st.selectbox('X', data.columns.tolist())
                col = st.selectbox('column', data.columns.tolist())
                row = st.selectbox('row', data.columns.tolist())
                button = st.button("Execute!!!")
                if button:
                    st.write(
                        sns.displot(
                            data,
                            x=x_col,
                            col=col,
                            row=row,
                            binwidth=3,
                            height=3,
                            facet_kws=dict(margin_titles=True),
                        ))
                    st.pyplot()
            elif chart_options == 'Customized':
                code_area = st.text_area(
                    """Enter your chart script, Return result to value.
                e.g. 
                a = 3
                b = 4
                value = a + b!!!, Don't enter data parameter !!!""")

                button = st.button("Execute!!!")
                if button:
                    loc = {}
                    exec(code_area, {'data': data}, loc)
                    return_workaround = loc['value']
                    st.write(return_workaround)
                    st.pyplot()
        elif ana_choice == 'Statistics':
            test_selection = st.selectbox('Category', [
                'Value Count', 'Normality Test', 'Correlation Test',
                'Stationary Test', 'Parametric Test', 'Non Parametric Test'
            ])
            statistics = stats.Statistics(data)
            if test_selection == 'Value Count':
                select_columns = st.selectbox("Select Columns",
                                              data.columns.tolist())
                mode = st.radio('Value Counts', ['Table', 'Chart'])
                if mode == 'Table':
                    value_counts = statistics.__get__stats__(select_columns)
                    st.dataframe(value_counts)
                else:
                    value_counts = statistics.__get__stats__(select_columns)
                    st.write(value_counts[:20].plot(kind='barh'))
                    st.pyplot()
            elif test_selection == 'Normality Test':
                st.write("""
                        Tests whether a data sample has a Gaussian distribution. \n
                        H0: the sample has a Gaussian distribution. \n
                        H1: the sample does not have a Gaussian distribution"""
                         )

                select_test = st.selectbox(
                    'Tests', ['ShapiroWilk', 'DAgostino', 'AndersonDarling'])
                col = st.selectbox('Select Column', data.columns.tolist())
                text_option = st.checkbox('Text')
                chart_option = st.checkbox('Chart')
                if text_option:
                    t, p = statistics.normality_tests(data[col],
                                                      test_type=select_test)
                    st.write('#### ' + t + " (" + str(p) + ")")
                if chart_option:
                    st.write(sns.kdeplot(x=col, data=data))
                    st.pyplot()

        elif ana_choice == 'Data Profiling':
            st.markdown("""
            ##### The Data Profiling is done automatically using Pandas Profiling tool.\n \n \n \n
            """)
            limited_records = st.checkbox("Execute on Limited Records!!!")
            select_columns = st.multiselect("Select Columns",
                                            data.columns.tolist())
            if len(select_columns) == 0:
                cols = data.columns.tolist()
            else:
                cols = select_columns
            if limited_records:
                num_rec = st.number_input("No. of Records:",
                                          min_value=0,
                                          max_value=1000000,
                                          step=1,
                                          value=100)
            else:
                num_rec = len(data)
            execute_profiling = st.button('Execute!!!')
            if execute_profiling:
                st.title(f"Pandas Profiling on {num_rec} records")

                report = ProfileReport(data[cols].loc[:num_rec, :],
                                       explorative=True)
                st.write(data)
                st_profile_report(report)
コード例 #17
0
def main():		
	"""Diabetes Prediction App"""
	html_temp = """
		<div style="background-color:navy;padding:10px;border-radius:10px">
		<h1 style="color:white;text-align:center;">Echidna AI</h1>
		<h5 style="color:white;text-align:center;">Diabetes Prediction WEB APP</h5>
		</div>
		"""

	components.html(html_temp)
	image = Image.open('C:/Users/Adila/Documents/hep_app/LOGO.png')
	st.image(image, use_column_width=True)

	menu = ["Home", "Login", "SignUp", "Book An Appointment", "Profile Report", "About", "Privacy Policy"]
	submenu = ["Plot", "Prediction",]

	choice = st.sidebar.selectbox("Menu", menu)
	if choice == "Home":
		st.subheader("---------------WELCOME TO THE DIABETES PREDICTION APP-----------------")
		




		html_temp2 = """
		<div style="background-color:navy;padding:3px;border-radius:10px">
		<h1 style="color:white;text-align:center;">How to Login?</h1>
		<h5 style="color:white;text-align:center;">press the arrow on the top left corner and choose the LOGIN from menu to get started</h5>
		</div>
		"""
		components.html(html_temp2)

		html_temp3 = """
		<div style="background-color:navy;padding:3px;border-radius:10px">
		<h1 style="color:white;text-align:center;">How to Sign Up?</h1>
		<h5 style="color:white;text-align:center;">press the arrow on the top left corner and choose the SIGN UP from menu to get started</h5>
		</div>
		"""
		components.html(html_temp3)
		st.title("Brief explanation on Diabetes Mellitus")
		st.subheader("------Diabetes mellitus (DM), commonly known as diabetes, is a group of metabolic disorders characterized by a high blood sugar level over a prolonged period. Symptoms often include frequent urination, increased thirst, and increased appetite. If left untreated, diabetes can cause many complications. Acute complications can include diabetic ketoacidosis, hyperosmolar hyperglycemic state, or death. Serious long-term complications include cardiovascular disease, stroke, chronic kidney disease, foot ulcers, damage to the nerves, damage to the eyes and cognitive impairment.")
		st.subheader("------Diabetes is due to either the pancreas not producing enough insulin, or the cells of the body not responding properly to the insulin produced. There are three main types of diabetes mellitus:")
		st.subheader("------Type 1 diabetes results from the pancreas's failure to produce enough insulin due to loss of beta cells. This form was previously referred to as insulin-dependent diabetes mellitus (IDDM) or  juvenile diabetes . The loss of beta cells is caused by an autoimmune response. The cause of this autoimmune response is unknown.") 
		st.subheader("------Type 2 diabetes begins with insulin resistance, a condition in which cells fail to respond to insulin properly. As the disease progresses, a lack of insulin may also develop. This form was previously referred to as non-insulin-dependent diabetes mellitus  (NIDDM) or  adult-onset diabetes .  The most common cause is a combination of excessive body weight and insufficient exercise.") 
		st.subheader("------Gestational diabetes is the third main form and occurs when pregnant women without a previous history of diabetes develop high blood sugar levels.")
		st.subheader("------Type 1 diabetes must be managed with insulin injections. Prevention and treatment of type 2 diabetes involves maintaining a healthy diet, regular physical exercise, a normal body weight, and avoiding use of tobacco. Type 2 diabetes may be treated with medications such as insulin sensitizers with or without insulin. Control of blood pressure and maintaining proper foot and eye care are important for people with the disease. Insulin and some oral medications can cause low blood sugar.  Weight loss surgery in those with obesity is sometimes an effective measure in those with type 2 diabetes. Gestational diabetes usually resolves after the birth of the baby.")
		st.subheader("------As of 2019, an estimated 463 million people had diabetes worldwide (8.8% of the adult population), with type 2 diabetes making up about 90% of the cases.  Rates are similar in women and men. Trends suggest that rates will continue to rise. Diabetes at least doubles a person's risk of early death. In 2019, diabetes resulted in approximately 4.2 million deaths. It is the 7th leading cause of death globally. The global economic cost of diabetes related health expenditure in 2017 was estimated at US$727 billion. In the United States, diabetes cost nearly US$327 billion in 2017. Average medical expenditures among people with diabetes are about 2.3 times higher.")

	elif choice == "Login":
		username = st.sidebar.text_input("Username")
		password = st.sidebar.text_input("Password",type='password')
		if st.sidebar.checkbox("Login"):
			create_usertable()
			hashed_pswd = generate_hashes(password)
			result = login_user(username,verify_hashes(password, hashed_pswd))
			#if password == "12345":
			if result:
				st.success("Welcome {}".format(username))

				activity = st.selectbox("Activity", submenu)
				if activity == "Plot":
					st.subheader("Data Vis Plot")
					df = pd.read_csv("data/clean_hepatitis_dataset.csv")
					st.dataframe(df)

					df['class'].value_counts().plot(kind='bar')
					st.pyplot()

					#Freq Dist Plot
					freq_df = pd.read_csv("data/freq_df_hepatitis_dataset.csv")
					st.bar_chart(freq_df['count'])

					
					if st.checkbox("Area Chart"):
						all_columns = df.columns.to_list()
						feat_choices = st.multiselect("Choose a Feature",all_columns)
						new_df = df[feat_choices]
						st.area_chart(new_df)

				elif activity == "Prediction":
					st.subheader("Predictive Analytics")

					age = st.number_input("Age",7,80)
					sex = st.radio("Sex",tuple(gender_dict.keys()))
					steroid = st.radio("Do You Take Steroids?",tuple(feature_dict.keys()))
					antivirals = st.radio("Do You Take Antivirals?",tuple(feature_dict.keys()))
					fatigue = st.radio("Do You Have Fatigue",tuple(feature_dict.keys()))
					spiders = st.radio("Presence of Spider Naeve",tuple(feature_dict.keys()))
					ascites = st.selectbox("Ascities",tuple(feature_dict.keys()))
					varices = st.selectbox("Presence of Varices",tuple(feature_dict.keys()))
					bilirubin = st.number_input("bilirubin Content",0.0,8.0)
					alk_phosphate = st.number_input("Alkaline Phosphate Content",0.0,296.0)
					sgot = st.number_input("Sgot",0.0,648.0)
					albumin = st.number_input("Albumin",0.0,6.4)
					protime = st.number_input("Prothrombin Time",0.0,100.0)
					histology = st.selectbox("Histology",tuple(feature_dict.keys()))
					feature_list = [age,get_value(sex,gender_dict),get_fvalue(steroid),get_fvalue(antivirals),get_fvalue(fatigue),get_fvalue(spiders),get_fvalue(ascites),get_fvalue(varices),bilirubin,alk_phosphate,sgot,albumin,int(protime),get_fvalue(histology)]

					st.write(feature_list)
					pretty_result = {"age":age,"sex":sex,"steroid":steroid,"antivirals":antivirals,"fatigue":fatigue,"spiders":spiders,"ascites":ascites,"varices":varices,"bilirubin":bilirubin,"alk_phosphate":alk_phosphate,"sgot":sgot,"albumin":albumin,"protime":protime,"histolog":histology}
					st.json(pretty_result)
					single_sample = np.array(feature_list).reshape(1,-1)

					#ML
					model_choice = st.selectbox("Select Model", ["KNN", "DecisionTree", "LR"]) 
					if st.button("Predict"):
						if model_choice == "KNN":
							loaded_model = load_model("models/knn_hepB_model.pkl")
							prediction = loaded_model.predict(single_sample)
							pred_prob = loaded_model.predict_proba(single_sample)
						elif model_choice == "DecisionTree":
							loaded_model = load_model("models/decision_tree_clf_hepB_model.pkl")
							prediction = loaded_model.predict(single_sample)
							pred_prob = loaded_model.predict_proba(single_sample)
						else:
							loaded_model = load_model("models/logistic_regression_hepB_model.pkl")
							prediction = loaded_model.predict(single_sample)
							pred_prob = loaded_model.predict_proba(single_sample)

						#st.write(prediction)
						#prediction_label = {"You have a risk to have diabetes":1, "You don't have a risk to have diabetes":2}
						#final_result = get_key(prediction,prediction_label)
						#st.write(Ffinal_result)
						if prediction == 1:
							st.warning("Patient has a risk to have Diabetes")
							pred_probability_score = {"Dibetes":pred_prob[0][0]*100,"No Diabetes":pred_prob[0][1]*100}
							st.subheader("Prediction Probability Score using Neural network with {}".format(model_choice))
							st.json(pred_probability_score)
							st.subheader("Prescriptive Analytics")
							st.markdown(prescriptive_message_temp,unsafe_allow_html=True)
							
						else:
							st.success("Patient don't have a risk of having Diabetes")
							pred_probability_score = {"Has a risk":pred_prob[0][0]*100,"No Risk":pred_prob[0][1]*100}
							st.subheader("Prediction Probability Score using Neural network with {}".format(model_choice))
							st.json(pred_probability_score)



					if st.checkbox("Interpret"):
						if model_choice == "KNN":
							loaded_model = load_model("models/knn_hepB_model.pkl")
							
						elif model_choice == "DecisionTree":
							loaded_model = load_model("models/decision_tree_clf_hepB_model.pkl")
							
						else:
							loaded_model = load_model("models/logistic_regression_hepB_model.pkl")
							

						# loaded_model = load_model("models/logistic_regression_model.pkl")							
						# 1 Die and 2 Live
						df = pd.read_csv("data/clean_hepatitis_dataset.csv")
						x = df[['age', 'sex', 'steroid', 'antivirals','fatigue','spiders', 'ascites','varices', 'bilirubin', 'alk_phosphate', 'sgot', 'albumin', 'protime','histology']]
						feature_names = ['age', 'sex', 'steroid', 'antivirals','fatigue','spiders', 'ascites','varices', 'bilirubin', 'alk_phosphate', 'sgot', 'albumin', 'protime','histology']
						class_names = ['Die(1)','Live(2)']
						explainer = lime.lime_tabular.LimeTabularExplainer(x.values,feature_names=feature_names, class_names=class_names,discretize_continuous=True)
						# The Explainer Instance
						exp = explainer.explain_instance(np.array(feature_list), loaded_model.predict_proba,num_features=13, top_labels=13)
						exp.show_in_notebook(show_table=True, show_all=True)
						# exp.save_to_file('lime_oi.html')
						st.write(exp.as_list())
						new_exp = exp.as_list()
						label_limits = [i[0] for i in new_exp]
						# st.write(label_limits)
						label_scores = [i[1] for i in new_exp]
						plt.barh(label_limits,label_scores)
						st.pyplot()
						plt.figure(figsize=(20,10))
						fig = exp.as_pyplot_figure()
						st.pyplot()	
				


			else:		
				st.warning("Incorrect Username or Password")
	elif choice == "SignUp":
		new_username = st.text_input("User name")
		new_password = st.text_input("Password", type='password')

		confirm_password = st.text_input("Confirm Password",type='password')
		if new_password == confirm_password:
			st.success("Password Confirmed")
		else:
			st.warning("Passwords not the same")

		if st.button("Submit"):
			create_usertable()
			hashed_new_password = generate_hashes(new_password)
			add_userdata(new_username,hashed_new_password)
			st.success("You have created a new account")
			st.info("Login to get started")

	elif choice == "Profile Report":
		st.title("What is Profile Report?")
		st.subheader("------This technology can be used to upload datasets that you may have as analyst or if you are a doctor who wants to get a clear idea about percentage of patients who has this or that problem. And then the technology itself will analyze it.")
		st.title("How to start to analyze the data?")
		st.subheader("------To analyze the data you just simply need to upload a CSV file") 
		st.title("What kind of files this technology accepts?")
		st.subheader("------Basically a CSV files")
		st.title("How is it analyzing the data?")
		st.subheader("------Mainly, technology itself uses a neural networks to analyze the data, and then it will represent the data as a charts")
		


		data_file = st.file_uploader("UPLOAD CSV",type=['csv'])
		st.set_option('deprecation.showfileUploaderEncoding', False)
		if data_file is not None:
			df = pd.read_csv(data_file)
			st.dataframe(df.head())
			profile = ProfileReport(df)
			st_profile_report(profile)

			
	elif choice == "Book An Appointment":
		st.title("Book An Appointments")
		st.title("Integration with Nilai medical center website")
		st.subheader("------Developer integrated this WebApp with existed website to make sure that patients can book an appointment to a real medical") 
		components.iframe('https://nmc.encoremed.io/',width=700,height=2000)

	elif choice == "About":
		st.title("About App")
		st.title("F.A.Q.")
		st.title("What is Echidna AI?")
		st.subheader("------Basically, its an a WEB APP that can help people to predict Diabetes")
		st.title("What kind of functions do Echidna Have?")
		st.subheader("------The main purpose of the Echidna AI is to provide a solution for people to predict diabetes and to help analysts to analyze the data in a better way. And the data itself can be stored inside this WEB APP because it has a neural network that can store data inside nodes")
		st.title("Is it Open source Alghorithm?")
		st.subheader("------The Echidna AI® algorithm")
		st.subheader("------This web app was released as open source software under the GNU Affero General Public Licence, version 3. This ensures that academics and others interested in the core of the algorithms at least start with a working implementation. The terms of the licence ensure that implementations are transparent and open, and are, in turn, open for others to use and/or modify under the same licence.")
		st.title("Is Echidna AI can be recommended for clinical use?")
		st.subheader("------It can be recommended for clinical use, software developers can use this professionally supported software development kits.")
		st.title("Would Echidna AI be supported in future?")
		st.subheader("------Echidna AI®-2020 will be released to licencees of our Echidna AI® software development kit in the new year, for deployment from August. Which means that it will be suported")
		st.title("Do Echidna AI patented or is it has a copyright?")
		st.subheader("Yes, Echidna AI has  a copyright, but it is an open source software that can be modified")
		st.subheader("------Copyright ©Echidna 2020. ALL RIGHTS RESERVED.")
		st.subheader("------Materials on this web site are protected by copyright law. Access to the materials on this web site for the sole purpose of personal educational and research use only. Where appropriate a single print out of a reasonable proportion of these materials may be made for personal education, research and private study. Materials should not be further copied, photocopied or reproduced, or distributed in electronic form. Any unauthorised use or distribution for commercial purposes is expressly forbidden. Any other unauthorised use or distribution of the materials may constitute an infringement of ClinRisk's copyright and may lead to legal action.")
		st.subheader("------For avoidance of doubt, any use of this site as a web service to obtain a Echidna AI® for any purpose is expressly forbidden. Similarly, use of this website for developing or testing software of any sort is forbidden unless permission has been explicitly granted.")
		st.subheader("------BMI predictor algorithm © 2020 Echidna Inc.")
		st.subheader("------WebApp and risk engine built by Adilan Akhramovich WebApp design ©Echidna 2020.")
		#components.iframe('https://quickdraw.withgoogle.com',height=2000)
		components.html(footer_html,height=500)

	elif choice == "Privacy Policy":
		st.title("Privacy Policy of Echidna Inc.")
		st.subheader("------At ECHIDNA AI, one of our main priorities is the privacy of our visitors. This Privacy Policy document contains types of information that is collected and recorded by ECHIDNA AI and how we use it.")

		st.subheader("------If you have additional questions or require more information about our Privacy Policy, do not hesitate to contact us.")

		st.subheader("------This Privacy Policy applies only to our online activities and is valid for visitors to our webapp with regards to the information that they shared and/or collect in ECHIDNA AI. This policy is not applicable to any information collected offline or via channels other than this webapp.")

		st.title("Consent")
		st.subheader("------By using our webapp, you hereby consent to our Privacy Policy and agree to its terms.")

		st.title("Information we collect")
		st.subheader("------The personal information that you are asked to provide, and the reasons why you are asked to provide it, will be made clear to you at the point we ask you to provide your personal information.")

		st.subheader("------If you contact us directly, we may receive additional information about you such as your name, email address, phone number, the contents of the message and/or attachments you may send us, and any other information you may choose to provide.")

		st.title("How we use your information?")
		st.subheader("We use the information we collect in various ways, including to:")

		st.subheader("------Provide, operate, and maintain our webapp")
		st.subheader("------Improve, personalize, and expand our webapp")
		st.subheader("------Understand and analyze how you use our webapp")
		st.subheader("------Develop new products, services, features, and functionality")
		st.subheader("------Communicate with you, either directly or through one of our partners, including for customer service, to provide you with updates and other information relating to the webapp, and for marketing and promotional purposes")
		st.subheader("------Send you emails")
		st.subheader("------Find and prevent fraud")

		st.title("Log Files")
		st.subheader("------ECHIDNA AI follows a standard procedure of using log files. These files log visitors when they visit websites. All hosting companies do this and a part of hosting services' analytics. The information collected by log files include internet protocol (IP) addresses, browser type, Internet Service Provider (ISP), date and time stamp, referring/exit pages, and possibly the number of clicks. These are not linked to any information that is personally identifiable. The purpose of the information is for analyzing trends, administering the site, tracking users' movement on the website, and gathering demographic information.")

		st.title("Advertising Partners Privacy Policies")
		st.subheader("------You may consult this list to find the Privacy Policy for each of the advertising partners of ECHIDNA AI.")

		st.subheader("------Third-party ad servers or ad networks uses technologies like cookies, JavaScript, or Web Beacons that are used in their respective advertisements and links that appear on ECHIDNA AI, which are sent directly to users' browser. They automatically receive your IP address when this occurs. These technologies are used to measure the effectiveness of their advertising campaigns and/or to personalize the advertising content that you see on websites that you visit.")

		st.subheader("------Note that ECHIDNA AI has no access to or control over these cookies that are used by third-party advertisers.")

		st.title("Third Party Privacy Policies")
		st.subheader("------ECHIDNA AI's Privacy Policy does not apply to other advertisers or websites. Thus, we are advising you to consult the respective Privacy Policies of these third-party ad servers for more detailed information. It may include their practices and instructions about how to opt-out of certain options.")

		st.subheader("------You can choose to disable cookies through your individual browser options. To know more detailed information about cookie management with specific web browsers, it can be found at the browsers' respective websites.")

		st.title("MCPA Privacy Rights (Do Not Sell My Personal Information)")
		st.subheader("Under the MCPA, among other rights, consumers have the right to:")

		st.subheader("------Request that a business that collects a consumer's personal data disclose the categories and specific pieces of personal data that a business has collected about consumers.")

		st.subheader("------Request that a business delete any personal data about the consumer that a business has collected.")

		st.subheader("------Request that a business that sells a consumer's personal data, not sell the consumer's personal data.")

		st.subheader("------If you make a request, we have one month to respond to you. If you would like to exercise any of these rights, please contact us.")

		st.title("GDPR Data Protection Rights")
		st.subheader("We would like to make sure you are fully aware of all of your data protection rights. Every user is entitled to the following:")

		st.subheader("------The right to access – You have the right to request copies of your personal data. We may charge you a small fee for this service.")

		st.subheader("------The right to rectification – You have the right to request that we correct any information you believe is inaccurate. You also have the right to request that we complete the information you believe is incomplete.")

		st.subheader("------The right to erasure – You have the right to request that we erase your personal data, under certain conditions.")

		st.subheader("------The right to restrict processing – You have the right to request that we restrict the processing of your personal data, under certain conditions.")

		st.subheader("------The right to object to processing – You have the right to object to our processing of your personal data, under certain conditions.")

		st.subheader("------The right to data portability – You have the right to request that we transfer the data that we have collected to another organization, or directly to you, under certain conditions.")

		st.subheader("------If you make a request, we have one month to respond to you. If you would like to exercise any of these rights, please contact us.")

		st.title("Children's Information")
		st.subheader("------Another part of our priority is adding protection for children while using the internet. We encourage parents and guardians to observe, participate in, and/or monitor and guide their online activity.")

		st.subheader("------ECHIDNA AI does not knowingly collect any Personal Identifiable Information from children under the age of 13. If you think that your child provided this kind of information on our website, we strongly encourage you to contact us immediately and we will do our best efforts to promptly remove such information from our records.")
コード例 #18
0
def profilling_analysis(df, **kwargs):
    try:
        pr = ProfileReport(df, explorative=True, **kwargs)
        st_profile_report(pr)
    except:
        st.error("Error - Pandas profiling was not generated")
コード例 #19
0
def profilling(df):
    pr = ProfileReport(df, explorative=True)
    st_profile_report(pr)
コード例 #20
0
def profile_report(df):
    report = ProfileReport(df, minimal=True)
    # components.v1.html(report.to_html())
    st_profile_report(report)
コード例 #21
0
app_name = st.sidebar.selectbox('a', ['doc', '自动数据报告'])

if app_name == '语义':
    text = st.sidebar.text_input(
        '字段', value="东北证券")  # st.text_area('xx', value="小米\n苹果")
    topn = st.sidebar.slider('召回数', value=20, min_value=1, max_value=100)

elif app_name == '自动数据报告':
    # import pandas_profiling

    from sklearn.datasets import load_iris
    from streamlit_pandas_profiling import st_profile_report

    st.markdown(f"""
        # 一键生成数据报告
        """)

    process_func = eval(
        st.text_input('数据预处理,支持 lambda',
                      value="""pd.read_csv"""))  # pd.read_excel
    uploaded_file = st.file_uploader('File uploader')

    if uploaded_file is not None:
        df = process_func(uploaded_file)
        st_profile_report(df.profile_report())  # 导致profile_report

    if st.sidebar.checkbox('Demo', value=False):
        df = pd.concat(load_iris(1, 1), 1)
        st_profile_report(df.profile_report())
コード例 #22
0
def main():
    """ Common ML Dataset Explorer """
    st.title("The EDA App")
    st.subheader("Intelligent EDA App Generator using Streamlit")

    html_temp = """
	<div style="background-color:blue;"><p style="color:white;font-size:20px;padding:10px">Data has a better idea</p></div>
	"""
    st.markdown(html_temp, unsafe_allow_html=True)

    def file_selector(folder_path='./datasets'):
        filenames = os.listdir(folder_path)
        selected_filename = st.selectbox("Select A file", filenames)
        return os.path.join(folder_path, selected_filename)

    filename = file_selector()
    st.info("Selected Datasets {}".format(filename))

    # Read Data
    df = pd.read_csv(filename)

    # Show Dataset
    if st.checkbox("Show Dataset"):
        number = st.number_input("Number of Rows to View", 5, 10)
        st.dataframe(df.head(number))

    #Show the Columns names
    if st.button("Columns names"):
        st.write(df.columns)

    # Show Shape
    if st.checkbox("Shape of Dataset"):
        data_dim = st.radio("Show Dimension By ", ("Rows", "Columns"))
        if data_dim == 'Rows':
            st.text("Number of Rows")
            st.write(df.shape[0])
        elif data_dim == 'Columns':
            st.text("Number of Columns")
            st.write(df.shape[1])
        else:
            st.write(df.shape)

    # Select Columns
    if st.checkbox("Select Columns To Show"):
        all_columns = df.columns.tolist()
        selected_columns = st.multiselect("Select", all_columns)
        new_df = df[selected_columns]
        st.dataframe(new_df)

        # Show Values
    if st.button("Value Counts"):
        st.text("Value Counts By Target/Class")
        st.write(df.iloc[:, -1].value_counts())

    # Show Datatypes
    if st.button("Data Types"):
        st.write(df.dtypes)

    # Show Summary
    if st.checkbox("Summary"):
        st.write(df.describe())

    #Show null values
    if st.checkbox("Check null values"):
        st.write(df.isnull())

    #pandas Profilling
    pr = ProfileReport(df, explorative=True)
    st.header('**Input DataFrame**')
    st.write(df)
    st.write('---')
    st.header('**Pandas Profiling Report**')
    st_profile_report(pr)

    ## Plot and Visualization

    st.subheader("Data Visualization")
    # Correlation
    # Seaborn Plot
    #if st.checkbox("Correlation Plot[Seaborn]"):
    #st.write(sns.heatmap(df.corr(),annot=True))
    #st.pyplot()

    # Pie Chart
    if st.checkbox("Pie Plot"):
        all_columns_names = df.columns.tolist()
        if st.button("Generate Pie Plot"):
            st.success("Generating A Pie Plot")
            st.write(df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
            st.pyplot()

    # Count Plot
    if st.checkbox("Plot of Value Counts"):
        st.text("Value Counts By Target")
        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox("Primary Columm to GroupBy",
                                   all_columns_names)
        selected_columns_names = st.multiselect("Select Columns",
                                                all_columns_names)
        if st.button("Plot"):
            st.text("Generate Plot")
            if selected_columns_names:
                vc_plot = df.groupby(
                    primary_col)[selected_columns_names].count()
            else:
                vc_plot = df.iloc[:, -1].value_counts()
            st.write(vc_plot.plot(kind="bar"))
            st.pyplot()

    # Customizable Plot

    st.subheader("Customizable Plot")
    all_columns_names = df.columns.tolist()
    type_of_plot = st.selectbox(
        "Select Type of Plot",
        ["hist", "box", "scatter", "area", "bar", "line", "kde"])
    selected_columns_names = st.multiselect("Select Columns To Plot",
                                            all_columns_names)

    if st.button("Generate Plot"):
        st.success("Generating Customizable Plot of {} for {}".format(
            type_of_plot, selected_columns_names))

        # Plot By Streamlit
        if type_of_plot == 'area':
            cust_data = df[selected_columns_names]
            st.area_chart(cust_data)

        elif type_of_plot == 'bar':
            cust_data = df[selected_columns_names]
            st.bar_chart(cust_data)

        elif type_of_plot == 'line':
            cust_data = df[selected_columns_names]
            st.line_chart(cust_data)

        # Custom Plot
        elif type_of_plot:
            cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()
コード例 #23
0
ファイル: pandas_page.py プロジェクト: mesmith027/sap_2021
def run_pandas(train_set):
    st.header(":scales: Pandas Profiling Report")
    st.write()
    pr = ProfileReport(train_set, explorative=True)
    st_profile_report(pr)
    return
コード例 #24
0
def build_model(df, c):
    #df = df.loc[:100] # FOR TESTING PURPOSE, COMMENT THIS OUT FOR PRODUCTION

    Y = df[c]  # Selecting the last column as Y
    X = df.loc[:, df.columns !=
               c]  # Using all column except for the c column as X
    st.markdown('**1.2. Dataset dimension**')
    st.write('X')
    st.info(X.shape)
    st.write('Y')
    st.info(Y.shape)

    st.markdown('**1.3. Variable details**:')
    st.write('X variable (first 20 are shown)')
    st.info(list(X.columns[:20]))
    st.write('Y variable')
    st.info(Y.name)
    st.markdown('**1.4. Histograms**:')
    df.hist(alpha=0.5, figsize=(20, 10))
    st.pyplot()
    # Build lazy model
    X_train, X_test, Y_train, Y_test = train_test_split(
        X, Y, test_size=split_size, random_state=seed_number)
    if model == 'Regression':
        reg = LazyRegressor(verbose=0,
                            ignore_warnings=False,
                            custom_metric=None,
                            predictions=True)
    elif model == 'Classification':
        reg = LazyClassifier(verbose=0,
                             ignore_warnings=True,
                             custom_metric=None,
                             predictions=True)
    models_train, predictions_train = reg.fit(X_train, X_train, Y_train,
                                              Y_train)
    models_test, predictions_test = reg.fit(X_train, X_test, Y_train, Y_test)
    pr = ProfileReport(df, explorative=True)
    st.header('**2.Pandas Profiling Report(Exploratory data Analysis)**')
    st_profile_report(pr)
    st.subheader('3. Table of Model Performance')

    st.write('Training set')
    st.write(models_train)
    st.markdown(filedownload(models_train, 'modeltraining.csv'),
                unsafe_allow_html=True)

    st.write('Test set')
    st.write(models_test)
    st.markdown(filedownload(models_test, 'modeltest.csv'),
                unsafe_allow_html=True)
    st.subheader('4. Predictions By the models')
    st.write('Training set')
    st.write(predictions_train)
    st.markdown(filedownload(predictions_train, 'predicttraining.csv'),
                unsafe_allow_html=True)

    st.write('Test set')
    st.write(predictions_test)
    st.markdown(filedownload(predictions_test, 'predicttest.csv'),
                unsafe_allow_html=True)

    st.subheader('5. Plot of Model Performance (Test set)')
    if model == 'Regression':
        with st.markdown('**R-squared**'):
            # Tall
            models_test["R-Squared"] = [
                0 if i < 0 else i for i in models_test["R-Squared"]
            ]
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax1 = sns.barplot(y=models_test.index,
                              x="R-Squared",
                              data=models_test)
            ax1.set(xlim=(0, 1))
        st.markdown(imagedownload(plt, 'plot-r2-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax1 = sns.barplot(x=models_test.index, y="R-Squared", data=models_test)
        ax1.set(ylim=(0, 1))
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-r2-wide.pdf'),
                    unsafe_allow_html=True)

        with st.markdown('**RMSE (capped at 50)**'):
            # Tall
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax2 = sns.barplot(y=models_test.index, x="RMSE", data=models_test)
        st.markdown(imagedownload(plt, 'plot-rmse-tall.pdf'),
                    unsafe_allow_html=True)
        #Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax2 = sns.barplot(x=models_test.index, y="RMSE", data=models_test)
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-rmse-wide.pdf'),
                    unsafe_allow_html=True)

        with st.markdown('**Calculation time**'):
            # Tall
            models_test["Time Taken"] = [
                0 if i < 0 else i for i in models_test["Time Taken"]
            ]  #        plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax3 = sns.barplot(y=models_test.index,
                              x="Time Taken",
                              data=models_test)
        st.markdown(imagedownload(plt, 'plot-calculation-time-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax3 = sns.barplot(x=models_test.index,
                          y="Time Taken",
                          data=models_test)
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-calculation-time-wide.pdf'),
                    unsafe_allow_html=True)
    elif model == 'Classification':
        with st.markdown('**Accuracy**'):
            # Tall
            models_test["Accuracy"] = [
                0 if i < 0 else i for i in models_test["Accuracy"]
            ]
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax1 = sns.barplot(y=models_test.index,
                              x="Accuracy",
                              data=models_test)
            ax1.set(xlim=(0, 1))
        st.markdown(imagedownload(plt, 'plot-accuracy-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax1 = sns.barplot(x=models_test.index, y="Accuracy", data=models_test)
        ax1.set(ylim=(0, 1))
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-accuracy-wide.pdf'),
                    unsafe_allow_html=True)
        with st.markdown('**Balanced Accuracy**'):
            # Tall
            models_test["Balanced Accuracy"] = [
                0 if i < 0 else i for i in models_test["Balanced Accuracy"]
            ]
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax1 = sns.barplot(y=models_test.index,
                              x="Balanced Accuracy",
                              data=models_test)
            ax1.set(xlim=(0, 1))
        st.markdown(imagedownload(plt, 'plot-balanced-accuracy-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax1 = sns.barplot(x=models_test.index,
                          y="Balanced Accuracy",
                          data=models_test)
        ax1.set(ylim=(0, 1))
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-balanced-accuracy-wide.pdf'),
                    unsafe_allow_html=True)
        with st.markdown('**F1 Score**'):
            # Tall
            models_test["F1 Score"] = [
                0 if i < 0 else i for i in models_test["F1 Score"]
            ]
            plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax1 = sns.barplot(y=models_test.index,
                              x="F1 Score",
                              data=models_test)
            ax1.set(xlim=(0, 1))
        st.markdown(imagedownload(plt, 'plot-F1-Score-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax1 = sns.barplot(x=models_test.index, y="F1 Score", data=models_test)
        ax1.set(ylim=(0, 1))
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-F1-Score-wide.pdf'),
                    unsafe_allow_html=True)
        with st.markdown('**Calculation time**'):
            # Tall
            models_test["Time Taken"] = [
                0 if i < 0 else i for i in models_test["Time Taken"]
            ]  #        plt.figure(figsize=(3, 9))
            sns.set_theme(style="whitegrid")
            ax3 = sns.barplot(y=models_test.index,
                              x="Time Taken",
                              data=models_test)
        st.markdown(imagedownload(plt, 'plot-calculation-time-tall.pdf'),
                    unsafe_allow_html=True)
        # Wide
        plt.figure(figsize=(9, 3))
        sns.set_theme(style="whitegrid")
        ax3 = sns.barplot(x=models_test.index,
                          y="Time Taken",
                          data=models_test)
        plt.xticks(rotation=90)
        st.pyplot(plt)
        st.markdown(imagedownload(plt, 'plot-calculation-time-wide.pdf'),
                    unsafe_allow_html=True)
コード例 #25
0
def main():
    """AutoML Web App Tool with Streamlit"""

    st.title("AutoML WebApp")
    st.text("Version(Beta): 0.2")

    #     activities = ["EDA", "Plot", "Model Building", "About"]
    #     choice = st.sidebar.selectbox("Select Activity", activities)

    #     dark_theme = st.sidebar.checkbox("Dark Theme", False)
    menu = [
        "Home", "Pandas Profile", "Sweetviz", "EDA", "Plot", "Model Building",
        "About"
    ]
    choice = st.sidebar.selectbox("Menu", menu)

    #     if dark_theme:
    #         global COLOR
    #         global BACKGROUND_COLOR
    #         BACKGROUND_COLOR = "rgb(17,17,17)"
    #         COLOR = "#fff"

    if choice == 'Home':
        #        st.subheader("Home")
        #         html_temp = """
        #         <div style = "background-color:royalblue;padding:10px;border-radius:10px">
        #         <h1 style = "color:white;text-align:center;">Simpe EDA App with Streamlit Components
        #         </div>
        #         """

        #        components.html("<p style='color:red'> Demo App </p>")
        #        components.html(html_temp)

        st.markdown(
            '**Data Analysis, Visualization** and Machine Learning **Model Building** in an interactive **WebApp** for Data Scientist/Data Engineer/Business Analyst.  \n\nThe purpose of this app is to create a **quick Business Insights**.  \n\nAutoML WebApp built with **Streamlit framework** using **Pandas** and **Numpy** for Data Analysis, **Matplotlib** and **Seaborn** for Data Visualization, **SciKit-Learn** for Machine Learning Model.'
        )
        #         st.markdown('**Demo URL**: https://automlwebapp.herokuapp.com/')
        st.header("Silent Features")
        st.markdown(
            '* User can browse or upload file(Dataset) in .csv or .txt format.  \n* User can get the details of dataset like No. of rows & Columns, Can View Column list, Select Columns with rows to show, Dataset Summary like count, mean, std, min and max values.  \n* Several Data Visualizations like Correlation with HeatMap, PieChart and Plots like Area, Bar, Line, Box, KDE.  \n* User can built Models like LogisticRegression, LinearDiscriminantAnalysis, KNeighborsClassifier, DecisionTreeClassifier, GaussianNB, SVC.  \n* Model Evaluation with Accuracy, Mean and Standard Deviation.'
        )

    if choice == 'Pandas Profile':
        st.subheader("Automated EDA with Pandas Profile")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())
            profile = ProfileReport(df)
            st_profile_report(profile)

    if choice == 'Sweetviz':
        st.subheader("Automated EDA with Sweetviz")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())
            if st.button("Generate Sweetviz Report"):

                ## WorkFlow
                report = sv.analyze(df)
                report.show_html()
                st_display_sweetviz("SWEETVIZ_REPORT.html")

    if choice == 'EDA':
        st.subheader("Exploratory Data Analysis")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Shape"):
                st.write(df.shape)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.write(all_columns)

            if st.checkbox("Select Columns To Show"):
                selected_columns = st.multiselect("Select Columns",
                                                  all_columns)
                new_df = df[selected_columns]
                st.dataframe(new_df)

            if st.checkbox("Show Summary"):
                st.write(df.describe())

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts())

            if st.checkbox("Correlation with Seaborn"):
                st.write(sns.heatmap(df.corr(), annot=True))
                st.pyplot()

            if st.checkbox("Pie Chart"):
                all_columns = df.columns.to_list()
                columns_to_plot = st.selectbox("Select 1 Column", all_columns)
                pie_plot = df[columns_to_plot].value_counts().plot.pie(
                    autopct="%1.1f%%")
                st.write(pie_plot)
                st.pyplot()

    elif choice == 'Plot':
        st.subheader("Data Visualization")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

        if st.checkbox("Correlation with Seaborn"):
            st.write(sns.heatmap(df.corr(), annot=True))
            st.pyplot()

        if st.checkbox("Pie Chart"):
            all_columns = df.columns.to_list()
            columns_to_plot = st.selectbox("Select 1 Column", all_columns)
            pie_plot = df[columns_to_plot].value_counts().plot.pie(
                autopct="%1.1f%%")
            st.write(pie_plot)
            st.pyplot()

        all_columns_names = df.columns.tolist()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns_names = st.multiselect("Select Columns To Plot",
                                                all_columns_names)

        if st.button("Generate Plot"):
            st.success("Generating Customizable Plot of {} for {}".format(
                type_of_plot, selected_columns_names))

            ## Plot By Streamlit
            if type_of_plot == 'area':
                cust_data = df[selected_columns_names]
                st.area_chart(cust_data)

            elif type_of_plot == 'bar':
                cust_data = df[selected_columns_names]
                st.bar_chart(cust_data)

            elif type_of_plot == 'line':
                cust_data = df[selected_columns_names]
                st.line_chart(cust_data)

            ## Custom Plot
            elif type_of_plot:
                cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
                st.write(cust_plot)
                st.pyplot()

    elif choice == 'Model Building':
        st.subheader("Building Ml Model")

        data = st.file_uploader("Upload Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            ## Model Building
            X = df.iloc[:, 0:-1]
            Y = df.iloc[:, -1]
            seed = 7

            ## Model
            models = []
            models.append(("LR", LogisticRegression()))
            models.append(("LDA", LinearDiscriminantAnalysis()))
            models.append(("KNN", KNeighborsClassifier()))
            models.append(("CART", DecisionTreeClassifier()))
            models.append(("NB", GaussianNB()))
            models.append(("SVM", SVC()))
            ## Evaluate each model in turn

            ## List
            model_names = []
            model_mean = []
            model_std = []
            all_models = []
            scoring = 'accuracy'

            for name, model in models:
                kfold = model_selection.KFold(n_splits=10, random_state=seed)
                cv_results = model_selection.cross_val_score(model,
                                                             X,
                                                             Y,
                                                             cv=kfold,
                                                             scoring=scoring)
                model_names.append(name)
                model_mean.append(cv_results.mean())
                model_std.append(cv_results.std())

                accuracy_results = {
                    "model_name": name,
                    "model_accuracy": cv_results.mean(),
                    "standard_deviation": cv_results.std()
                }
                all_models.append(accuracy_results)

            if st.checkbox("Metrics as Table"):
                st.dataframe(
                    pd.DataFrame(zip(model_names, model_mean, model_std),
                                 columns=[
                                     "Model Name", "Model Accuracy",
                                     "Standard Deviation"
                                 ]))

            if st.checkbox("Metrics as JSON"):
                st.json(all_models)

    elif choice == "About":
        st.header("About Author")
        #         st.markdown('**Data Analysis, Visualization** and Machine Learning **Model Building** in an interactive **WebApp** for Data Scientist/Data Engineer/Business Analyst.  \n\nThe purpose of this app is to create a **quick Business Insights**.  \n\nAutoML WebApp built with **Streamlit framework** using **Pandas** and **Numpy** for Data Analysis, **Matplotlib** and **Seaborn** for Data Visualization, **SciKit-Learn** for Machine Learning Model.')
        # #         st.markdown('**Demo URL**: https://automlwebapp.herokuapp.com/')
        #         st.header("Silent Features")
        #         st.markdown('* User can browse or upload file(Dataset) in .csv or .txt format.  \n* User can get the details of dataset like No. of rows & Columns, Can View Column list, Select Columns with rows to show, Dataset Summary like count, mean, std, min and max values.  \n* Several Data Visualizations like Correlation with HeatMap, PieChart and Plots like Area, Bar, Line, Box, KDE.  \n* User can built Models like LogisticRegression, LinearDiscriminantAnalysis, KNeighborsClassifier, DecisionTreeClassifier, GaussianNB, SVC.  \n* Model Evaluation with Accuracy, Mean and Standard Deviation.')
        #         st.header("Author")
        st.markdown(
            "Hi, there! I'm **Ravi Varma**. I'm passionate about using data to extract decision making insight and help machines learn to make the world a better place. If you liked what you saw, want to have a chat with me about the **Data Science** or **Machine Learning Projects**, **Work Opportunities**, or collaboration, shoot an **email** at **[email protected]**"
        )
        st.markdown(
            '**Portfolio**: https://ravivarmathotakura.github.io/portfolio/')
        #         st.markdown('**GitHub**: https://github.com/ravivarmathotakura')
        #         st.markdown('**LinkedIn**: https://www.linkedin.com/in/ravivarmathotakura/')
        st.markdown(
            '**Follow Me**: [@LinkedIn](https://www.linkedin.com/in/ravivarmathotakura/ "LinkedIn"), [@GitHub](https://github.com/ravivarmathotakura "GitHub")'
        )
        st.subheader("Note")
        st.text(
            "The author is not responsible for any misuse the program. \nAny contribution or suggestions are most welcome."
        )
コード例 #26
0
import streamlit as st
from pandas_profiling import ProfileReport
from streamlit_pandas_profiling import st_profile_report

st.markdown('''
# **The Streamlit X Pandas Crossover**
 **EDA Web App** 
''')

# Upload CSV data
st.sidebar.header('Upload your CSV file')
uploaded_file = st.sidebar.file_uploader("Upload CSV file", type=["csv"])

# Pandas Profiling Report
if uploaded_file is not None:

    @st.cache  # make use of web cache
    def load_csv():
        csv = pd.read_csv(uploaded_file)
        return csv

    df = load_csv()
    report = ProfileReport(df, explorative=True)
    st.header('** DataFrame**')
    st.write(df)
    st.header('**Pandas Profiling Results**')
    st_profile_report(report)

else:
    st.info('Upload CSV file to get started.')
コード例 #27
0
ファイル: dataexpo.py プロジェクト: aritra1311/AutoML
def app3(df):
    warnings.filterwarnings("ignore")
    pr = ProfileReport(df, explorative=True)
    st.header('**Pandas Profiling Report(Exploratory data Analysis)**')
    st_profile_report(pr)
コード例 #28
0
def main():
    
    st.sidebar.info("Click on the X to Hide the Side bar")
    
    menu=['Home', "Pandas Profiling", 'Sweetviz','Graphs','About']
    
    #st.title("Simple EDA APP with Streamlit")
    
    choice= st.sidebar.selectbox("Menu",menu)
    
    st.sidebar.info("Please refresh the page when uploading New Dataset")
    st.sidebar.markdown(' Created by  **_Prathap_**. :sunglasses:')
    
    
    if choice=="Pandas Profiling":
        
        st.markdown("""
                    <style>
                   body {
                         color: #034F84;
                  background-color: #F3F3F3;
                               }
                          </style>
                  """, unsafe_allow_html=True)
        
        st.subheader("EDA Using Pandas Profiling")
        
        data_file = st.file_uploader("Upload CSV File",type='CSV')
        
        if data_file is not None:
            df=pd.read_csv(data_file)
            st.subheader(" Head of Youre Dataset")
            st.write(df.head())
            profile=ProfileReport(df)
            if st.button(" Click Here To Generate The Detailed Report"):
                st.info("Please wait it may take some time if dataset is large")
         
                st.subheader("Scroll Down To See The Full Report")
                st_profile_report(profile)
                
            
            
      
    elif choice=='Sweetviz':
        
        st.markdown("""
                  <style>
                   body {
                     color: #034F84;
                     background-color: #F3F3F3;
                               }
                          </style>
                  """, unsafe_allow_html=True)
                  
        st.subheader('EDA Using Sweetviz')
         
        data_file = st.file_uploader("Upload CSV File",type='CSV')
        
        if data_file is not None:
            df=pd.read_csv(data_file)
            st.subheader(" Head of Youre Dataset")
            st.write(df.head())
            report=sv.analyze(df)
            if st.button(" Click Here to Generate the Sweetviz Report"):
                st.info("Please wait it may take some time if dataset is large")
                report.show_html('sweet_report.html')
         
    elif   choice=="Graphs":
        st.subheader('Plotting Graphs')
        
        st.markdown("""
                    <style>
                   body {
                         color: #034F84;
                  background-color: #F3F3F3;
                               }
                          </style>
                  """, unsafe_allow_html=True)
        
        
    
        data_file = st.file_uploader("Upload CSV File",type='csv')
        
        if data_file is not None:
            
            df=pd.read_csv(data_file)
            st.subheader(" Head of Youre Dataset")
            st.write(df.head())
            if st.button(' Click Here to Generate Pair Plot'):
               st.info("Please wait it may take some time if dataset is large")
               graph=sns.pairplot(df)
               sns.set(rc={'figure.figsize':(11.7,8.27)})
               st.pyplot(graph)
              
    elif choice=='About':
        
        st.markdown("""
                  <style>
                   body {
                     color: #034F84;
                     background-color: #F3F3F3;
                               }
                          </style>
                  """, unsafe_allow_html=True)
        st.subheader('About Section')
        st.write("Hello. Prathap Here, a Motivated data scientist with 3+ years of experience as a Data Analyst. Passionate about building models that fix problems. Relevant skills include Machine learning, Deep learning, Computer vision, problem solving, programming, and creative thinking")
        st.write("Connect me through @ [LinkedIn](https://www.linkedin.com/in/pratap-reddy-2794b91b7/)")
        st.write("Check out The Code @ [Github](https://github.com/Pratap517)")
        st.subheader("Check out my Other tiny Projects below ")
        st.write(" Handy tool to Analyze Csv Files [Click Here](https://share.streamlit.io/pratap517/streamlitapp_dataanalysis/main/main_app.py)")
        st.write(" Simple loan Prediction App [Click Here](https://share.streamlit.io/pratap517/ml_deploy_using_streamlit/main/app.py)")
        st.write(" Simple Mask Detection App [Click Here](https://mask-detection-5a800.firebaseapp.com/)")
   
    else:
        st.subheader("Home")
        #stc.html("<p style = 'color: red;'>This is An Awesome App</p>")
        
        
        html_temp = """
		<div style="background-color:#F3F3F3;padding:10px;border-radius:10px">
		<h1 style="color:#85144b;text-align:center;">Simple EDA App with Python Streamlit</h1>
		</div>
		"""
        stc.html(html_temp)
        st.markdown("""
                  <style>
                   body {
                     color: #034F84;
                     background-color: #F3F3F3;
                               }
                          </style>
                  """, unsafe_allow_html=True)
                  
        st.subheader("Please Select the Type of Report in the Side bar")
        
        stc.html("""
			<style>
			* {box-sizing: border-box}
			body {font-family: Verdana, sans-serif; margin:0}
			.mySlides {display: none}
			img {vertical-align: middle;}
			/* Slideshow container */
			.slideshow-container {
			  max-width: 1000px;
			  position: relative;
			  margin: auto;
			}
			/* Next & previous buttons */
			.prev, .next {
			  cursor: pointer;
			  position: absolute;
			  top: 50%;
			  width: auto;
			  padding: 16px;
			  margin-top: -22px;
			  color: white;
			  font-weight: bold;
			  font-size: 18px;
			  transition: 0.6s ease;
			  border-radius: 0 3px 3px 0;
			  user-select: none;
			}
			/* Position the "next button" to the right */
			.next {
			  right: 0;
			  border-radius: 3px 0 0 3px;
			}
			/* On hover, add a black background color with a little bit see-through */
			.prev:hover, .next:hover {
			  background-color: rgba(0,0,0,0.8);
			}
			/* Caption text */
			.text {
			  color: #f2f2f2;
			  font-size: 15px;
			  padding: 8px 12px;
			  position: absolute;
			  bottom: 8px;
			  width: 100%;
			  text-align: center;
			}
			/* Number text (1/3 etc) */
			.numbertext {
			  color: #f2f2f2;
			  font-size: 12px;
			  padding: 8px 12px;
			  position: absolute;
			  top: 0;
			}
			/* The dots/bullets/indicators */
			.dot {
			  cursor: pointer;
			  height: 15px;
			  width: 15px;
			  margin: 0 2px;
			  background-color: #bbb;
			  border-radius: 50%;
			  display: inline-block;
			  transition: background-color 0.6s ease;
			}
			.active, .dot:hover {
			  background-color: #717171;
			}
			/* Fading animation */
			.fade {
			  -webkit-animation-name: fade;
			  -webkit-animation-duration: 1.5s;
			  animation-name: fade;
			  animation-duration: 1.5s;
			}
			@-webkit-keyframes fade {
			  from {opacity: .4} 
			  to {opacity: 1}
			}
			@keyframes fade {
			  from {opacity: .4} 
			  to {opacity: 1}
			}
			/* On smaller screens, decrease text size */
			@media only screen and (max-width: 300px) {
			  .prev, .next,.text {font-size: 11px}
			}
			</style>
			</head>
			<body>
			<div class="slideshow-container">
			<div class="mySlides fade">
			  <div class="numbertext">1 / 3</div>
			  <img src="https://www.w3schools.com/howto/img_nature_wide.jpg" style="width:100%">
			  <div class="text">Caption Text</div>
			</div>
			<div class="mySlides fade">
			  <div class="numbertext">2 / 3</div>
			  <img src="https://www.w3schools.com/howto/img_snow_wide.jpg" style="width:100%">
			  <div class="text">Caption Two</div>
			</div>
			<div class="mySlides fade">
			  <div class="numbertext">3 / 3</div>
			  <img src="https://www.w3schools.com/howto/img_mountains_wide.jpg" style="width:100%">
			  <div class="text">Caption Three</div>
			</div>
			<a class="prev" onclick="plusSlides(-1)">&#10094;</a>
			<a class="next" onclick="plusSlides(1)">&#10095;</a>
			</div>
			<br>
			<div style="text-align:center">
			  <span class="dot" onclick="currentSlide(1)"></span> 
			  <span class="dot" onclick="currentSlide(2)"></span> 
			  <span class="dot" onclick="currentSlide(3)"></span> 
			</div>
			<script>
			var slideIndex = 1;
			showSlides(slideIndex);
			function plusSlides(n) {
			  showSlides(slideIndex += n);
			}
			function currentSlide(n) {
			  showSlides(slideIndex = n);
			}
			function showSlides(n) {
			  var i;
			  var slides = document.getElementsByClassName("mySlides");
			  var dots = document.getElementsByClassName("dot");
			  if (n > slides.length) {slideIndex = 1}    
			  if (n < 1) {slideIndex = slides.length}
			  for (i = 0; i < slides.length; i++) {
			      slides[i].style.display = "none";  
			  }
			  for (i = 0; i < dots.length; i++) {
			      dots[i].className = dots[i].className.replace(" active", "");
			  }
			  slides[slideIndex-1].style.display = "block";  
			  dots[slideIndex-1].className += " active";
			}
			</script>
			""")
コード例 #29
0
#Pandas Profiling Report
if uploaded_file is not None:

    @st.cache
    def load_csv():
        csv = pd.read_csv(uploaded_file)
        return csv

    df = load_csv()
    pr = ProfileReport(df, explorative=True)
    st.header('**Input DataFrame**')
    st.write(df)
    st.write('--')
    st.header('**Pandas Profiling Report**')
    st_profile_report(pr)
else:
    st.info('Waiting for CSV file to be uploaded.')
    if st.button('Click to use Example Dataset'):
        #Example data
        @st.cache
        def load_data():
            csv = pd.read_csv("percent_bachelors_degrees_women_usa.csv")
            return csv

        df = load_data()
        pr = ProfileReport(df, explorative=True)
        st.header('**Input DataFrame**')
        st.write("**Percentage Of Bachelors Degrees For Women In The USA **")
        st.write("Pandas example dataset")
        st.write(df)
コード例 #30
0
def main():
    "Simple EDA App with Streamlist Components"
    menu = ["Home", 'EDA', "Sweetviz", "Custom Analysis", "ML", "About"]
    choice = st.sidebar.selectbox("Menu", menu)

    if choice == "Home":
        image = Image.open('Data_science.jpg')
        #st.image(image, caption='commons.wikimedia.org' ,use_column_width=True)
        st.image(image, caption='commons.wikimedia.org')
        st.markdown(
            "Download here [data set](https://drive.google.com/file/d/1MAjahv92AkpGQ6-fPFrJbSXM8PkY6_00/view?usp=sharing) for checking stuff"
        )

    if choice == "EDA":
        st.title("Automated EDA with Pandas")
        st.markdown("You can upload your data in 'csv' format")
        data_file = st.file_uploader("Uplod CSV",
                                     type=['csv'],
                                     encoding=None,
                                     key='a')
        if data_file is not None:
            df = pd.read_csv(data_file)
            st.dataframe(df.head())
            profile = ProfileReport(df)
            st_profile_report(profile)

    elif choice == "Sweetviz":
        st.subheader("Automated EDA with Sweetviz")
        st.markdown("You can upload your data in 'csv' format")
        data_file = st.file_uploader("Uplod CSV",
                                     type=['csv'],
                                     encoding=None,
                                     key='a')
        if data_file is not None:
            df = pd.read_csv(data_file)
            st.dataframe(df.head())
            st.subheader("Analysis data with plots")
            if st.button("Sweetviz Report"):
                report = SV.analyze(df)
                report.show_html()
                st_display_sweetviz("SWEETVIZ_REPORT.html")
            # st.subheader("Compare data with plots")
            # if st.button("Compare"):
            #     report = SV.compare(df[100:], df[:100])
            #     report.show_html()
            #     st_display_sweetviz("Compare.html")

    elif choice == 'Custom Analysis':

        st.subheader("Data Visualization")
        data_file = st.file_uploader("Uplod CSV",
                                     type=['csv'],
                                     encoding=None,
                                     key='a')
        if data_file is not None:
            df = pd.read_csv(data_file)
            st.dataframe(df.head())

        if st.checkbox("Correlation Matrix"):
            st.write(sns.heatmap(df.corr(), annot=True))
            st.pyplot()

        if st.checkbox("Pie Chart"):
            all_columns = df.columns.to_list()
            columns_to_plot = st.selectbox("Select one Column", all_columns)
            pie_plot = df[columns_to_plot].value_counts().plot.pie(
                autopct="%1.1f%%")
            st.write(pie_plot)
            st.pyplot()

        all_columns = df.columns.to_list()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ['Area', 'Line', 'Bar', 'hist', 'box', 'kde'])
        selected_col_names = st.multiselect('Select Columns To plot Data',
                                            all_columns)

        if st.button("Produce Plot"):
            st.success(
                f"Creating Customizable Plot of {type_of_plot} for {selected_col_names}"
            )

            # Streamlit plots
            if type_of_plot == 'Area':
                custom_data = df[selected_col_names]
                st.area_chart(custom_data)

            elif type_of_plot == 'Line':
                custom_data = df[selected_col_names]
                st.line_chart(custom_data)

            elif type_of_plot == 'Bar':
                custom_data = df[selected_col_names]
                st.bar_chart(custom_data)

            # Custom Plots
            elif type_of_plot:
                custom_plt = df[selected_col_names].plot(kind=type_of_plot)
                st.write(custom_plt)
                st.pyplot()

    elif choice == "ML":
        st.title("Binary Classification")
        st.markdown("The is an basic idea about ML")
        st.sidebar.title("Binary Classification Web App")
        st.markdown("Are Mushrooms edible or poisonous? 🍄")
        #st.sidebar.markdown("Are your mushrooms edible or poisonous? 🍄")

        @st.cache(persist=True)
        #@st.cache(persist=True)
        def load_data():
            data = pd.read_csv("mushrooms.csv")
            labelEncoder = LabelEncoder()
            for col in data.columns:
                data[col] = labelEncoder.fit_transform(data[col])
            return data

        @st.cache(persist=True)
        def split(df):
            y = df.type
            X = df.drop("type", axis=1)
            X_train, X_test, y_train, y_test = train_test_split(X,
                                                                y,
                                                                test_size=0.2,
                                                                random_state=0)
            return X_train, X_test, y_train, y_test

        def plot_metrics(metrics_list):
            if "Confusion Matrix" in metrics_list:
                st.subheader("Confusion Matrix")
                plot_confusion_matrix(model, X_test, y_test)
                st.pyplot()

            if "ROC Curve" in metrics_list:
                st.subheader("ROC Curve")
                plot_roc_curve(model, X_test, y_test)
                st.pyplot()

            if "Precision-Recall Curve" in metrics_list:
                st.subheader("Precision-Recall Curve")
                plot_precision_recall_curve(model, X_test, y_test)
                st.pyplot()

        df = load_data()
        class_names = df['type']

        if st.sidebar.checkbox("Show row data", False):
            st.subheader("Mushroom Data Set (Classification)")
            st.write(df)
            st.write("The shape of data", df.shape)
            st.markdown(
                "This [data set](https://archive.ics.uci.edu/ml/datasets/Mushroom) includes descriptions of hypothetical samples corresponding to 23 species of gilled mushrooms "
                "in the Agaricus and Lepiota Family (pp. 500-525). Each species is identified as definitely edible, definitely poisonous, "
                "or of unknown edibility and not recommended. This latter class was combined with the poisonous one."
            )
            if st.checkbox("Show Summary"):
                st.write(df.describe().T)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.write(all_columns)

            if st.checkbox("Select Columns To See Values"):
                all_columns = df.columns.to_list()
                selected_col = st.multiselect("Select Columns", all_columns)
                new_df = df[selected_col]
                st.dataframe(new_df)

            if st.checkbox("Show value counts"):
                st.write(df.iloc[:, 0].value_counts())

        X_train, X_test, y_train, y_test = split(df)

        st.sidebar.subheader("Choose a Classifier")
        Classifier = st.sidebar.selectbox(
            "Classifier", ("Support Vector Machine (SVM)",
                           "Logistic Regession", "Random Forest"))

        if Classifier == 'Support Vector Machine (SVM)':
            st.sidebar.subheader('Model Hyperparameters')
            ##Choose Parameters\
            C = st.sidebar.number_input("C (Regularization parameter)",
                                        0.01,
                                        10.0,
                                        step=0.01,
                                        key='C_SVM')
            kernel = st.sidebar.radio("Kernel", ("rbf", "linear"),
                                      key='kernel')
            gamma = st.sidebar.radio("Gamma (Kernel Coefficient)",
                                     ("scale", "auto"),
                                     key='gamma')

            metrics = st.sidebar.multiselect(
                "Whitch metrics to plot?",
                ("Confusion Matrix", "Roc-Curve", "Precision-Recall Curve"))

            if st.sidebar.button("Classify", key="classify"):
                st.subheader("Support Vector Machine (SVM) Results")
                model = SVC(C=C, kernel=kernel, gamma=gamma)
                model.fit(X_train, y_train)
                accuracy = model.score(X_test, y_test)
                y_pred = model.predict(X_test)
                st.write("Accuracy: ", accuracy.round(2))
                st.write(
                    "Precision: ",
                    precision_score(y_test, y_pred,
                                    labels=class_names).round(2))
                st.write(
                    "Recall: ",
                    recall_score(y_test, y_pred, labels=class_names).round(2))
                plot_metrics(metrics)

        if Classifier == 'Logistic Regession':
            st.sidebar.subheader("Model Hyperparameters")
            C = st.sidebar.number_input("C (Regularization parameter)",
                                        0.01,
                                        10.0,
                                        step=0.01,
                                        key='C_LR')
            max_iter = st.sidebar.slider("Maximum Number of iterations",
                                         100,
                                         500,
                                         key='max_iter')

            metrics = st.sidebar.multiselect(
                "Which metrics to plot?",
                ("Confusion Matrix", 'ROC-Curve', 'precision-Recall Curve'))

            if st.sidebar.button("Classify", key='Classify'):
                st.subheader("Logistc Regression Results")
                #model = LogisticRegression(C=C, penalty='12', max_iter=max_iter)
                model = LogisticRegression(C=C,
                                           penalty='l2',
                                           max_iter=max_iter)
                #model.fit(X_train, y_train)
                model.fit(X_train, y_train)
                accuracy = model.score(X_test, y_test)
                y_pred = model.predict(X_test)
                st.write("Accuracy: ", accuracy.round(2))
                st.write(
                    "Precision: ",
                    precision_score(y_test, y_pred,
                                    labels=class_names).round(2))
                st.write(
                    'Recall: ',
                    recall_score(y_test, y_pred, labels=class_names).round(2))
                plot_metrics(metrics)

        if Classifier == 'Random Forest':
            st.sidebar.subheader("Model Hyperparameters")
            n_estimators = st.sidebar.number_input(
                "The number of trees in the forest",
                10,
                5000,
                key='n_estimators')
            max_depth = st.sidebar.number_input(
                "The maximum depth of the tree",
                1,
                20,
                step=1,
                key='n_estimators')
            bootstrap = st.sidebar.radio(
                "Bootstrap samples when builidng tees", ("True", 'False'),
                key='bootstrap')

            metrics = st.sidebar.multiselect(
                "Which metrics to plot?",
                ("Confusion Matrix", 'ROC-Curve', 'precision-Recall Curve'))

            if st.sidebar.button("Classify", key='classify'):
                st.subheader("Random Forest Classifer Results")
                model = RandomForestClassifier(n_estimators=n_estimators,
                                               max_depth=max_depth,
                                               bootstrap=bootstrap,
                                               n_jobs=-1)
                model.fit(X_train, y_train)
                accuracy = model.score(X_test, y_test)
                y_pred = model.predict(X_test)
                st.write("Accuracy: ", accuracy.round(2))
                st.write(
                    "Precision: ",
                    precision_score(y_test, y_pred,
                                    labels=class_names).round(2))
                st.write(
                    "Recall: ",
                    recall_score(y_test, y_pred, labels=class_names).round(2))
                plot_metrics(metrics)