Ejemplo n.º 1
0
    "Country/Territory": "Country",
    "Population(1 July 2019)": "Population"
})
df_pop = df_pop[['Country', 'Population']]
df_pop['Country'] = df_pop['Country'].str.replace(r"\[.*\]", "")

df_combined = df_demo.merge(df_soc, on='Country', how='left')[[
    'Country', 'Democracy_Score', 'SocialProgress_Score', 'Regimetype',
    'Region'
]]
df_combined.dropna(subset=['SocialProgress_Score'], inplace=True)
df_combined = df_combined.merge(df_pop, on='Country', how='left')

choice = st.multiselect(
    'Select region', ("Asia & Australasia", "Eastern Europe", "Latin America",
                      "Middle East and North Africa", "North America",
                      "Sub-Saharan Africa", "Western Europe"),
    key='1')

df_combined = df_combined[df_combined['Region'].isin(choice)]

# Configure the options common to all layers
brush = alt.selection(type='interval')
base = alt.Chart(df_combined).add_selection(brush)

points = alt.Chart(df_combined).mark_circle().encode(
    alt.X('Democracy_Score', title='Democracy Index'),
    alt.Y('SocialProgress_Score',
          title='Social Progress Index',
          scale=alt.Scale(domain=(30, 100))),
    color=('Region:N'),
Ejemplo n.º 2
0
def main():

    activities = [
        'HOME',
        'FILE PROCESS',
        'PDF TOOLS',
        'INFO ABOUT FILES/FOLDERS/DIRECTORIES',
        'DOWNLOAD IMAGE',
        'BATCH FILE CREATION',
        'CONVERT PHOTOS TO PDF',
        'SECURE FILES (STORE SECRETLY)',
        'SET AN EMAIL REMINDER ALARM',
        'URL LINK EXTRACTOR',
        'WEB PAGE DATA EXTRACTOR',
        'Topics identifier in text'.upper(),
        'Schedule application'.upper(),
        'Clean temporary files'.upper(),
        'Excel to PDF'.upper(),
        'Word to PDF'.upper(),
    ]
    choice = st.sidebar.selectbox("Select Activity", activities)
    if choice == 'HOME':
        st.image('task.jpg', use_column_width=True)
        st.info("This project use diffrent python script to do task,")
        st.warning('''
        select an option from the sidebar to use a particular module
        ''')
    if choice == 'FILE PROCESS':
        st.header("PROCESSING OF FILE RELATED DATA")
        st.subheader("Choose the option")
        fileactivity = [
            'Find all files which are older than x days',
            'Sorting files in the Folder',
            'Copy the content of one file into another',
            'find all files of required extension'
        ]
        filechoice = st.selectbox("Select Activity to perform", fileactivity)
        if filechoice == 'Find all files which are older than x days':
            req_path = st.text_input("Enter your path")
            if req_path:
                st.write(os.listdir(req_path))
                days = st.slider(
                    "Slide how much old data you wanna see, slide to hide newer files",
                    1, 500)
                if not os.path.exists(req_path):
                    st.write("not exist")

                if os.path.isfile(req_path):
                    st.write("please provide directory path")

                today_date = datetime.datetime.now()
                for each_file in os.listdir(req_path):
                    each_file_path = os.path.join(req_path, each_file)
                    if os.path.isfile(each_file_path):
                        file_creat_date = datetime.datetime.fromtimestamp(
                            os.path.getctime(each_file_path))
                        difference_in_days = (today_date -
                                              file_creat_date).days
                        if difference_in_days > days:
                            st.write(each_file_path, difference_in_days)
            else:
                st.warning("Enter your path first")

        if filechoice == 'Sorting files in the Folder':
            path = st.text_input(
                "Enter path to the folder whose files u want to sort ")
            if path:
                files = os.listdir(path)
                st.write(files)
                folder = st.multiselect(
                    "Folders name u want to make, in which u want to kept files?",
                    ("pdf", "txt", "image", "python"))
                st.write("You selected ", len(folder), folder)

                for x in range(0, len(folder)):
                    if not os.path.exists(path + folder[x]):
                        os.makedirs(path + folder[x])

                for f in files:
                    if "png" or "jpg" or "PNG" in f and not os.path.exists(
                            path + 'image/' + f):
                        shutil.move(path + '\\' + f, path + 'image/' + f)
                    if "pdf" in f and not os.path.exists(path + 'pdfs/' + f):
                        shutil.move(path + '\\' + f, path + 'pdf/' + f)
                    if "txt" in f and not os.path.exists(path + 'text/' + f):
                        shutil.move(path + '\\' + f, path + 'text/' + f)
            else:
                st.warning("Write the path first")

        if filechoice == 'Copy the content of one file into another':
            st.write("Enter following details ")
            sfile = st.text_input("Enter your source file location")
            dfile = st.text_input("Enter your destination file location")
            if sfile and dfile:
                sfo = open(sfile, 'r')
                content = sfo.read()
                sfo.close()
                dfo = open(dfile, 'w')
                dfo.write(content)
                if st.checkbox("Show Copied Content"):
                    st.write(content)
                dfo.close()
            else:
                st.warning("First enter the above details")

        if filechoice == 'find all files of required extension':
            st.write("Enter following details ")
            rpath = st.text_input("Enter your directory path")
            if rpath:
                if os.path.isfile(rpath):
                    st.write(
                        f"The given path {rpath} is a file. Please! pass only directory path"
                    )
                else:
                    st.write("logic starts")
                    all_files = os.listdir(rpath)

                    if len(all_files) == 0:
                        st.write(f"The given path {rpath} is an empty")
                    else:
                        req_ex = st.text_input(
                            "Enter the required files extension such as .py/.txt/.log/.sh/.zip"
                        )
                        req_files = []
                        for eachfile in all_files:
                            if eachfile.endswith(req_ex):
                                req_files.append(eachfile)
                        if len(req_files) == 0:
                            st.write(
                                "There are no {req_ex} files in the location of {rpath}"
                            )
                        else:
                            st.info(
                                f"There are {len(req_files)} files in the location of {rpath} with an extension of {req_ex}"
                            )
                            for i in req_files:
                                #st.success(f"So, The files are : {req_files}")
                                st.success(i)
            else:
                st.warning("Enter the above details first")

    if choice == 'PDF TOOLS':
        st.header("PROCESSING OF PDF RELATED STUFF")
        st.subheader("Choose the option")
        pdfact = [
            'Extract data of pdf', 'Merge pages of pdf', 'Get document info',
            'Convert Photos to pdf'
        ]
        pdfchoice = st.selectbox("Select Activity to perform", pdfact)
        data = st.file_uploader("Upload a file", type=['pdf'])
        if data:
            pdf_path = 'uploads/file.pdf'
            save_pdf(data, pdf_path)
            if pdfchoice == 'Extract data of pdf':
                result = extract_text_from_pdf(pdf_path)
                if result:
                    st.subheader("extracted data from PDF file")
                    if st.button('save'):
                        name = st.text_input('file name (no extensions)')
                        if name:
                            try:
                                savepath = f'extracted/{name}.txt'
                                with open(savepath, 'w') as f:
                                    f.write(result)
                                    st.success(f"saved file to {savepath}")
                            except:
                                st.error(
                                    'error occured during saving the file')
                    st.write(result)
                else:
                    st.error("could not extract data from file")

            elif pdfchoice == 'Merge pages of pdf':
                PDFfile = open(pdf_path, "rb")
                pdfread = p2.PdfFileReader(PDFfile)
                st.info(f'Uploaded file has {pdfread.numPages} pages')
                pageno = st.slider(
                    "Which page you want to merge to another pdf")
                st.write("Upload file to be merge")
                mergedata = st.file_uploader("Upload another file",
                                             type=['pdf'])
                if mergedata:
                    save_pdf(mergedata, pdf_path)
                    PDFfile2 = open(pdf_path, "rb")
                    pdfread2 = p2.PdfFileReader(PDFfile)
                    st.info(f'Uploaded file has {pdfread2.numPages} pages')

                    page_one = pdfread.getPage(pageno)
                    output_file = open('new_pdf1.pdf', 'wb')
                    pdfwriter = p2.PdfFileWriter()
                    pdfwriter.addPage(page_one)
                    pdfwriter.write(output_file)
                    st.write("page is added successfully to another pdf")
                else:
                    st.warning("upload another file")

            elif pdfchoice == 'Get document info':
                st.write(getinfo(pdf_path))

            else:
                st.error("select an option")
        else:
            st.error("upload a file to continue.....")

    if choice == 'INFO ABOUT FILES/FOLDERS/DIRECTORIES':
        path = st.text_input("Enter path to which you want information ")
        if path:
            st.write(list(os.walk(path)))

            st.subheader("All Directories")
            for r, d, f in os.walk(path, topdown=False):
                if len(d) != 0:
                    for each_dir in d:
                        st.write(each_dir)

            st.subheader("All Files")
            for r, d, f in os.walk(path, topdown=False):
                if len(f) != 0:
                    for each_file in f:
                        st.write(each_file)

            st.success("Only this much u have")

        else:
            st.info("Enter the path first")

    if choice == 'DOWNLOAD IMAGE USING URL':
        st.header("Enter following details to download image from web")
        url = st.text_input("Enter img url to download from web")
        if url:
            file_name = st.text_input("Enter filename to save as:")

            def dl_jpg(url, file_path, file_name):
                full_path = file_path + file_name + '.jpg'
                urllib.request.urlretrieve(url, full_path)
                return full_path

            with st.spinner('wait...'):
                path = dl_jpg(url, 'images/', file_name)
                st.image(path, use_column_width=True)
        else:
            st.warning("Enter the url first")

    if choice == 'BATCH FILE CREATION':
        st.header("CREATE MULTIPLE FOLDERS AT ANY LOCATION")
        path = st.text_input("Write path where u want to create folder ")
        if path:
            n = st.slider("Enter how many folders you want inside given path",
                          1, 100)
            folname = st.text_input(
                "Write the name u want to give to the folder")
            yn = st.text_input("if u want subfolders inside each folder y/n ")
            if yn is 'y':
                n1 = st.slider(
                    "Enter how many sub folders you want inside each folder",
                    1, 100)
                infolname = st.text_input(
                    "Write the name u want to give to the subfolders")
            else:
                n1 = 0
                infolname = None
            if n and folname and yn:
                createFol(path, n, folname, yn, n1, infolname)
        else:
            st.warning("First write the path")

    if choice == 'CONVERT PHOTOS TO PDF':
        # static_store=get_static_store()
        # st.info(__doc__)
        # result=st.file_uploader("Upload",type=['png','jpeg','jpg'])
        # if result:
        #     value=result.getvalue()
        #     if not value in static_store.values():
        #         static_store[result]=value
        # else:
        #     static_store.clear()
        #     st.info("Upload one more photo")

        # if st.button("Clear file list"):
        #     static_store.clear()

        # if st.checkbox("Show file list?"):
        #     st.write(list(static_store.keys()))
        st.header("Image to pdf converter")
        n = st.slider("Enter how many photos you want to upload")
        result = st.text_input(
            "Write each photo path separated by comma").split(",")
        if n and result:
            st.write(result)
            #photo_to_pdf(images)
            img1 = Image.open("task.jpg")
            for i in result:
                st.write(i)
                img = Image.open(i)
                img.save(f"{i.split('.')[0]}.pdf",
                         'PDF',
                         resolution=100,
                         save_all=True)
            st.write(f"store as {i.split('.')[0]}.pdf")
            st.success("Done")
        else:
            st.warning("Fill the above details")

    ## not moving any problem in s.move
    if choice == 'SECURE FILES (STORE SECRETLY)':
        st.header('SECURE FILES (STORE SECRETLY)')
        from_dir = st.text_input("Enter the dir of folder to protect")
        fname = st.text_input("Enter folder name to create")
        pas = st.text_input("Enter the password")
        if from_dir and fname and pas:
            secretStore(from_dir, fname, pas)
        else:
            st.warning("Fill the above details")

    if choice == 'SET AN EMAIL REMINDER ALARM':
        st.warning('slow on windows, works well on linux')
        st.header('SET AN ALARM FOR SENDING EMAIL')
        subject = st.text_input("enter the subject for reminder")
        alarmH = st.number_input("At what hour do you want alarm to ring",
                                 value=1)
        alarmM = st.slider("At what min do you want alarm to ring")
        time = ['am', 'pm']
        ap = st.selectbox("Select am/pm", time)
        st.info(alarmH)
        st.info(ap)
        if st.button('start'):
            if alarmH and alarmM and ap:
                if ap is 'pm':
                    alarmH = alarmM + 12
                    st.write(alarmH)
                st.info(f" At {alarmH} : {alarmM} {ap} alarm will ring")
                activate_alarm(alarmH, alarmM)

        else:
            st.warning("Fill the above details")

    if choice == 'URL LINK EXTRACTOR':
        st.header("EXTRACT & SAVE LINKS FROM A URL")
        st.info("enter a url below and a file name if you want to save links")
        url = st.text_input("enter a valid a URL")
        save_path = st.text_input(
            "enter the path where you want to save links as txt file")
        if st.button('start'):
            with st.spinner('please wait...'):
                if url:
                    links = link_extractor(url)
                    st.write(links)
                    if save_path:
                        if '.txt' not in save_path:
                            save_path += '.txt'
                        with open(save_path, 'a') as f:
                            f.writelines(links)
                        st.success('saved to ' + save_path)

    if choice == 'WEB PAGE DATA EXTRACTOR':
        st.header("EXTRACT & SAVE DATA FROM A URL")
        st.info("enter a url below and a file name if you want to save data")
        url = st.text_input("enter a valid a URL")
        save_path = st.text_input(
            "enter the path where you want to save data as txt file")

        if st.button('extract'):
            with st.spinner('please wait...'):
                if url:
                    links = data_extractor(url)
                    st.write(links)
                    if save_path:
                        if '.txt' not in save_path:
                            save_path += '.txt'
                        with open(save_path, 'a', errors='ignore') as f:
                            f.write(links)
                        st.success('saved to ' + save_path)

    if choice == 'Topics identifier in text'.upper():
        st.header("Topics identifier in multisentenced data")
        st.info("enter/ copy paste some text paragraph")
        data = st.text_area('enter sentences with 1 sentence on 1 line')
        topics = st.slider('number of topics ', 2, 4)
        if st.button("extract topics"):
            if data:
                result = extract_topic(data, num_topics=topics)
                st.write(result)

    if choice == 'Schedule application'.upper():
        st.header('Schedule application'.upper())
        with st.spinner('loading softwares'):
            softwares = foo(
                winreg.HKEY_LOCAL_MACHINE, winreg.KEY_WOW64_32KEY) + foo(
                    winreg.HKEY_LOCAL_MACHINE, winreg.KEY_WOW64_64KEY) + foo(
                        winreg.HKEY_CURRENT_USER, 0)
            names = [(k, v.get('name')) for k, v in enumerate(softwares)]
            software = st.selectbox("select a software", names)
            if software:
                sw_path = softwares[software[0]].get('path')
                st.write(sw_path)
                if sw_path is not 'undefined' and os.path.exists(sw_path):
                    st.write('found software location')
                    files = os.listdir(sw_path)
                    exefiles = [f for f in files if '.exe' in f]
                    exename = st.selectbox('select an executable', exefiles)
                    exepath = os.path.join(sw_path, exename)
                    st.write(exepath)

                    date = st.date_input("select a date to schedule")
                    time = st.time_input("select a time")
                    st.subheader('selected date time schedule')
                    st.write(datetime.datetime.combine(date, time))
                    if st.button('confirm'):
                        with st.spinner("scheduling, please wait"):
                            schedule(exepath,
                                     datetime.datetime.combine(date, time))
                            st.success("schedule task")
                else:
                    st.error("could not find exe file, try another")

    if choice == 'Clean temporary files'.upper():
        st.error("Warning delete files permanently")
        st.warning('be careful')
        st.header('Clean temporary files'.upper())
        dir = st.text_input('enter a directory path')
        if os.path.exists(dir):
            o = st.radio('select an option',
                         ('delete file by search', 'delete by extension',
                          'delete temporary'))
            if o == 'delete file by name':
                name = st.text_inut('filename(fullname with extension)')
                if name and st.button('confirm'):
                    with st.spinner("deleting"):
                        delete_file(dir, name)
                        st.success("done")
            if o == 'delete by extension':
                ext = st.text_inut('enter extension like *.pyc')
                if name and st.button('confirm'):
                    with st.spinner("deleting"):
                        delete_file_by_extension(dir, ext)
                        st.success("done")
            if o == 'delete temporary':
                if st.button('confirm'):
                    with st.spinner("deleting"):
                        temp_file_removal(dir)
                        st.success("done")
        else:
            st.error("path invalid")

    if choice == 'Excel to PDF'.upper():
        st.header('Convert Excel file to pdf file'.upper())
        excelpath = st.text_input('enter path of excel file')
        pdfpath = st.text_input('enter path for new pdf file')
        if excelpath and pdfpath and st.button('convert'):
            with st.spinner('converting'):
                convertpdf(excelpath, pdfpath)
                st.success('done')
        else:
            st.error('invalid details')

    if choice == 'Word to PDF'.upper():
        st.header('Convert Word file to pdf file'.upper())
        wordpath = st.text_input('enter path of excel file')
        pdfpath = st.text_input('enter path for new pdf file')
        if wordpath and pdfpath and st.button('convert'):
            with st.spinner('converting'):
                convert(wordpath, pdfpath)
                st.success('done')
Ejemplo n.º 3
0
# Título
st.title('Data App -  Prevendo Valores de Imóveis')

# Subtítulo
st.markdown(
    'Esse é um Data App utilizado para exibir a solução de Machine Learning para o problema de predição'
)

# verificando o dataset
st.subheader('Selecionado apenas um pequeno conjunto de atributos')

# atributos para serem exibidos por padrão
defaultcols = ['RM', 'PTRATIO', 'LSTAT', 'MEDV']

# defiinindo atributos a partir de multiselect
cols = st.multiselect('Atributos', data.columns.tolist(), default=defaultcols)

# exibindo os top 10 registros do dataframe
st.dataframe(data[cols].head(10))

st.subheader('Distribuição de Imóveis por Preço')

# definindo a faixa de valores
faixa_valores = st.slider('Faixa de Preço', float(data.MEDV.min()), 150.,
                          (10.0, 100.0))

# filtrando os dados
dados = data[data['MEDV'].between(left=faixa_valores[0],
                                  right=faixa_valores[1])]

# plota a distribuição dos dados
Ejemplo n.º 4
0
def get_UN_data():
    AWS_BUCKET_URL = "https://streamlit-demo-data.s3-us-west-2.amazonaws.com"
    df = pd.read_csv(AWS_BUCKET_URL + "/agri.csv.gz")
    return df.set_index("Region")


try:
    df = get_UN_data()
except urllib.error.URLError as e:
    st.error("""
        **This demo requires internet access.**

        Connection error: %s
        """ % e.reason)

countries = st.multiselect("Choose countries", list(df.index), ["Armenia"])
if not countries:
    st.error("Please select at least one country.")

data = df.loc[countries]
data /= 1000000.0
st.write("### Gross Agricultural Production ($B)", data.sort_index())

data = data.T.reset_index()
data = pd.melt(data, id_vars=["index"]).rename(columns={
    "index": "x",
    "value": "y"
})
data["x"] = data["x"].astype(float)

chart_component(key="gap", data=data.to_json(orient="records"))
Ejemplo n.º 5
0
zhuangjiaXZ_file = str(zhuanjiaXZ) + '.xlsx'
from openpyxl import load_workbook
wb = load_workbook(zhuangjiaXZ_file)
Sheet1 = wb["Sheet1"]
zhenzhuang = set()
st.write(Sheet1.cell(1, 1).value)
qzyz = Sheet1.cell(1, 12).value  #从表中读取权重因子
if qzyz < 0 or qzyz > 1:
    qzyz = 0.2
for i in range(3, Sheet1.max_row + 1):
    for j in range(1, 11):
        zhenzhuang.add(Sheet1.cell(i, j).value)
zhenzhuang.discard(None)
zhenzhuanglist = list(zhenzhuang)
zhenzhuanglist.sort()
zhenzhuangXZ = st.multiselect('选择你的症状(多选):', (zhenzhuanglist))
zhenzhuangXZ_set = set(zhenzhuangXZ)

#开始症状分析
zhenzhuangGL = set()
yaofang = list()
zhenduan = set()
quanzhong = [0, 0, 0]  #症状对应每个方剂的权重
for i in range(3, Sheet1.max_row + 1):
    for j in range(1, 11):
        zhenzhuangGL.add(Sheet1.cell(i, j).value)
    zhenzhuangGL.discard(None)
    z = zhenzhuangGL.intersection(zhenzhuangXZ_set)  #两个集合的交集
    if len(zhenzhuangGL) == 0:
        quanzhong.append(len(z))
    else:
Ejemplo n.º 6
0
st.sidebar.markdown('# COVID-19 Data and Reporting')
st.sidebar.markdown('## **EpiCenter for Disease Dynamics**')
st.sidebar.markdown('**School of Veterinary Medicine   UC Davis**')
st.sidebar.markdown("## Key COVID-19 Metrics")
st.sidebar.markdown(
    "COVID-Local provides basic key metrics against which to assess pandemic response and progress toward reopening. See more at https://www.covidlocal.org/metrics/"
)
st.sidebar.markdown(
    'For additional information  please contact *[email protected]*  https://ohi.vetmed.ucdavis.edu/centers/epicenter-disease-dynamics'
)
st.markdown('## Select counties of interest')
CA_counties = confirmed[confirmed.Province_State ==
                        'California'].Admin2.unique().tolist()

COUNTIES_SELECTED = st.multiselect('Select counties',
                                   CA_counties,
                                   default=['Yolo'])

st.sidebar.markdown(
    "One of the key metrics for which data are widely available is the estimate of **daily new cases per 100,000 population**. Here, in following graphics, we will track"
)

st.sidebar.markdown(
    "(A) Estimates of daily new cases per 100,000 population (averaged over the last seven days)"
)
st.sidebar.markdown("(B) Daily incidence (new cases)")
st.sidebar.markdown("(C) Cumulative cases and deaths")
st.sidebar.markdown("(D) Daily new tests")

st.sidebar.markdown(
    "Data source: Data for cases are procured automatically from **COVID-19 Data Repository by the Center for Systems Science and Engineering (CSSE) at Johns Hopkins University**."
Ejemplo n.º 7
0
def main():
    st.set_page_config(
        page_title="CoV-AbDab converter",
        page_icon=":rocket:",
        layout="centered",
    )

    st.title("CoV-AbDab converter")
    st.header("About")
    st.markdown("""
        This script converts the CSV file from the
        [Coronavirus Antibody Database (CoV-AbDab)](http://opig.stats.ox.ac.uk/webapps/covabdab/)
        to a fasta sequence format.

        You can enter a URL to the CSV file, or upload another file below.

        _Disclaimer: This script was written independently from the authors and the team
        that developed the Coronavirus Antibody Database. If you use this database,
        please cite the original article:_

        >Matthew I. J. Raybould, Aleksandr Kovaltsuk, Claire Marks, Charlotte M. Deane (2020) CoV-AbDab: the Coronavirus Antibody Database. _Bioinformatics._ doi:[10.1093/bioinformatics/btaa739](doi.org/10.1093/bioinformatics/btaa739).

        More Streamlit scripts:
        [github.com/RalfG/streamlit-scripts](https://github.com/RalfG/streamlit-scripts)

        """)

    st.header("Convert CSV to fasta")
    st.subheader("Input")
    csv_url = st.text_input(
        "Enter the URL to the CoV-AbDab CSV file:",
        value=
        "http://opig.stats.ox.ac.uk/webapps/covabdab/static/downloads/CoV-AbDab_230321.csv"
    )
    csv_uploaded = st.file_uploader(label="Or upload the CoV-AbDab CSV here:",
                                    type=[".csv", ".CSV"])

    if csv_uploaded:
        csv_data = pd.read_csv(csv_uploaded)
        output_name = parse_output_name(csv_uploaded.name)
    else:
        csv_data = pd.read_csv(csv_url)
        output_name = parse_output_name(csv_url)

    header_columns = st.multiselect(
        label="Header columns",
        options=csv_data.columns.to_list(),
        default=[
            n for n in ["Name", "Ab or Nb", "Origin"] if n in csv_data.columns
        ],
        help="Select columns to use in the fasta entry headers.")
    sequence_columns = st.multiselect(
        label="Sequence columns",
        options=csv_data.columns.to_list(),
        default=[
            n for n in ["CDRH3", "CDRL3", "VH or VHH", "VL"]
            if n in csv_data.columns
        ],
        help=
        ("Select columns with peptide sequences. Each column will be written as as "
         "separate entry in the fasta file."))
    max_header_length = st.number_input(
        label="Maximum header length (set to zero for unlimited length)",
        help=
        "Setting multiple header columns can result in lengthy headers. This option truncates headers that are too long.",
        min_value=0,
        value=0)
    max_line_length = st.number_input(
        label="Maximum sequence line length",
        help=
        "If the sequence is longer than this value, it will be wrapped over multiple lines.",
        min_value=10,
        value=58,
    )
    add_row_number = st.checkbox(
        label="Prefix CSV row number to fasta headers",
        help=
        "Recommended to avoid duplicate headers and to be able to trace back sequences to their original source.",
        value=True)
    cleanup_header = st.checkbox(
        label="Replace non-word characters with underscores in headers",
        help=
        "Some software does not accept special characters in fasta headers",
        value=True)

    if st.button("Convert to fasta"):
        status_placeholder = st.empty()
        status_placeholder.info(":hourglass_flowing_sand: Converting...")

        try:
            if max_header_length == 0:
                max_header_length = None
            entries = [
                l for l in dataframe_to_fasta_entry(
                    csv_data,
                    header_columns,
                    sequence_columns,
                    add_row_number=add_row_number,
                    cleanup_header=cleanup_header,
                    max_header_length=max_header_length,
                    max_line_length=max_line_length)
            ]
            entries_top = "".join(entries[:10])
            entries_b64 = encode_object_for_url("".join(entries))

        except Exception as e:
            status_placeholder.error(":x: Something went wrong.")
            st.exception(e)

        else:
            status_placeholder.success(":heavy_check_mark: Finished!")

            st.subheader("Original CSV")
            st.markdown("Only the first 500 entries are shown.")
            st.write(csv_data.head(500))

            st.subheader("Fasta entries")
            st.markdown("Only the first ten entries are shown.")
            st.code(entries_top, language=None)

            styled_download_button(f'data:file/fasta;base64,{entries_b64}',
                                   "Download fasta",
                                   download_filename=output_name)
Ejemplo n.º 8
0
def user_input():
    "# In this section we will try to explore ways to interarate with users"

    # Add interactivity with widgets
    "Checkbox with `st.checkbox` command"
    s = st.checkbox("Show dataframe")

    if s:
        chart_data = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"])
        chart_data

    # Use a selectbox with options
    "Display a select widget with `st.selectbox`"

    # chart_data = pd.DataFrame(np.random.randn(20, 3), columns=["a", "b", "c"])
        
    option = st.selectbox("Which number do you like best?", ["DS-610","DS-620","DS-640"])

    "You selected: ", option

    # second_option = st.selectbox("Which number do you like best?", chart_data["a"])

    # "You selected: ", second_option

    "Display a multiselect widget with `st.multiselect`"

    options = st.multiselect('What are your favorite colors',['Green', 'Yellow', 'Red', 'Blue'],['Yellow', 'Red'])
    st.write('You selected:', options)

    "Display a radio button widget with `st.radio`"

    genre = st.radio("What's your favorite movie genre",('Comedy', 'Drama', 'Documentary'))

    if genre == 'Comedy':
        st.write('You selected comedy.')
    else:
        st.write("You didn't select comedy.")


    "Display a slider widget with `st.slider`. This supports int, float, date, time, and datetime types."
    age = st.slider('How old are you?', 0, 130, 10,10,format="%d days")
    st.write("I'm ", age, 'years old')

    "Time slider example"
    appointment = st.slider("Schedule your appointment:",value=(time(11, 30), time(12, 45)))
    st.write("You're scheduled for:", appointment)

    "Date slider example"
    start_time = st.slider("When do you start?",datetime(2010, 1, 1, 0, 0),datetime(2020, 1, 1, 9, 30),value=datetime(2020, 1, 1, 9, 30),format="MM/DD/YY - hh:mm")
    st.write("Start time:", start_time)

    "Display a slider widget to select items from a list using `st.select_slider`"
    color = st.select_slider('Select a color of the rainbow',options=['red', 'orange', 'yellow', 'green', 'blue', 'indigo', 'violet'])
    st.write('My favorite color is', color)

    "Display a single-line text input widget using `st.text_input`"
    title = st.text_input('Movie title', 'Godzilla vs Kong')
    st.write('The current movie title is', title)

    "Display a multi-line text input widget using `st.text_area`"
    txt = st.text_area('Text to analyze',"")
    st.write('Sentiment:', txt)

    "Display a button widget with `st.button`, returns bool"
    if st.button('Say hello'):
        st.write('Why hello there')
    else:
        st.write('Goodbye')

    "Display a date input widget with `st.date_input`"
    d = st.date_input("When's your birthday", date(2019, 7, 6))
    st.write("Your birthday is:", d)

    """
Ejemplo n.º 9
0
def main():
    st.image('media/eu.jpeg', width=300)
    st.header('Pré-processador do Tito')
    st.image('media/neo.gif')

    file = st.file_uploader('Manda o arquivo CSV', type='csv')
    if file is not None:
        df = pd.read_csv(file)
        st.markdown(f'**Shape do arquivo = {df.shape}**')

        slider = st.slider('Número de linhas', 1, 100)
        st.dataframe(df.head(slider))

        st.markdown('**Colunas**')
        df_cols = pd.DataFrame({
            'colunas': df.columns,
            'tipos': df.dtypes
        }).reset_index(drop=True)
        df_null_bolean = df.isnull()
        nulls_sum_cols = df_null_bolean.sum().values
        df_cols['Número de NULLS na coluna'] = nulls_sum_cols
        df_cols['Taxa de NULLS na coluna'] = nulls_sum_cols / df.shape[0]
        null_cols = df.columns[df_cols['Número de NULLS na coluna'] > 0]
        st.write(pd.DataFrame(df_cols))

        st.markdown('**Estatísticas Descritivas sobre os dados numéricos**')
        st.dataframe(df.describe())

        st.markdown('Amostras com nulos')
        df_nulos = df[df_null_bolean.any(axis=1)][null_cols]
        slider_1 = st.slider('Número de linhas ', 1, 100)
        st.dataframe(df_nulos.head(slider_1))

        st.markdown('**Preencher colunas que têm NULL**')
        cols_select = st.multiselect(
            'Selecione as colunas a serem preenchidas',
            tuple(['Dropar linhas com NULL'] + list(null_cols)))
        st.markdown(f'Escolheu = {cols_select}')

        if cols_select == ['Dropar linhas com NULL']:
            df = df.dropna(axis=0, how='any')
            slider_7 = st.slider('Número de linhas  ', 1, 100)
            cols_select = None
            st.markdown('**Novo DataFrame**')
            st.markdown(f'**Shape do novo = {df.shape}**')
            st.dataframe(df.head(slider_7))
            # if st.button('Gerar link de download do dataframe'):
            #     tmp_download_link = download_link(df, 'data.csv', 'Download')
            #     st.markdown(tmp_download_link, unsafe_allow_html=True)

        #if st.button('Continuar'):
        if cols_select:
            df_nulls = pd.DataFrame({
                'colunas': cols_select,
                'tipos': df[cols_select].dtypes
            })
            cat_cols = list(df_nulls[df_nulls['tipos'] == 'object'].index)

            cols_select = list(set(cols_select) - set(cat_cols))
            st.markdown('Preencher os nulos')
            opt = st.radio(
                'Escolha um método de preenchimento(colunas categoricas serão preenchidas com a moda)',
                ('Dropar linhas com NULL', 'Zero', 'Media', 'Moda', 'Mediana',
                 'Interpolacao Linear'))

            if opt == 'Zero':
                df[cols_select] = df[cols_select].fillna(0)
                st.markdown(
                    'Colunas numéricas escolhidas foram preenchidas com 0')
                st.markdown('Novas colunas')
                df = cat_input(df, cat_cols)
                slider_2 = st.slider('Número de linhas  ', 1, 100)
                st.dataframe(df.loc[df_nulos.index,
                                    list(cols_select) +
                                    list(cat_cols)].head(slider_2))
            if opt == 'Media':
                df[cols_select] = df[cols_select].fillna(
                    df[cols_select].mean())
                st.markdown(
                    'Colunas escolhidas foram preenchidas com a média de cada coluna'
                )
                st.markdown('Novas colunas')
                df = cat_input(df, cat_cols)
                slider_3 = st.slider('Número de linhas  ', 1, 100)
                st.dataframe(df.loc[df_nulos.index,
                                    list(cols_select) +
                                    list(cat_cols)].head(slider_3))
            if opt == 'Moda':
                if len(cols_select) == 1: cols_select = cols_select[0]
                df[cols_select] = df[cols_select].fillna(
                    df[cols_select].mode().values[0])
                st.markdown(
                    'Colunas escolhidas foram preenchidas com a moda de cada coluna'
                )
                st.markdown('Novas colunas')
                df = cat_input(df, cat_cols)
                slider_4 = st.slider('Número de linhas  ', 1, 100)
                st.dataframe(df.loc[df_nulos.index,
                                    list([cols_select]) +
                                    list(cat_cols)].head(slider_4))
            if opt == 'Mediana':
                df[cols_select] = df[cols_select].fillna(
                    df[cols_select].median())
                st.markdown(
                    'Colunas escolhidas foram preenchidas com a mediana de cada coluna'
                )
                st.markdown('Novas colunas')
                df = cat_input(df, cat_cols)
                slider_5 = st.slider('Número de linhas  ', 1, 100)
                st.dataframe(df.loc[df_nulos.index,
                                    list(cols_select) +
                                    list(cat_cols)].head(slider_5))
            if opt == 'Interpolacao Linear':
                df[cols_select] = df[cols_select].interpolate(
                    method='linear').astype('int64')
                st.markdown('Colunas escolhidas foram foram interpoladas')
                st.markdown('Novas colunas')
                df = cat_input(df, cat_cols)
                slider_6 = st.slider('Número de linhas  ', 1, 100)
                st.dataframe(df.loc[df_nulos.index,
                                    list(cols_select) +
                                    list(cat_cols)].head(slider_6))

        if st.button('Gerar link de download do dataframe'):
            tmp_download_link = download_link(df, 'data.csv', 'Download')
            st.markdown(tmp_download_link, unsafe_allow_html=True)

        st.subheader('Visualização de Dados')
        st.image('media/Data.gif', width=300)

        num_cols_df = pd.DataFrame({'colunas': df.columns, 'tipos': df.dtypes})
        num_cols = list(num_cols_df[num_cols_df['tipos'] != object].index)
        cat_cols = list(num_cols_df[num_cols_df['tipos'] == object].index)

        opt = st.radio(
            'Selecione a visualização desejada',
            ('Histograma', 'Barras', 'Boxplot', 'Scatterplot', 'Correlação'))
        if opt == 'Histograma':
            col = st.selectbox('Selecione a coluna numérica desejada',
                               tuple(num_cols))
            split = st.selectbox(
                'Selecione o Hue desejado',
                tuple([None] + list(num_cols_df['colunas'].values)))
            if col:
                if split:
                    st.write(histogram(df, col, split))
                else:
                    st.write(histogram(df, col))
        if opt == 'Barras':
            y_axis = st.selectbox('Selecione a coluna numérica',
                                  tuple(num_cols))
            x_axis = st.selectbox('Selecione a coluna categórica',
                                  tuple(cat_cols))
            split = st.selectbox(
                'Selecione o Hue desejado',
                tuple([None] + list(num_cols_df['colunas'].values)))
            #split = st.selectbox('Selecione o Hue desejado', tuple([None]+cat_cols))
            if x_axis and y_axis:
                if split:
                    st.write(bar(df, x_axis, y_axis, split))
                else:
                    st.write(bar(df, x_axis, y_axis))
        if opt == 'Boxplot':
            y_axis = st.selectbox('Selecione a coluna numérica',
                                  tuple(num_cols))
            x_axis = st.selectbox('Selecione a coluna categórica',
                                  tuple([None] + list(cat_cols)))
            if y_axis:
                if x_axis:
                    st.write(boxplot(df, y_axis, x_axis))
                else:
                    st.write(boxplot(df, y_axis))
        if opt == 'Scatterplot':
            y_axis = st.selectbox('Selecione a coluna numérica do eixo y',
                                  tuple(num_cols))
            x_axis = st.selectbox('Selecione a coluna numérica do eixo x',
                                  tuple(num_cols))
            split = st.selectbox('Selecione o Hue desejado',
                                 tuple([None] + list(cat_cols)))

            if y_axis and x_axis:
                if split:
                    st.write(scatter(df, x_axis, y_axis, split))
                else:
                    st.write(scatter(df, x_axis, y_axis))

        if opt == 'Correlação':
            st.write(corre(df))
Ejemplo n.º 10
0
def main():
    """Semi Automated ML App with Streamlit """

    activities = ["EDA", "Plots", "Model Building", "About"]
    choice = st.sidebar.selectbox("Select Activities", activities)

    if choice == 'EDA':
        st.subheader("Exploratory Data Analysis")

        data = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        # file_buffer = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        # data = io.TextIOWrapper(file_buffer)

        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Shape"):
                st.write(df.shape)

            if st.checkbox("Show Columns"):
                all_columns = df.columns.to_list()
                st.write(all_columns)

            if st.checkbox("Summary"):
                st.write(df.describe())

            if st.checkbox("Show Selected Columns"):
                selected_columns = st.multiselect("Select Columns",
                                                  all_columns)
                new_df = df[selected_columns]
                st.dataframe(new_df)

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts())

            if st.checkbox("Correlation Plot(Matplotlib)"):
                plt.matshow(df.corr())
                st.pyplot()

            if st.checkbox("Correlation Plot(Seaborn)"):
                st.write(sns.heatmap(df.corr(), annot=True))
                st.pyplot()

            if st.checkbox("Pie Plot"):
                all_columns = df.columns.to_list()
                column_to_plot = st.selectbox("Select 1 Column", all_columns)
                pie_plot = df[column_to_plot].value_counts().plot.pie(
                    autopct="%1.1f%%")
                st.write(pie_plot)
                st.pyplot()

    elif choice == 'Plots':
        st.subheader("Data Visualization")
        data = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            if st.checkbox("Show Value Counts"):
                st.write(df.iloc[:, -1].value_counts().plot(kind='bar'))
                st.pyplot()

            # Customizable Plot

            all_columns_names = df.columns.tolist()
            type_of_plot = st.selectbox(
                "Select Type of Plot",
                ["area", "bar", "line", "hist", "box", "kde"])
            selected_columns_names = st.multiselect("Select Columns To Plot",
                                                    all_columns_names)

            if st.button("Generate Plot"):
                st.success("Generating Customizable Plot of {} for {}".format(
                    type_of_plot, selected_columns_names))

                # Plot By Streamlit
                if type_of_plot == 'area':
                    cust_data = df[selected_columns_names]
                    st.area_chart(cust_data)

                elif type_of_plot == 'bar':
                    cust_data = df[selected_columns_names]
                    st.bar_chart(cust_data)

                elif type_of_plot == 'line':
                    cust_data = df[selected_columns_names]
                    st.line_chart(cust_data)

                # Custom Plot
                elif type_of_plot:
                    # fig, ax = plt.subplots()
                    # ax.scatter(df[selected_columns_names])
                    # st.pyplot(fig)
                    cust_plot = df[selected_columns_names].plot(
                        kind=type_of_plot)
                    st.write(cust_plot)
                    st.pyplot()

    elif choice == 'Model Building':
        st.subheader("Building ML Models")
        data = st.file_uploader("Upload a Dataset", type=["csv", "txt"])
        if data is not None:
            df = pd.read_csv(data)
            st.dataframe(df.head())

            # Model Building
            X = df.iloc[:, 0:-1]
            Y = df.iloc[:, -1]
            seed = 7
            # prepare models
            models = []
            models.append(('LR', LogisticRegression()))
            models.append(('LDA', LinearDiscriminantAnalysis()))
            models.append(('KNN', KNeighborsClassifier()))
            models.append(('CART', DecisionTreeClassifier()))
            models.append(('NB', GaussianNB()))
            models.append(('SVM', SVC()))
            # evaluate each model in turn

            model_names = []
            model_mean = []
            model_std = []
            all_models = []
            # all_columns = []
            scoring = 'accuracy'
            for name, model in models:
                kfold = model_selection.KFold(n_splits=10, random_state=seed)
                cv_results = model_selection.cross_val_score(model,
                                                             X,
                                                             Y,
                                                             cv=kfold,
                                                             scoring=scoring)
                model_names.append(name)
                model_mean.append(cv_results.mean())
                model_std.append(cv_results.std())

                accuracy_results = {
                    "model name": name,
                    "model_accuracy": cv_results.mean(),
                    "standard deviation": cv_results.std()
                }
                all_models.append(accuracy_results)

            if st.checkbox("Metrics As Table"):
                st.dataframe(
                    pd.DataFrame(zip(model_names, model_mean, model_std),
                                 columns=["Algo", "Mean of Accuracy", "Std"]))

            if st.checkbox("Metrics As JSON"):
                st.json(all_models)
Ejemplo n.º 11
0
#indices_on_page, images_on_page = map(list, zip(*image_iterator))
#st.image(images_on_page,width=250, use_column_width=False, caption=capt)

import urllib.request
count = 0

st.markdown(
    'Cette sélection de produit est constituée par des algorithmes d\'intelligence artificielle, prenant en compte de multiples variables.'
)
st.markdown(
    'Par conséquent, il est important de considérer à la fois l\'image et la description produit pour déterminer la nature réelle du produit. '
)

if (type_cat == 'Famille Produit'):
    CATEGORIES_SELECTED = st.multiselect(
        'Dans quelle(e) catégorie(s) souhaites-tu connaître les best-seller du moment?',
        CATEGORIES)
    nb_item = st.multiselect('Nombre de Produit par Page', [10, 20, 30])
    try:
        nb_item = int(nb_item[0])
    except:
        nb_item = 10
    mask_categories = df["Category"].isin(CATEGORIES_SELECTED)
    data = df[mask_categories]
    data.reset_index(inplace=True)

    try:
        data = data.iloc[:nb_item, 1:]
    except:
        data = data.iloc[:, 1:]
Ejemplo n.º 12
0
def main():
    st.sidebar.header('Skin Lesion Analyzer - App')
    st.sidebar.subheader('Choose option to proceed:')
    page = st.sidebar.selectbox("", ["Sample Image", "Upload Your Image"])

    if page == "Sample Image":
        st.header("Sample Image Prediction for Skin Lesion")
        st.markdown("""
        **Let's do it. Let's get you some Predictions**

        You need to choose Sample Image
        """)

        mov_base = ['Sample Data I']
        movies_chosen = st.multiselect('Choose Sample Data', mov_base)

        if len(movies_chosen) > 1:
            st.error('Please select Sample Data')
        if len(movies_chosen) == 1:
            st.success("You have selected Sample Data")
        else:
            st.info('Please select Sample Data')

        if len(movies_chosen) == 1:
            if st.checkbox('Show Sample Data'):
                st.info("Showing Sample data---->>>")
                image = load_mekd()
                st.image(image, caption='Sample Data', use_column_width=True)
                st.subheader("Choose Training Algorithm!")
                if st.checkbox('Keras'):
                    model = load_models()
                    st.success("Keras Model Loaded!")
                    if st.checkbox(
                            'Show Prediction Probablity on Sample Data'):
                        x_test = data_gen(DATAPATH + '/ISIC_0024312.jpg')
                        y_new, Y_pred_classes = predict(x_test, model)
                        result = display_prediction(y_new)
                        st.write(result)
                        if st.checkbox('Display Probability Graph'):
                            fig = px.bar(result,
                                         x="Classes",
                                         y="Probability",
                                         color='Classes')
                            st.plotly_chart(fig, use_container_width=True)

    if page == "Upload Your Image":

        st.header("Upload Your Image")

        file_path = st.file_uploader('Upload an image', type=['png', 'jpg'])

        if file_path is not None:
            x_test = data_gen(file_path)
            image = Image.open(file_path)
            img_array = np.array(image)

            st.success('File Upload Success!!')
        else:
            st.info('Please upload Image file')

        if st.checkbox('Show Uploaded Image'):
            st.info("Showing Uploaded Image ---->>>")
            st.image(img_array,
                     caption='Uploaded Image',
                     use_column_width=True)
            st.subheader("Choose Training Algorithm!")
            if st.checkbox('Keras'):
                model = load_models()
                st.success("Hooray !! Keras Model Loaded!")
                if st.checkbox(
                        'Show Prediction Probablity for Uploaded Image'):
                    y_new, Y_pred_classes = predict(x_test, model)
                    result = display_prediction(y_new)
                    st.write(result)
                    if st.checkbox('Display Probability Graph'):
                        fig = px.bar(result,
                                     x="Classes",
                                     y="Probability",
                                     color='Classes')
                        st.plotly_chart(fig, use_container_width=True)
Ejemplo n.º 13
0
    #------------

    st.sidebar.image(tresh, use_column_width=True)  # width=700)
    st.sidebar.image(gray, use_column_width=True)  # width=700)

st.markdown('''

## Ascoltami
Mentre guardi i dati rilassati ascoltando questi suoni della natura.
''')
st.audio(
    'https://www.videomaker.com/sites/videomaker.com/files/downloads/free-sound-effects/Free_Thunder_6036_01.wav',
    format='audio/wav',
    start_time=0)

st.header("Covid Analisi")
df = pd.read_csv(
    "https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-andamento-nazionale/dpc-covid19-ita-andamento-nazionale.csv"
)
df["data"] = [el[0:10] for el in df["data"].values.tolist()]
st.dataframe(df, height=300, width=700)

select = ["deceduti", "totale_casi", "dimessi_guariti"]
select_options = st.multiselect('Seleziona cosa vuoi plottare',
                                list(df.keys()),
                                default=select)

fig = plot_plotly(df, x="data", y=select_options, title="Andamento Nazionale")
st.plotly_chart(fig, use_container_width=True)
Ejemplo n.º 14
0
def main():
    # criando um dataframe
    data = get_data()

    # treinando o modelo
    model = train_model()

    # título
    st.title("Data App - Prevendo Valores de Imóveis")

    # subtítulo
    st.markdown(
        "Este é um Data App utilizado para exibir a solução de Machine Learning para o problema de predição de valores de imóveis de Boston."
    )

    # verificando o dataset
    st.subheader("Selecionando apenas um pequeno conjunto de atributos")

    # atributos para serem exibidos por padrão
    defaultcols = ["RM", "PTRATIO", "LSTAT", "MEDV"]

    # defindo atributos a partir do multiselect
    cols = st.multiselect("Atributos",
                          data.columns.tolist(),
                          default=defaultcols)

    # exibindo os top 10 registro do dataframe
    st.dataframe(data[cols].head(10))

    st.subheader("Distribuição de imóveis por preço")

    # definindo a faixa de valores
    faixa_valores = st.slider("Faixa de preço", float(data.MEDV.min()), 150.,
                              (10.0, 100.0))

    # filtrando os dados
    dados = data[data['MEDV'].between(left=faixa_valores[0],
                                      right=faixa_valores[1])]

    # plot a distribuição dos dados
    f = px.histogram(dados,
                     x="MEDV",
                     nbins=100,
                     title="Distribuição de Preços")
    f.update_xaxes(title="MEDV")
    f.update_yaxes(title="Total Imóveis")
    st.plotly_chart(f)

    st.sidebar.subheader("Defina os atributos do imóvel para predição")

    # mapeando dados do usuário para cada atributo
    crim = st.sidebar.number_input("Taxa de Criminalidade",
                                   value=data.CRIM.mean())
    indus = st.sidebar.number_input("Proporção de Hectares de Negócio",
                                    value=data.CRIM.mean())
    chas = st.sidebar.selectbox("Faz limite com o rio?", ("Sim", "Não"))

    # transformando o dado de entrada em valor binário
    chas = 1 if chas == "Sim" else 0

    nox = st.sidebar.number_input("Concentração de óxido nítrico",
                                  value=data.NOX.mean())

    rm = st.sidebar.number_input("Número de Quartos", value=1)

    ptratio = st.sidebar.number_input("Índice de alunos para professores",
                                      value=data.PTRATIO.mean())

    b = st.sidebar.number_input(
        "Proporção de pessoas com descendencia afro-americana",
        value=data.B.mean())

    lstat = st.sidebar.number_input("Porcentagem de status baixo",
                                    value=data.LSTAT.mean())

    # inserindo um botão na tela
    btn_predict = st.sidebar.button("Realizar Predição")

    # verifica se o botão foi acionado
    if btn_predict:
        result = model.predict(
            [[crim, indus, chas, nox, rm, ptratio, b, lstat]])
        st.subheader("O valor previsto para o imóvel é:")
        result = "US $ " + str(round(result[0] * 10, 2))
        st.write(result)
Ejemplo n.º 15
0
def main():
    """ Machine Learning Dataset Explorer"""
    st.title("Machine Learning Dataset Explorer")
    st.subheader("Simple Data Science Explorer with Streamlit")

    html_temp = """ 
    <div style="background-color:tomato;">
    <p style="color:white; font-size: 50px">Frase aleatória</p>
    <div>
    """
    st.markdown(html_temp, unsafe_allow_html=True)

    def file_selector(folder_path='.'):
        filenames = os.listdir(folder_path)
        selected_filename = st.selectbox("Escolhar um arquivo", filenames)
        return os.path.join(folder_path, selected_filename)

    filename = file_selector()
    st.info("Você escolheu {}".format(filename))

    #Ler os dados
    df = pd.read_csv(filename)

    # Mostrar o dataset
    if st.checkbox("Mostrar DataSet"):
        number = st.number_input("Número de linhas para visualizar", 5, 10)
        st.dataframe(df.head(number))

    #Mostrar colunas
    if st.button("Nomes das Colunas"):
        st.write(df.columns)

    #Mostrar formatos
    if st.checkbox("Formato do Dataset"):
        st.write(df.shape)
        data_dim = st.radio("Show Dimension By", ("Rows", "Columns"))
        if data_dim == 'Columns':
            st.text("Número de Colunas")
            st.write(df.shape[1])
        elif data_dim == "Rows":
            st.text("Número de linhas")
            st.write(df.shape[0])
        else:
            st.write(df.shape)

    #Escolher colunas
    if st.checkbox("Selecione as colunas desejadas"):
        all_columns = df.columns.tolist()
        selected_columns = st.multiselect("Escolha", all_columns)
        new_df = df[selected_columns]
        st.dataframe(new_df)

    #Mostrar valores
    if st.button("Valores"):
        st.text("Valores em classes")
        st.write(df.iloc[:, 0].value_counts())  #moradores
        st.write(df.iloc[:, 1].value_counts())  #idosos
        st.write(df.iloc[:, -1].value_counts())  #crianças
        st.write(df.iloc[:, -2].value_counts())  #familias

    #Mostrar Datatypes
    if st.button("DataTypes"):
        st.write(df.dtypes)

    #Mostrar sumário
    if st.checkbox("Sumário"):
        st.write(df.describe().T)

    #Visualização
    st.subheader("Visualização dos dados")
    #Corelação
    #Seaborn
    if st.checkbox("Seaborn Plot"):
        st.write(sns.heatmap(df.corr(), annot=True))
        st.pyplot
    #Count plot
    if st.checkbox("Plot of Value Counts"):
        st.text("Value Counts By Target")
        all_columns_names = df.columns.tolist()
        primary_col = st.selectbox("Primary Columm to GroupBy",
                                   all_columns_names)
        selected_columns_names = st.multiselect("Select Columns",
                                                all_columns_names)
        if st.button("Plot"):
            st.text("Generate Plot")
            if selected_columns_names:
                vc_plot = df.groupby(
                    primary_col)[selected_columns_names].count()
            else:
                vc_plot = df.iloc[:, -1].value_counts()
            st.write(vc_plot.plot(kind="bar"))
            st.pyplot()
    #Pie chart
    if st.checkbox("Pie Plot"):
        all_columns_names = df.columns.tolist()
        selected_column = st.selectbox("Selecione a coluna desejada",
                                       all_columns_names)
        if st.button("Gerar Pie Plot"):
            st.success("Gerando um Pie Plot")
            st.write(
                df[selected_column].value_counts().plot.pie(autopct="%1.1f%%"))
            st.pyplot()

    #Plot customizado
    st.subheader("Plot Customizado")
    all_columns_names = df.columns.tolist()
    type_of_plot = st.selectbox("Selecione o tipo de plot",
                                ['area', 'bar', 'line', 'hist', 'box', 'kde'])
    selected_columns_names = st.multiselect("Selecione as colunas",
                                            all_columns_names)

    if st.button("Gerar Plot"):
        st.success("Gerando plot de {} para {}".format(type_of_plot,
                                                       selected_columns_names))

        if type_of_plot == 'area':
            cust_data = df[selected_columns_names]
            st.area_chart(cust_data)

        elif type_of_plot == 'bar':
            cust_data = df[selected_columns_names]
            st.bar_chart(cust_data)

        elif type_of_plot == 'line':
            cust_data = df[selected_columns_names]
            st.line_chart(cust_data)

        elif type_of_plot:
            cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
            st.write(cust_plot)
            st.pyplot()
Ejemplo n.º 16
0
    st.dataframe(df)

    st.sidebar.subheader("Show details about data")

    if st.sidebar.checkbox("Show Shape"):
        st.write(df.shape)

    if st.sidebar.checkbox("Show Columns"):
        all_columns = df.columns.to_list()
        st.write(all_columns)

    if st.sidebar.checkbox("Summary"):
        st.write(df.describe())

    if st.sidebar.checkbox("Show Selected Columns"):
        selected_columns = st.multiselect("Select Columns", all_columns)
        new_df = df[selected_columns]
        st.dataframe(new_df)

    if st.sidebar.checkbox("Show Value Counts"):
        st.write(df.iloc[:, -1].value_counts())

    if st.sidebar.checkbox("Show Null Values"):
        st.write(df.isna().sum())

    if st.sidebar.checkbox("Correlation Plot"):
        st.write(sns.heatmap(df.corr(), annot=True))
        st.pyplot()

    # Pie Chart
    if st.sidebar.checkbox("Pie Plot"):
Ejemplo n.º 17
0
    df = df[mask_cats]

# Items
ITEMS = list(df['item'].unique())
DEFIND = 0
if CAT_SELECTED == 'Totals' or CAT_SELECTED == 'All':
    DEFIND = ITEMS.index("Total Income Assessed")
ITEM_SELECTED = st.selectbox('Select Item', ITEMS, index=DEFIND)
mask_items = df['item'].isin([ITEM_SELECTED])
data = df[mask_items]

# Provinces
PROVS = list(data['provname'].unique())
PROVABBS = list(data['provabb'].unique())
PROVS_SELECTED = st.multiselect('Select Provinces',
                                PROVS,
                                default=["All provinces"])
mask_provs = data['provname'].isin(PROVS_SELECTED)
data = data[mask_provs]

# Unit
UNIT = st.radio('Variable of Interest',
                ('Share of Item', 'Total Real 2019 Dollars',
                 'Average Real 2019 Dollars Per Taxfiler'))

if UNIT == 'Share of Item':
    unitvar = 'shr'
    keepvar = ['shr']
if UNIT == 'Total Real 2019 Dollars':
    unitvar = 'realdol'
    keepvar = ['realdol']
Ejemplo n.º 18
0
def main():
    """Automated ML App"""

    #st.title('Machine Learning Application')
    activities = ["Home", "EDA", "Plots", "ML_Algorithms", "Neural Network"]
    choice = st.sidebar.selectbox("Menu", activities)

    html_temp = """
        <div 
        style="background-color:royalblue;padding:10px;border-radius:10px">
        <h1 style="color:white;text-align:center;font-style: italic;">Classifying the survival of patients with heart failure using Various Machine Learning Algorithms</h1>
        </div>
        """
    components.html(html_temp)
    #data = st.file_uploader("Upload a Dataset", type=["csv","txt","xlsx"])
    data = pd.read_csv('heart_failure.csv')
    if choice == 'EDA':
        st.subheader("Exploratory Data Analysis using Pandas Profiling")
        if data is not None:

            df = pd.read_csv('heart_failure.csv')
            st.dataframe(df.head())
            lable = preprocessing.LabelEncoder()
            for col in df.columns:
                df[col] = lable.fit_transform(df[col])
            #pandas profiling
            profile = ProfileReport(df)
            st_profile_report(profile)

    elif choice == 'Plots':
        st.subheader("Data Visualization")

        if data is not None:
            df = pd.read_csv('heart_failure.csv')
            st.dataframe(df.head())
            lable = preprocessing.LabelEncoder()
            for col in df.columns:
                df[col] = lable.fit_transform(df[col])

        if st.checkbox("Show Value Counts"):
            st.write(df.iloc[:, -1].value_counts().plot(kind='bar'))
            st.pyplot()

        #Customized Plot
        all_columns_names = df.columns.tolist()
        type_of_plot = st.selectbox(
            "Select Type of Plot",
            ["area", "bar", "line", "hist", "box", "kde"])
        selected_columns_names = st.multiselect("Select Columns To Plot",
                                                all_columns_names)
        if st.button("Generate Plot"):
            st.success("Generating Customizable Plot of {} for {}".format(
                type_of_plot, selected_columns_names))
            # Plot By Streamlit
            if type_of_plot == 'area':
                cust_data = df[selected_columns_names]
                st.area_chart(cust_data)
            elif type_of_plot == 'bar':
                cust_data = df[selected_columns_names]
                st.bar_chart(cust_data)
            elif type_of_plot == 'line':
                cust_data = df[selected_columns_names]
                st.line_chart(cust_data)
    # Custom Plot
            elif type_of_plot:
                cust_plot = df[selected_columns_names].plot(kind=type_of_plot)
                st.write(cust_plot)
                st.pyplot()

    elif choice == 'ML_Algorithms':
        st.subheader("Machine Learning Algorithms")

        if data is not None:
            df = pd.read_csv('heart_failure.csv')
            st.dataframe(df.head())
            lable = preprocessing.LabelEncoder()
            for col in df.columns:
                df[col] = lable.fit_transform(df[col])

        if st.checkbox("Summary"):
            st.write(df.describe())

        X = df.iloc[:, :-1].values
        y = df.iloc[:, -1].values

        #col_name = st.selectbox("Select Column Name",["X","y"])

        #if col_name == 'X':
        #    st.dataframe(X)
        #elif col_name == 'y':
        #    st.dataframe(y)

        st.write("Number of classes", len(np.unique(y)))
        params = dict()
        classifer_name = st.sidebar.selectbox(
            "Select Classifer",
            ("SVM Linear", "SVM Radial", "Decision Tree", "Random Forest"))

        #add parameters
        def add_parameters(clf_name):
            """Selection of parameters"""
            if clf_name == "SVM Linear":
                C = st.sidebar.slider("C", 0.01, 15.0)
                params["C"] = C
            elif clf_name == "SVM Radial":
                C = st.sidebar.slider("C", 0.01, 15.0)
                params["C"] = C
            elif clf_name == "Decision Tree":
                max_depth = st.sidebar.slider("max_depth", 2, 15)
                max_leaf_nodes = st.sidebar.slider("max_leaf_nodes", 2, 20)
                params["max_depth"] = max_depth
                params["max_leaf_nodes"] = max_leaf_nodes
            elif clf_name == "Random Forest":
                max_depth = st.sidebar.slider("max_depth", 2, 15)
                n_estimators = st.sidebar.slider("n_estimators", 1, 200)
                params["max_depth"] = max_depth
                params["n_estimators"] = n_estimators
            return params

        add_parameters(classifer_name)

        #get classifers
        def get_classifiers(clf_name, params):
            clf = None
            if clf_name == "SVM Linear":
                clf = SVC(C=params["C"], kernel='linear')
            elif clf_name == "SVM Radial":
                clf = SVC(C=params["C"], kernel='rbf')
            elif clf_name == "Decision Tree":
                clf = DecisionTreeClassifier(
                    max_depth=params["max_depth"],
                    max_leaf_nodes=params["max_leaf_nodes"],
                    random_state=100)
            elif clf_name == "Random Forest":
                clf = RandomForestClassifier(
                    n_estimators=params["n_estimators"],
                    max_depth=params["max_depth"],
                    random_state=100)

            return clf

        clf = get_classifiers(classifer_name, params)

        #Classification
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=100)

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        acc = accuracy_score(y_test, y_pred)
        st.write(
            f'<div style="color: #1C2331; font-size: medium; font-style: italic; padding: 15px; background-color:#b2dfdb;border-radius:5px;">Classifier = {classifer_name}</div></br>',
            unsafe_allow_html=True)
        clf_report = classification_report(y_test, y_pred)
        st.success(f"Classification Report:\n\n {clf_report}")
        st.warning(f"accuracy = {acc}")
        for i in range(1, 10):
            st.write("Actual=%s, Predicted=%s" % (y_test[i], y_pred[i]))

    elif choice == 'Neural Network':
        st.subheader("Neural Networks (MLPClassifier)")

        if data is not None:
            df = pd.read_csv('heart_failure.csv')
            st.dataframe(df.head())
            lable = preprocessing.LabelEncoder()
            for col in df.columns:
                df[col] = lable.fit_transform(df[col])

        X = df.iloc[:, :-1].values
        y = df.iloc[:, -1].values

        params = dict()
        classifer_name = "MLPClassifier"

        def add_parameters(clf_name):
            """Selection of parameters"""
            if clf_name == "MLPClassifier":
                max_iter = st.sidebar.slider("max_iter", 2, 30)
                params["max_iter"] = max_iter

            return params

        add_parameters(classifer_name)

        #get classifers
        def get_classifiers(clf_name, params):
            clf = None
            if clf_name == "MLPClassifier":
                clf = MLPClassifier(max_iter=params["max_iter"])

            return clf

        clf = get_classifiers(classifer_name, params)
        X_train, X_test, y_train, y_test = train_test_split(X,
                                                            y,
                                                            test_size=0.2,
                                                            random_state=100)

        clf.fit(X_train, y_train)
        y_pred = clf.predict(X_test)

        st.write(
            f'<div style="color: #1C2331; font-size: medium; font-style: italic; padding: 15px; background-color:#b2dfdb;border-radius:5px;">Classifier = {classifer_name}</div></br>',
            unsafe_allow_html=True)
        clf_report = classification_report(y_test, y_pred)
        st.success(f"Classification Report:\n\n {clf_report}")
        acc = accuracy_score(y_test, y_pred)
        st.warning(f"accuracy = {acc}")
        for i in range(1, 10):
            st.write("Actual=%s, Predicted=%s" % (y_test[i], y_pred[i]))
Ejemplo n.º 19
0
		if df is None:
			df = pd.DataFrame(res)
			df.index = pd.DatetimeIndex(df['Report_Date'])
			df = df.drop('Report_Date', 1)
			df = df.sort_values(by=['Report_Date'])
		else:
			df_new = pd.DataFrame(res)
			df_new.index = pd.DatetimeIndex(df_new['Report_Date'])
			df_new = df_new.drop('Report_Date', 1)
			df_new = df_new.sort_values(by=['Report_Date'])
			df = df.merge(df_new, left_index=True, right_index=True)

	return df


df = build_covid19_data()
st.title("COVID-19 Visualization")
countries_list = list(set([i.split('_')[0] for i in df.columns]))
countries_list.sort()
select = st.sidebar.selectbox('Country',countries_list)
st.write(select)
df_n = df[[select+'_cases',select+'_recovered',select+'_deaths']]
df_n['active'] = df_n[select+'_cases']-df_n[select+'_recovered']-df_n[select+'_deaths']
df_n['daily'] = df[select+'_cases'].diff()
df_n['death'] = df[select+'_deaths'].diff()
st.dataframe(df_n)


option = st.multiselect('What you want to be plot?',[select+'_cases',select+'_recovered',select+'_deaths','daily','death','active'])
st.line_chart(df_n[option])
Ejemplo n.º 20
0
 if visual == 'Гистограмма':
     co_select = st.selectbox('Выберите страну',
                              df['Country/Region'].unique())
     selected_c = df[df['Country/Region'] == co_select]
     country = get_new(selected_c)
     fig = pxe.bar(
         country,
         x='Status',
         y='Number',
         labels={'Number': 'Количество случаев в %s' % (co_select)},
         color='Status')
     st.plotly_chart(fig)
 elif visual == 'Круговая диаграмма':
     status = st.radio(
         'Статус', ('Подтверждено', 'Сейчас', 'Выздоровели', 'Умерли'))
     countries = st.multiselect('Выберите страны',
                                df['Country/Region'].unique())
     new_df = df[df['Country/Region'].isin(countries)]
     if status == 'Подтверждено':
         st.title("Все подтвержденные случаи")
         labels = new_df['Country/Region']
         values = new_df['Confirmed']
         fig, ax1 = plt.subplots()
         ax1.pie(values, autopct='%1.1f%%')
         ax1.axis('equal')
         ax1.legend(labels,
                    title="Страны",
                    loc="center left",
                    bbox_to_anchor=(1, 0, 0.5, 1))
         plt.tight_layout()
         st.pyplot(fig)
     elif status == 'Сейчас':
Ejemplo n.º 21
0
def app():

    # st.title('Visualisation Selector')
    data_load = st.text('Loading data...')
    df = get_cached_data()
    data_load.text(' ')
    # df = pd.read_csv(
    #     'raw_data/cleaned_data_040321.csv').drop(columns='Unnamed: 0')

    # STRESS LEVELS BY COUNTRY
    col1, col2, col3, col4 = st.beta_columns((2, 0.5, 0.5, 1))
    with col1:
        st.markdown('## Mean **Stress Levels** by country')
        # PREPARE DF
        df_stress = df.groupby(['Country']).mean().reset_index()
        df_stress_country = df_stress[['PSS10_avg',
                                       'Country']].sort_values(by='PSS10_avg',
                                                               ascending=True)

        # PLOTLY BAR CHART
        fig = px.bar(df_stress_country[-10:],
                     y='Country',
                     x='PSS10_avg',
                     text='Country',
                     height=400,
                     title='Top 10 Countries with higher degrees of stress',
                     labels={'PSS10_avg': 'Stress Level'})
        fig.update_traces(marker_color='rgb(255, 56, 116)',
                          marker_line_color='rgb(255, 0, 116)',
                          marker_line_width=1.5,
                          opacity=0.6,
                          textposition='outside')
        fig.update_layout(uniformtext_minsize=1, uniformtext_mode='hide')

        fig.update_xaxes(range=[1.5, 3.5])
        st.plotly_chart(fig, use_container_width=True)

    # PLOTLY RADAR CHART

    def country_radar():
        categories = [
            'Neuroticism', 'Openness', 'Extraversion', 'Agreeableness',
            'Conscientiousness', 'Neuroticism'
        ]

        fig = go.Figure()

        for i in user_select:
            fig.add_trace(
                go.Scatterpolar(r=[
                    df[df.Country == i].groupby('Country')
                    ['neu'].mean().to_list()[0], df[df.Country == i].groupby(
                        'Country')['ope'].mean().to_list()[0],
                    df[df.Country == i].groupby('Country')
                    ['ext'].mean().to_list()[0], df[df.Country == i].groupby(
                        'Country')['agr'].mean().to_list()[0],
                    df[df.Country == i].groupby('Country')
                    ['con'].mean().to_list()[0], df[df.Country == i].groupby(
                        'Country')['neu'].mean().to_list()[0]
                ],
                                theta=categories,
                                fill='toself',
                                name=i))

        fig.update_layout(
            polar=dict(radialaxis=dict(visible=True, range=[2, 5])),
            showlegend=True)

        # fig.show()
        return fig

    def stress():

        fig = plt.figure()

        for i in items:
            sns.kdeplot(data=df[df['Country'] == i],
                        x="PSS10_avg",
                        bw_adjust=1,
                        common_norm=False,
                        label=i)
        plt.title('STRESS')
        plt.xlabel('Perceived Stress')
        plt.ylabel('Distribution')
        plt.yticks([0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9], [
            '10 %', '20 %', '30 %', '40 %', '50 %', '60 %', '70 %', '80 %',
            '90 %'
        ])
        plt.legend()

        return fig

    def loneliness():
        fig = plt.figure()

        for i in user_select:
            items.append(i)
            # sns.set_style("talk")
            sns.kdeplot(data=df[df['Country'] == i],
                        x="SLON3_avg",
                        bw_adjust=1.5,
                        common_norm=False,
                        label=i)
            plt.title('LONLINESS')
            plt.xlabel('Percived Loneliness')
            plt.ylabel('Distribution')
            plt.yticks(
                [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7],
                ['10 %', '20 %', '30 %', '40 %', '50 %', '60 %', '70 %'])
            plt.legend()

        return fig

    # Create Columns
    # col1, col2 = st.beta_columns(2)

    st.markdown(
        '## Personality traits accross countries - Interactive Comparison')
    user_select = st.multiselect('Select countries', df['Country'].unique())

    #get the country selected in the selectbox
    sns.set_style('white')
    # select_country = df.loc[df['Country'].isin(user_select)]
    items = []
    for i in user_select:
        items.append(i)
    if st.button('Apply'):
        # print is visible in server output, not in the page
        print('button clicked!')
        col1, col2, col3 = st.beta_columns((1.5, 1, 1))
        col1.plotly_chart(country_radar(), use_container_width=True)
        # col1.plotly_chart(country_stats(), use_container_width=True)
        col2.pyplot(stress())
        col2.pyplot(loneliness())
        items = []
    else:
        st.write('Press to apply')
Ejemplo n.º 22
0

@st.cache
def load_data(DATA_URL):
    data = pd.read_csv(DATA_URL)
    return data


df = load_data(
    "https://beyondparasite.s3-us-west-1.amazonaws.com/data/imdb_mat_f.csv")

# year options
year_min, year_max = st.slider('Year', 1960, 2019, (2010, 2019))

# genre options
genre_options = st.multiselect('Genre', df['genre1'].unique())
# st.write('You selected:',genre_options)

new_df = df[np.logical_and(df['year'] >= year_min, df['year'] <= year_max)]
if len(genre_options) > 0:
    new_df = new_df[new_df.genre1.isin(genre_options)]
else:
    pass

# if len(new_df)>0:
#     st.write(new_df)
# else:
#     st.write('Oops! Your selections return no result.')

# recommender
Ejemplo n.º 23
0
import plotly.express as px


@st.cache
def load_data():
    df = pd.read_excel(
        "C:/Users/MATH-BUKER-ANGE/Desktop/Cours et livres/-1/PDS/Analyse des tableaux/DATABASE.xlsx"
    )
    df.dropna(inplace=True)
    num_data = df.select_dtypes(["int", "float"])
    num_col = num_data.columns
    text_data = df.select_dtypes(['object'])
    text_col = text_data.columns
    return df, num_col, text_col


df, num_col, text_col = load_data()
st.title("Données d'enquête académique PDS 2020")
chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])
st.line_chart(df)
st.sidebar.title("Opérations")
c_b = st.sidebar.checkbox(label="Afficher la base des données")
if c_b:
    st.write(df)

#selection des variables dans la BD
var_select = st.multiselect(label='variables à afficher', options=num_col)
val_num = st.sidebar.checkbox("Numériques")
if val_num:
    st.write(df.select_dtypes(["int", "float"]))
Ejemplo n.º 24
0
	        df_auxiliar["Region"]=[df_region.loc[country].values[0] for i in range(len(df_auxiliar))]
	        df_auxiliar=df_auxiliar.sort_values(by="date")
	        df = pd.concat([df, df_auxiliar], ignore_index=True)
	df["date"] = df["date"].astype('int64')
	df = pd.concat([df[df["country"]=="Austria"],df[df["country"]!="Austria"]], ignore_index=True)
	df = pd.concat([df[df["country"]=="Algeria"],df[df["country"]!="Algeria"]], ignore_index=True)
	df = pd.concat([df[df["country"]=="Botswana"],df[df["country"]!="Botswana"]], ignore_index=True)
	df = pd.concat([df[df["country"]=="Australia"],df[df["country"]!="Australia"]], ignore_index=True)
	df = pd.concat([df[df["country"]=="India"],df[df["country"]!="India"]], ignore_index=True)
	df = pd.concat([df[df["country"]=="United States"],df[df["country"]!="United States"]], ignore_index=True)
	df = pd.concat([df[df["country"]=="Chile"],df[df["country"]!="Chile"]], ignore_index=True)
	return df

df = get_data()
df_wb = df.copy()
option = st.multiselect('Elige regiones de interes', df_wb["Region"].unique().tolist(), df_wb["Region"].unique().tolist())
selected = st.multiselect('Elige países de interés', ['Todos']+df_wb["country"].unique().tolist())
df_aux = pd.DataFrame()
for i,region in enumerate(option):
	df_aux = pd.concat([df_aux, df_wb[df_wb["Region"]==option[i]]], ignore_index=True)
if len(df_aux) == 0:
	st.error("Por favor, ingrese una región")
else:
	df_aux["selected country"] = ['' for i in range(len(df_aux))]
	if len(selected) != 0:
		for i, cntry in enumerate(selected):
			if selected[i] == 'Todos':
				selected = df_wb["country"].unique().tolist()
	for selection in selected:
		df_aux.loc[df_aux.index[df_aux["country"]==f"{selection}"].tolist(), "selected country"] = f"{selection}"
	fig = px.scatter(df_aux,
Ejemplo n.º 25
0
df = pd.read_csv(
    "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv"
)
if st.checkbox('Show dataframe'):
    st.write(df)
    st.write(df.shape)


def info_dataset():
    st.write('Shape :', df.shape)
    st.write('Types :', df.dtypes)


info_dataset()

species = st.multiselect('Show iris per variety?', df['variety'].unique())
col1 = st.selectbox('Which feature on x?', df.columns[0:4])
col2 = st.selectbox('Which feature on y?', df.columns[0:4])
new_df = df[(df['variety'].isin(species))]
st.write(new_df)
fig = px.scatter(new_df, x=col1, y=col2, color='variety')
st.plotly_chart(fig)

feature = st.selectbox('Which feature?', df.columns[0:4])
# Filter dataframe
new_df2 = df[(df['variety'].isin(species))][feature]
fig2 = px.histogram(new_df, x=feature, color="variety", marginal="rug")
st.plotly_chart(fig2)

from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
Ejemplo n.º 26
0
def main():
    "CMC Predictor with Streamlit"

    st.title('CMC Predictor ML App')
    html_templ = """<div style="background-color:magenta;padding:10px;">
    <h2 style="color:white">ML App with Streamlit </h2> 
    </div>"""

    st.markdown(html_templ, unsafe_allow_html=True)

    activity = ['Descriptive', 'Predictive']
    choice = st.sidebar.selectbox("Choose Analytics Type", activity)

    if choice == 'Descriptive':
        st.subheader("EDA Aspect")

        df = pd.read_csv('data_cmc/cmc_dataset.csv')

        if st.checkbox("Preview Dataset"):
            number = int(st.number_input("Select number of rows to view "))
            st.dataframe(df.head(number))

        if st.checkbox("Select Columns"):
            all_columns = df.columns.tolist()
            selected_columns = st.multiselect("Select Columns", all_columns)
            new_df = df[selected_columns]
            st.dataframe(new_df)

        if st.button("Summary of Dataset"):
            st.write(df.describe())

        if st.button("Value Counts"):
            st.text("Value counts by target")
            st.write(df.iloc[:, -1].value_counts())

        st.subheader("Data Visualization")

        if st.checkbox("Correlation Plot with Matplotlib"):
            plt.matshow(df.corr())
            st.pyplot()

        if st.checkbox("Pie Chart"):
            if st.button("Generate pie chart"):
                st.write(
                    df.iloc[:, -1].value_counts().plot.pie(autopct="%1.1f%%"))
                st.pyplot()

        if st.checkbox("Plot value counts by columns"):
            st.text("Value counts by target/class")

            all_columns_names = df.columns.to_list()
            primary_col = st.multiselect("Select primary columns to group by",
                                         all_columns_names)
            selected_column_names = st.multiselect("Select Columns",
                                                   all_columns_names)
            if st.button("Plot"):
                st.text("Generating Plot for: {} and {}".format(
                    primary_col, selected_column_names))
                if selected_column_names:
                    vc_plot = df.groupby(
                        primary_col)[selected_column_names].count()
                else:
                    vc_plot = df.iloc[:, -1].value_counts()
                st.write((vc_plot.plot(kind="bar")))
                st.pyplot()

    if choice == "Predictive":
        st.subheader("Prediction Aspect")

        age = st.slider("Select Age", 16, 60)
        wife_education = st.number_input(
            "Select wide education level (low to high)", 1, 4)
        husband_education = st.number_input(
            "Select husband education level (low to high)", 1, 4)
        num_of_children_ever_born = st.number_input(
            "Number of Children Ever Born", 1, 20)

        wife_reg = {"Non_Religious": 0, "Religious": 1}
        choice_wife_reg = st.radio("Wife's Religion", tuple(wife_reg.keys()))
        result_wife_reg = get_value(choice_wife_reg, wife_reg)

        wife_working = {"Yes": 0, "No": 1}
        choice_wife_working = st.radio("Is the wife working",
                                       tuple(wife_working.keys()))
        result_wife_working = get_value(choice_wife_working, wife_working)

        husband_occupation = st.number_input("Husbands Occupation Level", 1, 4)

        standard_of_living = st.slider("Standard of Living", 1, 4)

        media_exposure = {"Good": 1, "Not-Good": 0}
        choice_media_exposure = st.radio("Media Exposure",
                                         tuple(media_exposure.keys()))
        result_media_exposure = get_value(choice_media_exposure,
                                          media_exposure)

        results = [
            age, wife_education, husband_education, num_of_children_ever_born,
            result_wife_reg, result_wife_working, husband_occupation,
            standard_of_living, result_media_exposure
        ]
        displayed_results = [
            age, wife_education, husband_education, num_of_children_ever_born,
            choice_wife_reg, choice_wife_working, husband_occupation,
            standard_of_living, choice_media_exposure
        ]
        prettified_result = {
            "age": age,
            "wife_education": wife_education,
            "husband_education": husband_education,
            "num_of_children_ever_born": num_of_children_ever_born,
            "result_wife_reg": choice_wife_reg,
            "result_wife_working": choice_wife_working,
            "husband_occupation": husband_occupation,
            "standard_of_living": standard_of_living,
            "media_exposure": choice_media_exposure
        }
        sample_data = np.array(results).reshape(1, -1)

        st.info(results)
        st.json(prettified_result)

        st.subheader("Prediction Aspects")
        if st.checkbox("Make Prediction"):
            all_ml_dict = ["LR", "Decision Tree", "Naive Bayes", "RFOREST"]
            model_choice = st.selectbox("Model Choice", all_ml_dict)

            if st.button("Predict"):
                prediction_label = {
                    "No_use": 1,
                    "Long_term": 2,
                    "Short_term": 3
                }
                if model_choice == 'LR':
                    predictor = load_prediction_model(
                        "models/contraceptives_logit_model.pkl")
                    prediction = predictor.predict(sample_data)
                elif model_choice == 'Decision Tree':
                    predictor = load_prediction_model(
                        "models/contraceptives_dcTree_model.pkl")
                    prediction = predictor.predict(sample_data)

                elif model_choice == 'Naive Bayes':
                    predictor = load_prediction_model(
                        "models/contraceptives_nv_model.pkl")
                    prediction = predictor.predict(sample_data)

                elif model_choice == "RFOREST":
                    predictor = load_prediction_model(
                        "models/contraceptives_rf_model.pkl")
                    prediction = predictor.predict(sample_data)

                final_result = get_key(prediction, prediction_label)
                st.success(final_result)
Ejemplo n.º 27
0
    temp4=pd.DataFrame(temp3['rating'].head(n))
    #temp4=temp4.style.background_gradient(cmap = 'copper')
    return temp4
if Name_of_the_movie!='Select a movie':
    st.write("Movie name:",Name_of_the_movie)
    st.write(str(n)+" related movies with ratings are: \n")
    rec_m=rec_engine(Name_of_the_movie,n)
    rec_m=rec_m.style.background_gradient(cmap = 'copper')
    st.dataframe(rec_m)
    # if rec_m.shape[0]==0:
    #     st.write("There is no similar movies found in the datadase")
    # else:
    #    
st.markdown("<h2 style='text-align: center; color: red;'><b> Netflix Style Movie Recommendation</b></h2>", unsafe_allow_html=True)
st.write("**Please select some movies for recommendations.**")
nfl=st.multiselect('Add movies here:',movie_list,["Star Wars (1977)", "12 Angry Men (1957)"])
if len(nfl)!=0:
    l=[]
    r=[]
    for m in nfl:
        tempdf=pd.DataFrame()
        tempdf=rec_engine(m,n=3)
        tl=list(tempdf.index)
        trr=list(tempdf['rating'])
        l=l+tl
        r=r+trr
    findf=pd.DataFrame({"Movie Name":l,"Rating":r})
    findf.set_index("Movie Name",inplace=True)
    st.write("**Recommended movies for you**")
    st.dataframe(findf.sort_values(by="Rating",ascending=False).style.background_gradient(cmap = 'ocean_r'))
else:
Ejemplo n.º 28
0
df_AllLetters= pd.read_csv(f'../Data_files/{lan}_allwords.csv', index_col=0)
with open(f'../Files/{lan}_dictionary.json', 'r') as f:
    dic = json.load(f)
#Show Dataset      #PONER PORCENTAJE DE VECES QUE APARECE CADA COLUMNA Y ELEGIR QUE DATASET SE QIERE VER

if st.checkbox("Show Dataset"):
  st.dataframe(df)

#Select lettes alredy in the word

all_letters= df.columns.tolist()

question1 = st.radio("What do you have?", ('Word with some letters','Empty panel', 'Empty word but with some letters'))

if question1 == 'Word with some letters':
  letters= st.multiselect("Select letters you have", all_letters)
  question2 = st.radio("What do you need?", ('Consonant', 'Vowel'))
  if question2 == "Consonant":
    st.subheader(next_letter(df, letters))
    st.dataframe(clean_data_letter(df, letters))

  if question2 == "Vowel":
    st.subheader(next_vowel(df, letters))
    st.dataframe(clean_data_vowel(df, letters))

if question1 == 'Empty panel':
  st.subheader(empty_panel(df_AllLetters))
  st.dataframe(df_AllLetters)


if question1 == 'Empty word but with some letters':
Ejemplo n.º 29
0
def main():
    """Hep Mortality Prediction App"""
    # st.title("Hepatitis Mortality Prediction App")
    st.markdown(html_temp.format('royalblue'), unsafe_allow_html=True)

    menu = ["Home", "Login", "Signup"]
    submenu = ["Plot", "Prediction", "Metrics"]

    choice = st.sidebar.selectbox("Menu", menu)
    if choice == "Home":
        st.subheader("Home")
        # st.text("What is Hepatitis?")
        st.markdown(descriptive_message_temp, unsafe_allow_html=True)
        st.image(load_image('images/hepimage.jpeg'))

    elif choice == "Login":
        username = st.sidebar.text_input("Username")
        password = st.sidebar.text_input("Password", type='password')
        if st.sidebar.checkbox("Login"):
            create_usertable()
            hashed_pswd = generate_hashes(password)
            result = login_user(username, verify_hashes(password, hashed_pswd))
            # if password == "12345":
            if result:
                st.success("Welcome {}".format(username))

                activity = st.selectbox("Activity", submenu)
                if activity == "Plot":
                    st.subheader("Data Vis Plot")
                    df = pd.read_csv("data/clean_hepatitis_dataset.csv")
                    st.dataframe(df)

                    df['class'].value_counts().plot(kind='bar')
                    st.pyplot()

                    # Freq Dist Plot
                    freq_df = pd.read_csv("data/freq_df_hepatitis_dataset.csv")
                    st.bar_chart(freq_df['count'])

                    if st.checkbox("Area Chart"):
                        all_columns = df.columns.to_list()
                        feat_choices = st.multiselect("Choose a Feature",
                                                      all_columns)
                        new_df = df[feat_choices]
                        st.area_chart(new_df)

                elif activity == "Prediction":
                    st.subheader("Predictive Analytics")

                    age = st.number_input("Age", 7, 80)
                    sex = st.radio("Sex", tuple(gender_dict.keys()))
                    steroid = st.radio("Do You Take Steroids?",
                                       tuple(feature_dict.keys()))
                    antivirals = st.radio("Do You Take Antivirals?",
                                          tuple(feature_dict.keys()))
                    fatigue = st.radio("Do You Have Fatigue",
                                       tuple(feature_dict.keys()))
                    spiders = st.radio("Presence of Spider Naeve",
                                       tuple(feature_dict.keys()))
                    ascites = st.selectbox("Ascities",
                                           tuple(feature_dict.keys()))
                    varices = st.selectbox("Presence of Varices",
                                           tuple(feature_dict.keys()))
                    bilirubin = st.number_input("bilirubin Content", 0.0, 8.0)
                    alk_phosphate = st.number_input(
                        "Alkaline Phosphate Content", 0.0, 296.0)
                    sgot = st.number_input("Sgot", 0.0, 648.0)
                    albumin = st.number_input("Albumin", 0.0, 6.4)
                    protime = st.number_input("Prothrombin Time", 0.0, 100.0)
                    histology = st.selectbox("Histology",
                                             tuple(feature_dict.keys()))
                    feature_list = [
                        age,
                        get_value(sex, gender_dict),
                        get_fvalue(steroid),
                        get_fvalue(antivirals),
                        get_fvalue(fatigue),
                        get_fvalue(spiders),
                        get_fvalue(ascites),
                        get_fvalue(varices), bilirubin, alk_phosphate, sgot,
                        albumin,
                        int(protime),
                        get_fvalue(histology)
                    ]
                    st.write(len(feature_list))
                    st.write(feature_list)
                    pretty_result = {
                        "age": age,
                        "sex": sex,
                        "steroid": steroid,
                        "antivirals": antivirals,
                        "fatigue": fatigue,
                        "spiders": spiders,
                        "ascites": ascites,
                        "varices": varices,
                        "bilirubin": bilirubin,
                        "alk_phosphate": alk_phosphate,
                        "sgot": sgot,
                        "albumin": albumin,
                        "protime": protime,
                        "histolog": histology
                    }
                    st.json(pretty_result)
                    single_sample = np.array(feature_list).reshape(1, -1)

                    # ML
                    model_choice = st.selectbox("Select Model",
                                                ["LR", "KNN", "DecisionTree"])
                    if st.button("Predict"):
                        if model_choice == "KNN":
                            loaded_model = load_model(
                                "models/knn_hepB_model.pkl")
                            prediction = loaded_model.predict(single_sample)
                            pred_prob = loaded_model.predict_proba(
                                single_sample)
                        elif model_choice == "DecisionTree":
                            loaded_model = load_model(
                                "models/decision_tree_clf_hepB_model.pkl")
                            prediction = loaded_model.predict(single_sample)
                            pred_prob = loaded_model.predict_proba(
                                single_sample)
                        else:
                            loaded_model = load_model(
                                "models/logistic_regression_hepB_model.pkl")
                            prediction = loaded_model.predict(single_sample)
                            pred_prob = loaded_model.predict_proba(
                                single_sample)

                        # st.write(prediction)
                        # prediction_label = {"Die":1,"Live":2}
                        # final_result = get_key(prediction,prediction_label)
                        if prediction == 1:
                            st.warning("Patient Dies")
                            pred_probability_score = {
                                "Die": pred_prob[0][0] * 100,
                                "Live": pred_prob[0][1] * 100
                            }
                            st.subheader(
                                "Prediction Probability Score using {}".format(
                                    model_choice))
                            st.json(pred_probability_score)
                            st.subheader("Prescriptive Analytics")
                            st.markdown(prescriptive_message_temp,
                                        unsafe_allow_html=True)

                        else:
                            st.success("Patient Lives")
                            pred_probability_score = {
                                "Die": pred_prob[0][0] * 100,
                                "Live": pred_prob[0][1] * 100
                            }
                            st.subheader(
                                "Prediction Probability Score using {}".format(
                                    model_choice))
                            st.json(pred_probability_score)

                    if st.checkbox("Interpret"):
                        if model_choice == "KNN":
                            loaded_model = load_model(
                                "models/knn_hepB_model.pkl")

                        elif model_choice == "DecisionTree":
                            loaded_model = load_model(
                                "models/decision_tree_clf_hepB_model.pkl")

                        else:
                            loaded_model = load_model(
                                "models/logistic_regression_hepB_model.pkl")

                            # loaded_model = load_model("models/logistic_regression_model.pkl")
                            # 1 Die and 2 Live
                            df = pd.read_csv(
                                "data/clean_hepatitis_dataset.csv")
                            x = df[[
                                'age', 'sex', 'steroid', 'antivirals',
                                'fatigue', 'spiders', 'ascites', 'varices',
                                'bilirubin', 'alk_phosphate', 'sgot',
                                'albumin', 'protime', 'histology'
                            ]]
                            feature_names = [
                                'age', 'sex', 'steroid', 'antivirals',
                                'fatigue', 'spiders', 'ascites', 'varices',
                                'bilirubin', 'alk_phosphate', 'sgot',
                                'albumin', 'protime', 'histology'
                            ]
                            class_names = ['Die(1)', 'Live(2)']
                            explainer = lime.lime_tabular.LimeTabularExplainer(
                                x.values,
                                feature_names=feature_names,
                                class_names=class_names,
                                discretize_continuous=True)
                            # The Explainer Instance
                            exp = explainer.explain_instance(
                                np.array(feature_list),
                                loaded_model.predict_proba,
                                num_features=13,
                                top_labels=1)
                            exp.show_in_notebook(show_table=True,
                                                 show_all=False)
                            # exp.save_to_file('lime_oi.html')
                            st.write(exp.as_list())
                            new_exp = exp.as_list()
                            label_limits = [i[0] for i in new_exp]
                            # st.write(label_limits)
                            label_scores = [i[1] for i in new_exp]
                            plt.barh(label_limits, label_scores)
                            st.pyplot()
                            plt.figure(figsize=(20, 10))
                            fig = exp.as_pyplot_figure()
                            st.pyplot()

            else:
                st.warning("Incorrect Username/Password")

    elif choice == "SignUp":
        new_username = st.text_input("User name")
        new_password = st.text_input("Password", type='password')

        confirm_password = st.text_input("Confirm Password", type='password')
        if new_password == confirm_password:
            st.success("Password Confirmed")
        else:
            st.warning("Passwords not the same")

        if st.button("Submit"):
            create_usertable()
            hashed_new_password = generate_hashes(new_password)
            add_userdata(new_username, hashed_new_password)
            st.success("You have successfully created a new account")
            st.info("Login to Get Started")
Ejemplo n.º 30
0
def compute_centrality_codex(codexkg):
    """
    Purpose:
        Page for centrality action
    Args:
        codexkg: Codexkg Object
    Returns:
        N/A
    """
    actions = ["degree", "k-core"]
    action = st.selectbox("Select Actions", actions)

    ents = list(codexkg.entity_map.keys())
    rels = list(codexkg.rel_map.keys())
    ents_rels = ents + rels

    cluster_obj = {}

    if action == "degree":

        # TODO look into "All Concepts" not working
        choices = ["All Concepts", "Subgraph", "Given type"]
        choice = st.selectbox("Select Chocie",
                              choices,
                              key="centrality select actions")

        if choice == "All Concepts":
            query_string = "compute centrality using degree;"

            cluster_obj["query_string"] = query_string
            cluster_obj["query_type"] = "centrality"
            cluster_obj["choice"] = "All Concepts"
            cluster_obj["concepts"] = ents_rels

        elif choice == "Subgraph":

            concepts = st.multiselect("Select Concepts",
                                      ents_rels,
                                      key=f"{action} concept select")

            cluster_obj["query_type"] = "centrality"
            cluster_obj["choice"] = "subgraph"
            cluster_obj["concepts"] = concepts

            query_string = (
                f"compute centrality in {concept_string(concepts)}, using degree;"
            )

            cluster_obj["query_string"] = query_string

        elif choice == "Given type":

            given_type = st.selectbox("Select Concept",
                                      ents_rels,
                                      key=f"{action} concept select given")
            concepts = st.multiselect("Select Concepts",
                                      ents_rels,
                                      key=f"{action} concept select")

            cluster_obj["query_type"] = "centrality"
            cluster_obj["choice"] = "subgraph"
            cluster_obj["concepts"] = concepts
            cluster_obj["given_type"] = given_type

            query_string = f"compute centrality of {given_type}, in {concept_string(concepts)}, using degree;"
            cluster_obj["query_string"] = query_string
        else:
            st.error("Unknown type")

    if action == "k-core":
        # concepts = st.multiselect("Select Concepts", ents_rels,key=f"{action} concept select")

        cluster_obj["query_type"] = "centrality"
        cluster_obj["choice"] = "k-core"
        cluster_obj["concepts"] = ents_rels
        query_string = f"compute centrality using k-core;"
        concepts = ents_rels

        if st.checkbox("specify k?"):
            k_num = st.number_input("Select K", min_value=2, value=2, step=1)
            query_string = f"compute centrality using k-core, where min-k={k_num};"

        cluster_obj["query_string"] = query_string

    # st.write(cluster_obj)
    st.header(query_string)
    curr_query = CodexQueryCluster(query=cluster_obj)

    if st.button("Query"):
        # st.success("Doing query")
        with st.spinner("Doing query..."):
            answers = codexkg.query(curr_query)
        # st.write(answers)

        viz.cluster_graph(answers, ents, rels, codexkg)