Example #1
0
def line_plots(data: pd.DataFrame, lang: NullTranslations) -> None:
    """Renders line plots, both general and regional, of data argument. Usually it is the resulting DataFrame from the website of the Protezione civile."""
    _ = lang.gettext

    # Group data by date
    general = calculate_positive_tests_ratio(
        data.groupby("data", as_index=False).sum(), lang)
    st.title(_("COVID-19 in Italy - Temporal trend"))

    st.markdown("### " + _("14-day cases per 100.000:"))

    # Get today
    today = general["data"].sort_values(ascending=False).iloc[0]

    # Filter for most recent 14 days and write calculation
    fourteen_day_new_positves = general[today - datetime.timedelta(
        days=14) < general["data"]][_("new_positive")]
    st.write(
        float(
            f"{fourteen_day_new_positves.sum() * 100000 / ITALIAN_POPULATION:.2f}"
        ))

    # Indicator chooser
    st.markdown(_("What indicator would you like to visualise?"))
    features = get_features(general)
    feature = st.selectbox(label=_("Choose..."),
                           options=features,
                           format_func=formatter,
                           index=6)

    # Add checkbox for diff with most recent data for an indicator
    diff = st.checkbox(label=_("Difference with previous datapoint"))
    st.markdown(
        _("By checking the above box, the indicator will be replaced by the difference of its value between two consecutive days. This helps in untangling cumulative data such as deaths and total tests."
          ))
    if diff:
        general = diff_over_previous_datapoint(general, "data", feature)

    # Choose log scale or linear, defines what feature to use
    general_choice = st.radio(label=_("Scale"),
                              options=[_("linear"),
                                       _("logarithmic")])
    if general_choice == _("logarithmic"):
        general = general[general[feature] > 0]
        general_scale = alt.Scale(type="log")
    else:
        general_scale = alt.Scale(type="linear")

    st.markdown(("## " + _("General data")))

    # Average calculation if needed
    is_general_average = st.checkbox(label=_("Average over days"),
                                     key="avg1",
                                     value=True)
    if is_general_average:
        avg_days = st.slider(
            label=_("Days to average over"),
            min_value=1,
            max_value=21,
            value=7,
            key="slider1",
        )
        general_average = average_over_days(general[[feature, "data"]],
                                            categorical_columns=["data"],
                                            avg_days=avg_days)

        general = general_average

    general_chart = generate_global_chart(general, feature, general_scale,
                                          _("Month and day"))
    st.altair_chart(general_chart)

    todays_latest = general[general["data"] == today][feature].iloc[0]

    st.markdown(
        _("Latest data on ") + f"**{formatter(feature).lower()}**: " +
        f"{todays_latest:.2f}")

    st.markdown(("## " + _("Situation in different regions")))

    # Get list of regions and select the ones of interest
    region_options = data[_(
        "denominazione_regione")].sort_values().unique().tolist()
    regions = st.multiselect(
        label=_("Regions"),
        options=region_options,
        default=["Lombardia", "Veneto", "Campania", "Lazio"],
    )
    # Filter regions in selection
    selected_regions = data[data[_("denominazione_regione")].isin(regions)]

    if selected_regions.empty:
        st.warning(_("No region selected!"))
    else:

        # Need to handle positive test percentage in if
        if feature == _("positivi_per_tampone_%"):
            selected_regions = (selected_regions.groupby([
                _("denominazione_regione")
            ]).apply(lambda group: calculate_positive_tests_ratio(group, lang)
                     ).sort_values(by="data", ascending=True).reset_index(
                         level=0, drop=True).reset_index(drop=True))

        if diff:
            selected_regions = (selected_regions.groupby([
                _("denominazione_regione")
            ]).apply(lambda group: diff_over_previous_datapoint(
                group, "data", feature)).sort_values(
                    by="data", ascending=True).reset_index(
                        level=0, drop=True).reset_index(drop=True))

        regional_choice = st.radio(label=_("Regional Scale"),
                                   options=[_("linear"),
                                            _("logarithmic")])
        if regional_choice == _("logarithmic"):
            selected_regions = selected_regions[selected_regions[feature] > 0]
            regional_scale = alt.Scale(type="log")
        else:
            regional_scale = alt.Scale(type="linear")

        is_regional_average = st.checkbox(label=_("Average over days"),
                                          key="avg2",
                                          value=True)
        if is_regional_average:
            avg_days = st.slider(
                label=_("Days to average over"),
                min_value=1,
                max_value=21,
                value=7,
                key="slider2",
            )
            regional_average = (selected_regions.groupby(
                [_("denominazione_regione")],
                as_index=False).apply(lambda group: average_over_days(
                    group[[feature, "data",
                           _("denominazione_regione")]],
                    ["data", _("denominazione_regione")],
                    avg_days,
                )).reset_index(level=0, drop=True).reset_index(drop=True))

            regional_average_chart = generate_regional_chart(
                regional_average,
                feature,
                regional_scale,
                x_title=_("Month and day"),
                color_title=_("Region"),
            )
            st.altair_chart(regional_average_chart)
        else:
            regional_chart = generate_regional_chart(
                selected_regions,
                feature,
                regional_scale,
                x_title=_("Month and day"),
                color_title=_("Region"),
            )
            st.altair_chart(regional_chart)
    data = pd.read_csv(DATA_URL,
                       nrows=nrows,
                       parse_dates=[['CRASH DATE', 'CRASH TIME']])
    data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis='columns', inplace=True)
    data.rename(columns={'crash date_crash time': 'date/time'}, inplace=True)
    data.columns = [column.replace(" ", "_") for column in data.columns]
    return data


data = load_data(100000)
original_data = data

st.header("Where are the most numebr of persons injured in NYC?")
injured_people = st.slider("Number of persons injured in vehicle collisions",
                           0, 19)
st.map(
    data.query('number_of_persons_injured >= @injured_people')[[
        'latitude', 'longitude'
    ]].dropna(how="any"))

st.header("How many collisions occur during a given time of day?")
# hour = st.selectbox("Hour to look at", range(1,24), 1)
hour = st.slider("Hour to look at", 0, 23)
data = data[data['date/time'].dt.hour == hour]
st.markdown("Collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24))

#initialize the map with correct points
midpoint = (np.average(data['latitude']), np.average(data['longitude']))
st.write(
    pdk.Deck(
Example #3
0
    # parsing dates and making a year and month col
    df['last_review'] = pd.to_datetime(df['last_review'], format="%Y-%m-%d")
    df['year'] = df['last_review'].dt.year.astype("Int64")
    df['month'] = df['last_review'].dt.month.astype("Int64")
    df['day'] = df['last_review'].dt.day.astype("Int64")
    return df.dropna().sample(frac=fraction)

df = load_data(fraction=0.1)

st.header('Data')

n_groups = list(df.neighbourhood_group.unique())
neighbourhood_group = st.multiselect(label="Select Neighbourhood Groups", 
options=n_groups, default=n_groups)

(min_price, max_price) = st.slider("Price Range", min_value=0, max_value=df.price.max(),
value=(0,300))
st.write(f"Min Price: {min_price}, Max Price: {max_price}")

df = df.query("neighbourhood_group == @neighbourhood_group and @min_price < price < @max_price")

st.text(f'Looking at {df.shape[0]:,} rows selected using above options')
st.dataframe(df.head(5))

st.header("Price ")

fig = px.scatter(df.query("price < 2000"), x="year", y="price", color="room_type",  
                title="Price over years with price outliers removed")

#fig.update_layout(legend_orientation='h')

st.plotly_chart(fig)
Example #4
0
img_placeholder = st.empty()

col1, col2 = st.beta_columns(2)

with col1:
    st.write("**Pixel spacing**")
    st.write(f"x: {scan.PixelSpacing:.2f} mm")
    st.write(f"y: {scan.PixelSpacing:.2f} mm")
    st.write(f"z: {scan.SliceSpacing:.2f} mm")
    st.write("**Device**")
    st.write(f"{scan.ManufacturerModelName} (by {scan.Manufacturer})")

with col2:
    overlay_nodules = st.checkbox("Show nodule overlay", value=True)
    z = st.slider("Slice:",
                  min_value=1,
                  max_value=img_arr.shape[2],
                  value=int(img_arr.shape[2] / 2))
    level = st.number_input("Window level:", value=-600)
    width = st.number_input("Window width:", value=1500)

img = get_img_slice(img_arr, z - 1, window=(level, width))

if overlay_nodules:
    mask = get_mask_slice(mask_arr, z - 1)
    overlay = get_overlay()
    ct = Image.composite(overlay, img, mask)
    img_placeholder.image(ct, use_column_width=True)
else:
    img_placeholder.image(img, use_column_width=True)

st.subheader("Detected nodules")
Example #5
0
                                          index=(160))

countries_for_comparison = []
countries_for_comparison.append(country_1_selected)
countries_for_comparison.append(country_2_selected)
countries_for_comparison.append(country_3_selected)

subselect_of_gdp = gdp_df[gdp_df['country'].isin(countries_for_comparison)]
countries_for_plotting = subselect_of_gdp.pivot(index='year',
                                                columns='country',
                                                values='gdppc')
countries_for_plotting = countries_for_plotting.reset_index()

value = st.slider('test',
                  min_value=1000,
                  max_value=2018,
                  value=(1900, 2018),
                  step=100)
for_plotting = countries_for_plotting[
    (countries_for_plotting['year'] >= value[0])
    & (countries_for_plotting['year'] <= value[1])]

# Chart title and legends
x_axis_title = 'Date'
y_axis_title = 'GDP per capita'

# State 1 Chart
fig1 = px.line(
    for_plotting,
    x="year",
    y=[country_1_selected, country_2_selected, country_3_selected],
Example #6
0
def load_data(nrows):
    data = pd.read_csv(DATA_URL,
                       nrows=nrows,
                       parse_dates=[['CRASH_DATE', 'CRASH_TIME']])
    data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis='columns', inplace=True)
    data.rename(columns={'crash_date_crash_time': 'date/time'}, inplace=True)
    return data


data = load_data(100000)
original_data = data

st.header('Where are the most people injured in NYC?')
injured_people = st.slider('Number of persons injured in vehicle collisions',
                           0, 19)
st.map(
    data.query('injured_persons >= @injured_people')[['latitude', 'longitude'
                                                      ]].dropna(how='any'))

st.header('How many collisions occur during a given time of day?')
hour = st.slider('How to look up', 0, 23)
data = data[data['date/time'].dt.hour == hour]

st.markdown('Vehicle collisions between %i:00 and %i:00' % (hour,
                                                            (hour + 1) % 24))
midpoint = (np.average(data['latitude']), np.average(data['longitude']))

st.write(
    pdk.Deck(
        map_style='mapbox://styles/mapbox/light-v9',
def main():

    # Title and Subheader
    st.title("Iris Dataset EDA App")
    st.subheader("EDA Web App with Streamlit ")

    DATA_URL = (
        'https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv'
    )

    @st.cache(persist=True, show_spinner=True)
    def load_data():
        data = pd.read_csv(DATA_URL)
        data.columns = ('sepal_length', 'sepal_width', 'petal_length',
                        'petal_width', 'species')
        # lowercase = lambda x: str(x).lower()
        # data.rename(lowercase, axis='columns', inplace=True)
        # data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
        return data

    # Create a text element and let the reader know the data is loading.
    data_load_state = st.text('Loading data...')
    # Load 10,000 rows of data into the dataframe.
    data = load_data()
    # Notify the reader that the data was successfully loaded.
    data_load_state.text('Loading data...Completed!')

    # Show Dataset
    # if st.checkbox("Preview DataFrame: Head or Tail"):

    #	if st.button("Head"):
    #		st.write(data.head())
    #	if st.button("Tail"):
    #		st.write(data.tail())

    # Show Entire Dataframe
    if st.checkbox("View DataFrame"):
        st.dataframe(data)

        # Show Description
    if st.checkbox("View All Column Names"):
        st.text("Columns:")
        st.write(data.columns)

    # Dimensions - Radio Buttonss
    # data_dim = st.radio('Check the dimensions of the dataframe',('Rows','Columns'))
    # if data_dim == 'Rows':
    #	st.write("There are", len(data), "Rows in the dataset")
    # if data_dim == 'Columns':
    #	st.write("There are", data.shape[1], "Columns in the dataset")

    if st.checkbox("Show Summary of Dataset"):
        st.write(data.describe())
        st.write("There are", len(data), "rows and", data.shape[1],
                 "columns in the dataset")

    # Selection

    if st.checkbox("View Single Column's Data"):

        species_option = st.selectbox(
            'Select Columns', ('sepal_length', 'sepal_width', 'petal_length',
                               'petal_width', 'species'))
        if species_option == 'sepal_length':
            st.write(data['sepal_length'])
        elif species_option == 'sepal_width':
            st.write(data['sepal_width'])
        elif species_option == 'petal_length':
            st.write(data['petal_length'])
        elif species_option == 'petal_width':
            st.write(data['petal_width'])
        elif species_option == 'species':
            st.write(data['species'])
        else:
            st.write("Select A Column")

    # Show Plots
    if st.checkbox("Show Plots"):
        st.write("_" * 10)
        data.plot(kind='scatter', x='sepal_length', y='sepal_width')
        st.pyplot()
        st.write(
            "---------------- 2D Scatter Plot of Sepal_length vs Sepal_width for all the Species ---------------- "
        )
        st.write("_" * 10)
        st.write(sns.pairplot(data, hue="species", size=3))
        # Use Matplotlib to render seaborn
        st.pyplot()
        st.write(
            "---------------- Pairplot of different species ----------------")
        st.write("_" * 10)

        v_counts = data.groupby('species')
        st.bar_chart(v_counts)
        st.write(
            "---------------- Bar Plot of Groups or Counts ----------------")
        st.write("_" * 10)

    # Iris Image Manipulation
    @st.cache
    def load_image(img):
        im = Image.open(os.path.join(img))
        return im

    # Image Type
    if st.checkbox("Show/Hide Images"):

        species_type = st.radio(
            'Have a look at the images of different Iris Species!',
            ('Setosa', 'Versicolor', 'Virginica'))

        if species_type == 'Setosa':
            st.text("Showing Setosa Species")
            my_image = load_image('images/setosa.png')
        elif species_type == 'Versicolor':
            st.text("Showing Versicolor Species")
            my_image = load_image('images/versicolor.png')
        elif species_type == 'Virginica':
            st.text("Showing Virginica Species")
            my_image = load_image('images/virginica.png')

        if st.checkbox("Enhance Image"):

            enh = ImageEnhance.Contrast(my_image)
            num = st.slider("Contrast", 1.0, 2.0)
            img_width = st.slider(
                "Zoom in the Image (Set Image Width in Pixels)", 300, 700)
            st.image(enh.enhance(num), width=img_width)
        else:
            img_width = 300
            num = 1.2
            enh = ImageEnhance.Contrast(my_image)
            st.image(enh.enhance(num), width=img_width)

    # About

    if st.button("About App"):
        st.subheader("Iris Dataset EDA App - Developed by Deepankar Kotnala")
        st.text("Built with Streamlit")
left_column, right_column = st.beta_columns(2)
button = left_column.button("å³å“ć«ć‚°ćƒ©ćƒ•ć‚’č”Øē¤ŗ")
with right_column:
    if button:
        st.line_chart(dg)

# st.area_chart(dg)
# st.bar_chart(dg)

# Map
dm = pd.DataFrame({"lat":[35.702202], "lon":[139.414096]})
# map.token("pk.eyJ1IjoiaGlyb3Rha2VrMDYiLCJhIjoiY2tpbzI2NGgwMTh5dTJyanpxczBtNGZsdyJ9.8WcugSD91Zq4M5KFqPzwvg")
st.map(dm)

# Interactive Wedgets
# input box

st.title("interactive wedgets")

text = st.text_input("恂ćŖćŸć®č¶£å‘³ć‚’ę•™ćˆć¦äø‹ć•ć„怂")
# text = st.sidebar.text_input("恂ćŖćŸć®č¶£å‘³ć‚’ę•™ćˆć¦äø‹ć•ć„怂") # sidbarč”Øē¤ŗć®å “åˆ
"恂ćŖćŸć®č¶£å‘³:",text
condition = st.slider("恂ćŖćŸć®čŖæ子ćÆļ¼Ÿ", 0, 100, 50)
# condition = st..sidebar.slider("恂ćŖćŸć®čŖæ子ćÆļ¼Ÿ", 0, 100, 50) # sidbarč”Øē¤ŗć®å “åˆ
"恂ćŖćŸć®čŖæ子:", condition

# expander
expander1 = st.beta_expander("č³Ŗ問")
expander1.write("回ē­”")

import streamlit as st
x = st.slider('Select a value')
st.write(x, 'squared id', x**2)
Example #10
0
def main():

    # Newspaper
    def paperHeadlines(paper, number):

        top_news = []

        if paper == 'Dainik Bhaskar':
            web_content = requests.get("https://www.bhaskar.com/coronavirus/")
            soup = BeautifulSoup(web_content.text, "html.parser")
            for a in soup.findAll('a', attrs={'class': "list_thumb"}):
                x = a.get('title')
                top_news.append(x)
        elif paper == 'Patrika':
            web_content = requests.get(
                'https://www.patrika.com/topic/coronavirus/')
            soup = BeautifulSoup(web_content.text, "html.parser")

            top = soup.find_all('div', attrs={'class': 'ctbl-text'})
            for i in top:
                top_news.append(i.text.strip())
        elif paper == 'Navbharat':
            web_content = requests.get(
                'https://navbharattimes.indiatimes.com/coronavirus/trending/74460387.cms'
            )
            soup = BeautifulSoup(web_content.text, "html.parser")

            top = soup.find_all('a', attrs={'class': 'cor_rest_art'})
            for i in top:
                top_news.append(i.text.strip())
        elif paper == 'Amarujala':
            web_content = requests.get(
                'https://www.amarujala.com/tags/corona-special-news?page=1')
            soup = BeautifulSoup(web_content.text, "html.parser")

            top = soup.find_all('h3')
            for i in top:
                top_news.append(i.text.strip())

        elif paper == 'India Today':
            web_content = requests.get('https://www.indiatoday.in/coronavirus')
            soup = BeautifulSoup(web_content.text, "html.parser")
            top = soup.find_all('h3')
            for i in top:
                top_news.append(i.text.strip())
        return top_news[:number]

    #   return top_news[:number]

# Front Page -----------------------------------------------------------------------
    st.markdown(
        "<body style='background-color:white;'><h1 style='text-align: center; color: blue;'>REAL TIME  COVID-19 ANALYSIS</h1></body>",
        unsafe_allow_html=True)
    img = Image.open('covid1.PNG')
    st.image(img, width=700)
    st.markdown(
        "<body style='background-color:CornflowerBlue;'><h3 style='text-align: center; color: green;'>Helpline Number for Corona Virus : +91-11-23978046 or 1075</h3></body>",
        unsafe_allow_html=True)
    st.markdown(
        "<a href='https://www.mohfw.gov.in//'><marquee>Click here for Guidelines by Health Ministry of India</marquee></a>",
        unsafe_allow_html=True)
    st.markdown(
        "<body style='background-color:CornflowerBlue;'><h3 style='text-align: center; color: red;'>#INDIAFIGHTSCORONA</h3></body>",
        unsafe_allow_html=True)
    st.markdown(
        "<body style='background-color:DarkTurquoise;'><h3 style='text-align: center; color: black;'>#StayHome_StaySafe</h3></body>",
        unsafe_allow_html=True)
    day = ['Select', 'Today', 'Yesterday', '2 Days Ago']

    st.markdown(
        "<body style='background-color:white;'><h1 style='text-align: center; color: green;'>SELECT ACTIVITIES FROM THE SIDEBAR Ć°Åøā€˜Ė†</h1></body>",
        unsafe_allow_html=True)
    activities = ["Select", "Indian News Paper Headlines"]
    st.sidebar.markdown(
        "<body style='background-color:CornflowerBlue;'><h3 style='text-align: center; color: black;'>Please Select the Activities</h3></body>",
        unsafe_allow_html=True)

    # task 1  Newspaper
    #*********************
    activity = st.sidebar.selectbox("", activities)
    if activity == activities[1]:
        st.markdown(
            "<body style='background-color:white;'><h1 style='text-align: center; color: #a84c32;'>Covid19 Newspaper Headlines</h1></body>",
            unsafe_allow_html=True)
        news = [
            'Select', 'Dainik Bhaskar', 'Patrika', 'Navbharat', 'India Today',
            'Amarujala'
        ]
        paper = st.selectbox('', news)
        if paper == 'Select':
            pass
        else:
            st.markdown(
                "<body style='background-color:white;'><h3 style='text-align: center; color: green;'>Slide through the slider to see the COVID19 news</h3></body>",
                unsafe_allow_html=True)
            number = st.slider(" ", 1, 15)
            headlines = paperHeadlines(paper, number)
            for i in headlines:
                st.info(i)
    else:
        pass
    st.markdown(
        "<body style='background-color:white;'><h3 style='text-align: center; color: RED;'>*********By Ayush kumar*******<h3></body>",
        unsafe_allow_html=True)
Example #11
0
import streamlit as st
from scraping_news import pull_articles
import pandas as pd


def make_clickable(link, text):
    return f'<a target="_blank" href="{link}">{text}</a>'


st.title("Planet Mountain News Roundup")

user_input = st.text_input(
    "What would you like to search for? (Default: '9b')", '9b')
num_pages = st.slider("Number of pages to scan", min_value=1, max_value=20)
scan_button = st.button("Scan")

# c1, c2 = st.beta_columns(2)

# if scan_button:
#     articles, thumbs, links = pull_articles(num_pages, user_input)
#     for article in range(len(articles)):
#         c1.image(thumbs[article])
#         link = make_clickable(links[article], articles[article])
#         df = pd.DataFrame(link, columns=['Link'])
#         c2.write(df['Link'].to_html())
# else:
#     pass

if scan_button:
    articles, thumbs, links = pull_articles(num_pages, user_input)
    data = list(zip(thumbs, articles, links))
def main():
    session_state = session.SessionState.get(
        key=session.get_user_id(),
        update=False,
        state_name="Acre",
        state_id="AC",
        city_name="Todos",
        administrative_level="Todos",
        refresh=False,
        reset=False,
        already_generated_user_id=None,
        pages_open=None,
        amplitude_events=None,
        button_styles=dict(),
        continuation_selection=None,
        button_simule=0,
        section1_organize=False,
        section2_manage=False,
    )
    utils.localCSS("style.css")
    config = yaml.load(open("config/config.yaml", "r"), Loader=yaml.FullLoader)
    df = get_data(config)
    genSelectBox(df, session_state)

    params = dict()
    main_icon = utils.load_image("imgs/simulation_main_icon.png")
    st.write(
        f"""
            <div class="text-title-section minor-padding">
                 Quantos <span class="bold main-orange-span">estudantes e professores(as)</span> retornam Ć s salas de aula em diferentes modelos?
            </div>
            <div class="container main-padding" style="padding-left:0px;">
                <div class="container minor-padding main-orange-span" style="font-size: 20px; color:#FF934A; font-weight: bold;"> 
                    <img class="minor-icon" src="data:image/png;base64,{main_icon}" alt="Fonte: Flaticon">
                    Simule o retorno
                </div>
                <div class="minor-padding">
                    O retorno Ć s atividades presenciais deve ser pensado em etapas para definir nĆ£o sĆ³ <b>quem pode retornar</b>, mas tambĆ©m <b>como</b>. Trazemos abaixo um passo a passo para construir a simulaĆ§Ć£o da sua rede - experimente!
                </div>
                 <div class="minor-padding" style="font-size: 20px; color:#FF934A; font-weight: bold;">
                    <br>Para qual etapa de ensino vocĆŖ estĆ” planejando?
            </div>
            """,
        unsafe_allow_html=True,
    )

    # TODO: colocar por estado somente tambƩm
    # if city_name:
    data = df[
        (df["city_name"] == session_state.city_name)
        & (df["administrative_level"] == session_state.administrative_level)]
    col1, col2 = st.beta_columns([0.9, 0.2])
    with col1:
        education_phase = st.selectbox(
            "", data["education_phase"].sort_values().unique())
        data = data[data["education_phase"] == education_phase]
    with col2:
        st.write(
            f"""<div class="container">
                <br>
                </div>
                <br>
            """,
            unsafe_allow_html=True,
        )

    st.write(
        f"""<br>
            <div class="container" style="padding-left:0px;">
                <div class="minor-padding" style="font-size: 20px; color:#FF934A;"><b>1. Escolha o modelo de retorno Ć s atividades</b></div>
                <div class="minor-padding">
                    Existem diversos modelos possƭveis de retorno avaliadas de acordo com as etapas de aprendizado. Separamos abaixo 5 opƧƵes possƭveis indicadas pela UNESCO.
                </div>
            </div>
        """,
        unsafe_allow_html=True,
    )

    UNESCO_models = {
        'Totalmente Presencial': {
            "description":
            """Neste modelo, todos os estudantes <b>retornam Ć s aulas
            presenciais padrĆ£o</b>, isto Ć©, os mesmos horĆ”rios em sala de
            aula, porƩm seguindo os novos protocolos de distanciamento e seguranƧa
            sanitƔria.
            <br><br><b>Por que este modelo?</b><br>
            Modelo tradicional, onde os estudantes e docentes estĆ£o habituados.""",
            "hours_per_day": 5,
            "priority": False
        },
        'Aulas presenciais + Tarefas remota': {
            "description": """Neste modelo professores(as) <b>transmitem
            conceitos para os estudantes presencialmente</b>, e, em seguida,
            <b>estudantes completam exercĆ­cios e tarefas em casa</b>.
            <br><br><b>Por que este modelo?</b><br>
            Alunos e professores mantĆŖm um contato prĆ³ximo, e estudantes podem tirar dĆŗvidas durante a exposiĆ§Ć£o da matĆ©ria.""",
            "hours_per_day": 3,
            "priority": False
        },
        'Aulas por vĆ­deo + Tarefas presenciais': {
            "description": """Neste modelo estudantes <b>aprendem
            novos conceitos de forma remota</b> e, em seguida, <b>concluem exercĆ­cios e 
            tarefas presencialmente</b> com o(a) professor(a).
            <br><br><b>Por que este modelo?</b><br>
            Alunos e professores mantĆŖm o convĆ­vio, e os estudantes podem tirar dĆŗvidas 
            urante a realizaĆ§Ć£o dos exercĆ­cios e se beneficiarem com as dĆŗvidas dos colegas.""",
            "hours_per_day": 2,
            "priority": False
        },
        'Grupo prioritƔrio presencial': {
            "description":
            """Neste modelo, os professores tĆŖm uma <b>aula normal completa com um grupo
            de estudantes presencial, enquanto outro grupo acompanha remotamente 
            por meio de videoconferĆŖncia (VC)</b>.
            <br><br><b>Por que este modelo?</b>
            Turma mantƩm o convƭvio, mesmo que virtual, e os professores atentem todos da turma no mesmo momento.""",
            "hours_per_day": 5,
            "priority": True
        }
    }

    col1_1, col1_2, col1_3, col1_4 = st.beta_columns([0.35, 0.05, 0.85, 0.3])
    with col1_1:
        params["education_model"] = st.selectbox("",
                                                 list(UNESCO_models.keys()))
        params["priority"] = UNESCO_models[
            params["education_model"]]["priority"]
    with col1_2:
        st.write(
            f"""
            <div class="container main-padding">
                <br>
            </div>
            """,
            unsafe_allow_html=True,
        )
    with col1_3:
        # Sobre o modelo
        st.write(
            f"""
                <div class="col light-green-simulator-bg card-simulator" style="border-radius:30px;">
                    <div style="font-family: 'Roboto Condensed', sans-serif; padding:10px; margin-bottom:0px; margin-top: 16px;margin-left: 16px; margin-right: 16px;">
                        <b>{params["education_model"]}</b>
                        <br><br>{UNESCO_models[params["education_model"]]["description"]}
                        <br><br><b><a href="https://en.unesco.org/sites/default/files/unesco-covid-19-response-toolkit-hybrid-learning.pdf">FONTE: UNESCO</a></b>
                    </div>
                    <div class="button-position" style="margin-bottom: 0px;padding: 10px;margin-top: 16px;margin-right: 16px;margin-left: 16px;">
                        <a href="#entenda-modelo">
                            <button class="button-protocolos" style="border-radius: .25rem; font-size:16px; margin-right: 10px;margin-left: 10px;">
                                leia sobre todos os modelos >
                            </button>
                        </a>
                    </div>
                    <div class="button-position" style="margin-bottom: 0px;padding: 10px;margin-top: 16px;margin-right: 16px;margin-left: 16px;">
                        <a href="#entenda-etapa">
                            <button class="button-protocolos" style="border-radius: .25rem; font-size:16px; margin-right: 10px;margin-left: 10px;">
                                veja consideraƧƵes por etapa de ensino >
                            </button>
                        </a>
                    </div>
                </div>
                <div id="entenda-modelo" class="info-modal-window" style="width: 80%; height: 70%;">
                    <div>
                        <a href="#" title="Close" class="info-btn-close" style="color: white;">&times</a>
                        <h1 class="main-orange-span bold" style="padding: 0px 50px 0px 50px;">Modelos</h1>
                        <div style="font-size: 16px; padding: 0px 50px 0px 50px;">
                            Abaixo hĆ” o quadro completo. Caso nĆ£o consiga ver a imagem, clique na imagem para baixa-la ou <a href="https://drive.google.com/u/1/uc?id=1tqBItM8XkLdY9u2wk0ZcPrVcHccgdp1f&export=download">[AQUI]</a>.
                        </div>
                        <a href="https://drive.google.com/u/1/uc?id=1tqBItM8XkLdY9u2wk0ZcPrVcHccgdp1f&export=download"><img style="padding: 50px 50px 50px 50px;" class="images" src="https://i.imgur.com/ZByy47a.jpg"></a>
                    </div>
                </div>
                <div id="entenda-etapa" class="info-modal-window" style="width: 80%; height: 70%;">
                    <div>
                        <a href="#" title="Close" class="info-btn-close" style="color: white;">&times</a>
                        <h1 class="main-orange-span bold" style="padding: 0px 50px 0px 50px;">Etapas de Ensino</h1>
                        <div style="font-size: 16px; padding: 0px 50px 0px 50px;">
                            <br>
                            <b>4 - 8 anos</b><br>
                            Pontos principais para consideraĆ§Ć£o:<br>
                            <li>CrianƧas desta faixa etƔria possuem menor risco de apresentar sintomas graves.</li>                            
                            <li>Pais e responsƔveis necessitam de creches e suporte para manter demais atividades do dia a dia</li>
                            <li>EficƔcia muito baixa do ensino remoto</li><br>
                            <b>8 - 12 anos</b><br>
                            Pontos principais para consideraĆ§Ć£o:<br>
                            <li>CrianƧas desta faixa etƔria possuem menor risco de apresentar sintomas graves, mas hƔ maior dificuldade em adotar medidas sanitƔrias.</li>
                            <li>JĆ” possuem maior autonomia no cotidiano e pode</li><br>
                            <b>12 - 17 anos</b><br>
                            Pontos principais para consideraĆ§Ć£o:<br>
                            <li>CrianƧas desta faixa etĆ”ria possuem maior risco intrĆ­nseco de contrair e desenvolver sintomas, mas apresentam maior aderĆŖncia aos protocolos sanitĆ”rios</li>
                            <li>Logƭstica de agendamento presencial pode ser mais complexa, pois os anos possuem matƩrias e professores diversos.</li><br>
                            <b>17 - 18 anos</b><br>
                            Pontos principais para consideraĆ§Ć£o:<br>
                            <li>CrianƧas desta faixa etĆ”ria possuem maior risco intrĆ­nseco de contrair e desenvolver sintomas, mas apresentam maior aderĆŖncia aos protocolos sanitĆ”rios.</li>
                            <li>Alta eficĆ”cia e adesĆ£o ao mĆ©todo remoto</li>
                            <br>Abaixo hĆ” o quadro completo. Caso nĆ£o consiga ver a imagem, clique na imagem para baixa-la ou <a href="https://drive.google.com/u/1/uc?id=1Sj65MXPkRcw6VxojYBLsJ8otIuvpLfq_&export=download">[AQUI]</a>.
                        </div>
                        <a href="https://drive.google.com/u/1/uc?id=1Sj65MXPkRcw6VxojYBLsJ8otIuvpLfq_&export=download"><img style="padding: 50px 50px 50px 50px;" class="images" src="https://i.imgur.com/FyoIFe9.jpg"></a>
                    </div>
                </div>
                """,
            unsafe_allow_html=True,
        )
    with col1_4:
        st.write(
            f"""<div class="container">
                <br>
                </div>
                <br>
            """,
            unsafe_allow_html=True,
        )

    st.write(
        f"""<br>
            <div class="container" style="padding-left:0px;">
                <div class="minor-padding" style="font-size: 20px; color:#FF934A;"><b>2. Escolha quem pode retornar</b></div>
            </div>
        """,
        unsafe_allow_html=True,
    )

    col2a_1, col2a_2, col2a_3, col2a_4 = st.beta_columns(
        [0.35, 0.05, 0.85, 0.3])
    with col2a_1:
        params["number_students"] = st.number_input(
            "Quantos estudantes retornam Ć s aulas presenciais?",
            format="%d",
            value=data["number_students"].values[0],
            step=1,
        )
        if params["priority"]:
            params["number_remote_students"] = st.number_input(
                "Quantos estudantes acompanham Ć s aulas somente de forma remota?",
                format="%d",
                value=data["number_students"].values[0],
                step=1,
            )

    with col2a_2:
        st.write(
            f"""
            <div class="container main-padding">
                <br>
            </div>
            """,
            unsafe_allow_html=True,
        )
    with col2a_3:
        st.write(
            f"""
            <div class="col light-green-simulator-bg card-simulator" style="border-radius:30px;">
                <div class="row" style="font-family: 'Roboto Condensed', sans-serif; margin-bottom:0px; padding:10px;">
                    <b>Iniciamos com total de estudantes reportados no Censo Escolar 2019 (INEP).</b>
                    <br>VocĆŖ pode alterar esse valor ao lado. Leve em consideraĆ§Ć£o quais grupos de estudantes podem ser vulnerĆ”veis ou ter prioridade.
                </div>
                <div class="button-position" style="padding-bottom: 15px;">
                    <a href="#entenda-estudantes">
                        <button class="button-protocolos" style="border-radius: .25rem; font-size:16px; margin-right: 10px;margin-left: 10px;">
                            grupos que requerem atencĆ£o especial >
                        </button>
                    </a>
                </div>
            </div>
            <div id="entenda-estudantes" class="info-modal-window" style="width: 80%; height: 70%;">
                <div>
                    <a href="#" title="Close" class="info-btn-close" style="color: white;">&times</a>
                    <h1 class="main-orange-span bold" style="padding: 0px 50px 0px 50px;">Estudantes</h1>
                    <div style="font-size: 20px; padding: 0px 50px 0px 50px;">
                        <b>Grupos que requerem atencĆ£o especial</b>
                    </div>
                    <br>
                    <div style="font-size: 16px; padding: 0px 50px 0px 50px;">
                        <b>Exemplos de grupos vulnerƔveis ou/e marginalizados</b>
                        <li>Minorias</li>
                        <li>Meninas adolescentes</li>
                        <li>CrianƧas com deficiĆŖncia de aprendizagem</li>
                        <li>CrianƧas que vivem em instituiƧƵes de abrigo</li>
                        <li>CrianƧas vivendo em condiĆ§Ć£o de pobreza, em residĆŖncias com alta ocupĆ¢ncia ou improvisadas</li>
                        <li>OrfĆ£os</li>
                        <li>CrianƧas separadas de seus responsƔveis</li>
                        <li>CrianƧas e adolescentes em risco de abandono escolar</li>
                    </div>
                </div>
            </div>
            """,
            unsafe_allow_html=True,
        )
    with col2a_4:
        st.write(
            f"""<div class="container">
                <br>
                </div>
                <br>
            """,
            unsafe_allow_html=True,
        )
    st.write(
        f"""
        <div class="container main-padding">
            <br>
        </div>
        """,
        unsafe_allow_html=True,
    )

    col2b_1, col2b_2, col2b_3, col2b_4 = st.beta_columns(
        [0.35, 0.05, 0.85, 0.3])
    with col2b_1:
        params["number_teachers"] = st.number_input(
            "Quantos professores(as) retornam?",
            format="%d",
            value=data["number_teachers"].values[0],
            step=1,
        )
    col2b_2 = col2a_2
    with col2b_3:
        st.write(
            f"""
            <div class="col light-green-simulator-bg card-simulator" style="border-radius:30px;">
                <div class="row" style="font-family: 'Roboto Condensed', sans-serif; margin-bottom:0px; padding:10px;">
                    <b>Iniciamos com total de professores reportados no Censo Escolar 2019 (INEP).</b> 
                    <br>VocĆŖ pode alterar esse valor ao lado. Leve em consideraĆ§Ć£o quais grupos de professores podem ser de risco, confortĆ”veis para retorno e outros.
                </div>
                <div class="button-position" style="padding-bottom: 15px;">
                    <a href="#entenda-professores">
                        <button class="button-protocolos" style="border-radius: .25rem; font-size:16px; margin-right: 10px;margin-left: 10px;">
                            como retornar professores(as) >
                        </button>
                    </a>
                </div>
                <div id="entenda-professores" class="info-modal-window" style="width: 80%; height: 70%;">
                    <div>
                        <a href="#" title="Close" class="info-btn-close" style="color: white;">&times</a>
                        <h1 class="main-orange-span bold" style="padding: 0px 50px 0px 50px;">Professores</h1>
                        <div style="font-size: 16px; padding: 0px 50px 0px 50px;">
                            <b>Fatores a serem considerados:</b> grupos vulnerĆ”veis, nĆŗmero de casos suspeitos, desconforto da rede com o retorno presencial, dificuldade logĆ­stica e a disponibilidade de retorno presencial.
                            <br><br>O quadro explicativo traz para cada fator um desafio e uma aĆ§Ć£o sugerida.
                            <br><br>Caso nĆ£o consiga ver a imagem, clique na imagem para baixa-la ou <a href="https://drive.google.com/u/1/uc?id=1lLtbEMau4nIj8tZ5rQF51ThV2Q8K1DzE&export=download">[AQUI]</a>.
                        </div>
                        <a href="https://drive.google.com/u/1/uc?id=1lLtbEMau4nIj8tZ5rQF51ThV2Q8K1DzE&export=download"><img style="padding: 50px 50px 50px 50px;" class="images" src="https://i.imgur.com/4ai7xDK.jpg"></a>
                    </div>
                </div>
            </div>
            """,
            unsafe_allow_html=True,
        )
    col2b_4 = col2a_4
    st.write(
        f"""
        <br>
        <div class="container" style="padding-left:0px;">
            <div class="minor-padding" style="font-size: 20px; color:#FF934A;"><b>3. Defina as restriƧƵes de retorno</b></div><br>
                </div>
            </div>
        </div>
        """,
        unsafe_allow_html=True,
    )

    col3_1, col3_2, col3_3, col3_4, col3_5, col3_6 = st.beta_columns(
        [0.35, 0.05, 0.4, 0.05, 0.4, 0.3])
    with col3_1:
        params["number_classrooms"] = st.number_input(
            "Quantas salas de aula disponĆ­veis?",
            format="%d",
            value=data["number_classroms"].values[0],
            step=1,
        )
        st.write(
            f"""
            <div class="row" style="margin:0px; padding:10px; background:#DDFBF0; border-radius: 1rem 1rem 1rem 1rem;">
                OĀ nĆŗmero de salas restringeĀ o nĆŗmero de turmas que podem voltar de forma simultĆ¢nea.
            </div>
        """,
            unsafe_allow_html=True,
        )
    col3_2 = col2a_2
    with col3_3:
        params["max_students_per_class"] = st.slider(
            "Selecione o mƔximo de estudantes por turma:", 0, 20, 20, 1)
        st.write(
            f"""
            <div class="row" style="margin:0px; padding:10px; background:#DDFBF0; border-radius: 1rem 1rem 1rem 1rem;">
                Limitamos em 20 estudantes por sala para diminiuir o risco de transmissĆ£o seguindo critĆ©rios sanitĆ”rios.
            </div>
            """,
            unsafe_allow_html=True,
        )
    col3_4 = col2a_2
    with col3_5:
        params["hours_per_day"] = int(
            st.slider(
                "Selecione o nĆŗmero de horas presenciais diĆ”rias na escola por turma:",
                min_value=1,
                max_value=5,
                value=UNESCO_models[params["education_model"]]
                ["hours_per_day"],
                step=1,
            ))

        st.write(
            f"""
            <div class="row" style="margin:0px; padding:10px; background:#DDFBF0; border-radius: 1rem 1rem 1rem 1rem;">
                As restriƧƵes sanitƔrias limitam a quantidade de tempo e estudantes que conseguem retornar Ơ sala de aula.
            </div>

            <div class="container">
            <br>
            </div>
            <br>
            """,
            unsafe_allow_html=True,
        )
    col3_6 = col2a_4

    with st.beta_expander("simular retorno"):
        user_analytics = amplitude.gen_user(utils.get_server_session())
        opening_response = user_analytics.safe_log_event(
            "clicked simule retorno", session_state, is_new_page=True)
        print(params)
        genSimulationResult(params, config)
    '''if st.button("Simular retorno"):
        if st.button("Esconder"):
            pass
        genSimulationResult()
    utils.stylizeButton(
        name="SIMULAR RETORNO",
        style_string="""
        box-sizing: border-box;
        border-radius: 15px; 
        width: 150px;padding: 0.5em;
        text-transform: uppercase;
        font-family: 'Oswald', sans-serif;
        background-color: #0097A7;
        font-weight: bold;
        text-align: center;
        text-decoration: none;font-size: 18px;
        animation-name: fadein;
        animation-duration: 3s;
        margin-top: 1.5em;""",
        session_state=session_state,
    )'''

    # TODO: escrever metodologia v1.2
    with st.beta_expander("ler metodologia"):
        user_analytics = amplitude.gen_user(utils.get_server_session())
        opening_response = user_analytics.safe_log_event(
            "clicked simule metodologia", session_state, is_new_page=True)
        methodology_text = load_markdown_content("methodology_short.md")
        st.write(methodology_text)
Example #13
0
def app():
    def text_on_page(dict_var, id_json, list_res, page):
        if type(dict_var) is dict:
            for k, v in dict_var.items():
                if k == id_json and v == page:
                    if v > page: return list_res
                    list_res.append(dict_var["text"])
                elif isinstance(v, dict):
                    text_on_page(v, id_json, list_res, page)
                elif isinstance(v, list):
                    for item in v:
                        text_on_page(item, id_json, list_res, page)
        return list_res

    def get_page(data, page):
        lines = []
        for chunk in data["elements"]:
            lines.extend(text_on_page(chunk, "page", [], page))
        return lines

    def get_histogram(docs, top=20):
        tokens = []
        for s in docs.values():
            tokens += s.split()
        uniques, counts = np.unique(tokens, return_counts=True)
        sorted_inds = np.argsort(counts)
        uniques_sorted = uniques[sorted_inds[-top:]][::-1]
        counts_sorted = counts[sorted_inds[-top:]][::-1]
        return (uniques_sorted, counts_sorted)

    file = st.file_uploader("test", type="pdf", key=2)
    start = 1
    max_val = 1000
    end = 5
    slider_val = st.slider('Page range:',
                           min_value=start,
                           max_value=max_val,
                           value=(1, end),
                           step=1)

    if file is not None:
        file_details = {
            "FileName": file.name,
            "FileType": file.type,
            "FileSize": str(file.size / 1000000) + 'mb'
        }
        data_load_state = st.text('Loading data... Thank you for waiting šŸ˜Š')

        parser = HierarchyParser()
        source = FileSource(file, page_numbers=list(range(start - 1, end)))

        @st.cache(suppress_st_warning=True)
        def fetch_doc(source):
            return parser.parse_pdf(source)

        document = fetch_doc(source)
        printer = JsonFilePrinter()
        file_path = pathlib.Path('pdf.json')
        printer.print(document, file_path=str(file_path.absolute()))

        with open('pdf.json') as json_file:
            data = json.load(json_file)
        json_file.close()
        pages = {
            i: get_page(data, i)
            for i in range(slider_val[0], slider_val[1])
        }

        (formatted_docs,
         paragraph_page_idx) = preprocessing2.get_formatted_docs(
             pages, max_paragraphs=5)
        preprocessed_docs = preprocessing2.get_preprocessed_docs(
            formatted_docs)
        data_load_state.text("Done!")
        st.write(file_details)
        with st.beta_expander("PDF Extraction details"):
            st.subheader('First paragraphs on page ' + str(slider_val[0]))
            if len(pages[slider_val[0]]) >= 5:
                for i in range(5):
                    st.markdown("<u>Ā¶ " + str(i + 1) + "</u>: " +
                                pages[slider_val[0]][i],
                                unsafe_allow_html=True)
            else:
                for i in range(len(pages[slider_val[0]])):
                    st.markdown("<u>Ā¶ " + str(i + 1) + "</u>: " +
                                pages[slider_val[0]][i],
                                unsafe_allow_html=True)

            st.subheader('PDF word distribution')
            (uniques, counts) = get_histogram(preprocessed_docs)
            fig = px.bar(x=uniques, y=counts)
            fig.update_xaxes(title_text='words')
            fig.update_yaxes(title_text='occurances')
            st.plotly_chart(fig)

            st.subheader('Paragraph similarity heatmap')

        tfidf_vectorizer = cosine2.get_tfidf_vectorizer()
        tfidf_matrix = tfidf_vectorizer.fit_transform(
            list(preprocessed_docs.values())).toarray()
        query1 = st.text_input("Cosine-SVD Search")
        if query1:
            q = cosine2.get_query_vector(query1, tfidf_vectorizer)
            cos_sims = cosine2.get_cosine_sim(q, tfidf_matrix)
            (rankings, scores) = cosine2.get_rankings(cos_sims)

            idx = rankings[0]
            score = scores[0]
            page_num = paragraph_page_idx[idx] + 1
            doc = formatted_docs[idx]
            if score > 0.0:
                st.subheader("Similarity: " + str(score))
                st.markdown("<u>Match</u>: " + str(doc),
                            unsafe_allow_html=True)
                st.markdown("<u>Page Number</u>: " + str(page_num),
                            unsafe_allow_html=True)

                #write match and query to the db
                doc_ref = db.collection("queries").document()
                doc_ref.set({
                    "query": query1,
                    "topMatch": str(doc),
                    "timeStamp": firestore.SERVER_TIMESTAMP,
                    "upvote": 0
                })

            else:
                st.subheader("No matches found.")
        st.write("Following methods are under construction šŸ˜Š Stay tuned!")
        query2 = st.text_input("Synonymized Query Search")
        query3 = st.text_input("Verbatim Search")

    st.subheader("Recent search results:")
    q_ref = db.collection("queries").order_by(
        u'timeStamp', direction=firestore.Query.DESCENDING)
    counter = 0
    yesButtons = []
    noButtons = []
    for doc in q_ref.stream():
        counter += 1
        doc_dict = doc.to_dict()

        st.markdown("<strong>Query " + str(counter) + "</strong>: \n",
                    unsafe_allow_html=True)
        st.markdown("<u>Query</u>: " + doc_dict["query"] + "\n",
                    unsafe_allow_html=True)
        st.markdown("<u>Top Match</u>: " + doc_dict["topMatch"] + "\n",
                    unsafe_allow_html=True)
        st.markdown("&nbsp")
        if doc_dict["upvote"] < 0:
            st.markdown("<small>So far " + str(abs(doc_dict["upvote"])) +
                        "people don't think it's a good match.</small>",
                        unsafe_allow_html=True)
        else:
            st.markdown("<small>So far " + str(doc_dict["upvote"]) +
                        " people think it's a good match.</small>",
                        unsafe_allow_html=True)

        st.markdown("<i><small>Do you think this is a good match?</small></i>",
                    unsafe_allow_html=True)
        yesButtons.append(st.button("šŸ‘", key="YesButton" + str(counter)))
        noButtons.append(st.button("šŸ‘Ž", key="NoButton" + str(counter)))

        st.markdown("<hr>", unsafe_allow_html=True)

        if counter == 5:
            break

    st.subheader('made with ā¤ļø by:')
    st.markdown(
        '[Vince Bartle](https://bartle.io) (vb344) | [Dubem Ogwulumba](https://www.linkedin.com/in/dubem-ogwulumba/) (dao52) | [Erik Ossner](https://erikossner.com/) (eco9) | [Qiyu Yang](https://github.com/qiyuyang16/) (qy35) | [Youhan Yuan](https://github.com/nukenukenukelol) (yy435)'
    )
Example #14
0
        mn = df[col].min().item()
        mx = df[col].max().item()
        defv = df[col].mean()

        if mx > mn:
            sp = (mx - mn) / 100
            sp = round(sp, 1 - int(floor(log10(abs(sp)))) - 1)

            if df[col].dtype == np.int64:
                defv = np.ceil(defv).astype(int).item()
                sp = np.ceil(sp).astype(int).item()

            val = st.slider(
                "{} ({})".format(col, df[col].dtype),
                min_value=mn,
                max_value=mx,
                step=sp,
                value=defv,
            )
        else:
            val = mx

    else:
        defv = df[col].mode().item()
        uniq = pd.unique(df[col]).tolist()
        val = st.selectbox(
            "{} ({})".format(col, df[col].dtype), options=uniq, index=uniq.index(defv)
        )
    list_form += [val]
    default_form += [defv]
Example #15
0
import numpy as np
from scipy import optimize
from matplotlib import pyplot as pl

def dynes(E, gamma, delta, N0):
    numerator = E - gamma*1j
    denominator = np.sqrt( (E-gamma*1j)**2 - delta**2 )
    return N0*np.abs(np.real(numerator/denominator))

f = 'stm.csv'
stm = pd.read_csv(f, names=['bias', 'didv'])
# try:
#     f = 'C:/Users/Kuri y Rizu/Documents/Synced Folders/UW-Madison/Oxide Lab/UW Papers/RF Q0 paper/stm.csv'
#     stm = pd.read_csv(f, names=['bias', 'didv'])
# except:
#     f = '/home/chris/Documents/Synced Folders/UW-Madison/Oxide Lab/UW Papers/RF Q0 paper/stm.csv'
#     stm = pd.read_csv(f, names=['bias', 'didv'])

p, cov = optimize.curve_fit(dynes, stm.bias, stm.didv, p0=[0.2, 3, 1])

gamma = st.slider('gamma', min_value=0.0, max_value=1.0, step=0.01, value=float(p[0]))
delta = st.slider('delta', min_value=0.0, max_value=4.0, step=0.1, value=float(p[1]))
N0 = st.slider('N0', min_value=0.0, max_value=1.2, step=0.01, value=float(p[2]))

fit = dynes(stm['bias'], gamma, delta, N0)

fig = pl.figure()
pl.plot(stm['bias'], stm['didv'], '-o', color='blue')
pl.plot(stm['bias'], fit, '-', color='red')

st.pyplot(fig)
Example #16
0
def write():
    """Used to write the page in the app.py file"""
    with st.spinner("Loading Map ..."):
        #ast.shared.components.title_awesome("")    #Title Awesome Streamlit ausgeblendet

        # read CSV
        # CSV for Choropleth Map
        df = pd.read_csv(
            "https://raw.githubusercontent.com/hannahkruck/awesome-test/master/Map.csv",
            encoding="utf8",
            sep=";")
        # CSV for Line Map
        df2 = pd.read_csv(
            "https://raw.githubusercontent.com/hannahkruck/awesome-test/master/Map.csv",
            encoding="utf8",
            sep=";")

        # Title
        st.title("Map view")

        #----------------- Side bar (filter options) -------------------

        # Select map (Choropleth or Line Map)
        selectedMapType = st.sidebar.radio("Map",
                                           ('Choropleth Map', 'Line Map'))
        if selectedMapType == 'Choropleth Map':
            showChoropleth = True
            showLine = False
        else:
            showLine = True
            showChoropleth = False

        # General filter (Age, Gender)
        st.sidebar.header("Filters")
        selectedAge = st.sidebar.multiselect(
            "Select Age", ("under 18", "18 - 34", "35 - 64", "over 65"))
        selectedGender = st.sidebar.selectbox("Select Gender",
                                              ("All", "Male", "Female"))

        # Special filter for Choropleth Map
        st.sidebar.header("Filter for Choropleth Map")
        # Drop down menu for Choropleth Map Information
        selectedMapChoropleth = st.sidebar.selectbox(
            "Select Map Information", ('Applications to target countries',
                                       'Applicants by country of origin'))
        # Information for Choropleth Map based on the chosen map information
        if 'target' in selectedMapChoropleth:
            selectedMapChoropleth = 'destinationCountry'
            selectedCode = 'geoCodeDC'
            mapColor = 'Blues'
        else:
            selectedMapChoropleth = 'homeCountry'
            selectedCode = 'geoCodeHC'
            mapColor = 'Reds'

        # Special filter for Line Map
        st.sidebar.header("Filter for Line Map")
        # Select type (show routes of asylum seeker from a particular origin country or to a particular target country)
        selectedType = st.sidebar.radio("Select type",
                                        ('Target country', 'Origin country'))
        if selectedType == 'Target country':
            selectedType = df.destinationCountry.unique()
            countryCategory = 'destinationCountry'
            namesToShow = 'homeCountry'
            selectedLon = 'lonDC'
            selectedLat = 'latDC'
        else:
            selectedType = df.homeCountry.unique()
            countryCategory = 'homeCountry'
            namesToShow = 'destinationCountry'
            selectedLon = 'lonHC'
            selectedLat = 'latHC'
        # Drop down menu for selected country
        selectedCountryMapLine = st.sidebar.selectbox("Select country",
                                                      (selectedType))

        #----------------- Website content (Year slider, i-Button) -------------------

        # Markdown for i-Button
        # CSS and HTML Code
        st.markdown('''
        <!-- https://www.w3schools.com/css/tryit.asp?filename=trycss_tooltip_transition & https://www.w3schools.com/css/tryit.asp?filename=trycss_tooltip_right-->
        <style>
            .tooltip {
              position: relative;
              display: inline-block;
              font-size:1.6rem;
              
            }
            
            .tooltip .tooltiptext {
              visibility: hidden;
              width: 50vw;
              background-color: #f1f3f7;
              color: #262730;
              text-align: justify;
              border-radius: 6px;
              padding: 5px;
              font-size:0.9rem;
              
              /* Position the tooltip */
              position: absolute;
              z-index: 1;
              top: -5px;
              left: 105%;
              
              opacity: 0;
              transition: opacity 0.8s;
            }
            
            .tooltip:hover .tooltiptext {
              visibility: visible;
              opacity: 1;
            }
        </style>
        ''',
                    unsafe_allow_html=True)

        # Text for tooltip
        st.markdown('''
        <div class="tooltip">&#x24D8
        <span class="tooltiptext">
        <b>Choropleth Map</b><br>The Choropleth Map shows the number of asylum applications per country in Europe and the number of refugees per country worldwide for the selected year (see filter 'Select Map Information' for Choropleth Map).
        <br><br>
        <b>Line Map</b><br>The Line Map presents the routes of the refugees depending on the selected type. The type 'target country' shows from which countries the asylum seekers originate based on a specific target country. The type 'origin country' indicates where the asylum seekers are fleeing to from a specific country of origin.
        
        </span></div>
        ''',
                    unsafe_allow_html=True)

        # Slider to choose the year
        selected_year = st.slider("", (int(df["year"].min())),
                                  (int(df["year"].max())))

        # Title for map regarding the chosen year
        st.subheader('Asylum seekers in the year %s' % selected_year)

        #----------------- Data preparation (general) -------------------

        # Remove 'overall' and 'Ɯberseeische LƤnder und Hoheitsgebiet' for both CSV
        indexNames = df[df['destinationCountry'] == 'Overall'].index
        df.drop(indexNames, inplace=True)
        indexNames = df[df['homeCountry'] == 'Overall'].index
        df.drop(indexNames, inplace=True)

        indexNames = df[df['destinationCountry'] ==
                        'Ɯberseeische LƤnder und Hoheitsgebiete'].index
        df.drop(indexNames, inplace=True)
        indexNames = df[df['homeCountry'] ==
                        'Ɯberseeische LƤnder und Hoheitsgebiete'].index
        df.drop(indexNames, inplace=True)

        indexNames = df2[df2['destinationCountry'] == 'Overall'].index
        df2.drop(indexNames, inplace=True)
        indexNames = df2[df2['homeCountry'] == 'Overall'].index
        df2.drop(indexNames, inplace=True)

        indexNames = df2[df2['destinationCountry'] ==
                         'Ɯberseeische LƤnder und Hoheitsgebiete'].index
        df2.drop(indexNames, inplace=True)
        indexNames = df2[df2['homeCountry'] ==
                         'Ɯberseeische LƤnder und Hoheitsgebiete'].index
        df2.drop(indexNames, inplace=True)

        # Delete all cells, except one year (both maps)
        indexNames = df[df['year'] != selected_year].index
        df.drop(indexNames, inplace=True)

        indexNames = df2[df2['year'] != selected_year].index
        df2.drop(indexNames, inplace=True)

        #----------------- Data preparation (Choropleth Map) -------------------

        # Information for Choropleth Map (df) based on the chosen gender and age
        df['subtotal'] = 0
        # Check selected gender
        if selectedGender == 'Female':
            # if an age is selected
            if selectedAge:
                # selectedAge is a list of strings
                # Therefore, we have to check every entry in the list and sum up partial results in new column subtotal
                for i in selectedAge:
                    if i == 'under 18':
                        df['subtotal'] = df['subtotal'] + df['fu18']
                    elif i == '18 - 34':
                        df['subtotal'] = df['subtotal'] + df['f18']
                    elif i == '35 - 64':
                        df['subtotal'] = df['subtotal'] + df['f35']
                    elif i == 'over 65':
                        df['subtotal'] = df['subtotal'] + df['fo65']
            else:  # no age is selected, that means the user wants to see all women
                df['subtotal'] = df['subtotal'] + df['womenTotal']
            a = 'subtotal'
        elif selectedGender == 'Male':
            if selectedAge:
                for i in selectedAge:
                    if i == 'under 18':
                        df['subtotal'] = df['subtotal'] + df['mu18']
                    elif i == '18 - 34':
                        df['subtotal'] = df['subtotal'] + df['m18']
                    elif i == '35 - 64':
                        df['subtotal'] = df['subtotal'] + df['m35']
                    elif i == 'over 65':
                        df['subtotal'] = df['subtotal'] + df['mo65']
            else:
                df['subtotal'] = df['subtotal'] + df['menTotal']
            a = 'subtotal'
        else:  # if no gender is selected, that means the user wants to see all
            if selectedAge:
                for i in selectedAge:
                    if i == 'under 18':
                        df['subtotal'] = df['subtotal'] + df['mu18'] + df[
                            'fu18']
                    elif i == '18 - 34':
                        df['subtotal'] = df['subtotal'] + df['m18'] + df['f18']
                    elif i == '35 - 64':
                        df['subtotal'] = df['subtotal'] + df['m35'] + df['f35']
                    elif i == 'over 65':
                        df['subtotal'] = df['subtotal'] + df['fo65'] + df[
                            'mo65']
                a = 'subtotal'
            else:
                a = 'total'

        # Group the countries by year and sum up the number (total) in a new column sum (df['sum']
        df['sum'] = df.groupby([selectedMapChoropleth,
                                'year'])[a].transform('sum')

        #----------------- Data preparation (Line Map) -------------------

        # countryCategory = homeCountry or destinationCountry
        # selectedCountryMapLine is the selected country for the map line (for example Syria (homeCountry))
        indexNames = df2[df2[countryCategory] != selectedCountryMapLine].index
        df2.drop(indexNames, inplace=True)

        df2['subtotal'] = 0

        if selectedGender == 'Female':
            # if an age is selected
            if selectedAge:
                # selectedAge is a list of strings
                # Therefore, we have to check every entry in the list and delete the row if the value in the column for the age is null
                for i in selectedAge:
                    if i == 'under 18':
                        indexNames = df2[df2['fu18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['fu18']
                    elif i == '18 - 34':
                        indexNames = df2[df2['f18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['f18']
                    elif i == '35 - 64':
                        indexNames = df2[df2['f35'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['f35']
                    elif i == 'over 65':
                        indexNames = df2[df2['fo65'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['fo65']
            else:
                indexNames = df2[df2['womenTotal'] == 0].index
                df2.drop(indexNames, inplace=True)
                df2['subtotal'] = df2['subtotal'] + df2['womenTotal']
        elif selectedGender == 'Male':
            if selectedAge:
                # selectedAge is a list of strings
                # Therefore, we have to check every entry in the list and delete the row if the value in the column for the age is null
                for i in selectedAge:
                    if i == 'under 18':
                        indexNames = df2[df2['mu18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['mu18']
                    elif i == '18 - 34':
                        indexNames = df2[df2['m18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['m18']
                    elif i == '35 - 64':
                        indexNames = df2[df2['m35'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['m35']
                    elif i == 'over 65':
                        indexNames = df2[df2['mo65'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['mo65']
            else:
                indexNames = df2[df2['menTotal'] == 0].index
                df2.drop(indexNames, inplace=True)
                df2['subtotal'] = df2['subtotal'] + df2['menTotal']
        else:  # if no gender is selected, that means the user wants to see all
            if selectedAge:
                for i in selectedAge:
                    if i == 'under 18':
                        indexNames = df2[df2['mu18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        indexNames = df2[df2['fu18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['mu18'] + df2[
                            'fu18']
                    elif i == '18 - 34':
                        indexNames = df2[df2['m18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        indexNames = df2[df2['f18'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['m18'] + df2[
                            'f18']
                    elif i == '35 - 64':
                        indexNames = df2[df2['m35'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        indexNames = df2[df2['f35'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['m35'] + df2[
                            'f35']
                    elif i == 'over 65':
                        indexNames = df2[df2['mo65'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        indexNames = df2[df2['fo65'] == 0].index
                        df2.drop(indexNames, inplace=True)
                        df2['subtotal'] = df2['subtotal'] + df2['mo65'] + df2[
                            'fo65']
            else:  # all people are considered
                indexNames = df2[df2['total'] == 0].index
                df2.drop(indexNames, inplace=True)

        # Create list of origin or target countries to display them in hover text
        # Every second index must contain the country name, so a placeholder is necessary in front of it
        # Structur: [placeholder,name+number,placeholder,name+number,...]
        # name = listPlaceholderNames
        # number = listPlaceholderNumber

        listPlaceholderNames = df2[namesToShow].values.tolist()
        listPlaceholderNumber = df2[a].values.tolist()

        nameList = []
        i = 0
        if namesToShow == 'homeCountry':
            for x in listPlaceholderNames:
                nameList.append(i)
                x = x + ': ' + str(listPlaceholderNumber[i])
                nameList.append(x)
                i = i + 1
            if len(nameList) != 0:
                nameList[-2] = None
        else:
            for x in listPlaceholderNames:
                x = x + ': ' + str(listPlaceholderNumber[i])
                nameList.append(x)
                nameList.append(i)
                i = i + 1
            if len(nameList) != 0:
                nameList[-1] = None

        st.write(
            '<style>div.Widget.row-widget.stRadio > div{flex-direction:row;}</style>',
            unsafe_allow_html=True)

        #----------------Create Maps with Plotly (Choropleth and Line Map)---------------------------

        #Link Toggle Map https://plotly.com/python/custom-buttons/

        fig = go.Figure()

        # Choropleth Map
        fig.add_trace(
            go.Choropleth(
                locations=df[selectedCode],
                visible=showChoropleth,
                z=df['sum'],
                text=df[selectedMapChoropleth],
                colorscale=mapColor,
                autocolorscale=False,
                reversescale=False,
                name="",
                marker_line_color='darkgray',
                marker_line_width=0.5,
                colorbar_tickprefix='',
                colorbar_title='Number of<br>asylum<br>applications<br>',
            ))

        # Line Map
        fig.add_trace(
            go.Scattergeo(locationmode='country names',
                          lon=df2[selectedLon],
                          lat=df2[selectedLat],
                          hoverinfo='text',
                          name=selectedCountryMapLine,
                          text=df2[countryCategory],
                          line=dict(width=1, color='red'),
                          opacity=0.510,
                          visible=showLine,
                          mode='markers',
                          marker=dict(size=3,
                                      color='rgb(255, 0, 0)',
                                      line=dict(
                                          width=3,
                                          color='rgba(68, 68, 68, 0)',
                                      ))))

        lons = []
        lats = []
        lons = np.empty(2 * len(df2))
        lons[::2] = df2['lonDC']
        lons[1::2] = df2['lonHC']
        lats = np.empty(2 * len(df2))
        lats[::2] = df2['latDC']
        lats[1::2] = df2['latHC']

        #hallo = 'testi'

        fig.add_trace(
            go.Scattergeo(locationmode='country names',
                          visible=showLine,
                          name='route and number <br>of asylum seekers',
                          text=nameList,
                          hovertemplate=nameList,
                          lon=lons,
                          lat=lats,
                          mode='markers+lines',
                          line=dict(width=1, color='red'),
                          opacity=0.5))

        fig.update_layout(
            showlegend=True,
            geo=go.layout.Geo(
                scope='world',
                #projection_type = 'azimuthal equal area',
                showland=True,
                showcountries=True,
                landcolor='rgb(243, 243, 243)',
                countrycolor='rgb(105,105,105)',
            ),
        )

        fig.update_layout(
            geo=dict(showframe=False,
                     showcoastlines=False,
                     projection_type='equirectangular'),
            autosize=True,
            margin=dict(
                l=0,
                r=0,
                b=0,
                t=20,
            ),
        )

        # Display figure
        st.plotly_chart(
            fig,
            use_container_width=True,
            config={
                'modeBarButtonsToRemove':
                ['lasso2d', 'select2d', 'pan2d', 'hoverClosestGeo']
            })
Example #17
0
def get_data(nrows):
    data = pd.read_csv(DATA_URL,
                       nrows=nrows,
                       parse_dates=[["CRASH_DATE", "CRASH_TIME"]])
    data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis="columns", inplace=True)
    data.rename(columns={"crash_date_crash_time": "date/time"}, inplace=True)
    return data


data = get_data(100000)
original_data = data

st.header("Where are the most people injured in NYC?")
injured_people = st.slider("No of people injured", 0, 19)
st.map(
    data.query("injured_persons >= @injured_people")[["latitude", "longitude"
                                                      ]].dropna(how="any"))

st.header("How many collisions occur during a givn time of a day?")
#hour = st.sidebar.slider("Hour to look at", 0, 23)
hour = st.slider("Hour to look at", 0, 23)
data = data[data['date/time'].dt.hour == hour]

midpoint = (np.average(data["latitude"]), np.average(data["longitude"]))

st.markdown("Vehicle collision between %i:00 and and %i:00" %
            (hour, (hour + 1) % 24))
st.write(
    pdk.Deck(
Example #18
0
with row0_1:
    hist_x = st.selectbox("Select a feature",
                          options=df.columns,
                          index=df.columns.get_loc("age"))

with row0_2:
    bar_mode = st.selectbox("Select barmode", ["relative", "group"], 0)

with set_hist_color:
    hist_color = st.selectbox(
        'Select categorical color option',
        ["sex", 'cp', 'fbs', 'restecg', 'exang', 'slope', 'thal', 'target'], 0)

hist_bins = st.slider(label="Histogram bins",
                      min_value=5,
                      max_value=50,
                      value=25,
                      step=1,
                      key='h1')
# hist_cats = df['Outcome'].sort_values().unique()
hist_cats = df[hist_x].sort_values().unique()
hist_fig1 = px.histogram(df,
                         x=hist_x,
                         nbins=hist_bins,
                         title="Histogram of " + hist_x,
                         template="plotly_white",
                         color=f'{hist_color}',
                         barmode=bar_mode,
                         color_discrete_map=dict(noDM='green', DM='red'),
                         category_orders={hist_x: hist_cats})
st.write(hist_fig1)
Example #19
0
# main.py
import streamlit as st
from sklearn import datasets
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt

iris = datasets.load_iris()  # data

# begin our app with a markdown text
st.markdown("""# Iris Cluster App
It's so easy to build Machine Learning applications!
""")

# add a slider
n = st.slider("n clusters: ", min_value=2, max_value=10, value=3)

# run model
k_means = KMeans(n_clusters=n)
cluster_labels = k_means.fit_predict(iris.data)

# visualize results
pca = PCA(n_components=2).fit_transform(iris.data)  # transform to 2D

plt.scatter(pca[:, 0], pca[:, 1], c=cluster_labels)
st.pyplot()  # display graph in app
Example #20
0
        for value in np.arange(0, 1, .05):
            if pt >= value and pt < value + .05:
                p_dict[round(value, 2)] += 1
    for key in p_dict.keys():
        p_dict[key] /= len(x)
    return p_dict


if page == "Gambler's Ruin II":
    st.title("Gambler's Ruin II")
    starting = st.number_input("Enter a dollar amount", min_value=0, value=10)
    win = st.number_input("Enter the number you would like to stop at",
                          min_value=0,
                          value=20)
    p = st.slider("Enter the probability of winning (p)",
                  min_value=.01,
                  max_value=1.0,
                  value=.5)
    n = st.number_input("Enter the number of matches to play",
                        min_value=1,
                        value=100)

    initial = np.zeros(win + 1)
    initial[starting] = 1

    matrix = np.zeros((win + 1, win + 1))
    for loc in range(win + 1):
        if loc == 0 or loc == win:
            matrix[loc, loc] = 1
        else:
            matrix[loc, loc + 1] = p
            matrix[loc, loc - 1] = 1 - p
Example #21
0
DATE_COLUMN = 'date/time'
DATA_URL = ('https://s3-us-west-2.amazonaws.com/'
            'streamlit-demo-data/uber-raw-data-sep14.csv.gz')


@st.cache
def load_data(nrows):
    data = pd.read_csv(DATA_URL, nrows=nrows)
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis='columns', inplace=True)
    data[DATE_COLUMN] = pd.to_datetime(data[DATE_COLUMN])
    return data


data_load_state = st.text('loading data...')
data = load_data(1000)
data_load_state.text('loading data... done!')
st.write('Done! with cache')

if st.checkbox('show raw data'):
    st.subheader('Raw data')
    data

st.subheader('number of pickups per hour')
hist_values = np.histogram(data[DATE_COLUMN].dt.hour, bins=24,
                           range=(0, 24))[0]
st.bar_chart(hist_values)

hour_to_filter = st.slider('hour', 0, 23, 17)
st.subheader('all pickup data at {}:00'.format(hour_to_filter))
data[data[DATE_COLUMN].dt.hour == hour_to_filter]
Example #22
0
def main():

    # st.title('Data Viz')
    # st.subheader('AnĆ”lise ExploratĆ³ria de Dados')

    # Carregando a imagem de tĆ­tulo e definindo o texto do header
    st.image(
        'https://sc.movimentoods.org.br/wp-content/uploads/2019/10/qualirede.png',
        width=400)
    st.header('Um jeito simples de visualizar e analisar seus dados')
    st.sidebar.title('Data Viz')

    # Lendo o arquivo upado pelo usuƔrio
    file = st.file_uploader('Escolha a base de dados CSV', type='csv')

    # Checando se o arquivo nĆ£o estĆ” vazio
    if file is not None:

        # Inserindo os menus laterais
        st.sidebar.image(
            'https://media.giphy.com/media/1klslCB8tbUmN4QoD4/giphy.gif',
            width=250)
        st.sidebar.header("Selecione:")
        # Checando quantas linhas do dataset o usuƔrio quer ver
        columns = st.slider('Quantas linhas deseja ver?',
                            min_value=1,
                            max_value=50)

        # Lendo e exibindo o dataset
        st.markdown('**Seu arquivo**:')
        data = pd.read_csv(file)

        st.dataframe(data.head(columns))

        # Menu lateral para chegar o shape do dataset
        if st.sidebar.checkbox('Quero ver o shape dos meus dados'):
            st.markdown('**Quantidade de linhas:** ')
            st.markdown(data.shape[0])
            st.markdown('**Quantidade de colunas:**')
            st.markdown(data.shape[1])

        #Menu lateral para chegar visualizar as colunas do dataset
        if st.sidebar.checkbox('Quero analisar as colunas'):
            all_columns = data.columns.tolist()
            selected_columns = st.multiselect('Selecione', all_columns)
            new_df = data[selected_columns].drop_duplicates(subset=None,
                                                            keep='first')
            st.dataframe(new_df)

        #Menu lateral para contar o nĆŗmero de variĆ”veis
        if st.sidebar.checkbox('Quero contas a quantidade de target/classes'):
            selected_plot = st.selectbox('Selecione o tipo de visualizaĆ§Ć£o',
                                         list(data.columns))
            st.markdown('**Contagem de Alvos/Classes**')
            st.write(
                data.iloc[:,
                          data.columns.get_loc(selected_plot)].value_counts())

        #Menu lateral para ver os tipos de dados
        if st.sidebar.checkbox('Quero ver os tipos dos dados'):
            st.markdown('**Tipos de dados**')
            st.write(data.dtypes)

        #Menu lateral para ver a descriĆ§Ć£o dos dados
        if st.sidebar.checkbox('Quero a descriĆ§Ć£o dos meus dados'):
            st.markdown('**DescriĆ§Ć£o**')
            st.write(data.describe())

        #Menu lateral para selecionar o tipo de grƔfico
        if st.sidebar.checkbox('Quero visualizar meus dados'):
            columns_names = data.columns.tolist()
            viz = ('line', 'bar', 'pie', 'hist', 'correlation', 'box')
            selected_plot = st.sidebar.selectbox(
                'Selecione o tipo de visualizaĆ§Ć£o', viz)
            selected_columns_names = st.multiselect('Selecione as colunas',
                                                    columns_names)

            # GrƔfico de linha
            if selected_plot == 'line':
                custom_data = data[selected_columns_names]
                st.line_chart(custom_data)

            # GrƔfico de barra
            elif selected_plot == 'bar':
                custom_data = data[selected_columns_names]
                st.bar_chart(custom_data)

            # GrƔfico de pizza
            elif selected_plot == 'pie':
                st.write(
                    data.iloc[:,
                              -1].value_counts().plot.pie(autopct="%1.1f%%"))
                st.pyplot()

            # GrĆ”fico de correlaĆ§Ć£o
            elif selected_plot == 'correlation':
                corr = data.corr()
                st.write(
                    sns.heatmap(corr,
                                xticklabels=corr.columns,
                                yticklabels=corr.columns,
                                annot=True))
                st.pyplot()

            # Outros grƔficos
            else:
                custom_plot = data[selected_columns_names].plot(
                    kind=selected_plot)
                st.pyplot()

        # Menu sobre mim
        #if st.sidebar.checkbox('Sobre'):
        html = """
        <br><br><br><br><br>
        <div>
        Desenvolvido pela equipe <a href="mailto:[email protected]">NGI</a>, 
        na <a href="https://www.qualirede.com.br/">Qualirede</a>.
        </div></b>
        """

        st.markdown(html, unsafe_allow_html=True)
Example #23
0
def presentation():  
    st.markdown("## 2.2 Mode Choice Model")
    # Show Probability Matrix
    if st.checkbox("Show Probability Matrixes"):
        st.markdown("Probability of choosing mode given origin and destination")
        probabilities = mp.get_probability()
        st.markdown("Probability for **CAR**")
        st.write(pd.DataFrame(probabilities[0], index=["Zone 1", "Zone 2"], columns=["Zone 1", "Zone 2"]))
        st.markdown("Probability for **PUBLIC TRANSPORT (PT)**")
        st.write(pd.DataFrame(probabilities[1], index=["Zone 1", "Zone 2"], columns=["Zone 1", "Zone 2"]))
        st.markdown("Probability for **SLOW**")
        st.write(pd.DataFrame(probabilities[2], index=["Zone 1", "Zone 2"], columns=["Zone 1", "Zone 2"]))

    # ------------------------- Write Up for the section ------------------------- #
    # ---------------------------------------------------------------------------- #
    st.markdown("""
    ## Mode Choice Model
    The model calculates the probability of each alternative mode of transport given the origin and destination. 
    Hence, we calculate **$Pr(m|i,j)$**, where $m$ is mode and $i$ and $j$ are the origin and destination zones.
    """)    
    st.write("""
    Assuming the utility of choosing the alternative is given by, 

    $$
    U_{j,m}^i = V_{j,m}^i + \epsilon_{j,m}^i  
    $$

    where, $\epsilon_{j,m}^i$ is I.I.D Gumble distributed error term representing uncertinity. 
    """)
    st.write("""
    The probability that the utility for alternative $m$ given $i$ and $j$ is given by, 

    $$
    Pr(m|i,j) = Pr(U_{j,m}^i > U_{j,m'}^i) = \dfrac{\exp^{V_{j,m}^i}}{ \sum_{m'={1,2,3}} \exp^{V_{j,m'}^i} }
    $$
    """)

    # ------------------------------------ EXERCISES ------------------------------#
    # ---------------------------------------------------------------------------- #
    st.markdown("## EXERCISES")

    # ------------------------------------ Ques2 ------------------------------#
    if st.checkbox("(ii) Probabilites of each zone given mode"):
        orig_zone = st.radio("Select Origin Zone", list(ZONE.keys()))
        dest_zone = st.radio("Select Destination Zone", list(ZONE.keys()))
        mode      = st.radio("Select Mode", list(MODE.keys()), key=1)
        probability = mp.get_probability(ZONE[orig_zone], ZONE[dest_zone], MODE[mode])
        st.markdown("The probability of travelling from **{}** to **{}** using a **{}** is,".format(orig_zone, dest_zone, mode))
        st.markdown("$P(m|i,j)$ = **{}**".format(round(probability, 4)))

    # ------------------------------------ Ques3 ------------------------------#
    if st.checkbox("(iii) Probability of travelling using choosen mode from zone 1 to any destination"):
        # Explanation
        st.write("""
        Using the law of large numbers, 

        $$
        Pr(m|i) = \sum_{j={1,2}} Pr(m|i,j)*P(j)
        $$

        where, $P(j)$ is 0.5 for each zone and $i = 1$
        """)
        
        # Answer
        mode      = st.radio("Select Mode", list(MODE.keys()), key=2)
        zone11    = mp.get_probability(ZONE["Zone 1"], ZONE["Zone 1"], MODE[mode])
        zone12    = mp.get_probability(ZONE["Zone 1"], ZONE["Zone 2"], MODE[mode])
        
        st.markdown("The probability of travelling from **Zone 1** using **{}** is,".format(mode))
        st.markdown("$P(m|i=1)$ = **{}**".format(round(zone11*0.5 + zone12*0.5, 4)))

    # ------------------------------------ Ques4 ------------------------------#
    if st.checkbox("(iv) Effect on probabilites choosing u_mode"):
        # Explanation
        st.markdown("""
        Let us assume that the utility function is given by,
        $$
        U_{j,m}^i = V_{j,m}^i + \mu_{mode}*\epsilon_{j,m}^i  
        $$
        where, $\mu_{mode}$ is a scaling factor for error term
        """)
        st.markdown("""
        As value for $\mu_{mode}$ increases, uncertinity in the model increases. Hence the probabilties for each mode becomes more and more uncertain. 
        As value for $\mu_{mode}$ decreases, we are more sure of the deterministic values. Hence the probabilties for each mode becomes more and more certain. 
        """)

        # Answer
        u_mode = st.slider("Select u_mode value", min_value=float(0.1), max_value=float(10), value=float(1), step=float(0.1))

        # Plotting
        umode_prob = mp.get_probability(u_mode=u_mode)
        index = ["Car", "Pt", "Slow"]
        zone11= [umode_prob[i][0][0] for i in range(3)]
        zone12= [umode_prob[i][0][1] for i in range(3)]
        zone21= [umode_prob[i][1][0] for i in range(3)]
        zone22= [umode_prob[i][1][1] for i in range(3)]
        fig = go.Figure(data=[
            go.Bar(name="Zone 11", x=index, y=zone11),
            go.Bar(name="Zone 12", x=index, y=zone12),
            go.Bar(name="Zone 21", x=index, y=zone21),
            go.Bar(name="Zone 22", x=index, y=zone22)])
        fig.update_layout(barmode='group',
                          title="Mode Choice probabilities for u_mode: {}".format(u_mode),
                          title_font_size=20)
        fig.update_yaxes(range=[0, 1], title_text='Probability')
        st.plotly_chart(fig)
        
    # ------------------------------------ Ques5 ------------------------------#
    if st.checkbox("(v) Effect on probabilites changing number of employees"):
        # Explanation
        st.markdown("""
        Due to **Equivalent Difference Property**, any change in number of employee does not affect the probablities.
        """)
        
        # Answer
        zone1_emp = st.slider("Change for Zone 1 employee", min_value=int(-5000), max_value=int(5000), value=int(0), step=int(1000))
        zone2_emp = st.slider("Change for Zone 2 employee", min_value=int(-5000), max_value=int(5000), value=int(0), step=int(1000))

        # Plotting
        emp_prob = mp.get_probability(emp_change_z1=zone1_emp, emp_change_z2=zone2_emp)
        index = ["Car", "Pt", "Slow"]
        zone11= [emp_prob[i][0][0] for i in range(3)]
        zone12= [emp_prob[i][0][1] for i in range(3)]
        zone21= [emp_prob[i][1][0] for i in range(3)]
        zone22= [emp_prob[i][1][1] for i in range(3)]
        fig = go.Figure(data=[
            go.Bar(name="Zone 11", x=index, y=zone11),
            go.Bar(name="Zone 12", x=index, y=zone12),
            go.Bar(name="Zone 21", x=index, y=zone21),
            go.Bar(name="Zone 22", x=index, y=zone22)])
        fig.update_layout(barmode='group',
                          title="Number of Employees - Zone 1: {} Zone 2: {}".format(10_000+zone1_emp, 15_000+zone2_emp),
                          title_font_size=20)
        fig.update_yaxes(range=[0, 1], title_text='Probability')
        st.plotly_chart(fig)
@st.cache(persist=True)
def load_data(nrows):
    data = pd.read_csv(DATA_URL,
                       nrows=nrows,
                       parse_dates=[['CRASH_DATE', 'CRASH_TIME']])
    data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis="columns", inplace=True)
    data.rename(columns={"crash_date_crash_time": "date/time"}, inplace=True)
    return data


data = load_data(100000)

st.header("Where are the most people injured in NYC?")
injured_people = st.slider("Number of persons injured in vehicle collisions",
                           0, 19)
st.map(
    data.query("injured_persons >= @injured_people")[["latitude", "longitude"
                                                      ]].dropna(how="any"))

st.header("How many collisions occur during a given time of day?")
hour = st.slider("Hour to look at", 0, 23)
original_data = data
data = data[data['date/time'].dt.hour == hour]

st.markdown("Vehicle collisions between %i:00 and %i:00" % (hour,
                                                            (hour + 1) % 24))
midpoint = (np.average(data["latitude"]), np.average(data["longitude"]))

st.write(
    pdk.Deck(
Example #25
0
st.pyplot()

#Drop the columns selected from the multiselect
#Select multiple columns
cols_drop = st.multiselect("Select columns to drop", data.columns)
#Drop columns
data = data.drop(cols_drop, axis=1)

#Show the new dataframe
#Text to show it's the new dataframe
st.text("Once features have been engineered, it looks like this:")
#Show the new dataframe
st.dataframe(data)

#Change the number of clusters you want the data to be broken up into
k = st.slider("Select the number of clusters", 2, 10)

#A button that clusters when you press it
if st.button("Cluster Results"):
    #Dropping the diagnosis column so that the features can be used to predict it
    X = data.drop('diagnosis', axis=1).values
    #Scaling the values of the dataframe
    X = StandardScaler().fit_transform(X)

    #Creating a KMeans clusterer called km
    km = KMeans(n_clusters=k, init="k-means++", n_init=10)

    #Passing our transformed dataframe into our KMeans clusterer
    km_pred = km.fit_predict(X)

    #Plotting the data
###########################
#### Cluster 1 - Model ####
###########################
st.markdown('---')
st.markdown('## **Regional Cluster 1 - Forecast:**')
st.markdown("""**Cluster Characteristics:**
            - Lowest average regional temperature: ($49^oF$)
            - Low Avg. Consumer Price Index (CPI): $133$
            - Highest Sales Cluster
            - Highest Regional Population
            """)

clust1_model_steps = st.slider('Forecast Range (Weeks):',
                               1,
                               51,
                               12,
                               key='slider1')

clust1_model_pred = np.exp(
    clust1_model.predict(start=cluster_1_test.index[0],
                         end=cluster_1_test.index[0] + clust1_model_steps,
                         exog=test_plus_future_holidays[:clust1_model_steps +
                                                        1],
                         dynamic=True,
                         plot_insample=False))

clust_1_model_ci = np.exp(clust1_model.get_forecast(steps=clust1_model_steps+1,
                                                    exog=test_plus_future_holidays[:clust1_model_steps+1])\
                                .conf_int(alpha=0.05))
def main():
    # -----------------------------
    # Initial description
    st.image('./dataset-cover.png', width=900)
    st.title('Analysis of e-commerce dataset')
    st.markdown("""Here, I will introduce some basic descriptive analysis of
                the Olist brazilian e-commerce dataset. This dataset contains 
                more than 110K orders from 2016 to 2018 with detailed costumer 
                transactions. The dataset consists of **9 files**, which 
                describe orders, products and their categories, user reviews, 
                information about delivery estimate date, payment method, 
                geolocation, and much, much more.    \n """)

    st.markdown("""All datasets are available 
                on [Kaggle](https://www.kaggle.com/olistbr/brazilian-ecommerce)
                .""")

    st.markdown("""Initially, I will start using **4 datasets**: 
                *Items, Orders, Products description and Payment.*""")

    st.markdown("Here, I will cover some basic info about the datasets "
                "(number of orders, how many variables are available, possible"
                " payment methods, etc.) and explore some data visualization"
                " (histogram, bar plot, and boxplot).  \n So, let's start!")

    # -----------------------------
    # Import dataset
    st.header("**Dataset investigation**")
    st.markdown("Let's take a look on data...")

    items = pd.read_csv("./olist_dataset/olist_order_items_dataset.csv")
    orders = pd.read_csv("./olist_dataset/olist_orders_dataset.csv")
    products = pd.read_csv("./olist_dataset/olist_products_dataset.csv")
    payment = pd.read_csv("./olist_dataset/olist_order_payments_dataset.csv")

    # Slider for dataframe.head
    slider_bar = st.slider(label='Select a number of rows to take a look on datasets?',
                           min_value=1, max_value=10)

    st.markdown('**Items**')
    st.markdown("""This dataset describes the relationship among orders, sellers,
                order  price, shipping cost and date.""")
    st.dataframe(items.head(slider_bar))

    st.markdown('**Orders **')
    st.markdown("""Here, We have info about order status: purchased time,
                order status, if was already shipped, for example...""")
    st.dataframe(orders.head(slider_bar))

    st.markdown('**Products**')
    st.markdown("""Products dataset describes products by categories, weight, 
                and dimensions""")
    st.dataframe(products.head(slider_bar))

    st.markdown('**Payment**')
    st.markdown("""Last but not least, this dataset shows payment value and 
                method.""")
    st.dataframe(payment.head(slider_bar))

    # Columns description
    add_info = st.checkbox('Want additional info about variables?')
    if add_info:
        st.markdown("""Basically, keep in mind that with these variables bellow,
                    we can track orders along the datasets: \n"""
                    "* order_id: identify products that are in the same basket. \n"
                    "* product_id: identify unique products within the dataset. \n"
                    "* customerid:  identify unique customers within the dataset. \n"
                    "* seller_id: identify unique sellers within the dataset. \n ")

    # -----------------------------
    # Dataset shape
    st.header("**Common questions**")
    cols_box = st.checkbox("How many columns and rows are in datasets?")
    if cols_box:
        st.markdown(f"Items: {items.shape}")
        st.markdown(f"Orders: {orders.shape}")
        st.markdown(f"Products: {products.shape}")
        st.markdown(f"Payments: {payment.shape}")

    order_box = st.checkbox("How many orders are in Order dataset?")
    if order_box:
        st.markdown(f"""Total number of orders: 
                    {orders['order_id'].nunique()}""")

    order_customers = st.checkbox("How many customers are?")
    if order_customers:
        st.markdown(f"""Total number of customers:
                    {orders['order_id'].nunique()}""")

    products_box = st.checkbox("What are the product categories?")
    if products_box:
        products_categories = products['product_category_name'].unique().tolist()
        st.write(f'There are {len(products_categories)} categories.')
        st.dataframe(products_categories)

    payment_type = st.checkbox("""What are the different payment
                              methods?""")
    if payment_type:
        st.markdown(f"""There are {payment['payment_type'].nunique()}
                     payment options:""")
        st.dataframe(payment['payment_type'].unique())

    order_customers = st.checkbox("""What are the possible delievery status
                                   on sales orders?""")
    if order_customers:
        order_status = orders['order_status'].unique().tolist()
        st.dataframe(order_status)

    # -----------------------------
    # Merging dataset ---- add code to streamlit
    st.header("**Descriptive analysis**")
    st.markdown("""First, let's merge our datasets:""")

    orders_items = pd.merge(orders, items, on='order_id')
    products_slice = products.drop(['product_name_lenght',
                                    'product_description_lenght'],
                                    axis='columns')

    merge_df = pd.merge(orders_items, products_slice, on='product_id')
    merge_df = pd.merge(merge_df, payment, on='order_id')

    # -----------------------------
    # Select columns to .describe()
    st.markdown("""A common task is to extract basic information about the 
                dataset, as the maximum and minimum value per variable, 
                find the mean, quantiles, etc. So, you can choose some
                columns to investigate it.""")

    cols = ['price', 'freight_value', 'product_weight_g', 'product_length_cm',
            'product_photos_qty', 'product_length_cm', 'product_height_cm',
            'product_width_cm', 'payment_installments', 'payment_value']

    columns_box = st.multiselect("""Select columns to calculate max, min, 
                                 mean, median and quantiles""", cols)
    if columns_box:
        df_columns_box = merge_df[columns_box]
        st.dataframe(df_columns_box.describe().T)

    # -----------------------------
    # Missing data
    st.markdown("""Another important task is to check types present on the 
                dataset and if exist any missing values. Keep in mind that 
                it is important to handle this effectively, because missing 
                values can impact our interpretation.""")

    missing = pd.DataFrame({'missing count': merge_df.isnull().sum(),
                            'dtype': merge_df.dtypes,
                            'missing %': (merge_df.isnull().sum()/merge_df.shape[0])*100})
    st.dataframe(missing.head(25))

    # Filling missing data
    missing_box = st.checkbox("Do you want to fill missing data?")

    if missing_box:
        st.markdown("""Great, since we have variables with different 
                    types, let's focus focus on numeric types""")

        # Filling in numeric columns
        missing_op = st.selectbox('How do you want to fill missing values',
                                  ('Mean', '0'))

        if missing_op == '0':
            st.markdown('')
            numeric_cols = ['product_photos_qty', 'product_weight_g',
                            'product_length_cm', 'product_height_cm',
                            'product_width_cm']

            for col in numeric_cols:
                merge_df[col] = merge_df[col].fillna(value=0)

        if missing_op == 'Mean':
            st.markdown('')
            numeric_cols = ['product_photos_qty', 'product_weight_g',
                            'product_length_cm', 'product_height_cm',
                            'product_width_cm']

            for col in numeric_cols:
                merge_df[col] = merge_df[col].fillna(value=merge_df[col].mean())

        # Filling in objetc columns
        st.markdown("""Also, Let's handle columns which have 
                    object' types:""")

        missing_obj = st.selectbox("""Do you want to drop rows with missing 
                                   data or ignore it?""", ('Drop', 'Ignore'))

        if missing_obj == 'Drop':
            st.markdown('Sorry, this feature is under construction :(')
            merge_df['product_category_name'] = merge_df['product_category_name'].fillna(value='no_info')
            # object_cols = ['order_approved_at', 'order_delivered_carrier_date', 'order_delivered_customer_date']
            # merge_df = merge_df[merge_df[object_cols].notna()]

        if missing_obj == 'Ignore':
            st.markdown('')

        # Recheck missing data
        missing_box2 = st.checkbox("Want to check the result?")
        if missing_box2:
            missing2 = pd.DataFrame({'missing count': merge_df.isnull().sum(),
                                    'dtype': merge_df.dtypes,
                                    'missing %': (merge_df.isnull().sum()/merge_df.shape[0])*100})
            st.dataframe(missing2.head(25))

    # -----------------------------
    # Data visualization
    st.header("""**Visualization**""")
    st.markdown("""Data visualization it's an important task on data analysis,
                which allows extracting interesting patterns from data and, 
                making it easier to understand. So, Let's start plotting""")

    # Histogram - columns
    st.subheader("Histogram")
    numeric_cols = ['price', 'product_photos_qty', 'product_weight_g',
                    'freight_value', 'product_length_cm', 'product_height_cm',
                    'product_width_cm', 'payment_value', 'payment_installments']

    hist_col = st.selectbox('What column do you want to create a histogram?:', 
                            numeric_cols)
    if hist_col:
        fig_hist = px.histogram(merge_df, x=hist_col)
        st.write(fig_hist)

    # Boxplot - columns
    st.subheader("Boxplot")
    boxplot_col = st.multiselect("""What column do you want to create 
                               boxplot?""", numeric_cols)

    if boxplot_col:
        fig_boxplot = px.box(merge_df, x=boxplot_col)
        st.write(fig_boxplot)

    # Barplot - Products Ordered
    st.subheader("How many products people generally order?")

    number_orders = merge_df.groupby('order_id')['order_item_id'].aggregate('sum').reset_index()
    number_orders = number_orders['order_item_id'].value_counts()
    number_orders.index += 1
    fig_bar = px.bar(number_orders, x=number_orders.index, y=number_orders.values)
    st.write(fig_bar)

    # Barplot - Most bought products
    st.subheader("**Which categories people buy at most?**")
    categories_prods = merge_df.groupby('product_category_name').count().reset_index().sort_values('order_id')
    fig_bar_p = px.bar(categories_prods, y='product_category_name', x='order_id',
                       orientation='h')
    st.write(fig_bar_p)

    # Money spent

    # Barplot - Payment methods
    st.subheader("What is the most common payment method?")
    pay_type = merge_df.groupby('payment_type')['order_id'].count().reset_index()
    pay_type = pay_type.sort_values(by='order_id', ascending=False)
    pay_type = pay_type.rename(columns={'order_id': 'value_count'})
    fig_pay = px.bar(pay_type, y='value_count', x='payment_type',
                     orientation='v')
    st.write(fig_pay)

    # -----------------------------
    # The end!
    st.header("That's all folks!")
    # st.balloons()

    st.markdown("""This work was developed using these excellent Kaggle repositories
                [A] (https://www.kaggle.com/gsdeepakkumar/e-commerce-dataset-analysis/notebook),
                [B] (https://www.kaggle.com/kabure/simple-eda-sales-and-customer-patterns/notebook).
                   \n So, if you want to dive in this dataset, you totally should check them.""")
    st.markdown("""Thank you so much for checking my job! If you liked, please,
                check my [github]
                 (https://github.com/cavalcante-l?tab=repositories)
                and my [linkedin]
                 (https://www.linkedin.com/in/laizacavalcante/). """)
    st.markdown("Developed by LaĆ­za Cavalcante.")
def write():
    st.markdown("""
        # SugarTime
        ### Model Performance
        This page lets you visualize how the model performs on data that
        it hasn't seen yet.
        """)
    with st.beta_expander("CLICK HERE to expand discussion"):
        st.markdown("""
            The dataset is split into two sets: a training set and a
            testing set. The model has been trained on the training set, and
            we can use the model to perform inference on data from the
            testing set here.

            The time series model is auto-regressive with exogenous variables
            (ARX). The base algorithm used in such a model can be any
            regression algorithm; here I currently use a support vector
            machine.

            The full model actually consists of several models, each
            individually
            fit to a different lag of the target variable. In other words,
            there
            is one model fit to the glucose data at time *t+1*, another fit to
            the
            glucose data at time *t+2*, another at *t+3*, etc.,
            all the way up to the
            selected horizon of the model (which defaults to 12 steps of 5
            minutes
            each, i.e., one hour). Each model represents the best performing
            model
            after optimizing the time-series design hyperparameters (e.g.,
            order of
            the *endogenous* or *target* variable, order of the *exogenous*
            variables, and/or delay of the exogenous variables) at that time
            step.

            Note that this model has essentially learned to revert to the mean.
            Since there is considerable autocorrelation in data from continuous
            glucose monitors, inference becomes less acurrate as the inference
            step gets farther away from the current time *t*.
            Here, instead of relying on the exogenous variables (i.e.,
            carbohydrates and insulin),
            the model does a better job by increasingly bringing the predicted
            value back to the mean, which for this patient is a blood glucose
            level of approximately 100 mg/dL.
            This is obviously not what we want the model to learn. But I have
            yet
            to find an estimator/algorithm that doesn't converge on this
            strategy
            to some extent, which suggests that these two exogenous variables
            are simply not predictive enough to account for significant
            variance beyond the autoregressive component of this model.
            """)
    st.markdown("""
        *Instructions:*
        Use the slider to select a time within the test set. The model
        will use the data up to that point to generate a forecast for
        the next hour.

        ***
        """)
    st.markdown("# Select date/time to show forecast.")

    # load patient data and fit model
    vm = load_saved_model()
    patient = vm.patient

    # make datetime selection slider
    x_index = patient.Xtest.index
    start_time = st.slider(
        "Move the slider to select the forecast date/time",
        min_value=x_index[40].to_pydatetime(),
        max_value=x_index[-40].to_pydatetime(),
        value=x_index[45].to_pydatetime(),
        step=timedelta(minutes=60),
        format="MM/DD/YY - hh:mm",
    )

    # plot glucose values for the test set
    fig = plot_test_set(patient, start_time)
    st.plotly_chart(fig)

    # plot performance of model
    st.markdown("# Show forecast vs actual")
    start_time_index = (x_index == pd.Timestamp(start_time)).argmax()
    nsteps = vm.horizon
    ypred = vm.multioutput_forecast(patient.Xtest[:start_time_index],
                                    patient.ytest[:start_time_index])
    idx = pd.date_range(
        start=start_time,
        end=start_time + timedelta(minutes=5 * (len(ypred) - 1)),
        freq="5T",
    )
    ypred = pd.DataFrame(ypred, columns=["ypred"], index=idx)
    fig = core.plot_forecast(
        patient.ytest[(start_time_index - 40):(start_time_index + nsteps)],
        ypred,
        return_flag=True,
    )
    start_time_text = datetime.datetime.strftime(start_time, "%m/%d/%m %H:%M")
    fig.update_layout(
        title={
            "text": "start time: " + start_time_text,
            "y": 0.88,
            "x": 0.5,
            "xanchor": "center",
            "yanchor": "top",
        })
    st.plotly_chart(fig)
def show_results():
    st.write(results_text)

    # MAE graph
    image_mae = Image.open(str(main_path / 'Results/image/mae.png'))
    st.image(image_mae, caption='Figure 1: Mean Absolute Error')

    # RMSE graph
    image_rmse = Image.open(str(main_path / 'Results/image/rmse.png'))
    st.image(image_rmse, caption='Figure 2: Root Mean Square Error')

    # R Square praph
    image_r_square = Image.open(str(main_path / 'Results/image/r_square.png'))
    st.image(image_r_square, caption='Figure 3: R Square')

    # Random Forest slider graph
    st.write("Random Forest model performance:")
    year_rf = st.slider('Number of years after publication?', 1, 10)
    year_image = 'Results/image/' + rf_images[year_rf]
    rf_image = Image.open(str(main_path / year_image))
    st.image(rf_image,
             caption='Figure 4: Random Forest Citation Frequency Histogram',
             width=600)

    # Random Forest performance table
    st.write(
        "The following are the performance matrix for Random Forest Regression model:"
    )
    performance_df = pd.DataFrame(np.array(
        [[1.168999, 1.89990, 2.66408, 3.43690, 4.19492, 5.62216, 6.94813],
         [
             43.38860, 71.76869, 124.98958, 216.97061, 356.03481, 86.33918,
             1421.48745
         ],
         [6.58700, 8.47164, 11.17987, 14.72992, 18.86888, 28.04174, 37.70262],
         [0.16119, 0.22493, 0.27972, 0.303861, 0.30706, 0.29510, 0.22525]]),
                                  columns=[
                                      '1 year', '2 year', '3 year', '4 year',
                                      '5 year', '7 year', '10 year'
                                  ])
    performance_df['index'] = ["MAE", "MSE", "RMSE", "R Square"]
    performance_df = performance_df.set_index("index")
    st.table(performance_df)

    # Benchmark Models slider graphs
    st.write("Benchmark model performance:")
    year_lr = st.slider('Number of years after publication?', 0, 10)
    col1, col2, col3 = st.beta_columns(3)
    with col1:
        lr_year_image = 'Results/image/lr/' + lr_images[year_lr]
        lr_image = Image.open(str(main_path / lr_year_image))
        st.image(lr_image, caption='Figure 5: LR Citation Frequency Histogram')
    with col2:
        svm_year_image = 'Results/image/svm/' + svm_images[year_lr]
        svm_image = Image.open(str(main_path / svm_year_image))
        st.image(svm_image,
                 caption='Figure 6: SVM Citation Frequency Histogram')
    with col3:
        km_year_image = 'Results/image/kmeans/' + km_images[year_lr]
        km_image = Image.open(str(main_path / km_year_image))
        st.image(km_image,
                 caption='Figure 7: K-Means Citation Frequency Histogram')
Example #30
0
def in_bounding_box(point):
    lng, lat = point
    in_lng_bounds = DOWNTOWN_BOUNDING_BOX[0] <= lng <= DOWNTOWN_BOUNDING_BOX[2]
    in_lat_bounds = DOWNTOWN_BOUNDING_BOX[1] <= lat <= DOWNTOWN_BOUNDING_BOX[3]
    return in_lng_bounds and in_lat_bounds


df = pd.read_csv(DATA_URL)
# Filter to bounding box

df = df[df['LAT_DESTINO'] < -33]

st.write(df)
df = df[df['NUM_EST'] < 23]
num_est = st.slider("Cantidad de estudiantes traspasados entre colegios", 1,
                    30)
GREEN_RGB = [0, 255, 0, 40]
RED_RGB = [240, 100, 0, 40]

arc_layer = pydeck.Layer(
    "ArcLayer",
    data=df.query("NUM_EST >= @num_est"),
    get_width="NUM_EST * 2",
    get_source_position=["LON_ORIGEN", "LAT_ORIGEN"],
    get_target_position=["LON_DESTINO", "LAT_DESTINO"],
    get_tilt=15,
    get_source_color=RED_RGB,
    get_target_color=GREEN_RGB,
    pickable=True,
    auto_highlight=True,
)