Exemple #1
0
        else: 
            try:
                data = pd.read_excel(file)
            except:
                try:
                    data = pd.read_excel(file,index_col=None, header=None)
                except:
                    data = pd.read_html(file)

            translate_download(data)

    file.close()


main()


translator = Translator()

st.title("Excel ve csv dosyalarınızı burada herhangi bir dilden Türkçe'ye dönüştürebilirsiniz")
st.header("Bu program Google Translate ile bağlantılı çalıştığından, hata verdiği zaman bir kaç kere tekrar denerseniz veya sayfayı yenilerseniz program çalışacaktır.")








    
Exemple #2
0
def main():
    # Navigation sidebar
    st.sidebar.header('Navigation')
    selection = st.sidebar.radio('Explore?', ['Home', 'Predictor', 'Data'])
    if selection == 'Home':
        home()
    elif selection == 'Predictor':
        classifier_page()
    elif selection == 'Data':
        # Functions to load data
        def load_raw_data(url):
            data = pd.read_csv(url)
            return data

        def format_cdc_data(df):
            date_cols = ['cdc_report_dt', 'pos_spec_dt', 'onset_dt']
            for col in date_cols:
                df[col] = pd.to_datetime(df[col], format='%Y-%m-%d').dt.date
            return df

        def format_tracking_data(df):
            df['date'] = pd.to_datetime(df['date'], format='%Y%m%d')
            df['pos_rate'] = df['positive'] / df['totalTestResults']
            return df

        # Data sources
        st.sidebar.header('Interactive Data')
        data_source = st.sidebar.selectbox('Data Sources', [
            'Choose data source', 'CDC Public Use Surveillance Data',
            'COVID Tracking Project'
        ])

        # CDC data
        if data_source == 'CDC Public Use Surveillance Data':
            cdc_data_raw = load_raw_data(
                'https://data.cdc.gov/resource/vbim-akqf.csv')
            cdc_data = format_cdc_data(cdc_data_raw)

            st.title('CDC Public Use Surveillance Data')
            st.markdown(
                'https://data.cdc.gov/Case-Surveillance/COVID-19-Case-Surveillance-Public-Use-Data/vbim-akqf',
                unsafe_allow_html=True)
            '\n'

            if st.checkbox('View raw data'):
                st.write(cdc_data_raw)
            '\n'

            st.header('Explore data')
            '\n'

            # Dictionary of variable names and corresponding column names for easy access
            vars_dict = {
                'Death': 'death_yn',
                'Hospitalization': 'hosp_yn',
                'ICU admission': 'icu_yn',
                'Medical condition': 'medcond_yn',
                'Sex': 'sex',
                'Age group': 'age_group',
                'Race/Ethnicity': 'race_ethnicity_combined',
                'Current status': 'current_status'
            }
            vars_names = list(vars_dict.keys())

            vars_chosen = st.multiselect('Choose variables', vars_names)
            for var in vars_chosen:
                st.bar_chart(cdc_data[vars_dict[var]])
            '\n'

            # Separate dataframes
            death = cdc_data[cdc_data['death_yn'] == 'Yes']
            no_death = cdc_data[cdc_data['death_yn'] == 'No']

            st.markdown('*Compare patients who died to those who did not:*')
            '\n'
            vars_names2 = vars_names[1:]

            if st.checkbox('Death'):
                death_vars = st.multiselect('Choose variable', vars_names2)
                for var in death_vars:
                    st.bar_chart(death[vars_dict[var]])
            '\n'
            if st.checkbox('No death'):
                no_death_vars = st.multiselect('Choose variable ', vars_names2)
                for var in no_death_vars:
                    st.bar_chart(no_death[vars_dict[var]])

        # Tracking data
        elif data_source == 'COVID Tracking Project':

            # Load data
            current_data_raw = load_raw_data(
                'https://api.covidtracking.com/v1/us/current.csv')
            current_data = format_tracking_data(current_data_raw)

            national_data_raw = load_raw_data(
                'https://api.covidtracking.com/v1/us/daily.csv')
            national_data = format_tracking_data(national_data_raw)

            states_data_raw = load_raw_data(
                'https://api.covidtracking.com/v1/states/daily.csv')
            states_data = format_tracking_data(states_data_raw)

            # Layout
            st.title('The COVID Tracking Project')
            st.markdown('https://covidtracking.com/data/national',
                        unsafe_allow_html=True)
            '\n'
            st.header('Explore data')
            '\n'
            st.subheader('National data')
            '\n'

            ## Current data
            st.markdown('*Current data*')
            if st.checkbox('View raw current data'):
                st.write(current_data)
            '\n'
            today = dt.datetime.now().strftime('%m/%d/%Y')
            today_pos = current_data.loc[0, 'positive']
            pos_incr = current_data.loc[0, 'positiveIncrease']
            today_deaths = current_data.loc[0, 'death']
            deaths_incr = current_data.loc[0, 'deathIncrease']
            today_hosp = current_data.loc[0, 'hospitalized']
            hosp_incr = current_data.loc[0, 'hospitalizedIncrease']
            today_pos_rate = current_data.loc[
                0, 'positive'] / current_data.loc[0, 'totalTestResults']
            today_icu = current_data.loc[0, '']

            # Area for displaying current data
            st.markdown('### As of **{}**'.format(today))
            '\n'
            st.write('Total positive cases: {} (+{})'.format(
                today_pos, pos_incr))
            st.write('Total deaths: {} (+{})'.format(today_deaths,
                                                     deaths_incr))
            st.write('Total patients hospitalized: {} (+{})'.format(
                today_hosp, hosp_incr))
            st.write('Total patients in ICU: {}'.format(today_icu))
            st.write('Current positive rate: {:.3f}'.format(today_pos_rate))
            '\n'
            '\n'

            ## All time data
            # Functions for plotting variables
            def plot_national_var(var):
                # Create dataframe
                df = pd.concat([national_data['date'], national_data[var]],
                               axis=1)
                df.sort_values(by='date', inplace=True)
                # Plot
                st.line_chart(
                    df.rename(columns={
                        'date': 'index'
                    }).set_index('index'))

            # Variables
            national_dict = {
                'Total positive cases': 'positive',
                'Total deaths': 'death',
                'Total negative cases': 'negative',
                'Total hospitalized': 'hospitalizedCumulative',
                'Total in ICU': 'inIcuCumulative',
                'Total on ventilator': 'onVentilatorCumulative',
                'Total recovered': 'recovered',
                'Total test results': 'totalTestResults',
                'Overall positive rate': 'pos_rate',
                'Increase in positive cases': 'positiveIncrease',
                'Increase in deaths': 'deathIncrease',
                'Increase in hospitalized': 'hospitalizedIncrease',
                'Increase in negative cases': 'negativeIncrease',
                'Increase in total test results': 'totalTestResultsIncrease'
            }
            national_vars = list(national_dict.keys())

            st.markdown('*All data*')

            if st.checkbox('View raw national data'):
                st.write(national_data)

            national_vars_chosen = st.multiselect('Choose variable',
                                                  national_vars)
            for var in national_vars_chosen:
                plot_national_var(national_dict[var])
            '\n'
Exemple #3
0
"E:\Sankar\Streamlit_App\Motor_Vehicle_Collisions_-_Crashes.csv"
)

@st.cache(persist=True) #decorating the function with st.cache in order to prevent the computation from rerunning everytime when the app is loaded; it will rerun only if any input has changed
def load_data(nrows):
    data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[['CRASH_DATE', 'CRASH_TIME']])
    data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)
    lowercase = lambda x: str(x).lower() #function to lower the case of the column names using the lambda function
    data.rename(lowercase, axis='columns', inplace=True)
    data.rename(columns={'crash_date_crash_time': 'date/time'}, inplace=True)
    return data

data = load_data(100000)
original_data = data

st.header("Where are the most people injured in NYC?")
injured_people = st.slider("Number of persons injured in vehicle collissions", 0, 20)
st.map(data.query("injured_persons >= @injured_people")[["latitude", "longitude"]].dropna(how="any"))

st.header("How many collissions occurred during a given time of day?")
hour = st.slider("Hour to look at", 0, 23)
data = data[data['date/time'].dt.hour == hour]

st.markdown("Vehicle collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24))
midpoint = (np.average(data["latitude"]), np.average(data["longitude"]))

st.write(pdk.Deck(
    map_style="mapbox://styles/mapbox/light-v9",
    initial_view_state={
        "latitude": midpoint[0],
        "longitude": midpoint[1],
Exemple #4
0
d_reroll = ['None','Ones','All']

#create streamlit elements on main page
st.title('Warhammer 40k 9th ed calculator')
st.write('v 0.1.1')
att_type_select = st.sidebar.selectbox('Select attack type', (att_type))

if att_type_select == att_type[0]:
    skill_label = 'Ballistic skill'
else: 
    skill_label = 'Weapon skill'
    
num_attackers = st.sidebar.number_input('Number of attacks', min_value = 1, step = 1)
num_defenders = st.sidebar.number_input('Number of defenders', min_value = 1, step = 1)

st.header('Attacker')

a_col1, a_col2, a_col3, a_col4 = st.beta_columns(4) 

with a_col1:
    a_skill = st.number_input(skill_label, min_value = 1, max_value = 6, step = 1, value = 3)
    a_hit_reroll = st.selectbox('Hit reroll', (a_reroll))
    
with a_col2:
    a_strength = st.number_input('Strength', min_value = 0, step = 1, value = 3)
    
with a_col3:
    a_armor_p = st.number_input('Armor Piercing', min_value = -10, step = 1, value = 0)
    a_wound_reroll = st.selectbox('Wound reroll', (a_reroll))
    
with a_col4:
if show_search:
    search_query = st.sidebar.text_input("Enter search terms",
                                         value="Baseball")
    tokenizer, model, V = load_model_and_vectors('data')
    df["dist"] = compute_distance(search_query)

df["size"] = radius
df["line_width"] = 0

viz_cols = [
    "text",
    "label",
]

st.header(project_title)

if show_search:
    subset = df.sort_values("dist")[["text", "dist"]]
    st.table(subset.head())

p = interface.plot_data_bokeh(df, hover_columns=viz_cols)
plot_placeholder = st.empty()

if show_labels:

    for col in range(n_text_labels):
        dx = df[df.cluster == col]
        dx = dx.reset_index()

        cmx, cmy = dx.ux.mean(), dx.uy.mean()
st.sidebar.subheader("When and where are the users tweeting from 🌎")
hour = st.sidebar.slider("Hour of day", 0, 23)
modified_data = data[data['tweet_created'].dt.hour == hour]
if not st.sidebar.checkbox("Close", True, key='1'):
    st.markdown("### Tweets location based on the time of day")
    st.markdown("%i tweets between %i:00 and %i:00" %
                (len(modified_data), hour, (hour + 1) % 24))
    st.map(modified_data)
    if st.sidebar.checkbox("Show raw data", False):
        st.write(modified_data)

st.sidebar.subheader("BreakDown airline tweets by Sentiment ✈️")
choice = st.sidebar.multiselect('Pick airlines',
                                ('US Airways', 'United', 'American',
                                 'Southwest', 'Delta', 'Virgin America'))

if len(choice) > 0:
    choice_data = data[data.airline.isin(choice)]
    fig_choice = px.histogram(choice_data,
                              x='airline',
                              y='airline_sentiment',
                              histfunc='count',
                              color='airline_sentiment',
                              facet_col='airline_sentiment',
                              height=600,
                              width=600)
    st.plotly_chart(fig_choice)

else:
    st.header("⬅️ Click on the options to get started")
import streamlit as st
from PIL import Image
import tensorflow as tf
import numpy as np
model = tf.keras.models.load_model('/content/save.h5')
st.markdown(
    "<h1 style='text-align: center;'>Malaria detection by CNN model</h1>",
    unsafe_allow_html=True)
st.subheader('Input will be the cell snapshots of suspected person')
st.set_option('deprecation.showfileUploaderEncoding', False)
img = st.file_uploader('Drop or upload cell images here',
                       types=['jpeg', 'png', 'jpg'])
st.markdown("<br><br>", unsafe_allow_html=True)
if (st.button('SUBMIT')) & (img is not None):
    img = Image.open(img)
    st.markdown("<br>", unsafe_allow_html=True)
    st.image(img, caption='Uploaded image')
    image = tf.keras.preprocessing.image.img_to_array(img)
    img = np.resize(image, (1, 90, 90, 3))
    #image = tf.keras.preprocessing.image.load_img(image, target_size=(90,90,3))
    #image = tf.keras.preprocessing.image.img_to_array(image)
    if model.predict(img) == 0:
        st.markdown('POSITIVE')
        st.header('Model implying that the image contains malaria')
    else:
        st.balloons()
        st.markdown('Negative')
        st.header('Model implying that the image does not contain malaria')
st.sidebar.markdown(''' Use Netflix Movies and TV Shows dataset from Kaggle and perform following operations :''')
st.sidebar.markdown('''1. Make a visualization showing the total number of movies
watched by children
2. Make a visualization showing the total number of standup
comedies
3. Make a visualization showing most watched shows.
4. Make a visualization showing highest rated show.
5. Make a dashboard (DASHBOARD A) containing all of these above visualizations. ''')

st.sidebar.markdown(''' Designed by: **Mihir Kulkarni**  ''')
                    
 



st.header('Dataset')
netflix_df = pd.read_csv("netflix_titles.csv")
netflix_df


# In[3]:


netflix_df.isnull().sum()


# In[4]:


netflix_df.director.fillna("No director",inplace=True)
netflix_df.cast.fillna("No cast",inplace=True)
Exemple #9
0
def write():

    st.title('Setup & Cleaning')
    st.header('Obtaining and Exploring the Data')

    st.markdown(
        '''<p style='text-align: justify; '>While searching for public datasets that contained relevant job and salary info, 
        we found it difficult to gather all the necessary pieces in one place. Many datasets were either too small, too old, or not comprehensive enough.
        We decided that our best bet to find a large, consistent dataset was by collecting it on our own via webscraping.</p>''',
        unsafe_allow_html=True)

    st.header('Setup')
    st.markdown('Relevant libraries for data cleaning and exploration.')
    import_code = '''
    import numpy as np
    import pandas as pd
    import altair as alt
    import seaborn as sns
    import matplotlib.pyplot as plot
    import googlemaps
    import folium'''
    st.code(import_code, language='python')

    st.header('Web Scraping Glassdoor')
    st.markdown(
        '''<p style='text-align: justify; '>Follwing this great <a href="https://towardsdatascience.com/selenium-tutorial-scraping-glassdoor-com-in-10-minutes-3d0915c6d905">Medium article</a> 
        on web scraping using <i>Selenium</i>, I was able to run a script to scrape 1000 unique job postings on Glassdoor.com. The author's original code 
        needed a few tweaks to run as the format of some of the HTML elements on the Glassdoor site had changed.</p>''',
        unsafe_allow_html=True)

    st.image(Image.open('images/scraper.png'),
             caption="'Software Engineering' query with no location specified",
             use_column_width=True)
    st.write('''
    I made the following three queries for: **Sofware Engineering**, **Data Scientist**, and **Designer** --all without specifiying a worksite location to get a wide range of positions across the United States.
    With each job entry, I collected the following information:''')
    st.image(Image.open('images/bullet.png'), use_column_width=True)

    st.markdown(
        '''Here's what the head of the DataFrame for *Software Engineering* jobs looked like after scraping:'''
    )
    st.code('''df = pd.read_csv('swe.csv')
df.head()
    ''',
            language='python')
    df = pd.read_csv('swe.csv')
    st.dataframe(df.head())

    st.header('Data Cleaning')
    st.write(
        '''<p style='text-align: justify; '>After scraping the data, I needed to clean it up so that it was usable for our model. I made the following changes and created the following variables:
    <ul><li>Identified the <i>Seniority</i> of each job based on title listing</li>
    <li>Parsed numeric data out of the <i>Salary Estimate</i> column</li>
    <li>Removed rows with missing salaries</li>
    <li>Made new columns for the Job <i>State</i> and <i>City</i></li>
    <li>Reverse geocoded locations using <a href="https://cloud.google.com/maps-platform/maps">Google Maps Cloud API</a> (retrieve Latitude/Longitude)</li>
    </ul>
    </p>''',
        unsafe_allow_html=True)

    st.write('''Here are what the some of these steps looked like:''')

    st.subheader('Identifying Seniority')
    st.code(
        '''unique_jobs = df['Job Title'].unique() # Overview of unique job titles
unique_jobs[0:5] # Sample of first 5 titles''',
        language='python')
    unique_jobs = df['Job Title'].unique()
    st.text(unique_jobs[0:5])
    st.markdown(
        '''From the *Job Title* column we find that there are 173 unique job titles with various levels of seniority. 
    The function below extracts the most common labels for senior and junior positions.'''
    )
    st.code('''def seniority(title):
    ''Identify and group specific job titles''
    title = title.lower().strip()
    seniority = ['senior', 'sr.', 'sr', 'lead', 'expert', 'experienced', 'principal']
    juniority = ['junior', 'jr.', 'jr', 'intern']
    for i in seniority:
        if i in title:
            return 'senior'
    for i in juniority:
        if i in title:
            return 'junior'
    else:
        return 'unspecified''')

    st.code('''df['Seniority'] = df['Job Title'].apply(seniority)
df.sample(3) # Sample 3 random postings''')

    def seniority(title):
        '''Identify and group specific job titles'''

        title = title.lower().strip()

        seniority = [
            'senior', 'sr.', 'sr', 'lead', 'expert', 'experienced', 'principal'
        ]
        juniority = ['junior', 'jr.', 'jr', 'intern']

        for i in seniority:
            if i in title:
                return 'senior'

        for i in juniority:
            if i in title:
                return 'junior'

        else:
            return 'unspecified'

    df['Seniority'] = df['Job Title'].apply(seniority)
    st.markdown(
        'Now we have a new column, *Seniority*, which specifies the precedence of each posting.'
    )
    st.dataframe(df.sample(3))
    st.code('df.Seniority.value_counts()')
    st.code(df.Seniority.value_counts())
    titles = pd.DataFrame({
        'Position': ['Unspecified', 'Senior', 'Junior'],
        'Total': [595, 325, 60]
    })
    st.markdown('**Bar chart of Seniority**')
    st.altair_chart(
        alt.Chart(titles).mark_bar().encode(y='Position', x='Total'))
    st.markdown(
        '''<p style='text-align: justify; '>This bar chart shows the distribution of seniority in the job title listings. 
    While the majority of titles do not specify seniority, it seems to make intuitive sense that there 
    is a greater demand for experienced software engineers as opposed to junior or new grad positions. 
    The lack of junior positions could also be explained by the notion that most of those listings would be 
    offered as internships rather than full time postions -and thus wouldn't be listed on a job-hunting 
    website such as Glassdoor.</p>''',
        unsafe_allow_html=True)

    st.subheader('Parsing Salary Estimates')
    st.markdown(
        'Next I separated the Glassdoor Salary Estimates into lows and highs to get an average.'
    )
    st.code('''df.get('Salary Estimate').unique()''')
    st.text(df.get('Salary Estimate').unique())
    st.code('''def salary_simplified(salary):
    salary_simp = salary.split('(')[0].replace('K','').replace('$','')
    minimum = int(salary_simp.split('-')[0])
    maximum = int(salary_simp.split('-')[1])
    return minimum, maximum''')
    st.code('''salary_ranges = df['Salary Estimate'].apply(salary_simplified)
    ''')

    def salary_simplified(salary):
        salary_simp = salary.split('(')[0].replace('K', '').replace('$', '')
        minimum = int(salary_simp.split('-')[0])
        maximum = int(salary_simp.split('-')[1])
        return minimum, maximum

    salary_ranges = df['Salary Estimate'].apply(salary_simplified)

    titles = pd.DataFrame({
        'Count': [24, 30, 31, 31, 31, 30, 32, 31, 616, 31, 30, 31, 32],
        'Average': [
            61.5, 62.5, 63.5, 73.0, 75.0, 80.0, 85.0, 88.0, 91.5, 100.0, 111.0,
            112.0, 126.0
        ]
    })
    st.markdown('**Bar chart of Average Salaries**')
    st.altair_chart(
        alt.Chart(titles).mark_bar().encode(y='Count', x='Average'))
    st.markdown(
        '''<p style='text-align: justify; '>As we can see from this bar chart, the overwhelming majority of salary averages are at $91.5k with
        lows in the mid $60k's and a high in the mid $120k's. </p>''',
        unsafe_allow_html=True)

    st.subheader('Reverse Geocoding Locations')
    st.markdown('''<p style='text-align: justify; '>
        In order to visualize the locations of the job postings in the data frame, 
        I needed a way to plot each posting on a map. However, the data did not come 
        with any geographic information about the locations of the postings. To solve 
        this problem I used Google Maps Cloud API to reverse geocode each city's location 
        and obtain it's lattitude and longitude coordinates. The following code cell assigns 
        each job's Location listing with the corresponding geographic coordinates.</p>''',
                unsafe_allow_html=True)
    st.code('''df['LAT'] = None
df['LON'] = None

for i in range(len(df.Location)):
    geocode_result = gmaps_key.geocode(df.Location.iloc[i])
    try:
        lat = geocode_result[0]['geometry']['location']['lat']
        lon = geocode_result[0]['geometry']['location']['lng']
        df.loc[i, 'LAT'] = lat
        df.loc[i, 'LON'] = lon
    except:
        lat = None
        lng = None''')

    st.markdown(
        '''The next code block simplifies the geocoding by creating a new data frame that stores 
    geographic coordinates, cities, and the number of times they appear in the original table.'''
    )

    st.code('''city_counts = df.groupby('Location').count().get('Job Title')
df_group = pd.DataFrame()
df_group['Lat'] = df['LAT']
df_group['Lon'] = df['LON']
df_group['City'] = df['Location']

cities = pd.DataFrame()
cities['Count'] = city_counts
cities = cities.reset_index()

df_geo = df_group.merge(cities, left_on='City', right_on='Location')
df_geo = df_geo.drop_duplicates(['City'], keep='first').drop(['City'], axis=1)
    ''')

    bubble_map = folium.Map(location=[37, -102], zoom_start=4)
    st.markdown(
        '''The map will default it's location over the United States.''')
    st.code('bubble_map = folium.Map(location=[37, -102], zoom_start=4)')

    df_geo = pd.read_csv('df_geo.csv')
    for i in range(len(df_geo)):
        folium.Circle(location=[df_geo.Lat.iloc[i], df_geo.Lon.iloc[i]],
                      popup=df_geo.Location.iloc[i],
                      radius=int(df_geo.Count.iloc[i]) * 10000,
                      color='#7551f8',
                      fill=True,
                      fill_color='#7551f8').add_to(bubble_map)

    st.code('''for i in range(len(df_geo)):
    folium.Circle(location=[df_geo.Lat.iloc[i], df_geo.Lon.iloc[i]],
            popup=df_geo.Location.iloc[i],
            radius=int(df_geo.Count.iloc[i]) * 10000,
            color='#7551f8',
            fill=True,
            fill_color='#7551f8').add_to(bubble_map)''')

    st.markdown('**Bubble Map of Jobs based on Posting Density**')

    folium_static(bubble_map)

    st.markdown('')
    st.markdown('''<p style='text-align: justify; '>
    This Bubble Map of the United States plots each job listing's location 
    where the radius of the bubble is a factor corresponding to the number of postings at 
    each location. From the map we can see that there are a large number of postings in 
    Salt Lake City, Chicago, Burlington, New York, San Jose, and Seattle. The concentration 
    of multiple circles in the Bay Area and East coast reflect a high volume of postings and 
    signify these areas as "Tech Hubs."</p>''',
                unsafe_allow_html=True)
import numpy as np
import os
import datetime


import time
import streamlit as st

from dogfight_game import GameEnv, plotGameData, best_reply_game_rollout

st.header("pre-generate a data set")

N_agents = st.number_input(
    label="number of agents", min_value=2, value=10, step=1, format="%.0d"
)

time_steps = st.number_input(
    label=" max time steps", min_value=0, value=10, step=1, format="%.0d",
)

num_states_to_save = st.number_input(
    label="total steps to save", min_value=0, value=10000, step=1, format="%.0d",
)

save_every = st.number_input(
    label="save every X steps", min_value=0, value=1000, step=1, format="%.0d",
)

create_dataset_button = st.button("create dataset")

Exemple #11
0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import streamlit as st
import numpy as np
import pandas as pd

# Explicitly seed the RNG for deterministic results
np.random.seed(0)

st.title('Tables with different sizes')

st.header('Long cells that overflow')

st.write('''
    Long text should show an ellipsis. All cells should have a tooltip
    with their entire un-ellipsized contents.
    ''')

st.dataframe({
    'foo': ['hello', 'world', 'foo '*30],
    'bar': ['hello', 'world', 'bar'*30],
    'baz': [1, 2, 3],
    'boz': [1, 2, 3],
    'buz': [1, 2, 3],
    'biz'*30: [1, 2, 3],
    'bim': [1, 2, 3],
})
def main():
    df = load_data()

    #page = st.sidebar.radio("Choose a page", ["Homepage", "SignUp"])

    verified = "True"
    result = "F.A.S.T. WebApp - For Interview Demo"
    st.sidebar.title(result)
    st.sidebar.write(
        "Created By: Akash M Dubey [LinkedIn](https://www.linkedin.com/in/akashmdubey/)"
    )
    st.sidebar.write(
        "Checkout more projects at [www.akashmdubey.com/projects](https://akashmdubey.com/)"
    )

    page = st.sidebar.radio("Choose a Function", [
        "About the Project", "Live News Sentiment", "Company Basic Details",
        "Company Advanced Details", "Stock Future Prediction",
        "Google Trends with Forecast", "Twitter Trends",
        "Meeting Summarization"
    ])

    if page == "Google Trends with Forecast":
        st.sidebar.write("""
        ## Choose a keyword and a prediction period 
        """)
        keyword = st.sidebar.text_input("Keyword", "Company name")
        periods = st.sidebar.slider('Prediction time in days:', 7, 365, 90)

        # main section
        st.write("""
        # Welcome to Trend Predictor App
        ### This app predicts the **Google Trend** you want!
        """)
        st.image(
            'https://s3.eu-west-2.amazonaws.com/cdn.howtomakemoneyfromhomeuk.com/wp-content/uploads/2020/10/Google-Trends.jpg',
            width=350,
            use_column_width=200)
        st.write("Evolution of interest:", keyword)

        df = get_data(keyword)
        forecast, fig1, fig2 = make_pred(df, periods)

        st.pyplot(fig1)

        st.write("Trends Over the Years and Months")
        st.pyplot(fig2)

    elif page == "About the Project":

        st.title('Data Sources')
        st.write("""
        ### Our F.A.S.T application have 3 data sources for two different use cases:
        #### 1. Web Scrapping to get Live News Data
        #### 2. Twitter API to get Real time Tweets
        #### 3. Google Trends API to get Real time Trends
        """)
        st.text('')

        link = '[Project Report](https://codelabs-preview.appspot.com/?file_id=1qxniFjwkDir6NT17KkvS1zDbmIgawcrEEwbbfCtAk8k#1)'
        st.markdown(link, unsafe_allow_html=True)

        st.title('AWS Data Architecture')
        st.image('./Images/Architecture Final AWS_FAST.jpg',
                 width=900,
                 use_column_width=1200)

        st.title('Dashboard')
        import streamlit.components.v1 as components
        components.iframe(
            "https://app.powerbi.com/view?r=eyJrIjoiZjMzMGUyZTEtM2RiMS00NzFlLWE3MWMtZDgzMjIxNTgxYmY3IiwidCI6ImE4ZWVjMjgxLWFhYTMtNGRhZS1hYzliLTlhMzk4YjkyMTVlNyIsImMiOjN9&pageName=ReportSection842eec15de524192b588",
            height=600,
            width=900)

    elif page == "Meeting Summarization":

        symbols = [
            './Audio Files/Meeting 1.mp3', './Audio Files/Meeting 2.mp3',
            './Audio Files/Meeting 3.mp3', './Audio Files/Meeting 4.mp3'
        ]

        track = st.selectbox('Choose a the Meeting Audio', symbols)

        st.audio(track)
        data_dir = './inference-data/'

        ratiodata = st.text_input(
            "Please Enter a Ratio you want summary by: (TRY: 0.01)")
        if st.button("Generate a Summarized Version of the Meeting"):
            time.sleep(2.4)
            #st.success("This is the Summarized text of the Meeting Audio Files xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx  xxxxxxgeeeeeeeeeeeeeee eeeeeeeeeeeeeehjjjjjjjjjjjjjjjsdbjhvsdk vjbsdkvjbsdvkb skbdv")

            if track == "./Audio Files/Meeting 2.mp3":
                user_input = "NKE"
                time.sleep(1.4)
                try:
                    with open(data_dir + user_input) as f:
                        st.success(summarize(f.read(), ratio=float(ratiodata)))
                        #print()
                        st.warning("Sentiment: Negative")
                except:
                    st.text("Please Enter a valid Decimal value like 0.01")

            else:
                user_input = "AGEN"
                time.sleep(1.4)
                try:
                    with open(data_dir + user_input) as f:
                        st.success(summarize(f.read(), ratio=float(ratiodata)))
                        #print()
                        st.success("Sentiment: Positive")
                except:
                    st.text("Please Enter a valid Decimal value like 0.01")

    elif page == "Twitter Trends":

        st.write("""
        # Welcome to Twitter Sentiment App
        ### This app predicts the **Twitter Sentiments** you want!
        """)
        st.image(
            'https://assets.teenvogue.com/photos/56b4f21327a088e24b967bb6/3:2/w_531,h_354,c_limit/twitter-gifs.gif',
            width=250,
            use_column_width=200)

        #st.subheader("Select a topic which you'd like to get the sentiment analysis on :")

        ################# Twitter API Connection #######################
        consumer_key = "MaA51EmeZbgYazwFYOZxNRZR5"
        consumer_secret = "6ZUmFGFhiNzePsbkiPlKRBF7R9nq2dkDqfyfx7uU5eNgDhR8ci"
        access_token = "1359729189700722691-g8oMz8ONW6qtvibbQqqc6OAXJCIjeE"
        access_token_secret = "FzXBd0XRy2yZmtpH90GuyMaclmnyPGQEdxfCBO68BI0nb"

        # Use the above credentials to authenticate the API.

        auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
        auth.set_access_token(access_token, access_token_secret)
        api = tweepy.API(auth)
        ################################################################

        df = pd.DataFrame(columns=[
            "Date", "User", "IsVerified", "Tweet", "Likes", "RT",
            'User_location'
        ])

        # Write a Function to extract tweets:
        def get_tweets(Topic, Count):
            i = 0
            #my_bar = st.progress(100) # To track progress of Extracted tweets
            for tweet in tweepy.Cursor(api.search,
                                       q=Topic,
                                       count=100,
                                       lang="en",
                                       exclude='retweets').items():
                #time.sleep(0.1)
                #my_bar.progress(i)
                df.loc[i, "Date"] = tweet.created_at
                df.loc[i, "User"] = tweet.user.name
                df.loc[i, "IsVerified"] = tweet.user.verified
                df.loc[i, "Tweet"] = tweet.text
                df.loc[i, "Likes"] = tweet.favorite_count
                df.loc[i, "RT"] = tweet.retweet_count
                df.loc[i, "User_location"] = tweet.user.location
                #df.to_csv("TweetDataset.csv",index=False)
                #df.to_excel('{}.xlsx'.format("TweetDataset"),index=False)   ## Save as Excel
                i = i + 1
                if i > Count:
                    break
                else:
                    pass

        # Function to Clean the Tweet.
        def clean_tweet(tweet):
            return ' '.join(
                re.sub(
                    '(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|([RT])',
                    ' ', tweet.lower()).split())

        # Funciton to analyze Sentiment
        def analyze_sentiment(tweet):
            analysis = TextBlob(tweet)
            if analysis.sentiment.polarity > 0:
                return 'Positive'
            elif analysis.sentiment.polarity == 0:
                return 'Neutral'
            else:
                return 'Negative'

        #Function to Pre-process data for Worlcloud
        def prepCloud(Topic_text, Topic):
            Topic = str(Topic).lower()
            Topic = ' '.join(re.sub('([^0-9A-Za-z \t])', ' ', Topic).split())
            Topic = re.split("\s+", str(Topic))
            stopwords = set(STOPWORDS)
            stopwords.update(
                Topic
            )  ### Add our topic in Stopwords, so it doesnt appear in wordClous
            ###
            text_new = " ".join(
                [txt for txt in Topic_text.split() if txt not in stopwords])
            return text_new

        # Collect Input from user :
        Topic = str()
        Topic = str(
            st.sidebar.text_input(
                "Enter the topic you are interested in (Press Enter once done)",
                "enter company name"))

        if len(Topic) > 0:

            # Call the function to extract the data. pass the topic and filename you want the data to be stored in.
            with st.spinner("Please wait, Tweets are being extracted"):
                get_tweets(Topic, Count=200)
            st.success('Tweets have been Extracted !!!!')

            # Call function to get Clean tweets
            df['clean_tweet'] = df['Tweet'].apply(lambda x: clean_tweet(x))

            # Call function to get the Sentiments
            df["Sentiment"] = df["Tweet"].apply(lambda x: analyze_sentiment(x))

            # Write Summary of the Tweets
            st.write("Total Tweets Extracted for Topic '{}' are : {}".format(
                Topic, len(df.Tweet)))
            st.write("Total Positive Tweets are : {}".format(
                len(df[df["Sentiment"] == "Positive"])))
            st.write("Total Negative Tweets are : {}".format(
                len(df[df["Sentiment"] == "Negative"])))
            st.write("Total Neutral Tweets are : {}".format(
                len(df[df["Sentiment"] == "Neutral"])))

            # See the Extracted Data :
            if st.button("See the Extracted Data"):
                #st.markdown(html_temp, unsafe_allow_html=True)
                st.success("Below is the Extracted Data :")
                st.write(df.head(50))

            # get the countPlot
            if st.button("Get Count Plot for Different Sentiments"):
                st.success("Generating A Count Plot")
                st.subheader(" Count Plot for Different Sentiments")
                st.write(sns.countplot(df["Sentiment"], palette="Blues"))
                st.pyplot()

            # Piechart
            if st.button("Get Pie Chart for Different Sentiments"):
                st.success("Generating A Pie Chart")
                a = len(df[df["Sentiment"] == "Positive"])
                b = len(df[df["Sentiment"] == "Negative"])
                c = len(df[df["Sentiment"] == "Neutral"])
                d = np.array([a, b, c])
                explode = (0.1, 0.0, 0.1)
                st.write(
                    plt.pie(d,
                            shadow=True,
                            explode=explode,
                            labels=["Positive", "Negative", "Neutral"],
                            autopct='%1.2f%%'))
                st.pyplot()

            # get the countPlot Based on Verified and unverified Users
            if st.button(
                    "Get Count Plot Based on Verified and unverified Users"):
                st.success(
                    "Generating A Count Plot (Verified and unverified Users)")
                st.subheader(
                    " Count Plot for Different Sentiments for Verified and unverified Users"
                )
                st.write(sns.countplot(df["Sentiment"], hue=df.IsVerified))
                st.pyplot()

            ## Points to add 1. Make Backgroud Clear for Wordcloud 2. Remove keywords from Wordcloud

            # Create a Worlcloud
            if st.button("Get WordCloud for all things said about {}".format(
                    Topic)):
                st.success(
                    "Generating A WordCloud for all things said about {}".
                    format(Topic))
                text = " ".join(review for review in df.clean_tweet)
                stopwords = set(STOPWORDS)
                text_newALL = prepCloud(text, Topic)
                wordcloud = WordCloud(
                    stopwords=stopwords,
                    max_words=800,
                    max_font_size=75,
                    colormap="Blues",
                    background_color="black").generate(text_newALL)
                st.write(plt.imshow(wordcloud, interpolation='bilinear'))
                st.pyplot()

            #Wordcloud for Positive tweets only
            if st.button(
                    "Get WordCloud for all Positive Tweets about {}".format(
                        Topic)):
                st.success(
                    "Generating A WordCloud for all Positive Tweets about {}".
                    format(Topic))
                text_positive = " ".join(review for review in df[
                    df["Sentiment"] == "Positive"].clean_tweet)
                stopwords = set(STOPWORDS)
                text_new_positive = prepCloud(text_positive, Topic)
                #text_positive=" ".join([word for word in text_positive.split() if word not in stopwords])
                wordcloud = WordCloud(
                    stopwords=stopwords,
                    max_words=800,
                    max_font_size=75,
                    colormap="Greens",
                    background_color="black").generate(text_new_positive)
                st.write(plt.imshow(wordcloud, interpolation='bilinear'))
                st.pyplot()

            #Wordcloud for Negative tweets only
            if st.button(
                    "Get WordCloud for all Negative Tweets about {}".format(
                        Topic)):
                st.success(
                    "Generating A WordCloud for all Positive Tweets about {}".
                    format(Topic))
                text_negative = " ".join(review for review in df[
                    df["Sentiment"] == "Negative"].clean_tweet)
                stopwords = set(STOPWORDS)
                text_new_negative = prepCloud(text_negative, Topic)
                #text_negative=" ".join([word for word in text_negative.split() if word not in stopwords])
                wordcloud = WordCloud(
                    stopwords=stopwords,
                    max_words=800,
                    max_font_size=75,
                    colormap="Reds",
                    background_color="black").generate(text_new_negative)
                st.write(plt.imshow(wordcloud, interpolation='bilinear'))
                st.pyplot()

        #st.sidebar.subheader("Scatter-plot setup")
        #box1 = st.sidebar.selectbox(label= "X axis", options = numeric_columns)
        #box2 = st.sidebar.selectbox(label="Y axis", options=numeric_columns)
        #sns.jointplot(x=box1, y= box2, data=df, kind = "reg", color= "red")
        #st.pyplot()

    elif page == "Stock Future Prediction":
        snp500 = pd.read_csv("./Datasets/SP500.csv")
        symbols = snp500['Symbol'].sort_values().tolist()

        ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols)

        START = "2015-01-01"
        TODAY = date.today().strftime("%Y-%m-%d")

        st.title('Stock Forecast App')

        st.image(
            'https://media2.giphy.com/media/JtBZm3Getg3dqxK0zP/giphy-downsized-large.gif',
            width=250,
            use_column_width=200)

        # stocks = ('GOOG', 'AAPL', 'MSFT', 'GME', 'W', 'TSLA')
        # selected_stock = st.selectbox('Select dataset for prediction', stocks)

        n_years = st.slider('Years of prediction:', 1, 4)
        period = n_years * 365

        st.title('Stock Forecast App To Do part in stockapp.py')

        data_load_state = st.text('Loading data...')

        data = yf.download(ticker, START, TODAY)
        data.reset_index(inplace=True)
        data_load_state.text('Loading data... done!')

        st.subheader('Raw data')
        st.write(data.tail())

        # Plot raw data
        def plot_raw_data():
            fig = go.Figure()
            fig.add_trace(
                go.Scatter(x=data['Date'], y=data['Open'], name="stock_open"))
            fig.add_trace(
                go.Scatter(x=data['Date'], y=data['Close'],
                           name="stock_close"))
            fig.layout.update(title_text='Time Series data with Rangeslider',
                              xaxis_rangeslider_visible=True)
            st.plotly_chart(fig)

        plot_raw_data()

        # Predict forecast with Prophet.
        df_train = data[['Date', 'Close']]
        df_train = df_train.rename(columns={"Date": "ds", "Close": "y"})

        m = Prophet()
        m.fit(df_train)
        future = m.make_future_dataframe(periods=period)
        forecast = m.predict(future)

        # Show and plot forecast
        st.subheader('Forecast data')
        st.write(forecast.tail())

        st.write(f'Forecast plot for {n_years} years')
        fig1 = plot_plotly(m, forecast)
        st.plotly_chart(fig1)

        st.write("Forecast components")
        fig2 = m.plot_components(forecast)
        st.write(fig2)

    elif page == "Company Advanced Details":
        snp500 = pd.read_csv("./Datasets/SP500.csv")
        symbols = snp500['Symbol'].sort_values().tolist()

        ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols)

        stock = yf.Ticker(ticker)

        def calcMovingAverage(data, size):
            df = data.copy()
            df['sma'] = df['Adj Close'].rolling(size).mean()
            df['ema'] = df['Adj Close'].ewm(span=size, min_periods=size).mean()
            df.dropna(inplace=True)
            return df

        def calc_macd(data):
            df = data.copy()
            df['ema12'] = df['Adj Close'].ewm(span=12, min_periods=12).mean()
            df['ema26'] = df['Adj Close'].ewm(span=26, min_periods=26).mean()
            df['macd'] = df['ema12'] - df['ema26']
            df['signal'] = df['macd'].ewm(span=9, min_periods=9).mean()
            df.dropna(inplace=True)
            return df

        def calcBollinger(data, size):
            df = data.copy()
            df["sma"] = df['Adj Close'].rolling(size).mean()
            df["bolu"] = df["sma"] + 2 * df['Adj Close'].rolling(size).std(
                ddof=0)
            df["bold"] = df["sma"] - 2 * df['Adj Close'].rolling(size).std(
                ddof=0)
            df["width"] = df["bolu"] - df["bold"]
            df.dropna(inplace=True)
            return df

        st.title('Company Stocks Advanced Details')
        st.subheader('Moving Average')

        coMA1, coMA2 = st.beta_columns(2)

        with coMA1:
            numYearMA = st.number_input('Insert period (Year): ',
                                        min_value=1,
                                        max_value=10,
                                        value=2,
                                        key=0)

        with coMA2:
            windowSizeMA = st.number_input('Window Size (Day): ',
                                           min_value=5,
                                           max_value=500,
                                           value=20,
                                           key=1)

        start = dt.datetime.today() - dt.timedelta(numYearMA * 365)
        end = dt.datetime.today()
        dataMA = yf.download(ticker, start, end)
        df_ma = calcMovingAverage(dataMA, windowSizeMA)
        df_ma = df_ma.reset_index()

        figMA = go.Figure()

        figMA.add_trace(
            go.Scatter(x=df_ma['Date'],
                       y=df_ma['Adj Close'],
                       name="Prices Over Last " + str(numYearMA) + " Year(s)"))

        figMA.add_trace(
            go.Scatter(x=df_ma['Date'],
                       y=df_ma['sma'],
                       name="SMA" + str(windowSizeMA) + " Over Last " +
                       str(numYearMA) + " Year(s)"))

        figMA.add_trace(
            go.Scatter(x=df_ma['Date'],
                       y=df_ma['ema'],
                       name="EMA" + str(windowSizeMA) + " Over Last " +
                       str(numYearMA) + " Year(s)"))

        figMA.update_layout(
            legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01))

        figMA.update_layout(legend_title_text='Trend')
        figMA.update_yaxes(tickprefix="$")

        st.plotly_chart(figMA, use_container_width=True)

        st.subheader('Moving Average Convergence Divergence (MACD)')
        numYearMACD = st.number_input('Insert period (Year): ',
                                      min_value=1,
                                      max_value=10,
                                      value=2,
                                      key=2)

        startMACD = dt.datetime.today() - dt.timedelta(numYearMACD * 365)
        endMACD = dt.datetime.today()
        dataMACD = yf.download(ticker, startMACD, endMACD)
        df_macd = calc_macd(dataMACD)
        df_macd = df_macd.reset_index()

        figMACD = make_subplots(rows=2,
                                cols=1,
                                shared_xaxes=True,
                                vertical_spacing=0.01)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['Adj Close'],
                                     name="Prices Over Last " +
                                     str(numYearMACD) + " Year(s)"),
                          row=1,
                          col=1)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['ema12'],
                                     name="EMA 12 Over Last " +
                                     str(numYearMACD) + " Year(s)"),
                          row=1,
                          col=1)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['ema26'],
                                     name="EMA 26 Over Last " +
                                     str(numYearMACD) + " Year(s)"),
                          row=1,
                          col=1)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['macd'],
                                     name="MACD Line"),
                          row=2,
                          col=1)

        figMACD.add_trace(go.Scatter(x=df_macd['Date'],
                                     y=df_macd['signal'],
                                     name="Signal Line"),
                          row=2,
                          col=1)

        figMACD.update_layout(legend=dict(
            orientation="h", yanchor="bottom", y=1, xanchor="left", x=0))

        figMACD.update_yaxes(tickprefix="$")
        st.plotly_chart(figMACD, use_container_width=True)

        st.subheader('Bollinger Band')
        coBoll1, coBoll2 = st.beta_columns(2)
        with coBoll1:
            numYearBoll = st.number_input('Insert period (Year): ',
                                          min_value=1,
                                          max_value=10,
                                          value=2,
                                          key=6)

        with coBoll2:
            windowSizeBoll = st.number_input('Window Size (Day): ',
                                             min_value=5,
                                             max_value=500,
                                             value=20,
                                             key=7)

        startBoll = dt.datetime.today() - dt.timedelta(numYearBoll * 365)
        endBoll = dt.datetime.today()
        dataBoll = yf.download(ticker, startBoll, endBoll)
        df_boll = calcBollinger(dataBoll, windowSizeBoll)
        df_boll = df_boll.reset_index()
        figBoll = go.Figure()
        figBoll.add_trace(
            go.Scatter(x=df_boll['Date'], y=df_boll['bolu'],
                       name="Upper Band"))

        figBoll.add_trace(
            go.Scatter(x=df_boll['Date'],
                       y=df_boll['sma'],
                       name="SMA" + str(windowSizeBoll) + " Over Last " +
                       str(numYearBoll) + " Year(s)"))

        figBoll.add_trace(
            go.Scatter(x=df_boll['Date'], y=df_boll['bold'],
                       name="Lower Band"))

        figBoll.update_layout(legend=dict(
            orientation="h", yanchor="bottom", y=1, xanchor="left", x=0))

        figBoll.update_yaxes(tickprefix="$")
        st.plotly_chart(figBoll, use_container_width=True)

    elif page == "Live News Sentiment":

        st.image('https://www.visitashland.com/files/latestnews.jpg',
                 width=250,
                 use_column_width=200)

        snp500 = pd.read_csv("./Datasets/SP500.csv")
        symbols = snp500['Symbol'].sort_values().tolist()

        ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols)

        if st.button("Click here to See Latest News about " + ticker + ""):

            st.header('Latest News')

            def newsfromfizviz(temp):

                # time.sleep(5)

                finwiz_url = 'https://finviz.com/quote.ashx?t='

                news_tables = {}
                tickers = [temp]

                for ticker in tickers:
                    url = finwiz_url + ticker
                    req = Request(url=url,
                                  headers={'user-agent': 'my-app/0.0.1'})
                    response = urlopen(req)
                    # Read the contents of the file into 'html'
                    html = BeautifulSoup(response)
                    # Find 'news-table' in the Soup and load it into 'news_table'
                    news_table = html.find(id='news-table')
                    # Add the table to our dictionary
                    news_tables[ticker] = news_table

                parsed_news = []

                # Iterate through the news
                for file_name, news_table in news_tables.items():
                    # Iterate through all tr tags in 'news_table'
                    for x in news_table.findAll('tr'):
                        # read the text from each tr tag into text
                        # get text from a only
                        text = x.a.get_text()
                        # splite text in the td tag into a list
                        date_scrape = x.td.text.split()
                        # if the length of 'date_scrape' is 1, load 'time' as the only element

                        if len(date_scrape) == 1:
                            time = date_scrape[0]

                        # else load 'date' as the 1st element and 'time' as the second
                        else:
                            date = date_scrape[0]
                            time = date_scrape[1]
                        # Extract the ticker from the file name, get the string up to the 1st '_'
                        ticker = file_name.split('_')[0]

                        # Append ticker, date, time and headline as a list to the 'parsed_news' list
                        parsed_news.append([ticker, date, time, text])

                # Instantiate the sentiment intensity analyzer
                vader = SentimentIntensityAnalyzer()

                # Set column names
                columns = ['ticker', 'date', 'time', 'headline']

                # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news'
                parsed_and_scored_news = pd.DataFrame(parsed_news,
                                                      columns=columns)

                # Iterate through the headlines and get the polarity scores using vader
                scores = parsed_and_scored_news['headline'].apply(
                    vader.polarity_scores).tolist()

                # Convert the 'scores' list of dicts into a DataFrame
                scores_df = pd.DataFrame(scores)

                # Join the DataFrames of the news and the list of dicts
                parsed_and_scored_news = parsed_and_scored_news.join(
                    scores_df, rsuffix='_right')

                # Convert the date column from string to datetime
                parsed_and_scored_news['date'] = pd.to_datetime(
                    parsed_and_scored_news.date).dt.date

                parsed_and_scored_news['Sentiment'] = np.where(
                    parsed_and_scored_news['compound'] > 0, 'Positive',
                    (np.where(parsed_and_scored_news['compound'] == 0,
                              'Neutral', 'Negative')))

                return parsed_and_scored_news

            df = newsfromfizviz(ticker)
            df_pie = df[['Sentiment', 'headline']].groupby('Sentiment').count()
            fig = px.pie(df_pie,
                         values=df_pie['headline'],
                         names=df_pie.index,
                         color=df_pie.index,
                         color_discrete_map={
                             'Positive': 'green',
                             'Neutral': 'darkblue',
                             'Negative': 'red'
                         })

            st.subheader('Dataframe with Latest News')
            st.dataframe(df)

            st.subheader('Latest News Sentiment Distribution using Pie Chart')
            st.plotly_chart(fig)

            plt.rcParams['figure.figsize'] = [11, 5]

            # Group by date and ticker columns from scored_news and calculate the mean
            mean_scores = df.groupby(['ticker', 'date']).mean()

            # Unstack the column ticker
            mean_scores = mean_scores.unstack()

            # Get the cross-section of compound in the 'columns' axis
            mean_scores = mean_scores.xs('compound',
                                         axis="columns").transpose()

            # Plot a bar chart with pandas
            mean_scores.plot(kind='bar')

            plt.grid()

            st.set_option('deprecation.showPyplotGlobalUse', False)

            st.subheader('Sentiments over Time')
            st.pyplot()

    elif page == "Company Basic Details":
        snp500 = pd.read_csv("./Datasets/SP500.csv")
        symbols = snp500['Symbol'].sort_values().tolist()

        ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols)

        stock = yf.Ticker(ticker)
        stock = yf.Ticker(ticker)
        info = stock.info
        st.title('Company Basic Details')
        st.subheader(info['longName'])
        st.markdown('** Sector **: ' + info['sector'])
        st.markdown('** Industry **: ' + info['industry'])
        st.markdown('** Phone **: ' + info['phone'])
        st.markdown('** Address **: ' + info['address1'] + ', ' +
                    info['city'] + ', ' + info['zip'] + ', ' + info['country'])
        st.markdown('** Website **: ' + info['website'])
        st.markdown('** Business Summary **')
        st.info(info['longBusinessSummary'])

        fundInfo = {
            'Enterprise Value (USD)': info['enterpriseValue'],
            'Enterprise To Revenue Ratio': info['enterpriseToRevenue'],
            'Enterprise To Ebitda Ratio': info['enterpriseToEbitda'],
            'Net Income (USD)': info['netIncomeToCommon'],
            'Profit Margin Ratio': info['profitMargins'],
            'Forward PE Ratio': info['forwardPE'],
            'PEG Ratio': info['pegRatio'],
            'Price to Book Ratio': info['priceToBook'],
            'Forward EPS (USD)': info['forwardEps'],
            'Beta ': info['beta'],
            'Book Value (USD)': info['bookValue'],
            'Dividend Rate (%)': info['dividendRate'],
            'Dividend Yield (%)': info['dividendYield'],
            'Five year Avg Dividend Yield (%)':
            info['fiveYearAvgDividendYield'],
            'Payout Ratio': info['payoutRatio']
        }

        fundDF = pd.DataFrame.from_dict(fundInfo, orient='index')
        fundDF = fundDF.rename(columns={0: 'Value'})
        st.subheader('Fundamental Info')
        st.table(fundDF)

        st.subheader('General Stock Info')
        st.markdown('** Market **: ' + info['market'])
        st.markdown('** Exchange **: ' + info['exchange'])
        st.markdown('** Quote Type **: ' + info['quoteType'])

        start = dt.datetime.today() - dt.timedelta(2 * 365)
        end = dt.datetime.today()
        df = yf.download(ticker, start, end)
        df = df.reset_index()
        fig = go.Figure(data=go.Scatter(x=df['Date'], y=df['Adj Close']))
        fig.update_layout(
            title={
                'text': "Stock Prices Over Past Two Years",
                'y': 0.9,
                'x': 0.5,
                'xanchor': 'center',
                'yanchor': 'top'
            })
        st.plotly_chart(fig, use_container_width=True)

        marketInfo = {
            "Volume": info['volume'],
            "Average Volume": info['averageVolume'],
            "Market Cap": info["marketCap"],
            "Float Shares": info['floatShares'],
            "Regular Market Price (USD)": info['regularMarketPrice'],
            'Bid Size': info['bidSize'],
            'Ask Size': info['askSize'],
            "Share Short": info['sharesShort'],
            'Short Ratio': info['shortRatio'],
            'Share Outstanding': info['sharesOutstanding']
        }

        marketDF = pd.DataFrame(data=marketInfo, index=[0])
        st.table(marketDF)

    else:
        verified = "False"
        result = "Please enter valid Username, Password and Acess Token!!"

        st.title(result)
def app():
    st.title('Analyses du site NH Hôtels')
    st.write(
        "Dans cette page vous allez trouver des analyses sur le site NH Hôtels, via lequel on peut chercher 357 hôtels parmis 29 pays dans le monde entier."
    )

    st.header("Objectif des analyses")
    st.write(
        "Récemment, le changement climatique, tel que le réchauffement de la planète, est devenu plus visible pour le public. "
        "Nous pensons qu'il est nécessaire que les entreprises de tous les domaines, y compris le tourisme, prennent plus sérieusement en compte l'environnement. "
        "Dans le site NH Hôtels, il y a des hôtels qui mettent en avant leur respect de l'environements. "
        "Dans cette page vous trouverez des analyses sur les hôtels en fonction de leur respect de l’environnement (Eco_Friendly)."
    )

    # Lire la data scrapée
    df = pd.read_csv("./data/output_nhHotels.csv")

    # présentation de donnée
    st.subheader("Donées sur les hôtels du site NH Hôtels")
    st.write(df)
    st.markdown(
        "Ce sont des données que nous avons extraites depuis le site NH hôtels. "
        "Elles contiennent les informations suivantes : ")
    st.markdown("- **Pays**")
    st.markdown("- **Nom de l'hôtel**")
    st.markdown("- **Nombre d'étoile de l'hôtel**")
    st.markdown(
        "- **Eco Friendly** : *Présence ou non d'un signe indiquant que l'hôtel adopte une demarche spécifique de protection de l'environement*."
        " Plus d'information [ici](https://www.nh-hotels.fr/environnement/hotels-ecologiques-developpement-durable)"
    )
    st.markdown("- **Nombre d'étoiles sur Trip Adviser**")

    # ====== Parie Analyes ===== #
    st.write("")
    st.header("Analyses")

    # doughnut chart
    st.subheader("Répartition des hôtels")
    labels = ['Non Eco Friendly', 'Eco friendly']
    vals = df['eco_friendly'].value_counts()
    values = [vals[0], vals[1]]
    fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)])
    st.write(fig)
    st.markdown(
        "**56%** des hôtels qui se trouvent dans ce site ont le signe de *Eco Friendly* et **44%** de les hôtels ne l'ont pas. "
        "Vous pouvez regarder les chiffres détaillés lorsque vous passer votre souris sur le graphe."
    )

    # Bar chart
    st.subheader("Note moyenne sur Trip Adviser")
    means = df.groupby('eco_friendly')['avis_client'].mean()
    colors = [
        'lightslategray',
    ] * 2
    colors[1] = 'crimson'
    fig_bar = go.Figure(
        data=[go.Bar(x=labels, y=[means[0], means[1]], marker_color=colors)])
    st.write(fig_bar)
    st.markdown(
        "Nous pouvons observer que la note des avis clients sur Trip Adviser pour les hôtels Eco Friendly est plus élevée à **4.12 étoiles** "
        "que ceux Non Eco Friendly à **3.9 étoiles**. Nous povons donc remarquer que les clients ont eu des expériences plus positives avec des hôtels Eco Friendly."
    )

    # ===== Conclusion ===== #
    st.write("")
    st.subheader("Conclusion")
    st.write(
        "D'après ces résultats, nous povons observer que, parmi les hôtels répertoriés sur ce site, "
        "ceux qui sont plus respectueux de l'environnement offrent un meilleur service à leurs clients."
        " Afin de promouvoir des activités plus respectueuses de l'environnement de la part du secteur du tourisme, "
        "il est conseillé de mettre ces informations sur le site afin qu'elles soient plus clairement visibles pour les clients potentiels."
        " Par conséquent, nous pensons qu'un site web qui permet aux utilisateurs de rechercher des hôtels en fonction de leur respect de l'environnement est "
        "précieux pour offrir un service plus satisfaisant aux futurs clients."
    )
Exemple #14
0
def run_app():
    #
    # Loads session state
    #
    state = _get_state()

    def reset_template_state():
        state.template_name = None
        state.jinja = None
        state.reference = None

    #
    # Initial page setup
    #
    st.set_page_config(page_title="Promptsource", layout="wide")
    st.sidebar.markdown(
        "<center><a href='https://github.com/bigscience-workshop/promptsource'>💻Github - Promptsource\n\n</a></center>",
        unsafe_allow_html=True,
    )
    mode = st.sidebar.selectbox(
        label="Choose a mode",
        options=select_options,
        index=0,
        key="mode_select",
    )
    st.sidebar.title(f"{side_bar_title_prefix} 🌸 - {mode}")

    #
    # Adds pygments styles to the page.
    #
    st.markdown("<style>" +
                HtmlFormatter(style="friendly").get_style_defs(".highlight") +
                "</style>",
                unsafe_allow_html=True)

    WIDTH = 140

    def show_jinja(t, width=WIDTH):
        def replace_linebreaks(t):
            """
            st.write does not handle double breaklines very well. When it encounters `\n\n`, it exit the curent <div> block.
            Explicitely replacing all `\n` with their html equivalent to bypass this issue.
            Also stripping the trailing `\n` first.
            """
            return t.strip("\n").replace("\n", "<br/>")

        wrap = textwrap.fill(t, width=width, replace_whitespace=False)
        out = highlight(wrap, DjangoLexer(), HtmlFormatter())
        out = replace_linebreaks(out)
        st.write(out, unsafe_allow_html=True)

    def show_text(t, width=WIDTH, with_markdown=False):
        wrap = [
            textwrap.fill(subt, width=width, replace_whitespace=False)
            for subt in t.split("\n")
        ]
        wrap = "\n".join(wrap)
        if with_markdown:
            st.write(wrap, unsafe_allow_html=True)
        else:
            st.text(wrap)

    if mode == "Helicopter view":
        st.title("High level metrics")
        st.write("This will take a minute to collect.")
        st.write(
            "If you want to contribute, please refer to the instructions in " +
            "[Contributing](https://github.com/bigscience-workshop/promptsource/blob/main/CONTRIBUTING.md)."
        )

        #
        # Loads template data
        #
        try:
            template_collection = TemplateCollection()
        except FileNotFoundError:
            st.error(
                "Unable to find the prompt folder!\n\n"
                "We expect the folder to be in the working directory. "
                "You might need to restart the app in the root directory of the repo."
            )
            st.stop()

        #
        # Global metrics
        #
        counts = template_collection.get_templates_count()
        nb_prompted_datasets = len(counts)
        st.write(f"## Number of *prompted datasets*: `{nb_prompted_datasets}`")
        nb_prompts = sum(counts.values())
        st.write(f"## Number of *prompts*: `{nb_prompts}`")

        #
        # Metrics per dataset/subset
        #
        # Download dataset infos (multiprocessing download)
        manager = Manager()
        all_infos = manager.dict()
        all_datasets = list(set([t[0] for t in template_collection.keys]))

        pool = Pool(processes=multiprocessing.cpu_count())
        pool.map(functools.partial(get_infos, all_infos), all_datasets)
        pool.close()
        pool.join()

        results = []
        for (dataset_name, subset_name) in template_collection.keys:
            # Collect split sizes (train, validation and test)
            if dataset_name not in all_infos:
                infos = get_dataset_infos(dataset_name)
                all_infos[dataset_name] = infos
            else:
                infos = all_infos[dataset_name]
            if infos:
                if subset_name is None:
                    subset_infos = infos[list(infos.keys())[0]]
                else:
                    subset_infos = infos[subset_name]

                split_sizes = {
                    k: v.num_examples
                    for k, v in subset_infos.splits.items()
                }
            else:
                # Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json
                # so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error
                # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0.
                split_sizes = {}

            # Collect template counts, original task counts and names
            dataset_templates = template_collection.get_dataset(
                dataset_name, subset_name)
            results.append({
                "Dataset name":
                dataset_name,
                "Subset name":
                "∅" if subset_name is None else subset_name,
                "Train size":
                split_sizes["train"] if "train" in split_sizes else 0,
                "Validation size":
                split_sizes["validation"]
                if "validation" in split_sizes else 0,
                "Test size":
                split_sizes["test"] if "test" in split_sizes else 0,
                "Number of prompts":
                len(dataset_templates),
                "Number of original task prompts":
                sum([
                    bool(t.metadata.original_task)
                    for t in dataset_templates.templates.values()
                ]),
                "Prompt names":
                [t.name for t in dataset_templates.templates.values()],
            })
        results_df = pd.DataFrame(results)
        results_df.sort_values(["Number of prompts"],
                               inplace=True,
                               ascending=False)
        results_df.reset_index(drop=True, inplace=True)

        nb_training_instances = results_df["Train size"].sum()
        st.write(
            f"## Number of *training instances*: `{nb_training_instances}`")

        plot_df = results_df[[
            "Dataset name", "Subset name", "Train size", "Number of prompts"
        ]].copy()
        plot_df[
            "Name"] = plot_df["Dataset name"] + " - " + plot_df["Subset name"]
        plot_df.sort_values(["Train size"], inplace=True, ascending=False)
        fig = px.bar(
            plot_df,
            x="Name",
            y="Train size",
            hover_data=["Dataset name", "Subset name", "Number of prompts"],
            log_y=True,
            title=
            "Number of training instances per data(sub)set - y-axis is in logscale",
        )
        fig.update_xaxes(visible=False, showticklabels=False)
        st.plotly_chart(fig, use_container_width=True)
        st.write(
            f"- Top 3 training subsets account for `{100 * plot_df[:3]['Train size'].sum() / nb_training_instances:.2f}%` of the training instances."
        )
        biggest_training_subset = plot_df.iloc[0]
        st.write(
            f"- Biggest training subset is *{biggest_training_subset['Name']}* with `{biggest_training_subset['Train size']}` instances"
        )
        smallest_training_subset = plot_df[plot_df["Train size"] > 0].iloc[-1]
        st.write(
            f"- Smallest training subset is *{smallest_training_subset['Name']}* with `{smallest_training_subset['Train size']}` instances"
        )

        st.markdown("***")
        st.write("Details per dataset")
        st.table(results_df)

    else:
        # Combining mode `Prompted dataset viewer` and `Sourcing` since the
        # backbone of the interfaces is the same
        assert mode in ["Prompted dataset viewer", "Sourcing"], ValueError(
            f"`mode` ({mode}) should be in `[Helicopter view, Prompted dataset viewer, Sourcing]`"
        )

        #
        # Loads dataset information
        #

        dataset_list = list_datasets()
        ag_news_index = dataset_list.index("ag_news")

        #
        # Select a dataset - starts with ag_news
        #
        dataset_key = st.sidebar.selectbox(
            "Dataset",
            dataset_list,
            key="dataset_select",
            index=ag_news_index,
            help="Select the dataset to work on.",
        )

        #
        # If a particular dataset is selected, loads dataset and template information
        #
        if dataset_key is not None:

            #
            # Check for subconfigurations (i.e. subsets)
            #
            configs = get_dataset_confs(dataset_key)
            conf_option = None
            if len(configs) > 0:
                conf_option = st.sidebar.selectbox(
                    "Subset", configs, index=0, format_func=lambda a: a.name)

            subset_name = str(conf_option.name) if conf_option else None
            try:
                dataset = get_dataset(dataset_key, subset_name)
            except OSError as e:
                st.error(
                    f"Some datasets are not handled automatically by `datasets` and require users to download the "
                    f"dataset manually. This applies to {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. "
                    f"\n\nPlease download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. "
                    f"\n\nYou can choose another cache directory by overriding `PROMPTSOURCE_MANUAL_DATASET_DIR` environment "
                    f"variable and downloading raw dataset to `$PROMPTSOURCE_MANUAL_DATASET_DIR/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`"
                    f"\n\nOriginal error:\n{str(e)}")
                st.stop()

            splits = list(dataset.keys())
            index = 0
            if "train" in splits:
                index = splits.index("train")
            split = st.sidebar.selectbox("Split",
                                         splits,
                                         key="split_select",
                                         index=index)
            dataset = dataset[split]
            dataset = renameDatasetColumn(dataset)

            #
            # Loads template data
            #
            try:
                dataset_templates = DatasetTemplates(
                    dataset_key, conf_option.name if conf_option else None)
            except FileNotFoundError:
                st.error(
                    "Unable to find the prompt folder!\n\n"
                    "We expect the folder to be in the working directory. "
                    "You might need to restart the app in the root directory of the repo."
                )
                st.stop()

            template_list = dataset_templates.all_template_names
            num_templates = len(template_list)
            st.sidebar.write(
                "No of prompts created for " +
                f"`{dataset_key + (('/' + conf_option.name) if conf_option else '')}`"
                + f": **{str(num_templates)}**")

            if mode == "Prompted dataset viewer":
                if num_templates > 0:
                    template_name = st.sidebar.selectbox(
                        "Prompt name",
                        template_list,
                        key="template_select",
                        index=0,
                        help="Select the prompt to visualize.",
                    )

                step = 50
                example_index = st.sidebar.number_input(
                    f"Select the example index (Size = {len(dataset)})",
                    min_value=0,
                    max_value=len(dataset) - step,
                    value=0,
                    step=step,
                    key="example_index_number_input",
                    help="Offset = 50.",
                )
            else:  # mode = Sourcing
                st.sidebar.subheader("Select Example")
                example_index = st.sidebar.slider("Select the example index",
                                                  0,
                                                  len(dataset) - 1)

                example = dataset[example_index]
                example = removeHyphen(example)

                st.sidebar.write(example)

            st.sidebar.subheader("Dataset Schema")
            rendered_features = render_features(dataset.features)
            st.sidebar.write(rendered_features)

            #
            # Display dataset information
            #
            st.header("Dataset: " + dataset_key + " " +
                      (("/ " + conf_option.name) if conf_option else ""))

            # If we have a custom dataset change the source link to the hub
            split_dataset_key = dataset_key.split("/")
            possible_user = split_dataset_key[0]
            if len(split_dataset_key) > 1 and possible_user in INCLUDED_USERS:
                source_link = "https://huggingface.co/datasets/%s/blob/main/%s.py" % (
                    dataset_key,
                    split_dataset_key[-1],
                )
            else:
                source_link = "https://github.com/huggingface/datasets/blob/master/datasets/%s/%s.py" % (
                    dataset_key,
                    dataset_key,
                )

            st.markdown("*Homepage*: " + dataset.info.homepage +
                        "\n\n*Dataset*: " + source_link)

            md = """
            %s
            """ % (dataset.info.description.replace("\\", "")
                   if dataset_key else "")
            st.markdown(md)

            #
            # Body of the app: display prompted examples in mode `Prompted dataset viewer`
            # or text boxes to create new prompts in mode `Sourcing`
            #
            if mode == "Prompted dataset viewer":
                #
                # Display template information
                #
                if num_templates > 0:
                    template = dataset_templates[template_name]
                    st.subheader("Prompt")
                    st.markdown("##### Name")
                    st.text(template.name)
                    st.markdown("##### Reference")
                    st.text(template.reference)
                    st.markdown("##### Original Task? ")
                    st.text(template.metadata.original_task)
                    st.markdown("##### Choices in template? ")
                    st.text(template.metadata.choices_in_prompt)
                    st.markdown("##### Metrics")
                    st.text(", ".join(template.metadata.metrics) if template.
                            metadata.metrics else None)
                    st.markdown("##### Answer Choices")
                    if template.get_answer_choices_expr() is not None:
                        show_jinja(template.get_answer_choices_expr())
                    else:
                        st.text(None)
                    st.markdown("##### Jinja template")
                    splitted_template = template.jinja.split("|||")
                    st.markdown("###### Input template")
                    show_jinja(splitted_template[0].strip())
                    if len(splitted_template) > 1:
                        st.markdown("###### Target template")
                        show_jinja(splitted_template[1].strip())
                    st.markdown("***")

                #
                # Display a couple (steps) examples
                #
                for ex_idx in range(example_index, example_index + step):
                    if ex_idx >= len(dataset):
                        continue
                    example = dataset[ex_idx]
                    example = removeHyphen(example)
                    col1, _, col2 = st.beta_columns([12, 1, 12])
                    with col1:
                        st.write(example)
                    if num_templates > 0:
                        with col2:
                            prompt = template.apply(example,
                                                    highlight_variables=False)
                            if prompt == [""]:
                                st.write("∅∅∅ *Blank result*")
                            else:
                                st.write("Input")
                                show_text(prompt[0])
                                if len(prompt) > 1:
                                    st.write("Target")
                                    show_text(prompt[1])
                    st.markdown("***")
            else:  # mode = Sourcing
                st.markdown("## Prompt Creator")

                #
                # Create a new template or select an existing one
                #
                col1a, col1b, _, col2 = st.beta_columns([9, 9, 1, 6])

                # current_templates_key and state.templates_key are keys for the templates object
                current_templates_key = (dataset_key, conf_option.name
                                         if conf_option else None)

                # Resets state if there has been a change in templates_key
                if state.templates_key != current_templates_key:
                    state.templates_key = current_templates_key
                    reset_template_state()

                with col1a, st.form("new_template_form"):
                    new_template_name = st.text_input(
                        "Create a New Prompt",
                        key="new_template",
                        value="",
                        help="Enter name and hit enter to create a new prompt.",
                    )
                    new_template_submitted = st.form_submit_button("Create")
                    if new_template_submitted:
                        if new_template_name in dataset_templates.all_template_names:
                            st.error(
                                f"A prompt with the name {new_template_name} already exists "
                                f"for dataset {state.templates_key}.")
                        elif new_template_name == "":
                            st.error("Need to provide a prompt name.")
                        else:
                            template = Template(new_template_name, "", "")
                            dataset_templates.add_template(template)
                            reset_template_state()
                            state.template_name = new_template_name
                    else:
                        state.new_template_name = None

                with col1b, st.beta_expander("or Select Prompt",
                                             expanded=True):
                    template_list = dataset_templates.all_template_names
                    if state.template_name:
                        index = template_list.index(state.template_name)
                    else:
                        index = 0
                    state.template_name = st.selectbox(
                        "",
                        template_list,
                        key="template_select",
                        index=index,
                        help="Select the prompt to work on.")

                    if st.button("Delete Prompt", key="delete_prompt"):
                        dataset_templates.remove_template(state.template_name)
                        reset_template_state()

                variety_guideline = """
                :heavy_exclamation_mark::question:Creating a diverse set of prompts whose differences go beyond surface wordings (i.e. marginally changing 2 or 3 words) is highly encouraged.
                Ultimately, the hope is that exposing the model to such a diversity will have a non-trivial impact on the model's robustness to the prompt formulation.
                \r**To get various prompts, you can try moving the cursor along theses axes**:
                \n- **Interrogative vs affirmative form**: Ask a question about an attribute of the inputs or tell the model to decide something about the input.
                \n- **Task description localization**: where is the task description blended with the inputs? In the beginning, in the middle, at the end?
                \n- **Implicit situation or contextualization**: how explicit is the query? For instance, *Given this review, would you buy this product?* is an indirect way to ask whether the review is positive.
                """

                col1, _, _ = st.beta_columns([18, 1, 6])
                with col1:
                    if state.template_name is not None:
                        show_text(variety_guideline, with_markdown=True)

                #
                # Edit the created or selected template
                #
                col1, _, col2 = st.beta_columns([18, 1, 6])
                with col1:
                    if state.template_name is not None:
                        template = dataset_templates[state.template_name]
                        #
                        # If template is selected, displays template editor
                        #
                        with st.form("edit_template_form"):
                            updated_template_name = st.text_input(
                                "Name", value=template.name)
                            state.reference = st.text_input(
                                "Prompt Reference",
                                help=
                                "Short description of the prompt and/or paper reference for the prompt.",
                                value=template.reference,
                            )

                            # Metadata
                            state.metadata = template.metadata
                            state.metadata.original_task = st.checkbox(
                                "Original Task?",
                                value=template.metadata.original_task,
                                help=
                                "Prompt asks model to perform the original task designed for this dataset.",
                            )
                            state.metadata.choices_in_prompt = st.checkbox(
                                "Choices in Template?",
                                value=template.metadata.choices_in_prompt,
                                help=
                                "Prompt explicitly lists choices in the template for the output.",
                            )

                            # Metrics from here:
                            # https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py
                            metrics_choices = [
                                "BLEU",
                                "ROUGE",
                                "Squad",
                                "Trivia QA",
                                "Accuracy",
                                "Pearson Correlation",
                                "Spearman Correlation",
                                "MultiRC",
                                "AUC",
                                "COQA F1",
                                "Edit Distance",
                            ]
                            # Add mean reciprocal rank
                            metrics_choices.append("Mean Reciprocal Rank")
                            # Add generic other
                            metrics_choices.append("Other")
                            # Sort alphabetically
                            metrics_choices = sorted(metrics_choices)
                            state.metadata.metrics = st.multiselect(
                                "Metrics",
                                metrics_choices,
                                default=template.metadata.metrics,
                                help=
                                "Select all metrics that are commonly used (or should "
                                "be used if a new task) to evaluate this prompt.",
                            )

                            # Answer choices
                            if template.get_answer_choices_expr() is not None:
                                answer_choices = template.get_answer_choices_expr(
                                )
                            else:
                                answer_choices = ""
                            state.answer_choices = st.text_input(
                                "Answer Choices",
                                value=answer_choices,
                                help=
                                "A Jinja expression for computing answer choices. "
                                "Separate choices with a triple bar (|||).",
                            )

                            # Jinja
                            state.jinja = st.text_area("Template",
                                                       height=40,
                                                       value=template.jinja)

                            # Submit form
                            if st.form_submit_button("Save"):
                                if (updated_template_name
                                        in dataset_templates.all_template_names
                                        and updated_template_name !=
                                        state.template_name):
                                    st.error(
                                        f"A prompt with the name {updated_template_name} already exists "
                                        f"for dataset {state.templates_key}.")
                                elif updated_template_name == "":
                                    st.error("Need to provide a prompt name.")
                                else:
                                    # Parses state.answer_choices
                                    if state.answer_choices == "":
                                        updated_answer_choices = None
                                    else:
                                        updated_answer_choices = state.answer_choices

                                    dataset_templates.update_template(
                                        state.template_name,
                                        updated_template_name,
                                        state.jinja,
                                        state.reference,
                                        state.metadata,
                                        updated_answer_choices,
                                    )
                                    # Update the state as well
                                    state.template_name = updated_template_name
                #
                # Displays template output on current example if a template is selected
                # (in second column)
                #
                with col2:
                    if state.template_name is not None:
                        st.empty()
                        template = dataset_templates[state.template_name]
                        prompt = template.apply(example)
                        if prompt == [""]:
                            st.write("∅∅∅ *Blank result*")
                        else:
                            st.write("Input")
                            show_text(prompt[0], width=40)
                            if len(prompt) > 1:
                                st.write("Target")
                                show_text(prompt[1], width=40)

    #
    # Must sync state at end
    #
    state.sync()
Exemple #15
0
 
 dataset = PegaFile2(file,separador)
 #exibindo um pedaço dos dados
 n_heads = st.slider('Nº de linhas exibidas',min_value = 0, max_value = len(dataset),value = 1,step = 1)
 st.write(dataset.head(n_heads))
 ##---------------------TIPO DE ANÁLISE DE DADOS-------------------------##
 analise = st.selectbox('Selecione o tipo de análise exploratória:',('Análise de dados faltantes','Distribuição de dados','Estatísticas','Correlações','modelo de Machine Learning'))
     
 if analise == 'Análise de dados faltantes':
     
     explorer = pd.DataFrame({'Variaveis':dataset.columns,'Num de faltantes' :dataset.isna().sum(axis=0).values,'Tipo de dado':dataset.dtypes.values})
             
     if sum(explorer['Num de faltantes'].values) > 0:
         
         st.text('Existem dados faltantes na base de dados!')
         st.header('Imputação de dados')
         var  = st.selectbox('Selecione a variavel para avaliar',options = dataset.columns)
         st.subheader('Selecione a estratégia de imputação:')
         if dataset[var].dtypes =='float64' or dataset[var].dtypes =='int64':
             
             opt = st.selectbox('',options = ('média','mediana','moda','zeros','apagar dados faltantes'))
                     
             if opt == 'média':
                 
                         
                 st.write('Os dados da coluna ',var,' serão substituídos por: ',dataset[var].mean(),'.')
                 botao = st.button('Imputar dados')
                         
                 if botao:
                     dataset[var] = dataset[var].fillna(dataset[var].mean())
                     
        Image.fromarray(gray).save(data, format="PNG")
        self._data["gray.png"] = data.getvalue()

    def save(self):
        for name, data in self._data.items():
            Image.open(io.BytesIO(data)).save("/tmp/%s" % name)

    def get_images(self):
        return self._data


# Generate some images.
si = StreamlitImages()

# Get a single image of bytes and display
st.header("individual image bytes")
filename = "image.png"
data = si.get_images().get(filename)
st.image(data, caption=filename, format="png")

# Display a list of images
st.header("list images")
images = []
captions = []
for filename, data in si.get_images().items():
    images.append(data)
    captions.append(filename)
st.image(images, caption=captions, format="png")

st.header("PIL Image")
data = []
def select_model(loaded_data, model_selection='user_user'):
    # default model is user-user based collaborative filtering
    if model_selection == 'user_user':
        algo = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': True})
    elif model_selection == 'item_item':
        algo = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': False})
    else:
        algo = mf.matrix_factorization_param(loaded_data)

    return algo
import os, io
 


st.title('Welcome to RecServe!')
st.header('Let me help you with the product recommendations')
option1 = st.selectbox(
    'Select the path for the dataset?',
    ['sample_us.tsv'])
#st.write('sample_us.tsv')
#st.write('You selected:',option1)

#url = st.text_input('Enter the path for the data')
st.write('The data is loaded')
#data_load_state = st.text('Loading the data')
data = ds.get_data(option1)
#st.write(data)

    #data = ds.get_data(_file_path, 'data/data_subset.csv', 0.99)
#data = ds.get_data('/Users/lalitharahul/Desktop/AutoRecommender/RecServe/sample_us.tsv')
#data = ds.get_data(url)
Exemple #18
0
def main():
    sucesso = False
    sucesso2 = False
    codigo_acao = ""
    st.title('Análise de Ações')
    st.subheader('Objetivos')
    st.text(
        'Analisar os dados de uma ação e correlacionar seu comportamento com o índice Ibovespa.'
    )
    st.text(
        'Prever o preço de fechamento de uma ação baseado em seus últimos 60 dias'
    )
    st.image('https://media.giphy.com/media/l0HlDDyxBfSaPpU88/giphy.gif',
             width=700)

    st.sidebar.title('AceleraDev Data Science')
    st.sidebar.image('logo.png', width=280)
    st.sidebar.subheader("By Pável Lelis")
    st.sidebar.title("O que fazer?")
    app_mode = st.sidebar.selectbox("", [
        "", "Análise Exploratória", "Prever o Fechamento",
        "Ver o Workflow deste App"
    ])

    if app_mode == "Análise Exploratória":
        #st.sidebar.success('Para continuar digite o código da ação.')
        codigo_acao = st.sidebar.text_input('Digite o código da ação aqui')
        datainicio = st.sidebar.date_input('Data de início')
        datafim = st.sidebar.date_input('Data Final')
        #botao=st.sidebar.button("Carregar Dados")
        #if botao:

        # Requisição e carga de Dados
        if codigo_acao != "" and datainicio != datafim:
            codigo_acao2 = codigo_acao + ".SA"
            df = web.DataReader(codigo_acao2,
                                data_source='yahoo',
                                start=datainicio,
                                end=datafim)
            df_ibovespa = web.DataReader('^BVSP',
                                         data_source='yahoo',
                                         start=datainicio,
                                         end=datafim)
            df_quote = df
            sucesso = True
            # Padronização do DataFrame
            for c in df_ibovespa.columns:
                df_ibovespa.rename(columns={c: c + '_IBOV'}, inplace=True)
            for c in df_quote.columns:
                df_quote.rename(columns={c: c + '_' + codigo_acao.upper()},
                                inplace=True)
            df_dados = pd.merge(df_quote,
                                df_ibovespa,
                                left_index=True,
                                right_index=True)

            def normalizaIbov(x):
                return (x - (df_dados[df_dados.columns[9]]).min()) / (
                    (df_dados[df_dados.columns[9]]).max() -
                    (df_dados[df_dados.columns[9]]).min())

            def normalizaAcao(x):
                return (x - (df_dados[df_dados.columns[3]]).min()) / (
                    (df_dados[df_dados.columns[3]]).max() -
                    (df_dados[df_dados.columns[3]]).min())

            df_dados['AcaoNorm'] = df_dados[df_dados.columns[3]].apply(
                normalizaAcao)
            df_dados['IbovNorm'] = df_dados[df_dados.columns[9]].apply(
                normalizaIbov)

    elif app_mode == "Prever o Fechamento":
        if codigo_acao == '':
            codigo_acao = st.sidebar.text_input('Digite o código da ação aqui')
            sucesso2 = True
        else:
            escolhaPrevisao = st.sidebar.selectbox(
                "Qual fechamento vamos prever?",
                [codigo_acao, 'Ibov', "Outra Ação"])
            if escolhaPrevisao == "Outra Ação":
                codigo_acao = st.sidebar.text_input(
                    'Digite o código da ação aqui')
                sucesso2 = True
            elif escolhaPrevisao == 'Ibov':
                codigo_acao = '^BVSP'
                sucesso2 = True
            else:
                pass

    elif app_mode == "Ver o Workflow deste App":
        st.image("App Workflow.png", width=850)

    if sucesso:
        st.title(codigo_acao)
        st.text(
            'Os dados do IBOV e da ação de análise estão apresentados pela data mais recente.'
        )
        num_linha = st.slider('Registros', 5, len(df_dados))
        st.dataframe(df_dados.tail(num_linha))
        st.title("Gráficos")
        selecionaGrafico = st.selectbox('Escolha o gráfico para análise', [
            '',
            'Candlestick',
            'Dispersão IBOV x Ação',
            'Curvas Normalizadas',
        ])
        if selecionaGrafico == "Candlestick":
            papel = st.selectbox('Para qual histótico?',
                                 ['', codigo_acao, 'IBOV'])
            if papel != "":
                ver_acao = False
                if papel == codigo_acao:
                    ver_acao = True
                st.header("CandleStick")
                st.write(criar_candlestick(df_dados, ver_acao, codigo_acao))
        if selecionaGrafico == "Dispersão IBOV x Ação":
            st.header("Dispersão IBOV x Ação")
            st.write(criar_dispersao(df_dados, codigo_acao))
        if selecionaGrafico == "Curvas Normalizadas":
            st.header("Curvas Normalizadas")
            st.write(criar_curvas_norm(df_dados, codigo_acao))

    if sucesso2:
        # Requisição e carga de Dados
        codigo_acao2 = codigo_acao + ".SA"
        hoje = date.today()
        intervalo = timedelta(1200)
        passado = hoje - intervalo
        if codigo_acao != "":
            df = web.DataReader(codigo_acao2,
                                data_source='yahoo',
                                start=passado,
                                end=hoje)

            texto = 'Realizando a previsão do próximo fechamento de ' + codigo_acao
            st.title(texto)
            realizarPrevisao(df, codigo_acao)
            ##################################################
            st.text("Processando dados...")
            # Criando um novo dataframe com a coluna Fechamento 'Close'
            data = df.filter(['Close'])
            # Criando um vertor de dados
            dataset = data.values
            # obtendo o número de registros para o conjunto de treino (Premissa 80/20)
            training_data_len = math.ceil(len(dataset) * .8)

            scaler = MinMaxScaler(feature_range=(0, 1))
            scaled_data = scaler.fit_transform(dataset)

            # Criando o dataset normalizado
            train_data = scaled_data[0:training_data_len, :]
            # Separando datasets x_train e y_train
            x_train = []
            y_train = []
            for i in range(60, len(train_data)):
                x_train.append(train_data[i - 60:i, 0])
                y_train.append(train_data[i, 0])

            # Criando numpy Arrays
            x_train, y_train = np.array(x_train), np.array(y_train)

            # Formatando os dados de acordo com o formato aceitado pelo LSTM
            x_train = np.reshape(x_train,
                                 (x_train.shape[0], x_train.shape[1], 1))
            st.text("Configurando o Modelo...")
            # Configurando o Modelo LSTM

            model = Sequential()
            model.add(
                LSTM(units=50,
                     return_sequences=True,
                     input_shape=(x_train.shape[1], 1)))
            model.add(LSTM(units=50, return_sequences=False))
            model.add(Dense(units=25))
            model.add(Dense(units=1))
            st.text("Compilando o Modelo...")
            # Compilando o modelo
            model.compile(optimizer='adam', loss='mean_squared_error')
            st.text("Treinando o modelo...")
            # Treinando o modelo
            model.fit(x_train, y_train, batch_size=1, epochs=1)
            # Verificando o ajuste do modelo
            st.text("Verificando o ajuste do modelo...")
            test_data = scaled_data[training_data_len - 60:, :]
            # Criando x_test e y_test
            x_test = []
            y_test = dataset[training_data_len:, :]
            for i in range(60, len(test_data)):
                x_test.append(test_data[i - 60:i, 0])
            # Convertendo x_test a numpy array
            x_test = np.array(x_test)

            # Formatando os dados de acordo com o formato aceitado pelo LSTM
            x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

            predictions = model.predict(x_test)
            predictions = scaler.inverse_transform(
                predictions)  # Desfazendo a normalização

            # Plot/Create the data for the graph
            train = data[:training_data_len]
            valid = data[training_data_len:]
            valid['Predictions'] = predictions
            # Visualize the data
            fig = plt.figure()
            plt.figure(figsize=(16, 8))
            plt.title('Modelo', fontsize=36)
            plt.xlabel('Data', fontsize=18)
            plt.ylabel('Preço de Fechamento (BRL)', fontsize=18)
            plt.plot(train['Close'])
            plt.plot(valid[['Close', 'Predictions']])
            plt.legend(['Treino', 'Valores', 'Previsões'], loc='lower right')
            st.pyplot()

            ##################################################
            def previstosxhistoricos(data):
                valid = data[training_data_len:]
                valid['Predictions'] = predictions
                fig = go.Figure()
                # fig.add_trace(go.Scatter( y=train['Close'],mode='lines+markers',name='Treino'))
                fig.add_trace(
                    go.Scatter(x=valid.index,
                               y=valid['Close'],
                               mode='lines+markers',
                               name='Valores Históricos'))
                fig.add_trace(
                    go.Scatter(x=valid.index,
                               y=valid['Predictions'],
                               mode='lines+markers',
                               name='Previsões'))

                fig.update_layout(
                    title={
                        'text': "Valores Previstos e Valores Históricos",
                        'y': 0.9,
                        'x': 0.5,
                        'xanchor': 'center',
                        'yanchor': 'top'
                    })

                return fig

            st.write((previstosxhistoricos(data)))

            st.text("Realizando a previsão do próximo fechamento...")
            #############################################################
            # Get the quote
            codigo_acao = codigo_acao  # +".SA"
            quote = web.DataReader(codigo_acao + ".SA",
                                   data_source='yahoo',
                                   start=passado,
                                   end=hoje)
            # Create a new dataframe
            new_df = quote.filter(['Close'])
            # Get teh last 60 day closing price
            last_60_days = new_df[-60:].values
            scaler = MinMaxScaler(feature_range=(0, 1))

            # Scale the data to be values between 0 and 1
            last_60_days_scaled = scaler.fit_transform(last_60_days)
            # Create an empty list
            X_test = []
            # Append teh past 60 days
            X_test.append(last_60_days_scaled)
            # Convert the X_test data set to a numpy array
            X_test = np.array(X_test)
            # Reshape the data
            X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))
            # Get the predicted scaled price
            pred_price = model.predict(X_test)
            # undo the scaling
            pred_price = scaler.inverse_transform(pred_price)
            if (round(
                (pred_price.tolist()[0][0]), 2)) > valid.Close.tail(1)[0]:
                resp = 'Vai Subir'
            else:
                resp = 'Vai descer'

            valor_previsao = "O valor predito para o próximo fechamento é de " + str(
                round((pred_price.tolist()[0][0]), 2)) + ". " + resp + "! "
            st.title(valor_previsao)
            st.image('Nota.png', width=600)
Exemple #19
0
#test_dict={"Linearity":["fbp","ar"],"Low Contrast":["227FOV"],"Resolution":["std","bone"],"Uniformity":["head","body","multislice","multislice_monoenergetic"],"Thickness":["single_slice","multi_slice"],"Cart":["Cart displacement"],"Iodine":["Iodine"]}

catphan_dict = {
    "Linearity": ["fbp", "asir"],
    "Resolution": ["std", "bone"],
    "Thickness": ["single_slice", "multi_slice"],
    "Cart": ["Cart displacement"]
}
ge_dict = {
    "Low Contrast": ["227FOV"],
    "Resolution": ["std", "bone"],
    "Uniformity": ["head", "body", "multislice", "multislice_monoenergetic"],
    "Iodine": ["Iodine"]
}

st.header("TAC QUALITY ASSURANCE")

page_bg_img = '''
<style>
body {
background-image: url("https://edu.ieee.org/pa-upanama/wp-content/uploads/sites/374/2015/02/minimalistic-simple-background-white-206534-1920x12002.jpg");
background-size: cover;
}
</style>
'''

st.markdown(page_bg_img, unsafe_allow_html=True)

file_loader = st.empty()

state = _get_state()
Exemple #20
0
def main():
    """Online Retail Analytics ML App"""

    st.title("Online Retail Analytics")
    #st.subheader("Streamlit ML App")

    activities = ['EDA', 'Prediction', 'About']
    choices = st.sidebar.selectbox("Select Activities", activities)
    data = load_data('data/dataset.csv')

    if choices == 'EDA':
        st.header("Exploratory Data Analysis")
        choice1 = st.sidebar.selectbox("Choose One:", [
            "Show top 5 rows of data", "Show Summary of Dataset",
            "Customer Retention Rate", "User Type Revenue"
        ])

        if choice1 == "Show top 5 rows of data":
            st.write("Top 5 rows of data")
            st.dataframe(data.head(5))

        if choice1 == "Show Summary of Dataset":
            st.write("Summary of Dataset")
            st.write(data.describe())

        if choice1 == "Customer Retention Rate":
            st.write("Customer Retention Rate")
            ret_rate = pd.read_csv(os.getcwd() + "/data/retention_rate.csv")

            # Replace using dictionary
            ret_rate['InvoiceYearMonth'] = ret_rate[
                'InvoiceYearMonth'].replace({
                    201102: "Feb",
                    201103: "Mar",
                    201104: "Apr",
                    201105: "May",
                    201106: "June",
                    201107: "July",
                    201108: "Aug",
                    201109: "Sep",
                    201110: "Oct",
                    201111: "Nov",
                    201112: "Dec",
                })

            plt.plot('InvoiceYearMonth',
                     'TotalUserCount',
                     data=ret_rate,
                     marker='o',
                     markerfacecolor='blue',
                     markersize=12,
                     color='skyblue',
                     linewidth=4)

            plt.plot('InvoiceYearMonth',
                     'RetainedUserCount',
                     data=ret_rate,
                     marker='o',
                     markerfacecolor='red',
                     markersize=12,
                     color='tomato',
                     linewidth=4)

            plt.legend()
            st.pyplot()

        if choice1 == "User Type Revenue":
            st.write("User Type Revenue")
            user_revenue = pd.read_csv(os.getcwd() +
                                       "/data/User_Type_Revenue.csv")
            user_revenue = user_revenue.drop(columns=['Unnamed: 0'])

            #             user_revenue['InvoiceYearMonth'] = user_revenue['InvoiceYearMonth'].replace({
            #                 201012:"Dec 2010",
            #                 201101:"Jan 2011",
            #                 201102:"Feb 2011",
            #                 201103:"Mar 2011",
            #                 201104:"Apr 2011",
            #                 201105:"May 2011",
            #                 201106:"June 2011",
            #                 201107:"July 2011",
            #                 201108:"Aug 2011",
            #                 201109:"Sep 2011",
            #                 201110:"Oct 2011",
            #                 201111:"Nov 2011",
            #                 201112:"Dec 2011",
            #             })

            viz = sns.barplot(x="InvoiceYearMonth",
                              y="Revenue",
                              data=user_revenue,
                              hue='UserType')

            plt.xticks(rotation=-45)
            viz.set(ylabel='Revenue')
            plt.show()
            st.pyplot()

    if choices == 'Prediction':
        st.header("Prediction Analytics")
        choice = st.sidebar.selectbox("Choose One:", [
            "Customer Segmentation", "Cross Selling",
            "Customer Lifetime Value", "Next Purchase Day"
        ])
        if choice == "Customer Segmentation":
            st.subheader("Customer Segmentation")
            st.write("Classifying Customers based on RFM Model")
            customer_segmentation = pd.read_csv(
                os.getcwd() + "/data/Customer_Segmentation.csv")
            customer_segmentation['CustomerID'] = customer_segmentation[
                'CustomerID'].astype(int)
            customerID = st.selectbox(
                'CustomerID', customer_segmentation['CustomerID'].head(100))
            if st.button("Submit"):
                selected_customer = customer_segmentation.loc[
                    customer_segmentation['CustomerID'] == customerID]
                st.write(selected_customer[[
                    'Segment', 'Recency', 'Frequency', 'Revenue'
                ]])

        if choice == "Customer Lifetime Value":
            st.subheader("Customer Lifetime Value")
            st.write("Predicting LTV using XGBoost classifier")
            clv = pd.read_csv(os.getcwd() + "/data/CLV.csv")
            clv['CustomerID'] = clv['CustomerID'].astype(int)
            customerID = st.selectbox('CustomerID',
                                      clv['CustomerID'].head(100))
            if st.button("Submit"):
                selected_customer = clv.loc[clv['CustomerID'] == customerID]
                st.write(selected_customer['Customer_Lifetime_value'])

        if choice == "Next Purchase Day":
            st.subheader("Next Purchase Day")
            st.write("Predict Next Purchase Day using KNN")
            st.write(
                "NextPurchaseDayClass=0:Customer Will Purchase in more than 50 days"
            )
            st.write(
                "NextPurchaseDayClass=1:Customer Will Purchase in 21-49 days")
            st.write(
                "NextPurchaseDayClass=2:Customer Will Purchase in 0-20 days")
            Next_pday = pd.read_csv(os.getcwd() + "/data/Next.csv",
                                    encoding='unicode_escape',
                                    index_col=False)
            Next_pday['Customer_Id'] = Next_pday['Customer_Id'].astype('str')
            Customer = st.selectbox("Select CustomerID:",
                                    Next_pday['Customer_Id'])
            if st.button("Submit"):
                st.write(Next_pday.loc[Next_pday['Customer_Id'] == Customer,
                                       'NextPurchaseDayClass'])

        if choice == "Cross Selling":
            st.subheader("Cross Selling")
            st.write("Market Basket Analysis using FP Growth")
            #Reading Data From Web
            #data = load_data('data/dataset.csv')
            #input1= st.selectbox("Select min support",[0.1,0.2,0.3,0.4,0.5])
            #if st.button("Submit"):'
            #country = st.sidebar.selectbox("Choose a country",data['Country'].unique())
            #if st.sidebar.button("Submit1"):
            #st.write("You selected: ",country)

            #min_support=input1
            #st.write("You selected this option ",input1)

            #Cleaning
            data['Description'] = data['Description'].str.strip()
            data.dropna(axis=0, subset=['InvoiceNo'], inplace=True)
            data['InvoiceNo'] = data['InvoiceNo'].astype('str')
            data = data[~data['InvoiceNo'].str.contains('C')]
            data.head()

            #Separating transactions for Country
            basket = (data[data['Country'] == 'Germany'].groupby([
                'InvoiceNo', 'Description'
            ])['Quantity'].sum().unstack().reset_index().fillna(0).set_index(
                'InvoiceNo'))

            #converting to 1 and  0
            def encoder(x):
                if x <= 0:
                    return 0
                if x >= 1:
                    return 1

            basket = basket.applymap(encoder)
            basket.drop('POSTAGE', inplace=True, axis=1, errors='ignore')

            #Generatig frequent itemsets
            itemsets = fpgrowth(basket, min_support=0.07, use_colnames=True)

            #generating rules
            rules = association_rules(itemsets, metric="lift", min_threshold=1)

            #rules=rules[['antecedents','consequents']]
            #df=rules
            #df.columns = ['Input', 'Output']

            choice = st.selectbox("Choose One:",
                                  rules['antecedents'].head(100))
            if st.button("Submit"):
                output = rules.loc[rules['antecedents'] == choice]
                st.write(output[['consequents']])

    if choices == 'About':
        st.subheader("About")
        st.write(
            "TK Maxx is a subsidiary of the American apparel and home goods company TJX Companies and offers customers across various  countries great values on brand name apparel and more, including high-end designer goods and juniors."
        )
        st.write(
            "TK Maxx wants to analyse the customer transactions at their stores over an 8 month period to understand their behavior and make some predictions about their customer behavior. They also plan to use this data to cross-sell products which are frequently bought together."
        )
        st.write(
            "We are building a web app to analyse the key metrics using various algorithms to segment customers,  predict the lifetime value of each customer, product recommendation and predict the next purchase date."
        )
        st.write(
            "A marketing analyst at TK Maxx can use these insights to develop strategies like targeting users, identifying important customers , predict the amount to be spent on acquiring or retaining customers  and offer personalised recommendations"
        )
def corporation():
    st.header('국내 자율주행 자동차 개발 기업 소개')
    c_list = ['현대자동차','카카오모빌리티','a2z','토르드라이브','스트라드비전','컨트롤웍스']
    choice = st.selectbox('선택하세요',c_list)

    if choice == '현대자동차':
        st.image('hyundai.jpg',width=None)
        st.subheader('현대자동차')
        st.write('- 국내 완성차 기업에 최고인 현대자동차')
        st.write('- 현대자동차는 자율주행으로 인한 사회적 혜택을 아래와 같이 정의한다')
        st.image('social.png',width=None)
        st.write('- 현재 판매중인 모든 차량에서 최대 3단계 자율주행을 제공한다')
        st.write('- 현대자동차 자체 개발 및 협력사 개발을 통해 빠르게 성장하고 있다')
        # url1 = 'https://motional.com/'
        link = '[홈페이지 바로가기](https://motional.com/)'
        st.write('- 현대자동차 협력사 : Motional')
        st.markdown(link,unsafe_allow_html=True)
        # if st.button('홈페이지 바로가기'):
        #     webbrowser.open('https://motional.com/',new=1)
    if choice == '카카오모빌리티':
        st.image('kko.png',width=None)
        st.subheader('카카오모빌리티')
        st.write('- 카카오모빌리티는 완성차를 개발하는 개념이 아닌 서비스제공 형태로 접근한다')
        st.write('- 2020년 3월 개발을 시작으로 꾸준한 성장을 보여주고 있다')
        st.write('- 자율주행 차량을 통제하는 관제시스템과 이를 총괄하는 운영시스템으로 나누고, 데이터와 기술, 운영 노하우를 접목시켜 가장 안전한 서비스를 제공하는게 목적이다')
        st.write('- 주변환경을 인식하고 주행전략을 결정하여 차량을 제어하는 기술과, 수요에 맞춰 배차하고 원격으로 차량을 조정하는 서비스를 함께 개발하고 있다')
        st.write('- 무엇보다 빅데이터 측면에선 압도적인 수준에 도달해 있기 때문에, 환경에 알맞는 서비스제공은 카카오모빌리티가 우위를 점할 것 같다')
        link1 = '[관련기사 바로보기](https://auto.v.daum.net/v/20200303103632878)'
        st.markdown(link1,unsafe_allow_html=True)
        # if st.button('관련기사 바로보기'):
        #     webbrowser.open_new_tab('https://auto.v.daum.net/v/20200303103632878')
    if choice == 'a2z':
        st.image('a2z.jpg',width=None)
        st.subheader('a2z 오토노머스')
        st.write('- 현대자동차 자율주행 개발부에서 근무하던 기술자 3명이 나와 창업한 자율주행차 전문개발 스타트업')
        st.write('- 2018년부터 개발을 시작하여 현재 세종시에서 실전주행을 진행중에 있다')
        st.write('- 다른 스타트업들에 비해 엄청난 고공성장을 이루고 있으며, 지자체 및 수많은 기업에 파트너십을 맺어 미래성장가능성도 충분히 보여주고 있다')
        st.write('- 자율주행 개발자들을 위한 SDK(Software Development Kit)를 개발 및 배포하여 개발환경에 진입장벽을 낮추었다')
        st.write('- 자동차 전문 유튜버인 김한용 기자가 a2z 자율주행차 시승영상을 올리면서 a2z가 본격적으로 알려졌다')
        st.video('https://youtu.be/FAnnpoHkfgw')
        st.subheader('a2z 파트너사')
        st.image('ptn.png',width=None)
        link3 = '[홈페이지 바로가기](http://www.autoa2z.co.kr/)'
        st.markdown(link3,unsafe_allow_html=True)
        # if st.button('a2z 홈페이지 바로가기'):
        #     webbrowser.open_new_tab('http://www.autoa2z.co.kr/')
    if choice == '토르드라이브':
        st.image('토르.jpg',width=None)
        st.subheader('토르드라이브')
        st.write('- 2016년 서울대 출신 연구진이 모여 만든 스타트업으로 현재 서울, 미국 일부지역에서 자율주행 자동차를 개발하고 있다.')
        st.write('- 다른 국내 스타트업과 달리 미국 진출에 성공한 케이스로 자율주행 자동차 및 실내에서 운영가능한 자율주행 전동차도 개발하였다')
        st.write('- 자율주행 기술을 항공, 운송산업분야 등 다양한 비즈니스 모델에 접목시켜 실효성을 검증하는 중이다')
        
        st.subheader('토르드라이브 회사소개')
        st.video('https://youtu.be/Kuiv2Fb-n0U')
        
        st.subheader('인천공항 자율주행 전동차 도입사례')
        st.video('https://youtu.be/GNnwR-lGJUU')
        link4 = '[해당기사 바로가기](https://www.irobotnews.com/news/articleView.html?idxno=22582)'
        st.markdown(link4,unsafe_allow_html=True)
        # if st.button('해당기사 바로가기'):
        #     webbrowser.open_new_tab('https://www.irobotnews.com/news/articleView.html?idxno=22582')
        link5 = '[홈페이지 바로가기](https://www.thordrive.ai/)'
        st.markdown(link5,unsafe_allow_html=True)
        # if st.button('토르드라이브 홈페이지 바로가기'):
        #     url5 = 'https://www.thordrive.ai/'
        #     webbrowser.open_new_tab('https://www.thordrive.ai/')
    if choice == '스트라드비전':
        st.image('스트라드비전.png',width=None)
        st.subheader('스트라드비전')

        st.write('- 2014년 창업하여 최초 AI를 기반한 객체인식 기술을 개발하여 초소형 하드웨어용(웨어러블기기) S/W 개발이 목표였다')
        st.write('- 오히려 자동차업계에서 관심을 받았고 자율주행 소프트웨어 개발제안을 받아 성공적으로 목표를 전향한 케이스이다')
        st.write("""- 현재는 완성차 제조사를 대상으로 900만대 차량에 자율주행을 위한 소프트웨어 'SVNet'을 공급하고 있다""")
        st.write('- 미국, 중국, 독일, 일본, 인도에 회사를 두고 있으며, 현대자동차 등 대기업과의 협력사로서 활발히 활동하고 있다')

        st.subheader('스트라드비전 회사소개영상')
        st.video('https://youtu.be/da4YM27FSss')

    if choice == '컨트롤웍스':
        st.image('컨트롤웍스.png',width= None)
        st.subheader('컨트롤웍스')
        st.write('- 컨트롤웍스는 자율주행 기술에 대한 소프트웨어 개발보다는 해당 기술에 필요한 하드웨어를 제작 및 공급하는 회사로 더 많이 알려져있다')
        st.write('- 라이다센서를 비롯하여 자율주행 개발에 필요한 제품들을 공급하며, 기업과의 협력 뿐만 아니라 대학교 개발지망생들과 자율주행시스템을 제작하기도 하였다')
        st.image('control.png',width=None)
        link6 = '[홈페이지 바로가기](https://www.control-works.co.kr/)'
        st.markdown(link6,unsafe_allow_html=True)
Exemple #22
0
def main():
    st.title('My first app')

    st.header("This is a header")
    st.subheader("this is a subheader")
    st.text("this is my test")

    code = '''
    def myf(variable):
        return variable
    '''
    st.code(code, language="python")

    st.header("This section is about data frame")
    st.write("Here's our first attempt at using data to create a table:")
    df = pd.DataFrame(
        {
            'first column': [1, 2, 3, 4],
            'second column': [10, 20, 30, 40]
        },
        index=['a', 'b', 'c', 'd'])
    df.index.name = "my shortcut"

    st.write(df)
    st.dataframe(df)
    st.table(df)

    chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])

    st.line_chart(chart_data)

    map_data = pd.DataFrame(0.1 * np.random.randn(1000, 2) / [50, 50] +
                            [37.76, -122.4],
                            columns=['lat', 'lon'])
    st.dataframe(map_data)
    st.map(map_data)

    if st.checkbox('Show dataframe'):
        chart_data = pd.DataFrame(np.random.randn(20, 3),
                                  columns=['a', 'b', 'c'])

        st.line_chart(chart_data)

    option = st.selectbox("What sports doe you link?",
                          ['Basketball', "Baseball"])
    st.write("You like ", option)

    chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c'])

    st.sidebar.line_chart(chart_data)

    left_column, right_column = st.beta_columns(2)

    pressed = left_column.button('Press me?')
    if pressed:
        right_column.write("Woohoo!")

    expander = st.beta_expander("FAQ")
    expander.write(
        "Here you could put in some really, really long explanations...")

    st.write('Starting a long computation...')

    # Add a placeholder
    latest_iteration = st.empty()
    bar = st.progress(0)

    for i in range(100):
        # Update the progress bar with each iteration.
        latest_iteration.text(f'Iteration {i+1}')
        bar.progress(i + 1)
        time.sleep(0.001)

    st.write('...and now we\'re done!')

    if st.button("Run my function"):
        st.write("Running.....")
        val = my_long_func()
        st.write("Value = ", val)

    genre = st.radio("What's your favorite movie genre",
                     ('Comedy', 'Drama', 'Documentary'))
    if genre == "Comedy":
        st.write("You select Comedy")
    else:
        st.write("You did not like Comedy???")

    options = st.multiselect('What are your favorite colors',
                             ['Green', 'Yellow', 'Red', 'Blue'])
    st.write("You select: ", options)

    age = st.slider('How old are you?', 0, 130, 25)
    st.write("I'm ", age, 'years old')

    title = st.text_input('Movie title', 'Life of Brian')
    st.write('The current movie title is', title)

    txt = st.text_area('Text to analyze')
    st.write(txt)

    bd = st.date_input("When is your birthday?")
    st.write("Your birthday is :", bd)

    t = st.time_input('Set an alarm for', datetime.time(8, 42))
    st.write('Alarm is set for', t)

    uploaded_file = st.file_uploader("Choose a file")
    if uploaded_file:
        dataframe = pd.read_csv(uploaded_file)
        st.write(dataframe)

    color = st.color_picker('Pick A Color', '#00f900')
    st.write('The current color is', color)

    st.markdown(get_table_download_link(df), unsafe_allow_html=True)

    num1 = st.number_input("Insert number 1", value=0.0)
    num2 = st.number_input("Insert number 2", value=0.0)
    res = expensive_computation(num1, num2)
    st.write(f"{num1} x {num2} equals ", res)
Exemple #23
0
).reset_index()
fig2 = px.bar(bar2, x='index', y='safety_of_care_national_comparison')
st.plotly_chart(fig2)

st.markdown(
    'Based on this bar chart, the safety of care in NC for the majority of hospitals are above the national average'
)
#---------------------

#Drill down into INPATIENT and OUTPATIENT just for NY
st.title('INPATIENT DATA - NC')

inpatient_nc = df_inpatient_2[df_inpatient_2['provider_state'] == 'NC']
total_inpatient_count = sum(inpatient_nc['total_discharges'])

st.header('Total Count of Discharges from Inpatient Captured: ')
st.header(str(total_inpatient_count))

##Common D/C

common_discharges = inpatient_nc.groupby(
    'drg_definition')['total_discharges'].sum().reset_index()

top10 = common_discharges.head(10)
bottom10 = common_discharges.tail(10)

st.header('DRGs')
st.dataframe(common_discharges)

col1, col2 = st.beta_columns(2)
Exemple #24
0
import streamlit as st
import pandas as pd
import numpy as np
import pickle

st.header('Car residual values prediction')

st.subheader('The Residual Values')

st.write("""
    The Residual Values (RV) model is a model to predict car values for given times. There are several car conditions having
    effects to future prices. In the model, transformed categorical features via One-Hot Encoding and scaled numeric
    features via normalization and polynomial transformer. The core model is used CatBoost regression.
    """)

st.subheader('To use model prediction, please following below steps:')

st.write("""
    1. From the left side of this page, there is an area to input several car conditions. \n
    2. To input car conditions that needed to be predicted. \n
    3. See the results below.
    """)

# Load development data
df = pd.read_csv('carData.csv')
df = df[df['vehicleType'] == 'car']

# Create sidebar
st.sidebar.header('Input car condition')

Exemple #25
0
def slick_grid(results):
    st.header("Slickgrid")
    st.markdown("""
The SlickGrid example does not work because I cannot inject javascript <script>...</script> tags.

References:

- [SlickGrid](https://slickgrid.net/)
- [SlickGrid examples](https://github.com/mleibman/SlickGrid/tree/gh-pages/examples)
""")

    st.markdown(
        """
<link rel="stylesheet" href="https://mleibman.github.io/SlickGrid/slick.grid.css" type="text/css"/>
<link rel="stylesheet" href="https://mleibman.github.io/SlickGrid/css/smoothness/jquery-ui-1.8.16.custom.css" type="text/css"/>
<table width="100%">
  <tr>
    <td valign="top" width="50%">
      <div id="myGrid" style="width:600px;height:500px;"></div>
    </td>
    <td valign="top">
      <h2>Demonstrates:</h2>
      <ul>
        <li>basic grid with minimal configuration</li>
      </ul>
        <h2>View Source:</h2>
        <ul>
            <li><A href="https://github.com/mleibman/SlickGrid/blob/gh-pages/examples/example1-simple.html" target="_sourcewindow"> View the source for this example on Github</a></li>
        </ul>
    </td>
  </tr>
</table>
<script src="https://mleibman.github.io/SlickGrid/lib/jquery-1.7.min.js"></script>
<script src="https://mleibman.github.io/SlickGrid/lib/jquery.event.drag-2.2.js"></script>
<script src="https://mleibman.github.io/SlickGrid/slick.core.js"></script>
<script src="https://mleibman.github.io/SlickGrid/slick.grid.js"></script>
<script>
  var grid;
  var columns = [
    {id: "title", name: "Title", field: "title"},
    {id: "duration", name: "Duration", field: "duration"},
    {id: "%", name: "% Complete", field: "percentComplete"},
    {id: "start", name: "Start", field: "start"},
    {id: "finish", name: "Finish", field: "finish"},
    {id: "effort-driven", name: "Effort Driven", field: "effortDriven"}
  ];
  var options = {
    enableCellNavigation: true,
    enableColumnReorder: false
  };
  $(function () {
    var data = [];
    for (var i = 0; i < 500; i++) {
      data[i] = {
        title: "Task " + i,
        duration: "5 days",
        percentComplete: Math.round(Math.random() * 100),
        start: "01/01/2009",
        finish: "01/05/2009",
        effortDriven: (i % 5 == 0)
      };
    }
    grid = new Slick.Grid("#myGrid", data, columns, options);
  })
</script>
""",
        unsafe_allow_html=True,
    )
Exemple #26
0
##st.markdown("One stop solution to Supply Chain Decision Making")
from PIL import Image
image = Image.open(r'banner.png')
st.image(image, use_column_width=True)
#FILE UPLOAD
st.sidebar.subheader('What would you like work with today?')
todo = st.sidebar.selectbox(
    'To do:', ('Inbound Logistics', 'Inhouse Preparation',
               'Outbound Logistics', 'Fulfillment', 'Sales', 'Article sheet'))
if todo == 'Inbound Logistics':
    st.sidebar.subheader('Inbound Logistics')
    inbound = st.sidebar.selectbox('To do:', ('Lead Time', 'Purchase Order'))

    #PURCHASE ORDER FREQUENCY
    if inbound == 'Purchase Order':
        st.header("Purchase Order Analysis")
        st.markdown(
            "Analysis of purchase order data such as PO frequency, PO volume and PO prices"
        )
        uploaded_file = st.file_uploader("Choose a XLSX file", type="xlsx")
        if uploaded_file:
            data = pd.read_excel(uploaded_file)
            df = pd.DataFrame(data,
                              columns=[
                                  'PO Number', 'Doc Date', 'Vendor Name',
                                  'Category', 'Base Qty', 'Net value per UoM',
                                  'Grand Total FA PO'
                              ])
            #st.markdown("Say hello to your database")
            #st.dataframe(df)
            if st.button('Preview Dataset'):
Exemple #27
0
def classifier_page():
    # Title
    st.title('Predicting the Outcome of a Patient with COVID-19')
    st.header('A Machine Learning Approach')
    '\n'
    st.image('/Users/sunnajo/downloads/covidml.jpeg')
    st.text('Image source: TABIP')
    '\n'
    '\n'
    st.markdown('*Disclaimer*')
    st.write(
        'This content is purely for educational purposes and should NOT be transmitted, used to guide clinical decision making and/or personal decisions regarding seeking medical care or treatment, and/or for any other real-world applications.'
    )
    '\n'
    '\n'

    # Functions
    def load_data(url):
        data = pd.read_csv(url)
        return data

    ## User input areas
    # Dictionary of age groups
    age_dict = {
        "0-9 years": 0,
        "10-19 years": 1,
        "20-29 years": 2,
        "30-39 years": 3,
        "40-49 years": 4,
        "50-59 years": 5,
        "60-69 years": 6,
        "70-79 years": 7,
        "80+ years": 8
    }
    age_list = list(age_dict.keys())

    st.markdown('### **How old is the patient?**')
    input_age = st.select_slider('', age_list)
    age_group = age_dict[input_age]
    '\n'

    st.markdown('### **Is the patient hospitalized?**')
    hosp = st.radio('', ["No", "Yes"])
    if hosp == "Yes":
        hosp = 1
    elif hosp == "No":
        hosp = 0
    '\n'

    st.markdown('### **Is the patient in the ICU?**')
    icu = st.radio('  ', ["No", "Yes"])
    if icu == "Yes":
        icu = 1
    elif icu == "No":
        icu = 0
    '\n'

    st.markdown(
        '### **Does the patient have an underlying medical condition?**')
    med_cond = st.radio('   ', ["No", "Yes"])
    if med_cond == "Yes":
        med_cond = 1
    elif med_cond == "No":
        med_cond = 0
    '\n'

    st.markdown(
        '### **What is the current positivity rate? (as a percentage)**')
    pos_rate = st.number_input('    ')
    if st.button("Look it up"):
        current_data = load_data(
            'https://api.covidtracking.com/v1/us/current.csv')
        pos_rate_pct = float(
            current_data['positive'] / current_data['totalTestResults']) * 100
        st.write('{:.2f}%'.format(pos_rate_pct))
    '\n'

    st.markdown("### **What is the patient's biological sex?**")
    sex = st.radio('     ', ["Female", "Male", "Other"])
    if sex == "Male":
        Male = 1
    else:
        Male = 0
    '\n'
    '\n'
    '\n'

    # Prediction
    if st.button("Predict"):
        result = prediction(icu, hosp, age_group, med_cond, Male, pos_rate)
        prob_pct = (float(
            predict_prob(icu, hosp, age_group, med_cond, Male,
                         pos_rate)[:, 1])) * 100
        '\n'
        if result == 0:
            st.success("The patient likely has a low risk of death")
        elif result == 1:
            st.warning("The patient has a higher risk of death")
        '\n'

        # Pause
        time.sleep(1)

        # Cue for navigating to data section
        st.markdown('### *How did we come up with this algorithm?*')
        '\n'
        st.subheader('Click on the sidebar for data sources')
Exemple #28
0
def load_data(nrows):
    data = pd.read_csv(Data_Url,
                       nrows=nrows,
                       parse_dates=[['CRASH_DATE', 'CRASH_TIME']])
    data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True)
    lowercase = lambda x: str(x).lower()
    data.rename(lowercase, axis='columns', inplace=True)
    data.rename(columns={'crash_date_crash_time': 'date/time'}, inplace=True)
    return data


data = load_data(100000)
original_data = data

st.header("Most injured people in NYC")
injured_people = st.slider("Number of peoples injured in collisions", 0, 19)
st.map(
    data.query("injured_persons >= @injured_people")[["latitude", "longitude"
                                                      ]].dropna(how="any"))

st.header("How many collisions occured during a given time of day?")
hour = st.slider("Hour to look at", 0, 23)
data = data[data['date/time'].dt.hour == hour]

st.markdown("Vehicle collisions between %i:00 and %i:00" % (hour,
                                                            (hour + 1) % 24))
midpoint = (np.average(data['latitude']), np.average(data['longitude']))

st.write(
    pdk.Deck(
Exemple #29
0
import base64

import pandas as pd
import streamlit as st

def download_link(dataframe, name):
    csv = dataframe.to_csv(index=False)
    b64 = base64.b64encode(csv.encode()).decode()  # some strings <-> bytes conversions necessary here
    href = "<a href='data:file/csv;base64,{}' download='{}'>Download</a>".format(b64, name)
    st.markdown(href, unsafe_allow_html=True)

st.header("File Download")

data = [(1, 2, 3)]
df = pd.DataFrame(data, columns=["Col1", "Col2", "Col3"])

download_link(df, 'data.csv')
st.dataframe(df)
Exemple #30
0
import tempfile
import os
import time
tmpdir = tempfile.mkdtemp()
os.environ[
    'TFHUB_CACHE_DIR'] = r'C:\Users\Black\OneDrive\Desktop\tf\tfhub_modules'
PAGE_CONFIG = {
    "page_title": "Dog Vision AI",
    "page_icon": "dog",
    "layout": "centered"
}
st.beta_set_page_config(**PAGE_CONFIG)
st.set_option('deprecation.showfileUploaderEncoding', False)

st.title("Dog Vision AI 🐶")
st.header("Welcome To Dog Breed Identification 👀")
st.write('')

menu = ["Home", "About", 'Contact']
choice = st.sidebar.selectbox('Menu', menu)
if choice == 'Home':
    # st.write(" bhdsjcbdsjcjdc")
    def teachable_machine_classification(img, weights_file):
        # Load the model
        # weights_file=r'C:\Users\Black\OneDrive\Desktop\Dog_ai_webapp\20200911-121337-10000-images-mobilenet-v2-Adam_optimizer.h5'
        model = tf.keras.models.load_model(
            weights_file, custom_objects={'KerasLayer': hub.KerasLayer})
        # Create the array of the right shape to feed into the keras model
        data = np.ndarray(shape=(1, 512, 512, 3), dtype=np.float32)
        image = img
        #image sizing