else: try: data = pd.read_excel(file) except: try: data = pd.read_excel(file,index_col=None, header=None) except: data = pd.read_html(file) translate_download(data) file.close() main() translator = Translator() st.title("Excel ve csv dosyalarฤฑnฤฑzฤฑ burada herhangi bir dilden Tรผrkรงe'ye dรถnรผลtรผrebilirsiniz") st.header("Bu program Google Translate ile baฤlantฤฑlฤฑ รงalฤฑลtฤฑฤฤฑndan, hata verdiฤi zaman bir kaรง kere tekrar denerseniz veya sayfayฤฑ yenilerseniz program รงalฤฑลacaktฤฑr.")
def main(): # Navigation sidebar st.sidebar.header('Navigation') selection = st.sidebar.radio('Explore?', ['Home', 'Predictor', 'Data']) if selection == 'Home': home() elif selection == 'Predictor': classifier_page() elif selection == 'Data': # Functions to load data def load_raw_data(url): data = pd.read_csv(url) return data def format_cdc_data(df): date_cols = ['cdc_report_dt', 'pos_spec_dt', 'onset_dt'] for col in date_cols: df[col] = pd.to_datetime(df[col], format='%Y-%m-%d').dt.date return df def format_tracking_data(df): df['date'] = pd.to_datetime(df['date'], format='%Y%m%d') df['pos_rate'] = df['positive'] / df['totalTestResults'] return df # Data sources st.sidebar.header('Interactive Data') data_source = st.sidebar.selectbox('Data Sources', [ 'Choose data source', 'CDC Public Use Surveillance Data', 'COVID Tracking Project' ]) # CDC data if data_source == 'CDC Public Use Surveillance Data': cdc_data_raw = load_raw_data( 'https://data.cdc.gov/resource/vbim-akqf.csv') cdc_data = format_cdc_data(cdc_data_raw) st.title('CDC Public Use Surveillance Data') st.markdown( 'https://data.cdc.gov/Case-Surveillance/COVID-19-Case-Surveillance-Public-Use-Data/vbim-akqf', unsafe_allow_html=True) '\n' if st.checkbox('View raw data'): st.write(cdc_data_raw) '\n' st.header('Explore data') '\n' # Dictionary of variable names and corresponding column names for easy access vars_dict = { 'Death': 'death_yn', 'Hospitalization': 'hosp_yn', 'ICU admission': 'icu_yn', 'Medical condition': 'medcond_yn', 'Sex': 'sex', 'Age group': 'age_group', 'Race/Ethnicity': 'race_ethnicity_combined', 'Current status': 'current_status' } vars_names = list(vars_dict.keys()) vars_chosen = st.multiselect('Choose variables', vars_names) for var in vars_chosen: st.bar_chart(cdc_data[vars_dict[var]]) '\n' # Separate dataframes death = cdc_data[cdc_data['death_yn'] == 'Yes'] no_death = cdc_data[cdc_data['death_yn'] == 'No'] st.markdown('*Compare patients who died to those who did not:*') '\n' vars_names2 = vars_names[1:] if st.checkbox('Death'): death_vars = st.multiselect('Choose variable', vars_names2) for var in death_vars: st.bar_chart(death[vars_dict[var]]) '\n' if st.checkbox('No death'): no_death_vars = st.multiselect('Choose variable ', vars_names2) for var in no_death_vars: st.bar_chart(no_death[vars_dict[var]]) # Tracking data elif data_source == 'COVID Tracking Project': # Load data current_data_raw = load_raw_data( 'https://api.covidtracking.com/v1/us/current.csv') current_data = format_tracking_data(current_data_raw) national_data_raw = load_raw_data( 'https://api.covidtracking.com/v1/us/daily.csv') national_data = format_tracking_data(national_data_raw) states_data_raw = load_raw_data( 'https://api.covidtracking.com/v1/states/daily.csv') states_data = format_tracking_data(states_data_raw) # Layout st.title('The COVID Tracking Project') st.markdown('https://covidtracking.com/data/national', unsafe_allow_html=True) '\n' st.header('Explore data') '\n' st.subheader('National data') '\n' ## Current data st.markdown('*Current data*') if st.checkbox('View raw current data'): st.write(current_data) '\n' today = dt.datetime.now().strftime('%m/%d/%Y') today_pos = current_data.loc[0, 'positive'] pos_incr = current_data.loc[0, 'positiveIncrease'] today_deaths = current_data.loc[0, 'death'] deaths_incr = current_data.loc[0, 'deathIncrease'] today_hosp = current_data.loc[0, 'hospitalized'] hosp_incr = current_data.loc[0, 'hospitalizedIncrease'] today_pos_rate = current_data.loc[ 0, 'positive'] / current_data.loc[0, 'totalTestResults'] today_icu = current_data.loc[0, ''] # Area for displaying current data st.markdown('### As of **{}**'.format(today)) '\n' st.write('Total positive cases: {} (+{})'.format( today_pos, pos_incr)) st.write('Total deaths: {} (+{})'.format(today_deaths, deaths_incr)) st.write('Total patients hospitalized: {} (+{})'.format( today_hosp, hosp_incr)) st.write('Total patients in ICU: {}'.format(today_icu)) st.write('Current positive rate: {:.3f}'.format(today_pos_rate)) '\n' '\n' ## All time data # Functions for plotting variables def plot_national_var(var): # Create dataframe df = pd.concat([national_data['date'], national_data[var]], axis=1) df.sort_values(by='date', inplace=True) # Plot st.line_chart( df.rename(columns={ 'date': 'index' }).set_index('index')) # Variables national_dict = { 'Total positive cases': 'positive', 'Total deaths': 'death', 'Total negative cases': 'negative', 'Total hospitalized': 'hospitalizedCumulative', 'Total in ICU': 'inIcuCumulative', 'Total on ventilator': 'onVentilatorCumulative', 'Total recovered': 'recovered', 'Total test results': 'totalTestResults', 'Overall positive rate': 'pos_rate', 'Increase in positive cases': 'positiveIncrease', 'Increase in deaths': 'deathIncrease', 'Increase in hospitalized': 'hospitalizedIncrease', 'Increase in negative cases': 'negativeIncrease', 'Increase in total test results': 'totalTestResultsIncrease' } national_vars = list(national_dict.keys()) st.markdown('*All data*') if st.checkbox('View raw national data'): st.write(national_data) national_vars_chosen = st.multiselect('Choose variable', national_vars) for var in national_vars_chosen: plot_national_var(national_dict[var]) '\n'
"E:\Sankar\Streamlit_App\Motor_Vehicle_Collisions_-_Crashes.csv" ) @st.cache(persist=True) #decorating the function with st.cache in order to prevent the computation from rerunning everytime when the app is loaded; it will rerun only if any input has changed def load_data(nrows): data = pd.read_csv(DATA_URL, nrows=nrows, parse_dates=[['CRASH_DATE', 'CRASH_TIME']]) data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True) lowercase = lambda x: str(x).lower() #function to lower the case of the column names using the lambda function data.rename(lowercase, axis='columns', inplace=True) data.rename(columns={'crash_date_crash_time': 'date/time'}, inplace=True) return data data = load_data(100000) original_data = data st.header("Where are the most people injured in NYC?") injured_people = st.slider("Number of persons injured in vehicle collissions", 0, 20) st.map(data.query("injured_persons >= @injured_people")[["latitude", "longitude"]].dropna(how="any")) st.header("How many collissions occurred during a given time of day?") hour = st.slider("Hour to look at", 0, 23) data = data[data['date/time'].dt.hour == hour] st.markdown("Vehicle collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24)) midpoint = (np.average(data["latitude"]), np.average(data["longitude"])) st.write(pdk.Deck( map_style="mapbox://styles/mapbox/light-v9", initial_view_state={ "latitude": midpoint[0], "longitude": midpoint[1],
d_reroll = ['None','Ones','All'] #create streamlit elements on main page st.title('Warhammer 40k 9th ed calculator') st.write('v 0.1.1') att_type_select = st.sidebar.selectbox('Select attack type', (att_type)) if att_type_select == att_type[0]: skill_label = 'Ballistic skill' else: skill_label = 'Weapon skill' num_attackers = st.sidebar.number_input('Number of attacks', min_value = 1, step = 1) num_defenders = st.sidebar.number_input('Number of defenders', min_value = 1, step = 1) st.header('Attacker') a_col1, a_col2, a_col3, a_col4 = st.beta_columns(4) with a_col1: a_skill = st.number_input(skill_label, min_value = 1, max_value = 6, step = 1, value = 3) a_hit_reroll = st.selectbox('Hit reroll', (a_reroll)) with a_col2: a_strength = st.number_input('Strength', min_value = 0, step = 1, value = 3) with a_col3: a_armor_p = st.number_input('Armor Piercing', min_value = -10, step = 1, value = 0) a_wound_reroll = st.selectbox('Wound reroll', (a_reroll)) with a_col4:
if show_search: search_query = st.sidebar.text_input("Enter search terms", value="Baseball") tokenizer, model, V = load_model_and_vectors('data') df["dist"] = compute_distance(search_query) df["size"] = radius df["line_width"] = 0 viz_cols = [ "text", "label", ] st.header(project_title) if show_search: subset = df.sort_values("dist")[["text", "dist"]] st.table(subset.head()) p = interface.plot_data_bokeh(df, hover_columns=viz_cols) plot_placeholder = st.empty() if show_labels: for col in range(n_text_labels): dx = df[df.cluster == col] dx = dx.reset_index() cmx, cmy = dx.ux.mean(), dx.uy.mean()
st.sidebar.subheader("When and where are the users tweeting from ๐") hour = st.sidebar.slider("Hour of day", 0, 23) modified_data = data[data['tweet_created'].dt.hour == hour] if not st.sidebar.checkbox("Close", True, key='1'): st.markdown("### Tweets location based on the time of day") st.markdown("%i tweets between %i:00 and %i:00" % (len(modified_data), hour, (hour + 1) % 24)) st.map(modified_data) if st.sidebar.checkbox("Show raw data", False): st.write(modified_data) st.sidebar.subheader("BreakDown airline tweets by Sentiment โ๏ธ") choice = st.sidebar.multiselect('Pick airlines', ('US Airways', 'United', 'American', 'Southwest', 'Delta', 'Virgin America')) if len(choice) > 0: choice_data = data[data.airline.isin(choice)] fig_choice = px.histogram(choice_data, x='airline', y='airline_sentiment', histfunc='count', color='airline_sentiment', facet_col='airline_sentiment', height=600, width=600) st.plotly_chart(fig_choice) else: st.header("โฌ ๏ธ Click on the options to get started")
import streamlit as st from PIL import Image import tensorflow as tf import numpy as np model = tf.keras.models.load_model('/content/save.h5') st.markdown( "<h1 style='text-align: center;'>Malaria detection by CNN model</h1>", unsafe_allow_html=True) st.subheader('Input will be the cell snapshots of suspected person') st.set_option('deprecation.showfileUploaderEncoding', False) img = st.file_uploader('Drop or upload cell images here', types=['jpeg', 'png', 'jpg']) st.markdown("<br><br>", unsafe_allow_html=True) if (st.button('SUBMIT')) & (img is not None): img = Image.open(img) st.markdown("<br>", unsafe_allow_html=True) st.image(img, caption='Uploaded image') image = tf.keras.preprocessing.image.img_to_array(img) img = np.resize(image, (1, 90, 90, 3)) #image = tf.keras.preprocessing.image.load_img(image, target_size=(90,90,3)) #image = tf.keras.preprocessing.image.img_to_array(image) if model.predict(img) == 0: st.markdown('POSITIVE') st.header('Model implying that the image contains malaria') else: st.balloons() st.markdown('Negative') st.header('Model implying that the image does not contain malaria')
st.sidebar.markdown(''' Use Netflix Movies and TV Shows dataset from Kaggle and perform following operations :''') st.sidebar.markdown('''1. Make a visualization showing the total number of movies watched by children 2. Make a visualization showing the total number of standup comedies 3. Make a visualization showing most watched shows. 4. Make a visualization showing highest rated show. 5. Make a dashboard (DASHBOARD A) containing all of these above visualizations. ''') st.sidebar.markdown(''' Designed by: **Mihir Kulkarni** ''') st.header('Dataset') netflix_df = pd.read_csv("netflix_titles.csv") netflix_df # In[3]: netflix_df.isnull().sum() # In[4]: netflix_df.director.fillna("No director",inplace=True) netflix_df.cast.fillna("No cast",inplace=True)
def write(): st.title('Setup & Cleaning') st.header('Obtaining and Exploring the Data') st.markdown( '''<p style='text-align: justify; '>While searching for public datasets that contained relevant job and salary info, we found it difficult to gather all the necessary pieces in one place. Many datasets were either too small, too old, or not comprehensive enough. We decided that our best bet to find a large, consistent dataset was by collecting it on our own via webscraping.</p>''', unsafe_allow_html=True) st.header('Setup') st.markdown('Relevant libraries for data cleaning and exploration.') import_code = ''' import numpy as np import pandas as pd import altair as alt import seaborn as sns import matplotlib.pyplot as plot import googlemaps import folium''' st.code(import_code, language='python') st.header('Web Scraping Glassdoor') st.markdown( '''<p style='text-align: justify; '>Follwing this great <a href="https://towardsdatascience.com/selenium-tutorial-scraping-glassdoor-com-in-10-minutes-3d0915c6d905">Medium article</a> on web scraping using <i>Selenium</i>, I was able to run a script to scrape 1000 unique job postings on Glassdoor.com. The author's original code needed a few tweaks to run as the format of some of the HTML elements on the Glassdoor site had changed.</p>''', unsafe_allow_html=True) st.image(Image.open('images/scraper.png'), caption="'Software Engineering' query with no location specified", use_column_width=True) st.write(''' I made the following three queries for: **Sofware Engineering**, **Data Scientist**, and **Designer** --all without specifiying a worksite location to get a wide range of positions across the United States. With each job entry, I collected the following information:''') st.image(Image.open('images/bullet.png'), use_column_width=True) st.markdown( '''Here's what the head of the DataFrame for *Software Engineering* jobs looked like after scraping:''' ) st.code('''df = pd.read_csv('swe.csv') df.head() ''', language='python') df = pd.read_csv('swe.csv') st.dataframe(df.head()) st.header('Data Cleaning') st.write( '''<p style='text-align: justify; '>After scraping the data, I needed to clean it up so that it was usable for our model. I made the following changes and created the following variables: <ul><li>Identified the <i>Seniority</i> of each job based on title listing</li> <li>Parsed numeric data out of the <i>Salary Estimate</i> column</li> <li>Removed rows with missing salaries</li> <li>Made new columns for the Job <i>State</i> and <i>City</i></li> <li>Reverse geocoded locations using <a href="https://cloud.google.com/maps-platform/maps">Google Maps Cloud API</a> (retrieve Latitude/Longitude)</li> </ul> </p>''', unsafe_allow_html=True) st.write('''Here are what the some of these steps looked like:''') st.subheader('Identifying Seniority') st.code( '''unique_jobs = df['Job Title'].unique() # Overview of unique job titles unique_jobs[0:5] # Sample of first 5 titles''', language='python') unique_jobs = df['Job Title'].unique() st.text(unique_jobs[0:5]) st.markdown( '''From the *Job Title* column we find that there are 173 unique job titles with various levels of seniority. The function below extracts the most common labels for senior and junior positions.''' ) st.code('''def seniority(title): ''Identify and group specific job titles'' title = title.lower().strip() seniority = ['senior', 'sr.', 'sr', 'lead', 'expert', 'experienced', 'principal'] juniority = ['junior', 'jr.', 'jr', 'intern'] for i in seniority: if i in title: return 'senior' for i in juniority: if i in title: return 'junior' else: return 'unspecified''') st.code('''df['Seniority'] = df['Job Title'].apply(seniority) df.sample(3) # Sample 3 random postings''') def seniority(title): '''Identify and group specific job titles''' title = title.lower().strip() seniority = [ 'senior', 'sr.', 'sr', 'lead', 'expert', 'experienced', 'principal' ] juniority = ['junior', 'jr.', 'jr', 'intern'] for i in seniority: if i in title: return 'senior' for i in juniority: if i in title: return 'junior' else: return 'unspecified' df['Seniority'] = df['Job Title'].apply(seniority) st.markdown( 'Now we have a new column, *Seniority*, which specifies the precedence of each posting.' ) st.dataframe(df.sample(3)) st.code('df.Seniority.value_counts()') st.code(df.Seniority.value_counts()) titles = pd.DataFrame({ 'Position': ['Unspecified', 'Senior', 'Junior'], 'Total': [595, 325, 60] }) st.markdown('**Bar chart of Seniority**') st.altair_chart( alt.Chart(titles).mark_bar().encode(y='Position', x='Total')) st.markdown( '''<p style='text-align: justify; '>This bar chart shows the distribution of seniority in the job title listings. While the majority of titles do not specify seniority, it seems to make intuitive sense that there is a greater demand for experienced software engineers as opposed to junior or new grad positions. The lack of junior positions could also be explained by the notion that most of those listings would be offered as internships rather than full time postions -and thus wouldn't be listed on a job-hunting website such as Glassdoor.</p>''', unsafe_allow_html=True) st.subheader('Parsing Salary Estimates') st.markdown( 'Next I separated the Glassdoor Salary Estimates into lows and highs to get an average.' ) st.code('''df.get('Salary Estimate').unique()''') st.text(df.get('Salary Estimate').unique()) st.code('''def salary_simplified(salary): salary_simp = salary.split('(')[0].replace('K','').replace('$','') minimum = int(salary_simp.split('-')[0]) maximum = int(salary_simp.split('-')[1]) return minimum, maximum''') st.code('''salary_ranges = df['Salary Estimate'].apply(salary_simplified) ''') def salary_simplified(salary): salary_simp = salary.split('(')[0].replace('K', '').replace('$', '') minimum = int(salary_simp.split('-')[0]) maximum = int(salary_simp.split('-')[1]) return minimum, maximum salary_ranges = df['Salary Estimate'].apply(salary_simplified) titles = pd.DataFrame({ 'Count': [24, 30, 31, 31, 31, 30, 32, 31, 616, 31, 30, 31, 32], 'Average': [ 61.5, 62.5, 63.5, 73.0, 75.0, 80.0, 85.0, 88.0, 91.5, 100.0, 111.0, 112.0, 126.0 ] }) st.markdown('**Bar chart of Average Salaries**') st.altair_chart( alt.Chart(titles).mark_bar().encode(y='Count', x='Average')) st.markdown( '''<p style='text-align: justify; '>As we can see from this bar chart, the overwhelming majority of salary averages are at $91.5k with lows in the mid $60k's and a high in the mid $120k's. </p>''', unsafe_allow_html=True) st.subheader('Reverse Geocoding Locations') st.markdown('''<p style='text-align: justify; '> In order to visualize the locations of the job postings in the data frame, I needed a way to plot each posting on a map. However, the data did not come with any geographic information about the locations of the postings. To solve this problem I used Google Maps Cloud API to reverse geocode each city's location and obtain it's lattitude and longitude coordinates. The following code cell assigns each job's Location listing with the corresponding geographic coordinates.</p>''', unsafe_allow_html=True) st.code('''df['LAT'] = None df['LON'] = None for i in range(len(df.Location)): geocode_result = gmaps_key.geocode(df.Location.iloc[i]) try: lat = geocode_result[0]['geometry']['location']['lat'] lon = geocode_result[0]['geometry']['location']['lng'] df.loc[i, 'LAT'] = lat df.loc[i, 'LON'] = lon except: lat = None lng = None''') st.markdown( '''The next code block simplifies the geocoding by creating a new data frame that stores geographic coordinates, cities, and the number of times they appear in the original table.''' ) st.code('''city_counts = df.groupby('Location').count().get('Job Title') df_group = pd.DataFrame() df_group['Lat'] = df['LAT'] df_group['Lon'] = df['LON'] df_group['City'] = df['Location'] cities = pd.DataFrame() cities['Count'] = city_counts cities = cities.reset_index() df_geo = df_group.merge(cities, left_on='City', right_on='Location') df_geo = df_geo.drop_duplicates(['City'], keep='first').drop(['City'], axis=1) ''') bubble_map = folium.Map(location=[37, -102], zoom_start=4) st.markdown( '''The map will default it's location over the United States.''') st.code('bubble_map = folium.Map(location=[37, -102], zoom_start=4)') df_geo = pd.read_csv('df_geo.csv') for i in range(len(df_geo)): folium.Circle(location=[df_geo.Lat.iloc[i], df_geo.Lon.iloc[i]], popup=df_geo.Location.iloc[i], radius=int(df_geo.Count.iloc[i]) * 10000, color='#7551f8', fill=True, fill_color='#7551f8').add_to(bubble_map) st.code('''for i in range(len(df_geo)): folium.Circle(location=[df_geo.Lat.iloc[i], df_geo.Lon.iloc[i]], popup=df_geo.Location.iloc[i], radius=int(df_geo.Count.iloc[i]) * 10000, color='#7551f8', fill=True, fill_color='#7551f8').add_to(bubble_map)''') st.markdown('**Bubble Map of Jobs based on Posting Density**') folium_static(bubble_map) st.markdown('') st.markdown('''<p style='text-align: justify; '> This Bubble Map of the United States plots each job listing's location where the radius of the bubble is a factor corresponding to the number of postings at each location. From the map we can see that there are a large number of postings in Salt Lake City, Chicago, Burlington, New York, San Jose, and Seattle. The concentration of multiple circles in the Bay Area and East coast reflect a high volume of postings and signify these areas as "Tech Hubs."</p>''', unsafe_allow_html=True)
import numpy as np import os import datetime import time import streamlit as st from dogfight_game import GameEnv, plotGameData, best_reply_game_rollout st.header("pre-generate a data set") N_agents = st.number_input( label="number of agents", min_value=2, value=10, step=1, format="%.0d" ) time_steps = st.number_input( label=" max time steps", min_value=0, value=10, step=1, format="%.0d", ) num_states_to_save = st.number_input( label="total steps to save", min_value=0, value=10000, step=1, format="%.0d", ) save_every = st.number_input( label="save every X steps", min_value=0, value=1000, step=1, format="%.0d", ) create_dataset_button = st.button("create dataset")
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import streamlit as st import numpy as np import pandas as pd # Explicitly seed the RNG for deterministic results np.random.seed(0) st.title('Tables with different sizes') st.header('Long cells that overflow') st.write(''' Long text should show an ellipsis. All cells should have a tooltip with their entire un-ellipsized contents. ''') st.dataframe({ 'foo': ['hello', 'world', 'foo '*30], 'bar': ['hello', 'world', 'bar'*30], 'baz': [1, 2, 3], 'boz': [1, 2, 3], 'buz': [1, 2, 3], 'biz'*30: [1, 2, 3], 'bim': [1, 2, 3], })
def main(): df = load_data() #page = st.sidebar.radio("Choose a page", ["Homepage", "SignUp"]) verified = "True" result = "F.A.S.T. WebApp - For Interview Demo" st.sidebar.title(result) st.sidebar.write( "Created By: Akash M Dubey [LinkedIn](https://www.linkedin.com/in/akashmdubey/)" ) st.sidebar.write( "Checkout more projects at [www.akashmdubey.com/projects](https://akashmdubey.com/)" ) page = st.sidebar.radio("Choose a Function", [ "About the Project", "Live News Sentiment", "Company Basic Details", "Company Advanced Details", "Stock Future Prediction", "Google Trends with Forecast", "Twitter Trends", "Meeting Summarization" ]) if page == "Google Trends with Forecast": st.sidebar.write(""" ## Choose a keyword and a prediction period """) keyword = st.sidebar.text_input("Keyword", "Company name") periods = st.sidebar.slider('Prediction time in days:', 7, 365, 90) # main section st.write(""" # Welcome to Trend Predictor App ### This app predicts the **Google Trend** you want! """) st.image( 'https://s3.eu-west-2.amazonaws.com/cdn.howtomakemoneyfromhomeuk.com/wp-content/uploads/2020/10/Google-Trends.jpg', width=350, use_column_width=200) st.write("Evolution of interest:", keyword) df = get_data(keyword) forecast, fig1, fig2 = make_pred(df, periods) st.pyplot(fig1) st.write("Trends Over the Years and Months") st.pyplot(fig2) elif page == "About the Project": st.title('Data Sources') st.write(""" ### Our F.A.S.T application have 3 data sources for two different use cases: #### 1. Web Scrapping to get Live News Data #### 2. Twitter API to get Real time Tweets #### 3. Google Trends API to get Real time Trends """) st.text('') link = '[Project Report](https://codelabs-preview.appspot.com/?file_id=1qxniFjwkDir6NT17KkvS1zDbmIgawcrEEwbbfCtAk8k#1)' st.markdown(link, unsafe_allow_html=True) st.title('AWS Data Architecture') st.image('./Images/Architecture Final AWS_FAST.jpg', width=900, use_column_width=1200) st.title('Dashboard') import streamlit.components.v1 as components components.iframe( "https://app.powerbi.com/view?r=eyJrIjoiZjMzMGUyZTEtM2RiMS00NzFlLWE3MWMtZDgzMjIxNTgxYmY3IiwidCI6ImE4ZWVjMjgxLWFhYTMtNGRhZS1hYzliLTlhMzk4YjkyMTVlNyIsImMiOjN9&pageName=ReportSection842eec15de524192b588", height=600, width=900) elif page == "Meeting Summarization": symbols = [ './Audio Files/Meeting 1.mp3', './Audio Files/Meeting 2.mp3', './Audio Files/Meeting 3.mp3', './Audio Files/Meeting 4.mp3' ] track = st.selectbox('Choose a the Meeting Audio', symbols) st.audio(track) data_dir = './inference-data/' ratiodata = st.text_input( "Please Enter a Ratio you want summary by: (TRY: 0.01)") if st.button("Generate a Summarized Version of the Meeting"): time.sleep(2.4) #st.success("This is the Summarized text of the Meeting Audio Files xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx xxxxxxgeeeeeeeeeeeeeee eeeeeeeeeeeeeehjjjjjjjjjjjjjjjsdbjhvsdk vjbsdkvjbsdvkb skbdv") if track == "./Audio Files/Meeting 2.mp3": user_input = "NKE" time.sleep(1.4) try: with open(data_dir + user_input) as f: st.success(summarize(f.read(), ratio=float(ratiodata))) #print() st.warning("Sentiment: Negative") except: st.text("Please Enter a valid Decimal value like 0.01") else: user_input = "AGEN" time.sleep(1.4) try: with open(data_dir + user_input) as f: st.success(summarize(f.read(), ratio=float(ratiodata))) #print() st.success("Sentiment: Positive") except: st.text("Please Enter a valid Decimal value like 0.01") elif page == "Twitter Trends": st.write(""" # Welcome to Twitter Sentiment App ### This app predicts the **Twitter Sentiments** you want! """) st.image( 'https://assets.teenvogue.com/photos/56b4f21327a088e24b967bb6/3:2/w_531,h_354,c_limit/twitter-gifs.gif', width=250, use_column_width=200) #st.subheader("Select a topic which you'd like to get the sentiment analysis on :") ################# Twitter API Connection ####################### consumer_key = "MaA51EmeZbgYazwFYOZxNRZR5" consumer_secret = "6ZUmFGFhiNzePsbkiPlKRBF7R9nq2dkDqfyfx7uU5eNgDhR8ci" access_token = "1359729189700722691-g8oMz8ONW6qtvibbQqqc6OAXJCIjeE" access_token_secret = "FzXBd0XRy2yZmtpH90GuyMaclmnyPGQEdxfCBO68BI0nb" # Use the above credentials to authenticate the API. auth = tweepy.OAuthHandler(consumer_key, consumer_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) ################################################################ df = pd.DataFrame(columns=[ "Date", "User", "IsVerified", "Tweet", "Likes", "RT", 'User_location' ]) # Write a Function to extract tweets: def get_tweets(Topic, Count): i = 0 #my_bar = st.progress(100) # To track progress of Extracted tweets for tweet in tweepy.Cursor(api.search, q=Topic, count=100, lang="en", exclude='retweets').items(): #time.sleep(0.1) #my_bar.progress(i) df.loc[i, "Date"] = tweet.created_at df.loc[i, "User"] = tweet.user.name df.loc[i, "IsVerified"] = tweet.user.verified df.loc[i, "Tweet"] = tweet.text df.loc[i, "Likes"] = tweet.favorite_count df.loc[i, "RT"] = tweet.retweet_count df.loc[i, "User_location"] = tweet.user.location #df.to_csv("TweetDataset.csv",index=False) #df.to_excel('{}.xlsx'.format("TweetDataset"),index=False) ## Save as Excel i = i + 1 if i > Count: break else: pass # Function to Clean the Tweet. def clean_tweet(tweet): return ' '.join( re.sub( '(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)|([RT])', ' ', tweet.lower()).split()) # Funciton to analyze Sentiment def analyze_sentiment(tweet): analysis = TextBlob(tweet) if analysis.sentiment.polarity > 0: return 'Positive' elif analysis.sentiment.polarity == 0: return 'Neutral' else: return 'Negative' #Function to Pre-process data for Worlcloud def prepCloud(Topic_text, Topic): Topic = str(Topic).lower() Topic = ' '.join(re.sub('([^0-9A-Za-z \t])', ' ', Topic).split()) Topic = re.split("\s+", str(Topic)) stopwords = set(STOPWORDS) stopwords.update( Topic ) ### Add our topic in Stopwords, so it doesnt appear in wordClous ### text_new = " ".join( [txt for txt in Topic_text.split() if txt not in stopwords]) return text_new # Collect Input from user : Topic = str() Topic = str( st.sidebar.text_input( "Enter the topic you are interested in (Press Enter once done)", "enter company name")) if len(Topic) > 0: # Call the function to extract the data. pass the topic and filename you want the data to be stored in. with st.spinner("Please wait, Tweets are being extracted"): get_tweets(Topic, Count=200) st.success('Tweets have been Extracted !!!!') # Call function to get Clean tweets df['clean_tweet'] = df['Tweet'].apply(lambda x: clean_tweet(x)) # Call function to get the Sentiments df["Sentiment"] = df["Tweet"].apply(lambda x: analyze_sentiment(x)) # Write Summary of the Tweets st.write("Total Tweets Extracted for Topic '{}' are : {}".format( Topic, len(df.Tweet))) st.write("Total Positive Tweets are : {}".format( len(df[df["Sentiment"] == "Positive"]))) st.write("Total Negative Tweets are : {}".format( len(df[df["Sentiment"] == "Negative"]))) st.write("Total Neutral Tweets are : {}".format( len(df[df["Sentiment"] == "Neutral"]))) # See the Extracted Data : if st.button("See the Extracted Data"): #st.markdown(html_temp, unsafe_allow_html=True) st.success("Below is the Extracted Data :") st.write(df.head(50)) # get the countPlot if st.button("Get Count Plot for Different Sentiments"): st.success("Generating A Count Plot") st.subheader(" Count Plot for Different Sentiments") st.write(sns.countplot(df["Sentiment"], palette="Blues")) st.pyplot() # Piechart if st.button("Get Pie Chart for Different Sentiments"): st.success("Generating A Pie Chart") a = len(df[df["Sentiment"] == "Positive"]) b = len(df[df["Sentiment"] == "Negative"]) c = len(df[df["Sentiment"] == "Neutral"]) d = np.array([a, b, c]) explode = (0.1, 0.0, 0.1) st.write( plt.pie(d, shadow=True, explode=explode, labels=["Positive", "Negative", "Neutral"], autopct='%1.2f%%')) st.pyplot() # get the countPlot Based on Verified and unverified Users if st.button( "Get Count Plot Based on Verified and unverified Users"): st.success( "Generating A Count Plot (Verified and unverified Users)") st.subheader( " Count Plot for Different Sentiments for Verified and unverified Users" ) st.write(sns.countplot(df["Sentiment"], hue=df.IsVerified)) st.pyplot() ## Points to add 1. Make Backgroud Clear for Wordcloud 2. Remove keywords from Wordcloud # Create a Worlcloud if st.button("Get WordCloud for all things said about {}".format( Topic)): st.success( "Generating A WordCloud for all things said about {}". format(Topic)) text = " ".join(review for review in df.clean_tweet) stopwords = set(STOPWORDS) text_newALL = prepCloud(text, Topic) wordcloud = WordCloud( stopwords=stopwords, max_words=800, max_font_size=75, colormap="Blues", background_color="black").generate(text_newALL) st.write(plt.imshow(wordcloud, interpolation='bilinear')) st.pyplot() #Wordcloud for Positive tweets only if st.button( "Get WordCloud for all Positive Tweets about {}".format( Topic)): st.success( "Generating A WordCloud for all Positive Tweets about {}". format(Topic)) text_positive = " ".join(review for review in df[ df["Sentiment"] == "Positive"].clean_tweet) stopwords = set(STOPWORDS) text_new_positive = prepCloud(text_positive, Topic) #text_positive=" ".join([word for word in text_positive.split() if word not in stopwords]) wordcloud = WordCloud( stopwords=stopwords, max_words=800, max_font_size=75, colormap="Greens", background_color="black").generate(text_new_positive) st.write(plt.imshow(wordcloud, interpolation='bilinear')) st.pyplot() #Wordcloud for Negative tweets only if st.button( "Get WordCloud for all Negative Tweets about {}".format( Topic)): st.success( "Generating A WordCloud for all Positive Tweets about {}". format(Topic)) text_negative = " ".join(review for review in df[ df["Sentiment"] == "Negative"].clean_tweet) stopwords = set(STOPWORDS) text_new_negative = prepCloud(text_negative, Topic) #text_negative=" ".join([word for word in text_negative.split() if word not in stopwords]) wordcloud = WordCloud( stopwords=stopwords, max_words=800, max_font_size=75, colormap="Reds", background_color="black").generate(text_new_negative) st.write(plt.imshow(wordcloud, interpolation='bilinear')) st.pyplot() #st.sidebar.subheader("Scatter-plot setup") #box1 = st.sidebar.selectbox(label= "X axis", options = numeric_columns) #box2 = st.sidebar.selectbox(label="Y axis", options=numeric_columns) #sns.jointplot(x=box1, y= box2, data=df, kind = "reg", color= "red") #st.pyplot() elif page == "Stock Future Prediction": snp500 = pd.read_csv("./Datasets/SP500.csv") symbols = snp500['Symbol'].sort_values().tolist() ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols) START = "2015-01-01" TODAY = date.today().strftime("%Y-%m-%d") st.title('Stock Forecast App') st.image( 'https://media2.giphy.com/media/JtBZm3Getg3dqxK0zP/giphy-downsized-large.gif', width=250, use_column_width=200) # stocks = ('GOOG', 'AAPL', 'MSFT', 'GME', 'W', 'TSLA') # selected_stock = st.selectbox('Select dataset for prediction', stocks) n_years = st.slider('Years of prediction:', 1, 4) period = n_years * 365 st.title('Stock Forecast App To Do part in stockapp.py') data_load_state = st.text('Loading data...') data = yf.download(ticker, START, TODAY) data.reset_index(inplace=True) data_load_state.text('Loading data... done!') st.subheader('Raw data') st.write(data.tail()) # Plot raw data def plot_raw_data(): fig = go.Figure() fig.add_trace( go.Scatter(x=data['Date'], y=data['Open'], name="stock_open")) fig.add_trace( go.Scatter(x=data['Date'], y=data['Close'], name="stock_close")) fig.layout.update(title_text='Time Series data with Rangeslider', xaxis_rangeslider_visible=True) st.plotly_chart(fig) plot_raw_data() # Predict forecast with Prophet. df_train = data[['Date', 'Close']] df_train = df_train.rename(columns={"Date": "ds", "Close": "y"}) m = Prophet() m.fit(df_train) future = m.make_future_dataframe(periods=period) forecast = m.predict(future) # Show and plot forecast st.subheader('Forecast data') st.write(forecast.tail()) st.write(f'Forecast plot for {n_years} years') fig1 = plot_plotly(m, forecast) st.plotly_chart(fig1) st.write("Forecast components") fig2 = m.plot_components(forecast) st.write(fig2) elif page == "Company Advanced Details": snp500 = pd.read_csv("./Datasets/SP500.csv") symbols = snp500['Symbol'].sort_values().tolist() ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols) stock = yf.Ticker(ticker) def calcMovingAverage(data, size): df = data.copy() df['sma'] = df['Adj Close'].rolling(size).mean() df['ema'] = df['Adj Close'].ewm(span=size, min_periods=size).mean() df.dropna(inplace=True) return df def calc_macd(data): df = data.copy() df['ema12'] = df['Adj Close'].ewm(span=12, min_periods=12).mean() df['ema26'] = df['Adj Close'].ewm(span=26, min_periods=26).mean() df['macd'] = df['ema12'] - df['ema26'] df['signal'] = df['macd'].ewm(span=9, min_periods=9).mean() df.dropna(inplace=True) return df def calcBollinger(data, size): df = data.copy() df["sma"] = df['Adj Close'].rolling(size).mean() df["bolu"] = df["sma"] + 2 * df['Adj Close'].rolling(size).std( ddof=0) df["bold"] = df["sma"] - 2 * df['Adj Close'].rolling(size).std( ddof=0) df["width"] = df["bolu"] - df["bold"] df.dropna(inplace=True) return df st.title('Company Stocks Advanced Details') st.subheader('Moving Average') coMA1, coMA2 = st.beta_columns(2) with coMA1: numYearMA = st.number_input('Insert period (Year): ', min_value=1, max_value=10, value=2, key=0) with coMA2: windowSizeMA = st.number_input('Window Size (Day): ', min_value=5, max_value=500, value=20, key=1) start = dt.datetime.today() - dt.timedelta(numYearMA * 365) end = dt.datetime.today() dataMA = yf.download(ticker, start, end) df_ma = calcMovingAverage(dataMA, windowSizeMA) df_ma = df_ma.reset_index() figMA = go.Figure() figMA.add_trace( go.Scatter(x=df_ma['Date'], y=df_ma['Adj Close'], name="Prices Over Last " + str(numYearMA) + " Year(s)")) figMA.add_trace( go.Scatter(x=df_ma['Date'], y=df_ma['sma'], name="SMA" + str(windowSizeMA) + " Over Last " + str(numYearMA) + " Year(s)")) figMA.add_trace( go.Scatter(x=df_ma['Date'], y=df_ma['ema'], name="EMA" + str(windowSizeMA) + " Over Last " + str(numYearMA) + " Year(s)")) figMA.update_layout( legend=dict(yanchor="top", y=0.99, xanchor="left", x=0.01)) figMA.update_layout(legend_title_text='Trend') figMA.update_yaxes(tickprefix="$") st.plotly_chart(figMA, use_container_width=True) st.subheader('Moving Average Convergence Divergence (MACD)') numYearMACD = st.number_input('Insert period (Year): ', min_value=1, max_value=10, value=2, key=2) startMACD = dt.datetime.today() - dt.timedelta(numYearMACD * 365) endMACD = dt.datetime.today() dataMACD = yf.download(ticker, startMACD, endMACD) df_macd = calc_macd(dataMACD) df_macd = df_macd.reset_index() figMACD = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.01) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['Adj Close'], name="Prices Over Last " + str(numYearMACD) + " Year(s)"), row=1, col=1) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['ema12'], name="EMA 12 Over Last " + str(numYearMACD) + " Year(s)"), row=1, col=1) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['ema26'], name="EMA 26 Over Last " + str(numYearMACD) + " Year(s)"), row=1, col=1) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['macd'], name="MACD Line"), row=2, col=1) figMACD.add_trace(go.Scatter(x=df_macd['Date'], y=df_macd['signal'], name="Signal Line"), row=2, col=1) figMACD.update_layout(legend=dict( orientation="h", yanchor="bottom", y=1, xanchor="left", x=0)) figMACD.update_yaxes(tickprefix="$") st.plotly_chart(figMACD, use_container_width=True) st.subheader('Bollinger Band') coBoll1, coBoll2 = st.beta_columns(2) with coBoll1: numYearBoll = st.number_input('Insert period (Year): ', min_value=1, max_value=10, value=2, key=6) with coBoll2: windowSizeBoll = st.number_input('Window Size (Day): ', min_value=5, max_value=500, value=20, key=7) startBoll = dt.datetime.today() - dt.timedelta(numYearBoll * 365) endBoll = dt.datetime.today() dataBoll = yf.download(ticker, startBoll, endBoll) df_boll = calcBollinger(dataBoll, windowSizeBoll) df_boll = df_boll.reset_index() figBoll = go.Figure() figBoll.add_trace( go.Scatter(x=df_boll['Date'], y=df_boll['bolu'], name="Upper Band")) figBoll.add_trace( go.Scatter(x=df_boll['Date'], y=df_boll['sma'], name="SMA" + str(windowSizeBoll) + " Over Last " + str(numYearBoll) + " Year(s)")) figBoll.add_trace( go.Scatter(x=df_boll['Date'], y=df_boll['bold'], name="Lower Band")) figBoll.update_layout(legend=dict( orientation="h", yanchor="bottom", y=1, xanchor="left", x=0)) figBoll.update_yaxes(tickprefix="$") st.plotly_chart(figBoll, use_container_width=True) elif page == "Live News Sentiment": st.image('https://www.visitashland.com/files/latestnews.jpg', width=250, use_column_width=200) snp500 = pd.read_csv("./Datasets/SP500.csv") symbols = snp500['Symbol'].sort_values().tolist() ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols) if st.button("Click here to See Latest News about " + ticker + ""): st.header('Latest News') def newsfromfizviz(temp): # time.sleep(5) finwiz_url = 'https://finviz.com/quote.ashx?t=' news_tables = {} tickers = [temp] for ticker in tickers: url = finwiz_url + ticker req = Request(url=url, headers={'user-agent': 'my-app/0.0.1'}) response = urlopen(req) # Read the contents of the file into 'html' html = BeautifulSoup(response) # Find 'news-table' in the Soup and load it into 'news_table' news_table = html.find(id='news-table') # Add the table to our dictionary news_tables[ticker] = news_table parsed_news = [] # Iterate through the news for file_name, news_table in news_tables.items(): # Iterate through all tr tags in 'news_table' for x in news_table.findAll('tr'): # read the text from each tr tag into text # get text from a only text = x.a.get_text() # splite text in the td tag into a list date_scrape = x.td.text.split() # if the length of 'date_scrape' is 1, load 'time' as the only element if len(date_scrape) == 1: time = date_scrape[0] # else load 'date' as the 1st element and 'time' as the second else: date = date_scrape[0] time = date_scrape[1] # Extract the ticker from the file name, get the string up to the 1st '_' ticker = file_name.split('_')[0] # Append ticker, date, time and headline as a list to the 'parsed_news' list parsed_news.append([ticker, date, time, text]) # Instantiate the sentiment intensity analyzer vader = SentimentIntensityAnalyzer() # Set column names columns = ['ticker', 'date', 'time', 'headline'] # Convert the parsed_news list into a DataFrame called 'parsed_and_scored_news' parsed_and_scored_news = pd.DataFrame(parsed_news, columns=columns) # Iterate through the headlines and get the polarity scores using vader scores = parsed_and_scored_news['headline'].apply( vader.polarity_scores).tolist() # Convert the 'scores' list of dicts into a DataFrame scores_df = pd.DataFrame(scores) # Join the DataFrames of the news and the list of dicts parsed_and_scored_news = parsed_and_scored_news.join( scores_df, rsuffix='_right') # Convert the date column from string to datetime parsed_and_scored_news['date'] = pd.to_datetime( parsed_and_scored_news.date).dt.date parsed_and_scored_news['Sentiment'] = np.where( parsed_and_scored_news['compound'] > 0, 'Positive', (np.where(parsed_and_scored_news['compound'] == 0, 'Neutral', 'Negative'))) return parsed_and_scored_news df = newsfromfizviz(ticker) df_pie = df[['Sentiment', 'headline']].groupby('Sentiment').count() fig = px.pie(df_pie, values=df_pie['headline'], names=df_pie.index, color=df_pie.index, color_discrete_map={ 'Positive': 'green', 'Neutral': 'darkblue', 'Negative': 'red' }) st.subheader('Dataframe with Latest News') st.dataframe(df) st.subheader('Latest News Sentiment Distribution using Pie Chart') st.plotly_chart(fig) plt.rcParams['figure.figsize'] = [11, 5] # Group by date and ticker columns from scored_news and calculate the mean mean_scores = df.groupby(['ticker', 'date']).mean() # Unstack the column ticker mean_scores = mean_scores.unstack() # Get the cross-section of compound in the 'columns' axis mean_scores = mean_scores.xs('compound', axis="columns").transpose() # Plot a bar chart with pandas mean_scores.plot(kind='bar') plt.grid() st.set_option('deprecation.showPyplotGlobalUse', False) st.subheader('Sentiments over Time') st.pyplot() elif page == "Company Basic Details": snp500 = pd.read_csv("./Datasets/SP500.csv") symbols = snp500['Symbol'].sort_values().tolist() ticker = st.sidebar.selectbox('Choose a S&P 500 Stock', symbols) stock = yf.Ticker(ticker) stock = yf.Ticker(ticker) info = stock.info st.title('Company Basic Details') st.subheader(info['longName']) st.markdown('** Sector **: ' + info['sector']) st.markdown('** Industry **: ' + info['industry']) st.markdown('** Phone **: ' + info['phone']) st.markdown('** Address **: ' + info['address1'] + ', ' + info['city'] + ', ' + info['zip'] + ', ' + info['country']) st.markdown('** Website **: ' + info['website']) st.markdown('** Business Summary **') st.info(info['longBusinessSummary']) fundInfo = { 'Enterprise Value (USD)': info['enterpriseValue'], 'Enterprise To Revenue Ratio': info['enterpriseToRevenue'], 'Enterprise To Ebitda Ratio': info['enterpriseToEbitda'], 'Net Income (USD)': info['netIncomeToCommon'], 'Profit Margin Ratio': info['profitMargins'], 'Forward PE Ratio': info['forwardPE'], 'PEG Ratio': info['pegRatio'], 'Price to Book Ratio': info['priceToBook'], 'Forward EPS (USD)': info['forwardEps'], 'Beta ': info['beta'], 'Book Value (USD)': info['bookValue'], 'Dividend Rate (%)': info['dividendRate'], 'Dividend Yield (%)': info['dividendYield'], 'Five year Avg Dividend Yield (%)': info['fiveYearAvgDividendYield'], 'Payout Ratio': info['payoutRatio'] } fundDF = pd.DataFrame.from_dict(fundInfo, orient='index') fundDF = fundDF.rename(columns={0: 'Value'}) st.subheader('Fundamental Info') st.table(fundDF) st.subheader('General Stock Info') st.markdown('** Market **: ' + info['market']) st.markdown('** Exchange **: ' + info['exchange']) st.markdown('** Quote Type **: ' + info['quoteType']) start = dt.datetime.today() - dt.timedelta(2 * 365) end = dt.datetime.today() df = yf.download(ticker, start, end) df = df.reset_index() fig = go.Figure(data=go.Scatter(x=df['Date'], y=df['Adj Close'])) fig.update_layout( title={ 'text': "Stock Prices Over Past Two Years", 'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) st.plotly_chart(fig, use_container_width=True) marketInfo = { "Volume": info['volume'], "Average Volume": info['averageVolume'], "Market Cap": info["marketCap"], "Float Shares": info['floatShares'], "Regular Market Price (USD)": info['regularMarketPrice'], 'Bid Size': info['bidSize'], 'Ask Size': info['askSize'], "Share Short": info['sharesShort'], 'Short Ratio': info['shortRatio'], 'Share Outstanding': info['sharesOutstanding'] } marketDF = pd.DataFrame(data=marketInfo, index=[0]) st.table(marketDF) else: verified = "False" result = "Please enter valid Username, Password and Acess Token!!" st.title(result)
def app(): st.title('Analyses du site NH Hรดtels') st.write( "Dans cette page vous allez trouver des analyses sur le site NH Hรดtels, via lequel on peut chercher 357 hรดtels parmis 29 pays dans le monde entier." ) st.header("Objectif des analyses") st.write( "Rรฉcemment, le changement climatique, tel que le rรฉchauffement de la planรจte, est devenu plus visible pour le public. " "Nous pensons qu'il est nรฉcessaire que les entreprises de tous les domaines, y compris le tourisme, prennent plus sรฉrieusement en compte l'environnement. " "Dans le site NH Hรดtels, il y a des hรดtels qui mettent en avant leur respect de l'environements. " "Dans cette page vous trouverez des analyses sur les hรดtels en fonction de leur respect de lโenvironnement (Eco_Friendly)." ) # Lire la data scrapรฉe df = pd.read_csv("./data/output_nhHotels.csv") # prรฉsentation de donnรฉe st.subheader("Donรฉes sur les hรดtels du site NH Hรดtels") st.write(df) st.markdown( "Ce sont des donnรฉes que nous avons extraites depuis le site NH hรดtels. " "Elles contiennent les informations suivantes : ") st.markdown("- **Pays**") st.markdown("- **Nom de l'hรดtel**") st.markdown("- **Nombre d'รฉtoile de l'hรดtel**") st.markdown( "- **Eco Friendly** : *Prรฉsence ou non d'un signe indiquant que l'hรดtel adopte une demarche spรฉcifique de protection de l'environement*." " Plus d'information [ici](https://www.nh-hotels.fr/environnement/hotels-ecologiques-developpement-durable)" ) st.markdown("- **Nombre d'รฉtoiles sur Trip Adviser**") # ====== Parie Analyes ===== # st.write("") st.header("Analyses") # doughnut chart st.subheader("Rรฉpartition des hรดtels") labels = ['Non Eco Friendly', 'Eco friendly'] vals = df['eco_friendly'].value_counts() values = [vals[0], vals[1]] fig = go.Figure(data=[go.Pie(labels=labels, values=values, hole=.3)]) st.write(fig) st.markdown( "**56%** des hรดtels qui se trouvent dans ce site ont le signe de *Eco Friendly* et **44%** de les hรดtels ne l'ont pas. " "Vous pouvez regarder les chiffres dรฉtaillรฉs lorsque vous passer votre souris sur le graphe." ) # Bar chart st.subheader("Note moyenne sur Trip Adviser") means = df.groupby('eco_friendly')['avis_client'].mean() colors = [ 'lightslategray', ] * 2 colors[1] = 'crimson' fig_bar = go.Figure( data=[go.Bar(x=labels, y=[means[0], means[1]], marker_color=colors)]) st.write(fig_bar) st.markdown( "Nous pouvons observer que la note des avis clients sur Trip Adviser pour les hรดtels Eco Friendly est plus รฉlevรฉe ร **4.12 รฉtoiles** " "que ceux Non Eco Friendly ร **3.9 รฉtoiles**. Nous povons donc remarquer que les clients ont eu des expรฉriences plus positives avec des hรดtels Eco Friendly." ) # ===== Conclusion ===== # st.write("") st.subheader("Conclusion") st.write( "D'aprรจs ces rรฉsultats, nous povons observer que, parmi les hรดtels rรฉpertoriรฉs sur ce site, " "ceux qui sont plus respectueux de l'environnement offrent un meilleur service ร leurs clients." " Afin de promouvoir des activitรฉs plus respectueuses de l'environnement de la part du secteur du tourisme, " "il est conseillรฉ de mettre ces informations sur le site afin qu'elles soient plus clairement visibles pour les clients potentiels." " Par consรฉquent, nous pensons qu'un site web qui permet aux utilisateurs de rechercher des hรดtels en fonction de leur respect de l'environnement est " "prรฉcieux pour offrir un service plus satisfaisant aux futurs clients." )
def run_app(): # # Loads session state # state = _get_state() def reset_template_state(): state.template_name = None state.jinja = None state.reference = None # # Initial page setup # st.set_page_config(page_title="Promptsource", layout="wide") st.sidebar.markdown( "<center><a href='https://github.com/bigscience-workshop/promptsource'>รฐลธโยปGithub - Promptsource\n\n</a></center>", unsafe_allow_html=True, ) mode = st.sidebar.selectbox( label="Choose a mode", options=select_options, index=0, key="mode_select", ) st.sidebar.title(f"{side_bar_title_prefix} รฐลธลยธ - {mode}") # # Adds pygments styles to the page. # st.markdown("<style>" + HtmlFormatter(style="friendly").get_style_defs(".highlight") + "</style>", unsafe_allow_html=True) WIDTH = 140 def show_jinja(t, width=WIDTH): def replace_linebreaks(t): """ st.write does not handle double breaklines very well. When it encounters `\n\n`, it exit the curent <div> block. Explicitely replacing all `\n` with their html equivalent to bypass this issue. Also stripping the trailing `\n` first. """ return t.strip("\n").replace("\n", "<br/>") wrap = textwrap.fill(t, width=width, replace_whitespace=False) out = highlight(wrap, DjangoLexer(), HtmlFormatter()) out = replace_linebreaks(out) st.write(out, unsafe_allow_html=True) def show_text(t, width=WIDTH, with_markdown=False): wrap = [ textwrap.fill(subt, width=width, replace_whitespace=False) for subt in t.split("\n") ] wrap = "\n".join(wrap) if with_markdown: st.write(wrap, unsafe_allow_html=True) else: st.text(wrap) if mode == "Helicopter view": st.title("High level metrics") st.write("This will take a minute to collect.") st.write( "If you want to contribute, please refer to the instructions in " + "[Contributing](https://github.com/bigscience-workshop/promptsource/blob/main/CONTRIBUTING.md)." ) # # Loads template data # try: template_collection = TemplateCollection() except FileNotFoundError: st.error( "Unable to find the prompt folder!\n\n" "We expect the folder to be in the working directory. " "You might need to restart the app in the root directory of the repo." ) st.stop() # # Global metrics # counts = template_collection.get_templates_count() nb_prompted_datasets = len(counts) st.write(f"## Number of *prompted datasets*: `{nb_prompted_datasets}`") nb_prompts = sum(counts.values()) st.write(f"## Number of *prompts*: `{nb_prompts}`") # # Metrics per dataset/subset # # Download dataset infos (multiprocessing download) manager = Manager() all_infos = manager.dict() all_datasets = list(set([t[0] for t in template_collection.keys])) pool = Pool(processes=multiprocessing.cpu_count()) pool.map(functools.partial(get_infos, all_infos), all_datasets) pool.close() pool.join() results = [] for (dataset_name, subset_name) in template_collection.keys: # Collect split sizes (train, validation and test) if dataset_name not in all_infos: infos = get_dataset_infos(dataset_name) all_infos[dataset_name] = infos else: infos = all_infos[dataset_name] if infos: if subset_name is None: subset_infos = infos[list(infos.keys())[0]] else: subset_infos = infos[subset_name] split_sizes = { k: v.num_examples for k, v in subset_infos.splits.items() } else: # Zaid/coqa_expanded and Zaid/quac_expanded don't have dataset_infos.json # so infos is an empty dic, and `infos[list(infos.keys())[0]]` raises an error # For simplicity, just filling `split_sizes` with nothing, so the displayed split sizes will be 0. split_sizes = {} # Collect template counts, original task counts and names dataset_templates = template_collection.get_dataset( dataset_name, subset_name) results.append({ "Dataset name": dataset_name, "Subset name": "รขหโฆ" if subset_name is None else subset_name, "Train size": split_sizes["train"] if "train" in split_sizes else 0, "Validation size": split_sizes["validation"] if "validation" in split_sizes else 0, "Test size": split_sizes["test"] if "test" in split_sizes else 0, "Number of prompts": len(dataset_templates), "Number of original task prompts": sum([ bool(t.metadata.original_task) for t in dataset_templates.templates.values() ]), "Prompt names": [t.name for t in dataset_templates.templates.values()], }) results_df = pd.DataFrame(results) results_df.sort_values(["Number of prompts"], inplace=True, ascending=False) results_df.reset_index(drop=True, inplace=True) nb_training_instances = results_df["Train size"].sum() st.write( f"## Number of *training instances*: `{nb_training_instances}`") plot_df = results_df[[ "Dataset name", "Subset name", "Train size", "Number of prompts" ]].copy() plot_df[ "Name"] = plot_df["Dataset name"] + " - " + plot_df["Subset name"] plot_df.sort_values(["Train size"], inplace=True, ascending=False) fig = px.bar( plot_df, x="Name", y="Train size", hover_data=["Dataset name", "Subset name", "Number of prompts"], log_y=True, title= "Number of training instances per data(sub)set - y-axis is in logscale", ) fig.update_xaxes(visible=False, showticklabels=False) st.plotly_chart(fig, use_container_width=True) st.write( f"- Top 3 training subsets account for `{100 * plot_df[:3]['Train size'].sum() / nb_training_instances:.2f}%` of the training instances." ) biggest_training_subset = plot_df.iloc[0] st.write( f"- Biggest training subset is *{biggest_training_subset['Name']}* with `{biggest_training_subset['Train size']}` instances" ) smallest_training_subset = plot_df[plot_df["Train size"] > 0].iloc[-1] st.write( f"- Smallest training subset is *{smallest_training_subset['Name']}* with `{smallest_training_subset['Train size']}` instances" ) st.markdown("***") st.write("Details per dataset") st.table(results_df) else: # Combining mode `Prompted dataset viewer` and `Sourcing` since the # backbone of the interfaces is the same assert mode in ["Prompted dataset viewer", "Sourcing"], ValueError( f"`mode` ({mode}) should be in `[Helicopter view, Prompted dataset viewer, Sourcing]`" ) # # Loads dataset information # dataset_list = list_datasets() ag_news_index = dataset_list.index("ag_news") # # Select a dataset - starts with ag_news # dataset_key = st.sidebar.selectbox( "Dataset", dataset_list, key="dataset_select", index=ag_news_index, help="Select the dataset to work on.", ) # # If a particular dataset is selected, loads dataset and template information # if dataset_key is not None: # # Check for subconfigurations (i.e. subsets) # configs = get_dataset_confs(dataset_key) conf_option = None if len(configs) > 0: conf_option = st.sidebar.selectbox( "Subset", configs, index=0, format_func=lambda a: a.name) subset_name = str(conf_option.name) if conf_option else None try: dataset = get_dataset(dataset_key, subset_name) except OSError as e: st.error( f"Some datasets are not handled automatically by `datasets` and require users to download the " f"dataset manually. This applies to {dataset_key}{f'/{subset_name}' if subset_name is not None else ''}. " f"\n\nPlease download the raw dataset to `~/.cache/promptsource/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`. " f"\n\nYou can choose another cache directory by overriding `PROMPTSOURCE_MANUAL_DATASET_DIR` environment " f"variable and downloading raw dataset to `$PROMPTSOURCE_MANUAL_DATASET_DIR/{dataset_key}{f'/{subset_name}' if subset_name is not None else ''}`" f"\n\nOriginal error:\n{str(e)}") st.stop() splits = list(dataset.keys()) index = 0 if "train" in splits: index = splits.index("train") split = st.sidebar.selectbox("Split", splits, key="split_select", index=index) dataset = dataset[split] dataset = renameDatasetColumn(dataset) # # Loads template data # try: dataset_templates = DatasetTemplates( dataset_key, conf_option.name if conf_option else None) except FileNotFoundError: st.error( "Unable to find the prompt folder!\n\n" "We expect the folder to be in the working directory. " "You might need to restart the app in the root directory of the repo." ) st.stop() template_list = dataset_templates.all_template_names num_templates = len(template_list) st.sidebar.write( "No of prompts created for " + f"`{dataset_key + (('/' + conf_option.name) if conf_option else '')}`" + f": **{str(num_templates)}**") if mode == "Prompted dataset viewer": if num_templates > 0: template_name = st.sidebar.selectbox( "Prompt name", template_list, key="template_select", index=0, help="Select the prompt to visualize.", ) step = 50 example_index = st.sidebar.number_input( f"Select the example index (Size = {len(dataset)})", min_value=0, max_value=len(dataset) - step, value=0, step=step, key="example_index_number_input", help="Offset = 50.", ) else: # mode = Sourcing st.sidebar.subheader("Select Example") example_index = st.sidebar.slider("Select the example index", 0, len(dataset) - 1) example = dataset[example_index] example = removeHyphen(example) st.sidebar.write(example) st.sidebar.subheader("Dataset Schema") rendered_features = render_features(dataset.features) st.sidebar.write(rendered_features) # # Display dataset information # st.header("Dataset: " + dataset_key + " " + (("/ " + conf_option.name) if conf_option else "")) # If we have a custom dataset change the source link to the hub split_dataset_key = dataset_key.split("/") possible_user = split_dataset_key[0] if len(split_dataset_key) > 1 and possible_user in INCLUDED_USERS: source_link = "https://huggingface.co/datasets/%s/blob/main/%s.py" % ( dataset_key, split_dataset_key[-1], ) else: source_link = "https://github.com/huggingface/datasets/blob/master/datasets/%s/%s.py" % ( dataset_key, dataset_key, ) st.markdown("*Homepage*: " + dataset.info.homepage + "\n\n*Dataset*: " + source_link) md = """ %s """ % (dataset.info.description.replace("\\", "") if dataset_key else "") st.markdown(md) # # Body of the app: display prompted examples in mode `Prompted dataset viewer` # or text boxes to create new prompts in mode `Sourcing` # if mode == "Prompted dataset viewer": # # Display template information # if num_templates > 0: template = dataset_templates[template_name] st.subheader("Prompt") st.markdown("##### Name") st.text(template.name) st.markdown("##### Reference") st.text(template.reference) st.markdown("##### Original Task? ") st.text(template.metadata.original_task) st.markdown("##### Choices in template? ") st.text(template.metadata.choices_in_prompt) st.markdown("##### Metrics") st.text(", ".join(template.metadata.metrics) if template. metadata.metrics else None) st.markdown("##### Answer Choices") if template.get_answer_choices_expr() is not None: show_jinja(template.get_answer_choices_expr()) else: st.text(None) st.markdown("##### Jinja template") splitted_template = template.jinja.split("|||") st.markdown("###### Input template") show_jinja(splitted_template[0].strip()) if len(splitted_template) > 1: st.markdown("###### Target template") show_jinja(splitted_template[1].strip()) st.markdown("***") # # Display a couple (steps) examples # for ex_idx in range(example_index, example_index + step): if ex_idx >= len(dataset): continue example = dataset[ex_idx] example = removeHyphen(example) col1, _, col2 = st.beta_columns([12, 1, 12]) with col1: st.write(example) if num_templates > 0: with col2: prompt = template.apply(example, highlight_variables=False) if prompt == [""]: st.write("รขหโฆรขหโฆรขหโฆ *Blank result*") else: st.write("Input") show_text(prompt[0]) if len(prompt) > 1: st.write("Target") show_text(prompt[1]) st.markdown("***") else: # mode = Sourcing st.markdown("## Prompt Creator") # # Create a new template or select an existing one # col1a, col1b, _, col2 = st.beta_columns([9, 9, 1, 6]) # current_templates_key and state.templates_key are keys for the templates object current_templates_key = (dataset_key, conf_option.name if conf_option else None) # Resets state if there has been a change in templates_key if state.templates_key != current_templates_key: state.templates_key = current_templates_key reset_template_state() with col1a, st.form("new_template_form"): new_template_name = st.text_input( "Create a New Prompt", key="new_template", value="", help="Enter name and hit enter to create a new prompt.", ) new_template_submitted = st.form_submit_button("Create") if new_template_submitted: if new_template_name in dataset_templates.all_template_names: st.error( f"A prompt with the name {new_template_name} already exists " f"for dataset {state.templates_key}.") elif new_template_name == "": st.error("Need to provide a prompt name.") else: template = Template(new_template_name, "", "") dataset_templates.add_template(template) reset_template_state() state.template_name = new_template_name else: state.new_template_name = None with col1b, st.beta_expander("or Select Prompt", expanded=True): template_list = dataset_templates.all_template_names if state.template_name: index = template_list.index(state.template_name) else: index = 0 state.template_name = st.selectbox( "", template_list, key="template_select", index=index, help="Select the prompt to work on.") if st.button("Delete Prompt", key="delete_prompt"): dataset_templates.remove_template(state.template_name) reset_template_state() variety_guideline = """ :heavy_exclamation_mark::question:Creating a diverse set of prompts whose differences go beyond surface wordings (i.e. marginally changing 2 or 3 words) is highly encouraged. Ultimately, the hope is that exposing the model to such a diversity will have a non-trivial impact on the model's robustness to the prompt formulation. \r**To get various prompts, you can try moving the cursor along theses axes**: \n- **Interrogative vs affirmative form**: Ask a question about an attribute of the inputs or tell the model to decide something about the input. \n- **Task description localization**: where is the task description blended with the inputs? In the beginning, in the middle, at the end? \n- **Implicit situation or contextualization**: how explicit is the query? For instance, *Given this review, would you buy this product?* is an indirect way to ask whether the review is positive. """ col1, _, _ = st.beta_columns([18, 1, 6]) with col1: if state.template_name is not None: show_text(variety_guideline, with_markdown=True) # # Edit the created or selected template # col1, _, col2 = st.beta_columns([18, 1, 6]) with col1: if state.template_name is not None: template = dataset_templates[state.template_name] # # If template is selected, displays template editor # with st.form("edit_template_form"): updated_template_name = st.text_input( "Name", value=template.name) state.reference = st.text_input( "Prompt Reference", help= "Short description of the prompt and/or paper reference for the prompt.", value=template.reference, ) # Metadata state.metadata = template.metadata state.metadata.original_task = st.checkbox( "Original Task?", value=template.metadata.original_task, help= "Prompt asks model to perform the original task designed for this dataset.", ) state.metadata.choices_in_prompt = st.checkbox( "Choices in Template?", value=template.metadata.choices_in_prompt, help= "Prompt explicitly lists choices in the template for the output.", ) # Metrics from here: # https://github.com/google-research/text-to-text-transfer-transformer/blob/4b580f23968c2139be7fb1cd53b22c7a7f686cdf/t5/evaluation/metrics.py metrics_choices = [ "BLEU", "ROUGE", "Squad", "Trivia QA", "Accuracy", "Pearson Correlation", "Spearman Correlation", "MultiRC", "AUC", "COQA F1", "Edit Distance", ] # Add mean reciprocal rank metrics_choices.append("Mean Reciprocal Rank") # Add generic other metrics_choices.append("Other") # Sort alphabetically metrics_choices = sorted(metrics_choices) state.metadata.metrics = st.multiselect( "Metrics", metrics_choices, default=template.metadata.metrics, help= "Select all metrics that are commonly used (or should " "be used if a new task) to evaluate this prompt.", ) # Answer choices if template.get_answer_choices_expr() is not None: answer_choices = template.get_answer_choices_expr( ) else: answer_choices = "" state.answer_choices = st.text_input( "Answer Choices", value=answer_choices, help= "A Jinja expression for computing answer choices. " "Separate choices with a triple bar (|||).", ) # Jinja state.jinja = st.text_area("Template", height=40, value=template.jinja) # Submit form if st.form_submit_button("Save"): if (updated_template_name in dataset_templates.all_template_names and updated_template_name != state.template_name): st.error( f"A prompt with the name {updated_template_name} already exists " f"for dataset {state.templates_key}.") elif updated_template_name == "": st.error("Need to provide a prompt name.") else: # Parses state.answer_choices if state.answer_choices == "": updated_answer_choices = None else: updated_answer_choices = state.answer_choices dataset_templates.update_template( state.template_name, updated_template_name, state.jinja, state.reference, state.metadata, updated_answer_choices, ) # Update the state as well state.template_name = updated_template_name # # Displays template output on current example if a template is selected # (in second column) # with col2: if state.template_name is not None: st.empty() template = dataset_templates[state.template_name] prompt = template.apply(example) if prompt == [""]: st.write("รขหโฆรขหโฆรขหโฆ *Blank result*") else: st.write("Input") show_text(prompt[0], width=40) if len(prompt) > 1: st.write("Target") show_text(prompt[1], width=40) # # Must sync state at end # state.sync()
dataset = PegaFile2(file,separador) #exibindo um pedaรงo dos dados n_heads = st.slider('Nยบ de linhas exibidas',min_value = 0, max_value = len(dataset),value = 1,step = 1) st.write(dataset.head(n_heads)) ##---------------------TIPO DE ANรLISE DE DADOS-------------------------## analise = st.selectbox('Selecione o tipo de anรกlise exploratรณria:',('Anรกlise de dados faltantes','Distribuiรงรฃo de dados','Estatรญsticas','Correlaรงรตes','modelo de Machine Learning')) if analise == 'Anรกlise de dados faltantes': explorer = pd.DataFrame({'Variaveis':dataset.columns,'Num de faltantes' :dataset.isna().sum(axis=0).values,'Tipo de dado':dataset.dtypes.values}) if sum(explorer['Num de faltantes'].values) > 0: st.text('Existem dados faltantes na base de dados!') st.header('Imputaรงรฃo de dados') var = st.selectbox('Selecione a variavel para avaliar',options = dataset.columns) st.subheader('Selecione a estratรฉgia de imputaรงรฃo:') if dataset[var].dtypes =='float64' or dataset[var].dtypes =='int64': opt = st.selectbox('',options = ('mรฉdia','mediana','moda','zeros','apagar dados faltantes')) if opt == 'mรฉdia': st.write('Os dados da coluna ',var,' serรฃo substituรญdos por: ',dataset[var].mean(),'.') botao = st.button('Imputar dados') if botao: dataset[var] = dataset[var].fillna(dataset[var].mean())
Image.fromarray(gray).save(data, format="PNG") self._data["gray.png"] = data.getvalue() def save(self): for name, data in self._data.items(): Image.open(io.BytesIO(data)).save("/tmp/%s" % name) def get_images(self): return self._data # Generate some images. si = StreamlitImages() # Get a single image of bytes and display st.header("individual image bytes") filename = "image.png" data = si.get_images().get(filename) st.image(data, caption=filename, format="png") # Display a list of images st.header("list images") images = [] captions = [] for filename, data in si.get_images().items(): images.append(data) captions.append(filename) st.image(images, caption=captions, format="png") st.header("PIL Image") data = []
def select_model(loaded_data, model_selection='user_user'): # default model is user-user based collaborative filtering if model_selection == 'user_user': algo = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': True}) elif model_selection == 'item_item': algo = KNNWithMeans(k=50, sim_options={'name': 'pearson_baseline', 'user_based': False}) else: algo = mf.matrix_factorization_param(loaded_data) return algo import os, io st.title('Welcome to RecServe!') st.header('Let me help you with the product recommendations') option1 = st.selectbox( 'Select the path for the dataset?', ['sample_us.tsv']) #st.write('sample_us.tsv') #st.write('You selected:',option1) #url = st.text_input('Enter the path for the data') st.write('The data is loaded') #data_load_state = st.text('Loading the data') data = ds.get_data(option1) #st.write(data) #data = ds.get_data(_file_path, 'data/data_subset.csv', 0.99) #data = ds.get_data('/Users/lalitharahul/Desktop/AutoRecommender/RecServe/sample_us.tsv') #data = ds.get_data(url)
def main(): sucesso = False sucesso2 = False codigo_acao = "" st.title('Anรกlise de Aรงรตes') st.subheader('Objetivos') st.text( 'Analisar os dados de uma aรงรฃo e correlacionar seu comportamento com o รญndice Ibovespa.' ) st.text( 'Prever o preรงo de fechamento de uma aรงรฃo baseado em seus รบltimos 60 dias' ) st.image('https://media.giphy.com/media/l0HlDDyxBfSaPpU88/giphy.gif', width=700) st.sidebar.title('AceleraDev Data Science') st.sidebar.image('logo.png', width=280) st.sidebar.subheader("By Pรกvel Lelis") st.sidebar.title("O que fazer?") app_mode = st.sidebar.selectbox("", [ "", "Anรกlise Exploratรณria", "Prever o Fechamento", "Ver o Workflow deste App" ]) if app_mode == "Anรกlise Exploratรณria": #st.sidebar.success('Para continuar digite o cรณdigo da aรงรฃo.') codigo_acao = st.sidebar.text_input('Digite o cรณdigo da aรงรฃo aqui') datainicio = st.sidebar.date_input('Data de inรญcio') datafim = st.sidebar.date_input('Data Final') #botao=st.sidebar.button("Carregar Dados") #if botao: # Requisiรงรฃo e carga de Dados if codigo_acao != "" and datainicio != datafim: codigo_acao2 = codigo_acao + ".SA" df = web.DataReader(codigo_acao2, data_source='yahoo', start=datainicio, end=datafim) df_ibovespa = web.DataReader('^BVSP', data_source='yahoo', start=datainicio, end=datafim) df_quote = df sucesso = True # Padronizaรงรฃo do DataFrame for c in df_ibovespa.columns: df_ibovespa.rename(columns={c: c + '_IBOV'}, inplace=True) for c in df_quote.columns: df_quote.rename(columns={c: c + '_' + codigo_acao.upper()}, inplace=True) df_dados = pd.merge(df_quote, df_ibovespa, left_index=True, right_index=True) def normalizaIbov(x): return (x - (df_dados[df_dados.columns[9]]).min()) / ( (df_dados[df_dados.columns[9]]).max() - (df_dados[df_dados.columns[9]]).min()) def normalizaAcao(x): return (x - (df_dados[df_dados.columns[3]]).min()) / ( (df_dados[df_dados.columns[3]]).max() - (df_dados[df_dados.columns[3]]).min()) df_dados['AcaoNorm'] = df_dados[df_dados.columns[3]].apply( normalizaAcao) df_dados['IbovNorm'] = df_dados[df_dados.columns[9]].apply( normalizaIbov) elif app_mode == "Prever o Fechamento": if codigo_acao == '': codigo_acao = st.sidebar.text_input('Digite o cรณdigo da aรงรฃo aqui') sucesso2 = True else: escolhaPrevisao = st.sidebar.selectbox( "Qual fechamento vamos prever?", [codigo_acao, 'Ibov', "Outra Aรงรฃo"]) if escolhaPrevisao == "Outra Aรงรฃo": codigo_acao = st.sidebar.text_input( 'Digite o cรณdigo da aรงรฃo aqui') sucesso2 = True elif escolhaPrevisao == 'Ibov': codigo_acao = '^BVSP' sucesso2 = True else: pass elif app_mode == "Ver o Workflow deste App": st.image("App Workflow.png", width=850) if sucesso: st.title(codigo_acao) st.text( 'Os dados do IBOV e da aรงรฃo de anรกlise estรฃo apresentados pela data mais recente.' ) num_linha = st.slider('Registros', 5, len(df_dados)) st.dataframe(df_dados.tail(num_linha)) st.title("Grรกficos") selecionaGrafico = st.selectbox('Escolha o grรกfico para anรกlise', [ '', 'Candlestick', 'Dispersรฃo IBOV x Aรงรฃo', 'Curvas Normalizadas', ]) if selecionaGrafico == "Candlestick": papel = st.selectbox('Para qual histรณtico?', ['', codigo_acao, 'IBOV']) if papel != "": ver_acao = False if papel == codigo_acao: ver_acao = True st.header("CandleStick") st.write(criar_candlestick(df_dados, ver_acao, codigo_acao)) if selecionaGrafico == "Dispersรฃo IBOV x Aรงรฃo": st.header("Dispersรฃo IBOV x Aรงรฃo") st.write(criar_dispersao(df_dados, codigo_acao)) if selecionaGrafico == "Curvas Normalizadas": st.header("Curvas Normalizadas") st.write(criar_curvas_norm(df_dados, codigo_acao)) if sucesso2: # Requisiรงรฃo e carga de Dados codigo_acao2 = codigo_acao + ".SA" hoje = date.today() intervalo = timedelta(1200) passado = hoje - intervalo if codigo_acao != "": df = web.DataReader(codigo_acao2, data_source='yahoo', start=passado, end=hoje) texto = 'Realizando a previsรฃo do prรณximo fechamento de ' + codigo_acao st.title(texto) realizarPrevisao(df, codigo_acao) ################################################## st.text("Processando dados...") # Criando um novo dataframe com a coluna Fechamento 'Close' data = df.filter(['Close']) # Criando um vertor de dados dataset = data.values # obtendo o nรบmero de registros para o conjunto de treino (Premissa 80/20) training_data_len = math.ceil(len(dataset) * .8) scaler = MinMaxScaler(feature_range=(0, 1)) scaled_data = scaler.fit_transform(dataset) # Criando o dataset normalizado train_data = scaled_data[0:training_data_len, :] # Separando datasets x_train e y_train x_train = [] y_train = [] for i in range(60, len(train_data)): x_train.append(train_data[i - 60:i, 0]) y_train.append(train_data[i, 0]) # Criando numpy Arrays x_train, y_train = np.array(x_train), np.array(y_train) # Formatando os dados de acordo com o formato aceitado pelo LSTM x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1)) st.text("Configurando o Modelo...") # Configurando o Modelo LSTM model = Sequential() model.add( LSTM(units=50, return_sequences=True, input_shape=(x_train.shape[1], 1))) model.add(LSTM(units=50, return_sequences=False)) model.add(Dense(units=25)) model.add(Dense(units=1)) st.text("Compilando o Modelo...") # Compilando o modelo model.compile(optimizer='adam', loss='mean_squared_error') st.text("Treinando o modelo...") # Treinando o modelo model.fit(x_train, y_train, batch_size=1, epochs=1) # Verificando o ajuste do modelo st.text("Verificando o ajuste do modelo...") test_data = scaled_data[training_data_len - 60:, :] # Criando x_test e y_test x_test = [] y_test = dataset[training_data_len:, :] for i in range(60, len(test_data)): x_test.append(test_data[i - 60:i, 0]) # Convertendo x_test a numpy array x_test = np.array(x_test) # Formatando os dados de acordo com o formato aceitado pelo LSTM x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1)) predictions = model.predict(x_test) predictions = scaler.inverse_transform( predictions) # Desfazendo a normalizaรงรฃo # Plot/Create the data for the graph train = data[:training_data_len] valid = data[training_data_len:] valid['Predictions'] = predictions # Visualize the data fig = plt.figure() plt.figure(figsize=(16, 8)) plt.title('Modelo', fontsize=36) plt.xlabel('Data', fontsize=18) plt.ylabel('Preรงo de Fechamento (BRL)', fontsize=18) plt.plot(train['Close']) plt.plot(valid[['Close', 'Predictions']]) plt.legend(['Treino', 'Valores', 'Previsรตes'], loc='lower right') st.pyplot() ################################################## def previstosxhistoricos(data): valid = data[training_data_len:] valid['Predictions'] = predictions fig = go.Figure() # fig.add_trace(go.Scatter( y=train['Close'],mode='lines+markers',name='Treino')) fig.add_trace( go.Scatter(x=valid.index, y=valid['Close'], mode='lines+markers', name='Valores Histรณricos')) fig.add_trace( go.Scatter(x=valid.index, y=valid['Predictions'], mode='lines+markers', name='Previsรตes')) fig.update_layout( title={ 'text': "Valores Previstos e Valores Histรณricos", 'y': 0.9, 'x': 0.5, 'xanchor': 'center', 'yanchor': 'top' }) return fig st.write((previstosxhistoricos(data))) st.text("Realizando a previsรฃo do prรณximo fechamento...") ############################################################# # Get the quote codigo_acao = codigo_acao # +".SA" quote = web.DataReader(codigo_acao + ".SA", data_source='yahoo', start=passado, end=hoje) # Create a new dataframe new_df = quote.filter(['Close']) # Get teh last 60 day closing price last_60_days = new_df[-60:].values scaler = MinMaxScaler(feature_range=(0, 1)) # Scale the data to be values between 0 and 1 last_60_days_scaled = scaler.fit_transform(last_60_days) # Create an empty list X_test = [] # Append teh past 60 days X_test.append(last_60_days_scaled) # Convert the X_test data set to a numpy array X_test = np.array(X_test) # Reshape the data X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1)) # Get the predicted scaled price pred_price = model.predict(X_test) # undo the scaling pred_price = scaler.inverse_transform(pred_price) if (round( (pred_price.tolist()[0][0]), 2)) > valid.Close.tail(1)[0]: resp = 'Vai Subir' else: resp = 'Vai descer' valor_previsao = "O valor predito para o prรณximo fechamento รฉ de " + str( round((pred_price.tolist()[0][0]), 2)) + ". " + resp + "! " st.title(valor_previsao) st.image('Nota.png', width=600)
#test_dict={"Linearity":["fbp","ar"],"Low Contrast":["227FOV"],"Resolution":["std","bone"],"Uniformity":["head","body","multislice","multislice_monoenergetic"],"Thickness":["single_slice","multi_slice"],"Cart":["Cart displacement"],"Iodine":["Iodine"]} catphan_dict = { "Linearity": ["fbp", "asir"], "Resolution": ["std", "bone"], "Thickness": ["single_slice", "multi_slice"], "Cart": ["Cart displacement"] } ge_dict = { "Low Contrast": ["227FOV"], "Resolution": ["std", "bone"], "Uniformity": ["head", "body", "multislice", "multislice_monoenergetic"], "Iodine": ["Iodine"] } st.header("TAC QUALITY ASSURANCE") page_bg_img = ''' <style> body { background-image: url("https://edu.ieee.org/pa-upanama/wp-content/uploads/sites/374/2015/02/minimalistic-simple-background-white-206534-1920x12002.jpg"); background-size: cover; } </style> ''' st.markdown(page_bg_img, unsafe_allow_html=True) file_loader = st.empty() state = _get_state()
def main(): """Online Retail Analytics ML App""" st.title("Online Retail Analytics") #st.subheader("Streamlit ML App") activities = ['EDA', 'Prediction', 'About'] choices = st.sidebar.selectbox("Select Activities", activities) data = load_data('data/dataset.csv') if choices == 'EDA': st.header("Exploratory Data Analysis") choice1 = st.sidebar.selectbox("Choose One:", [ "Show top 5 rows of data", "Show Summary of Dataset", "Customer Retention Rate", "User Type Revenue" ]) if choice1 == "Show top 5 rows of data": st.write("Top 5 rows of data") st.dataframe(data.head(5)) if choice1 == "Show Summary of Dataset": st.write("Summary of Dataset") st.write(data.describe()) if choice1 == "Customer Retention Rate": st.write("Customer Retention Rate") ret_rate = pd.read_csv(os.getcwd() + "/data/retention_rate.csv") # Replace using dictionary ret_rate['InvoiceYearMonth'] = ret_rate[ 'InvoiceYearMonth'].replace({ 201102: "Feb", 201103: "Mar", 201104: "Apr", 201105: "May", 201106: "June", 201107: "July", 201108: "Aug", 201109: "Sep", 201110: "Oct", 201111: "Nov", 201112: "Dec", }) plt.plot('InvoiceYearMonth', 'TotalUserCount', data=ret_rate, marker='o', markerfacecolor='blue', markersize=12, color='skyblue', linewidth=4) plt.plot('InvoiceYearMonth', 'RetainedUserCount', data=ret_rate, marker='o', markerfacecolor='red', markersize=12, color='tomato', linewidth=4) plt.legend() st.pyplot() if choice1 == "User Type Revenue": st.write("User Type Revenue") user_revenue = pd.read_csv(os.getcwd() + "/data/User_Type_Revenue.csv") user_revenue = user_revenue.drop(columns=['Unnamed: 0']) # user_revenue['InvoiceYearMonth'] = user_revenue['InvoiceYearMonth'].replace({ # 201012:"Dec 2010", # 201101:"Jan 2011", # 201102:"Feb 2011", # 201103:"Mar 2011", # 201104:"Apr 2011", # 201105:"May 2011", # 201106:"June 2011", # 201107:"July 2011", # 201108:"Aug 2011", # 201109:"Sep 2011", # 201110:"Oct 2011", # 201111:"Nov 2011", # 201112:"Dec 2011", # }) viz = sns.barplot(x="InvoiceYearMonth", y="Revenue", data=user_revenue, hue='UserType') plt.xticks(rotation=-45) viz.set(ylabel='Revenue') plt.show() st.pyplot() if choices == 'Prediction': st.header("Prediction Analytics") choice = st.sidebar.selectbox("Choose One:", [ "Customer Segmentation", "Cross Selling", "Customer Lifetime Value", "Next Purchase Day" ]) if choice == "Customer Segmentation": st.subheader("Customer Segmentation") st.write("Classifying Customers based on RFM Model") customer_segmentation = pd.read_csv( os.getcwd() + "/data/Customer_Segmentation.csv") customer_segmentation['CustomerID'] = customer_segmentation[ 'CustomerID'].astype(int) customerID = st.selectbox( 'CustomerID', customer_segmentation['CustomerID'].head(100)) if st.button("Submit"): selected_customer = customer_segmentation.loc[ customer_segmentation['CustomerID'] == customerID] st.write(selected_customer[[ 'Segment', 'Recency', 'Frequency', 'Revenue' ]]) if choice == "Customer Lifetime Value": st.subheader("Customer Lifetime Value") st.write("Predicting LTV using XGBoost classifier") clv = pd.read_csv(os.getcwd() + "/data/CLV.csv") clv['CustomerID'] = clv['CustomerID'].astype(int) customerID = st.selectbox('CustomerID', clv['CustomerID'].head(100)) if st.button("Submit"): selected_customer = clv.loc[clv['CustomerID'] == customerID] st.write(selected_customer['Customer_Lifetime_value']) if choice == "Next Purchase Day": st.subheader("Next Purchase Day") st.write("Predict Next Purchase Day using KNN") st.write( "NextPurchaseDayClass=0:Customer Will Purchase in more than 50 days" ) st.write( "NextPurchaseDayClass=1:Customer Will Purchase in 21-49 days") st.write( "NextPurchaseDayClass=2:Customer Will Purchase in 0-20 days") Next_pday = pd.read_csv(os.getcwd() + "/data/Next.csv", encoding='unicode_escape', index_col=False) Next_pday['Customer_Id'] = Next_pday['Customer_Id'].astype('str') Customer = st.selectbox("Select CustomerID:", Next_pday['Customer_Id']) if st.button("Submit"): st.write(Next_pday.loc[Next_pday['Customer_Id'] == Customer, 'NextPurchaseDayClass']) if choice == "Cross Selling": st.subheader("Cross Selling") st.write("Market Basket Analysis using FP Growth") #Reading Data From Web #data = load_data('data/dataset.csv') #input1= st.selectbox("Select min support",[0.1,0.2,0.3,0.4,0.5]) #if st.button("Submit"):' #country = st.sidebar.selectbox("Choose a country",data['Country'].unique()) #if st.sidebar.button("Submit1"): #st.write("You selected: ",country) #min_support=input1 #st.write("You selected this option ",input1) #Cleaning data['Description'] = data['Description'].str.strip() data.dropna(axis=0, subset=['InvoiceNo'], inplace=True) data['InvoiceNo'] = data['InvoiceNo'].astype('str') data = data[~data['InvoiceNo'].str.contains('C')] data.head() #Separating transactions for Country basket = (data[data['Country'] == 'Germany'].groupby([ 'InvoiceNo', 'Description' ])['Quantity'].sum().unstack().reset_index().fillna(0).set_index( 'InvoiceNo')) #converting to 1 and 0 def encoder(x): if x <= 0: return 0 if x >= 1: return 1 basket = basket.applymap(encoder) basket.drop('POSTAGE', inplace=True, axis=1, errors='ignore') #Generatig frequent itemsets itemsets = fpgrowth(basket, min_support=0.07, use_colnames=True) #generating rules rules = association_rules(itemsets, metric="lift", min_threshold=1) #rules=rules[['antecedents','consequents']] #df=rules #df.columns = ['Input', 'Output'] choice = st.selectbox("Choose One:", rules['antecedents'].head(100)) if st.button("Submit"): output = rules.loc[rules['antecedents'] == choice] st.write(output[['consequents']]) if choices == 'About': st.subheader("About") st.write( "TK Maxx is a subsidiary of the American apparel and home goods company TJX Companies and offers customers across various countries great values on brand name apparel and more, including high-end designer goods and juniors." ) st.write( "TK Maxx wants to analyse the customer transactions at their stores over an 8 month period to understand their behavior and make some predictions about their customer behavior. They also plan to use this data to cross-sell products which are frequently bought together." ) st.write( "We are building a web app to analyse the key metrics using various algorithms to segment customers, predict the lifetime value of each customer, product recommendation and predict the next purchase date." ) st.write( "A marketing analyst at TK Maxx can use these insights to develop strategies like targeting users, identifying important customers , predict the amount to be spent on acquiring or retaining customers and offer personalised recommendations" )
def corporation(): st.header('๊ตญ๋ด ์์จ์ฃผํ ์๋์ฐจ ๊ฐ๋ฐ ๊ธฐ์ ์๊ฐ') c_list = ['ํ๋์๋์ฐจ','์นด์นด์ค๋ชจ๋น๋ฆฌํฐ','a2z','ํ ๋ฅด๋๋ผ์ด๋ธ','์คํธ๋ผ๋๋น์ ','์ปจํธ๋กค์์ค'] choice = st.selectbox('์ ํํ์ธ์',c_list) if choice == 'ํ๋์๋์ฐจ': st.image('hyundai.jpg',width=None) st.subheader('ํ๋์๋์ฐจ') st.write('- ๊ตญ๋ด ์์ฑ์ฐจ ๊ธฐ์ ์ ์ต๊ณ ์ธ ํ๋์๋์ฐจ') st.write('- ํ๋์๋์ฐจ๋ ์์จ์ฃผํ์ผ๋ก ์ธํ ์ฌํ์ ํํ์ ์๋์ ๊ฐ์ด ์ ์ํ๋ค') st.image('social.png',width=None) st.write('- ํ์ฌ ํ๋งค์ค์ธ ๋ชจ๋ ์ฐจ๋์์ ์ต๋ 3๋จ๊ณ ์์จ์ฃผํ์ ์ ๊ณตํ๋ค') st.write('- ํ๋์๋์ฐจ ์์ฒด ๊ฐ๋ฐ ๋ฐ ํ๋ ฅ์ฌ ๊ฐ๋ฐ์ ํตํด ๋น ๋ฅด๊ฒ ์ฑ์ฅํ๊ณ ์๋ค') # url1 = 'https://motional.com/' link = '[ํํ์ด์ง ๋ฐ๋ก๊ฐ๊ธฐ](https://motional.com/)' st.write('- ํ๋์๋์ฐจ ํ๋ ฅ์ฌ : Motional') st.markdown(link,unsafe_allow_html=True) # if st.button('ํํ์ด์ง ๋ฐ๋ก๊ฐ๊ธฐ'): # webbrowser.open('https://motional.com/',new=1) if choice == '์นด์นด์ค๋ชจ๋น๋ฆฌํฐ': st.image('kko.png',width=None) st.subheader('์นด์นด์ค๋ชจ๋น๋ฆฌํฐ') st.write('- ์นด์นด์ค๋ชจ๋น๋ฆฌํฐ๋ ์์ฑ์ฐจ๋ฅผ ๊ฐ๋ฐํ๋ ๊ฐ๋ ์ด ์๋ ์๋น์ค์ ๊ณต ํํ๋ก ์ ๊ทผํ๋ค') st.write('- 2020๋ 3์ ๊ฐ๋ฐ์ ์์์ผ๋ก ๊พธ์คํ ์ฑ์ฅ์ ๋ณด์ฌ์ฃผ๊ณ ์๋ค') st.write('- ์์จ์ฃผํ ์ฐจ๋์ ํต์ ํ๋ ๊ด์ ์์คํ ๊ณผ ์ด๋ฅผ ์ด๊ดํ๋ ์ด์์์คํ ์ผ๋ก ๋๋๊ณ , ๋ฐ์ดํฐ์ ๊ธฐ์ , ์ด์ ๋ ธํ์ฐ๋ฅผ ์ ๋ชฉ์์ผ ๊ฐ์ฅ ์์ ํ ์๋น์ค๋ฅผ ์ ๊ณตํ๋๊ฒ ๋ชฉ์ ์ด๋ค') st.write('- ์ฃผ๋ณํ๊ฒฝ์ ์ธ์ํ๊ณ ์ฃผํ์ ๋ต์ ๊ฒฐ์ ํ์ฌ ์ฐจ๋์ ์ ์ดํ๋ ๊ธฐ์ ๊ณผ, ์์์ ๋ง์ถฐ ๋ฐฐ์ฐจํ๊ณ ์๊ฒฉ์ผ๋ก ์ฐจ๋์ ์กฐ์ ํ๋ ์๋น์ค๋ฅผ ํจ๊ป ๊ฐ๋ฐํ๊ณ ์๋ค') st.write('- ๋ฌด์๋ณด๋ค ๋น ๋ฐ์ดํฐ ์ธก๋ฉด์์ ์๋์ ์ธ ์์ค์ ๋๋ฌํด ์๊ธฐ ๋๋ฌธ์, ํ๊ฒฝ์ ์๋ง๋ ์๋น์ค์ ๊ณต์ ์นด์นด์ค๋ชจ๋น๋ฆฌํฐ๊ฐ ์ฐ์๋ฅผ ์ ํ ๊ฒ ๊ฐ๋ค') link1 = '[๊ด๋ จ๊ธฐ์ฌ ๋ฐ๋ก๋ณด๊ธฐ](https://auto.v.daum.net/v/20200303103632878)' st.markdown(link1,unsafe_allow_html=True) # if st.button('๊ด๋ จ๊ธฐ์ฌ ๋ฐ๋ก๋ณด๊ธฐ'): # webbrowser.open_new_tab('https://auto.v.daum.net/v/20200303103632878') if choice == 'a2z': st.image('a2z.jpg',width=None) st.subheader('a2z ์คํ ๋ ธ๋จธ์ค') st.write('- ํ๋์๋์ฐจ ์์จ์ฃผํ ๊ฐ๋ฐ๋ถ์์ ๊ทผ๋ฌดํ๋ ๊ธฐ์ ์ 3๋ช ์ด ๋์ ์ฐฝ์ ํ ์์จ์ฃผํ์ฐจ ์ ๋ฌธ๊ฐ๋ฐ ์คํํธ์ ') st.write('- 2018๋ ๋ถํฐ ๊ฐ๋ฐ์ ์์ํ์ฌ ํ์ฌ ์ธ์ข ์์์ ์ค์ ์ฃผํ์ ์งํ์ค์ ์๋ค') st.write('- ๋ค๋ฅธ ์คํํธ์ ๋ค์ ๋นํด ์์ฒญ๋ ๊ณ ๊ณต์ฑ์ฅ์ ์ด๋ฃจ๊ณ ์์ผ๋ฉฐ, ์ง์์ฒด ๋ฐ ์๋ง์ ๊ธฐ์ ์ ํํธ๋์ญ์ ๋งบ์ด ๋ฏธ๋์ฑ์ฅ๊ฐ๋ฅ์ฑ๋ ์ถฉ๋ถํ ๋ณด์ฌ์ฃผ๊ณ ์๋ค') st.write('- ์์จ์ฃผํ ๊ฐ๋ฐ์๋ค์ ์ํ SDK(Software Development Kit)๋ฅผ ๊ฐ๋ฐ ๋ฐ ๋ฐฐํฌํ์ฌ ๊ฐ๋ฐํ๊ฒฝ์ ์ง์ ์ฅ๋ฒฝ์ ๋ฎ์ถ์๋ค') st.write('- ์๋์ฐจ ์ ๋ฌธ ์ ํ๋ฒ์ธ ๊นํ์ฉ ๊ธฐ์๊ฐ a2z ์์จ์ฃผํ์ฐจ ์์น์์์ ์ฌ๋ฆฌ๋ฉด์ a2z๊ฐ ๋ณธ๊ฒฉ์ ์ผ๋ก ์๋ ค์ก๋ค') st.video('https://youtu.be/FAnnpoHkfgw') st.subheader('a2z ํํธ๋์ฌ') st.image('ptn.png',width=None) link3 = '[ํํ์ด์ง ๋ฐ๋ก๊ฐ๊ธฐ](http://www.autoa2z.co.kr/)' st.markdown(link3,unsafe_allow_html=True) # if st.button('a2z ํํ์ด์ง ๋ฐ๋ก๊ฐ๊ธฐ'): # webbrowser.open_new_tab('http://www.autoa2z.co.kr/') if choice == 'ํ ๋ฅด๋๋ผ์ด๋ธ': st.image('ํ ๋ฅด.jpg',width=None) st.subheader('ํ ๋ฅด๋๋ผ์ด๋ธ') st.write('- 2016๋ ์์ธ๋ ์ถ์ ์ฐ๊ตฌ์ง์ด ๋ชจ์ฌ ๋ง๋ ์คํํธ์ ์ผ๋ก ํ์ฌ ์์ธ, ๋ฏธ๊ตญ ์ผ๋ถ์ง์ญ์์ ์์จ์ฃผํ ์๋์ฐจ๋ฅผ ๊ฐ๋ฐํ๊ณ ์๋ค.') st.write('- ๋ค๋ฅธ ๊ตญ๋ด ์คํํธ์ ๊ณผ ๋ฌ๋ฆฌ ๋ฏธ๊ตญ ์ง์ถ์ ์ฑ๊ณตํ ์ผ์ด์ค๋ก ์์จ์ฃผํ ์๋์ฐจ ๋ฐ ์ค๋ด์์ ์ด์๊ฐ๋ฅํ ์์จ์ฃผํ ์ ๋์ฐจ๋ ๊ฐ๋ฐํ์๋ค') st.write('- ์์จ์ฃผํ ๊ธฐ์ ์ ํญ๊ณต, ์ด์ก์ฐ์ ๋ถ์ผ ๋ฑ ๋ค์ํ ๋น์ฆ๋์ค ๋ชจ๋ธ์ ์ ๋ชฉ์์ผ ์คํจ์ฑ์ ๊ฒ์ฆํ๋ ์ค์ด๋ค') st.subheader('ํ ๋ฅด๋๋ผ์ด๋ธ ํ์ฌ์๊ฐ') st.video('https://youtu.be/Kuiv2Fb-n0U') st.subheader('์ธ์ฒ๊ณตํญ ์์จ์ฃผํ ์ ๋์ฐจ ๋์ ์ฌ๋ก') st.video('https://youtu.be/GNnwR-lGJUU') link4 = '[ํด๋น๊ธฐ์ฌ ๋ฐ๋ก๊ฐ๊ธฐ](https://www.irobotnews.com/news/articleView.html?idxno=22582)' st.markdown(link4,unsafe_allow_html=True) # if st.button('ํด๋น๊ธฐ์ฌ ๋ฐ๋ก๊ฐ๊ธฐ'): # webbrowser.open_new_tab('https://www.irobotnews.com/news/articleView.html?idxno=22582') link5 = '[ํํ์ด์ง ๋ฐ๋ก๊ฐ๊ธฐ](https://www.thordrive.ai/)' st.markdown(link5,unsafe_allow_html=True) # if st.button('ํ ๋ฅด๋๋ผ์ด๋ธ ํํ์ด์ง ๋ฐ๋ก๊ฐ๊ธฐ'): # url5 = 'https://www.thordrive.ai/' # webbrowser.open_new_tab('https://www.thordrive.ai/') if choice == '์คํธ๋ผ๋๋น์ ': st.image('์คํธ๋ผ๋๋น์ .png',width=None) st.subheader('์คํธ๋ผ๋๋น์ ') st.write('- 2014๋ ์ฐฝ์ ํ์ฌ ์ต์ด AI๋ฅผ ๊ธฐ๋ฐํ ๊ฐ์ฒด์ธ์ ๊ธฐ์ ์ ๊ฐ๋ฐํ์ฌ ์ด์ํ ํ๋์จ์ด์ฉ(์จ์ด๋ฌ๋ธ๊ธฐ๊ธฐ) S/W ๊ฐ๋ฐ์ด ๋ชฉํ์๋ค') st.write('- ์คํ๋ ค ์๋์ฐจ์ ๊ณ์์ ๊ด์ฌ์ ๋ฐ์๊ณ ์์จ์ฃผํ ์ํํธ์จ์ด ๊ฐ๋ฐ์ ์์ ๋ฐ์ ์ฑ๊ณต์ ์ผ๋ก ๋ชฉํ๋ฅผ ์ ํฅํ ์ผ์ด์ค์ด๋ค') st.write("""- ํ์ฌ๋ ์์ฑ์ฐจ ์ ์กฐ์ฌ๋ฅผ ๋์์ผ๋ก 900๋ง๋ ์ฐจ๋์ ์์จ์ฃผํ์ ์ํ ์ํํธ์จ์ด 'SVNet'์ ๊ณต๊ธํ๊ณ ์๋ค""") st.write('- ๋ฏธ๊ตญ, ์ค๊ตญ, ๋ ์ผ, ์ผ๋ณธ, ์ธ๋์ ํ์ฌ๋ฅผ ๋๊ณ ์์ผ๋ฉฐ, ํ๋์๋์ฐจ ๋ฑ ๋๊ธฐ์ ๊ณผ์ ํ๋ ฅ์ฌ๋ก์ ํ๋ฐํ ํ๋ํ๊ณ ์๋ค') st.subheader('์คํธ๋ผ๋๋น์ ํ์ฌ์๊ฐ์์') st.video('https://youtu.be/da4YM27FSss') if choice == '์ปจํธ๋กค์์ค': st.image('์ปจํธ๋กค์์ค.png',width= None) st.subheader('์ปจํธ๋กค์์ค') st.write('- ์ปจํธ๋กค์์ค๋ ์์จ์ฃผํ ๊ธฐ์ ์ ๋ํ ์ํํธ์จ์ด ๊ฐ๋ฐ๋ณด๋ค๋ ํด๋น ๊ธฐ์ ์ ํ์ํ ํ๋์จ์ด๋ฅผ ์ ์ ๋ฐ ๊ณต๊ธํ๋ ํ์ฌ๋ก ๋ ๋ง์ด ์๋ ค์ ธ์๋ค') st.write('- ๋ผ์ด๋ค์ผ์๋ฅผ ๋น๋กฏํ์ฌ ์์จ์ฃผํ ๊ฐ๋ฐ์ ํ์ํ ์ ํ๋ค์ ๊ณต๊ธํ๋ฉฐ, ๊ธฐ์ ๊ณผ์ ํ๋ ฅ ๋ฟ๋ง ์๋๋ผ ๋ํ๊ต ๊ฐ๋ฐ์ง๋ง์๋ค๊ณผ ์์จ์ฃผํ์์คํ ์ ์ ์ํ๊ธฐ๋ ํ์๋ค') st.image('control.png',width=None) link6 = '[ํํ์ด์ง ๋ฐ๋ก๊ฐ๊ธฐ](https://www.control-works.co.kr/)' st.markdown(link6,unsafe_allow_html=True)
def main(): st.title('My first app') st.header("This is a header") st.subheader("this is a subheader") st.text("this is my test") code = ''' def myf(variable): return variable ''' st.code(code, language="python") st.header("This section is about data frame") st.write("Here's our first attempt at using data to create a table:") df = pd.DataFrame( { 'first column': [1, 2, 3, 4], 'second column': [10, 20, 30, 40] }, index=['a', 'b', 'c', 'd']) df.index.name = "my shortcut" st.write(df) st.dataframe(df) st.table(df) chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) st.line_chart(chart_data) map_data = pd.DataFrame(0.1 * np.random.randn(1000, 2) / [50, 50] + [37.76, -122.4], columns=['lat', 'lon']) st.dataframe(map_data) st.map(map_data) if st.checkbox('Show dataframe'): chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) st.line_chart(chart_data) option = st.selectbox("What sports doe you link?", ['Basketball', "Baseball"]) st.write("You like ", option) chart_data = pd.DataFrame(np.random.randn(20, 3), columns=['a', 'b', 'c']) st.sidebar.line_chart(chart_data) left_column, right_column = st.beta_columns(2) pressed = left_column.button('Press me?') if pressed: right_column.write("Woohoo!") expander = st.beta_expander("FAQ") expander.write( "Here you could put in some really, really long explanations...") st.write('Starting a long computation...') # Add a placeholder latest_iteration = st.empty() bar = st.progress(0) for i in range(100): # Update the progress bar with each iteration. latest_iteration.text(f'Iteration {i+1}') bar.progress(i + 1) time.sleep(0.001) st.write('...and now we\'re done!') if st.button("Run my function"): st.write("Running.....") val = my_long_func() st.write("Value = ", val) genre = st.radio("What's your favorite movie genre", ('Comedy', 'Drama', 'Documentary')) if genre == "Comedy": st.write("You select Comedy") else: st.write("You did not like Comedy???") options = st.multiselect('What are your favorite colors', ['Green', 'Yellow', 'Red', 'Blue']) st.write("You select: ", options) age = st.slider('How old are you?', 0, 130, 25) st.write("I'm ", age, 'years old') title = st.text_input('Movie title', 'Life of Brian') st.write('The current movie title is', title) txt = st.text_area('Text to analyze') st.write(txt) bd = st.date_input("When is your birthday?") st.write("Your birthday is :", bd) t = st.time_input('Set an alarm for', datetime.time(8, 42)) st.write('Alarm is set for', t) uploaded_file = st.file_uploader("Choose a file") if uploaded_file: dataframe = pd.read_csv(uploaded_file) st.write(dataframe) color = st.color_picker('Pick A Color', '#00f900') st.write('The current color is', color) st.markdown(get_table_download_link(df), unsafe_allow_html=True) num1 = st.number_input("Insert number 1", value=0.0) num2 = st.number_input("Insert number 2", value=0.0) res = expensive_computation(num1, num2) st.write(f"{num1} x {num2} equals ", res)
).reset_index() fig2 = px.bar(bar2, x='index', y='safety_of_care_national_comparison') st.plotly_chart(fig2) st.markdown( 'Based on this bar chart, the safety of care in NC for the majority of hospitals are above the national average' ) #--------------------- #Drill down into INPATIENT and OUTPATIENT just for NY st.title('INPATIENT DATA - NC') inpatient_nc = df_inpatient_2[df_inpatient_2['provider_state'] == 'NC'] total_inpatient_count = sum(inpatient_nc['total_discharges']) st.header('Total Count of Discharges from Inpatient Captured: ') st.header(str(total_inpatient_count)) ##Common D/C common_discharges = inpatient_nc.groupby( 'drg_definition')['total_discharges'].sum().reset_index() top10 = common_discharges.head(10) bottom10 = common_discharges.tail(10) st.header('DRGs') st.dataframe(common_discharges) col1, col2 = st.beta_columns(2)
import streamlit as st import pandas as pd import numpy as np import pickle st.header('Car residual values prediction') st.subheader('The Residual Values') st.write(""" The Residual Values (RV) model is a model to predict car values for given times. There are several car conditions having effects to future prices. In the model, transformed categorical features via One-Hot Encoding and scaled numeric features via normalization and polynomial transformer. The core model is used CatBoost regression. """) st.subheader('To use model prediction, please following below steps:') st.write(""" 1. From the left side of this page, there is an area to input several car conditions. \n 2. To input car conditions that needed to be predicted. \n 3. See the results below. """) # Load development data df = pd.read_csv('carData.csv') df = df[df['vehicleType'] == 'car'] # Create sidebar st.sidebar.header('Input car condition')
def slick_grid(results): st.header("Slickgrid") st.markdown(""" The SlickGrid example does not work because I cannot inject javascript <script>...</script> tags. References: - [SlickGrid](https://slickgrid.net/) - [SlickGrid examples](https://github.com/mleibman/SlickGrid/tree/gh-pages/examples) """) st.markdown( """ <link rel="stylesheet" href="https://mleibman.github.io/SlickGrid/slick.grid.css" type="text/css"/> <link rel="stylesheet" href="https://mleibman.github.io/SlickGrid/css/smoothness/jquery-ui-1.8.16.custom.css" type="text/css"/> <table width="100%"> <tr> <td valign="top" width="50%"> <div id="myGrid" style="width:600px;height:500px;"></div> </td> <td valign="top"> <h2>Demonstrates:</h2> <ul> <li>basic grid with minimal configuration</li> </ul> <h2>View Source:</h2> <ul> <li><A href="https://github.com/mleibman/SlickGrid/blob/gh-pages/examples/example1-simple.html" target="_sourcewindow"> View the source for this example on Github</a></li> </ul> </td> </tr> </table> <script src="https://mleibman.github.io/SlickGrid/lib/jquery-1.7.min.js"></script> <script src="https://mleibman.github.io/SlickGrid/lib/jquery.event.drag-2.2.js"></script> <script src="https://mleibman.github.io/SlickGrid/slick.core.js"></script> <script src="https://mleibman.github.io/SlickGrid/slick.grid.js"></script> <script> var grid; var columns = [ {id: "title", name: "Title", field: "title"}, {id: "duration", name: "Duration", field: "duration"}, {id: "%", name: "% Complete", field: "percentComplete"}, {id: "start", name: "Start", field: "start"}, {id: "finish", name: "Finish", field: "finish"}, {id: "effort-driven", name: "Effort Driven", field: "effortDriven"} ]; var options = { enableCellNavigation: true, enableColumnReorder: false }; $(function () { var data = []; for (var i = 0; i < 500; i++) { data[i] = { title: "Task " + i, duration: "5 days", percentComplete: Math.round(Math.random() * 100), start: "01/01/2009", finish: "01/05/2009", effortDriven: (i % 5 == 0) }; } grid = new Slick.Grid("#myGrid", data, columns, options); }) </script> """, unsafe_allow_html=True, )
##st.markdown("One stop solution to Supply Chain Decision Making") from PIL import Image image = Image.open(r'banner.png') st.image(image, use_column_width=True) #FILE UPLOAD st.sidebar.subheader('What would you like work with today?') todo = st.sidebar.selectbox( 'To do:', ('Inbound Logistics', 'Inhouse Preparation', 'Outbound Logistics', 'Fulfillment', 'Sales', 'Article sheet')) if todo == 'Inbound Logistics': st.sidebar.subheader('Inbound Logistics') inbound = st.sidebar.selectbox('To do:', ('Lead Time', 'Purchase Order')) #PURCHASE ORDER FREQUENCY if inbound == 'Purchase Order': st.header("Purchase Order Analysis") st.markdown( "Analysis of purchase order data such as PO frequency, PO volume and PO prices" ) uploaded_file = st.file_uploader("Choose a XLSX file", type="xlsx") if uploaded_file: data = pd.read_excel(uploaded_file) df = pd.DataFrame(data, columns=[ 'PO Number', 'Doc Date', 'Vendor Name', 'Category', 'Base Qty', 'Net value per UoM', 'Grand Total FA PO' ]) #st.markdown("Say hello to your database") #st.dataframe(df) if st.button('Preview Dataset'):
def classifier_page(): # Title st.title('Predicting the Outcome of a Patient with COVID-19') st.header('A Machine Learning Approach') '\n' st.image('/Users/sunnajo/downloads/covidml.jpeg') st.text('Image source: TABIP') '\n' '\n' st.markdown('*Disclaimer*') st.write( 'This content is purely for educational purposes and should NOT be transmitted, used to guide clinical decision making and/or personal decisions regarding seeking medical care or treatment, and/or for any other real-world applications.' ) '\n' '\n' # Functions def load_data(url): data = pd.read_csv(url) return data ## User input areas # Dictionary of age groups age_dict = { "0-9 years": 0, "10-19 years": 1, "20-29 years": 2, "30-39 years": 3, "40-49 years": 4, "50-59 years": 5, "60-69 years": 6, "70-79 years": 7, "80+ years": 8 } age_list = list(age_dict.keys()) st.markdown('### **How old is the patient?**') input_age = st.select_slider('', age_list) age_group = age_dict[input_age] '\n' st.markdown('### **Is the patient hospitalized?**') hosp = st.radio('', ["No", "Yes"]) if hosp == "Yes": hosp = 1 elif hosp == "No": hosp = 0 '\n' st.markdown('### **Is the patient in the ICU?**') icu = st.radio(' ', ["No", "Yes"]) if icu == "Yes": icu = 1 elif icu == "No": icu = 0 '\n' st.markdown( '### **Does the patient have an underlying medical condition?**') med_cond = st.radio(' ', ["No", "Yes"]) if med_cond == "Yes": med_cond = 1 elif med_cond == "No": med_cond = 0 '\n' st.markdown( '### **What is the current positivity rate? (as a percentage)**') pos_rate = st.number_input(' ') if st.button("Look it up"): current_data = load_data( 'https://api.covidtracking.com/v1/us/current.csv') pos_rate_pct = float( current_data['positive'] / current_data['totalTestResults']) * 100 st.write('{:.2f}%'.format(pos_rate_pct)) '\n' st.markdown("### **What is the patient's biological sex?**") sex = st.radio(' ', ["Female", "Male", "Other"]) if sex == "Male": Male = 1 else: Male = 0 '\n' '\n' '\n' # Prediction if st.button("Predict"): result = prediction(icu, hosp, age_group, med_cond, Male, pos_rate) prob_pct = (float( predict_prob(icu, hosp, age_group, med_cond, Male, pos_rate)[:, 1])) * 100 '\n' if result == 0: st.success("The patient likely has a low risk of death") elif result == 1: st.warning("The patient has a higher risk of death") '\n' # Pause time.sleep(1) # Cue for navigating to data section st.markdown('### *How did we come up with this algorithm?*') '\n' st.subheader('Click on the sidebar for data sources')
def load_data(nrows): data = pd.read_csv(Data_Url, nrows=nrows, parse_dates=[['CRASH_DATE', 'CRASH_TIME']]) data.dropna(subset=['LATITUDE', 'LONGITUDE'], inplace=True) lowercase = lambda x: str(x).lower() data.rename(lowercase, axis='columns', inplace=True) data.rename(columns={'crash_date_crash_time': 'date/time'}, inplace=True) return data data = load_data(100000) original_data = data st.header("Most injured people in NYC") injured_people = st.slider("Number of peoples injured in collisions", 0, 19) st.map( data.query("injured_persons >= @injured_people")[["latitude", "longitude" ]].dropna(how="any")) st.header("How many collisions occured during a given time of day?") hour = st.slider("Hour to look at", 0, 23) data = data[data['date/time'].dt.hour == hour] st.markdown("Vehicle collisions between %i:00 and %i:00" % (hour, (hour + 1) % 24)) midpoint = (np.average(data['latitude']), np.average(data['longitude'])) st.write( pdk.Deck(
import base64 import pandas as pd import streamlit as st def download_link(dataframe, name): csv = dataframe.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() # some strings <-> bytes conversions necessary here href = "<a href='data:file/csv;base64,{}' download='{}'>Download</a>".format(b64, name) st.markdown(href, unsafe_allow_html=True) st.header("File Download") data = [(1, 2, 3)] df = pd.DataFrame(data, columns=["Col1", "Col2", "Col3"]) download_link(df, 'data.csv') st.dataframe(df)
import tempfile import os import time tmpdir = tempfile.mkdtemp() os.environ[ 'TFHUB_CACHE_DIR'] = r'C:\Users\Black\OneDrive\Desktop\tf\tfhub_modules' PAGE_CONFIG = { "page_title": "Dog Vision AI", "page_icon": "dog", "layout": "centered" } st.beta_set_page_config(**PAGE_CONFIG) st.set_option('deprecation.showfileUploaderEncoding', False) st.title("Dog Vision AI ๐ถ") st.header("Welcome To Dog Breed Identification ๐") st.write('') menu = ["Home", "About", 'Contact'] choice = st.sidebar.selectbox('Menu', menu) if choice == 'Home': # st.write(" bhdsjcbdsjcjdc") def teachable_machine_classification(img, weights_file): # Load the model # weights_file=r'C:\Users\Black\OneDrive\Desktop\Dog_ai_webapp\20200911-121337-10000-images-mobilenet-v2-Adam_optimizer.h5' model = tf.keras.models.load_model( weights_file, custom_objects={'KerasLayer': hub.KerasLayer}) # Create the array of the right shape to feed into the keras model data = np.ndarray(shape=(1, 512, 512, 3), dtype=np.float32) image = img #image sizing