def main(): menu = ['Pandas-Profiling', 'SweetViz', 'D-tale', 'About'] option = st.selectbox("Select Tool for Viz", menu) if option == 'Pandas-Profiling': st.header("Pandas-Profiling") data_file = st.file_uploader("Upload_csv", type=['csv']) if data_file is not None: load_csv = pd.read_csv(data_file) st.write(load_csv.head()) st.success("Successfully uploaded!") if st.button('Generate Report'): report = ProfileReport(load_csv, title="CSV Profiling Report", explorative=True) st.write('---') st.header('**Pandas Profiling Report**') st_profile_report(report) elif option == 'SweetViz': st.header("SweetViz") data_file = st.file_uploader("Upload_csv", type=['csv']) st.success("Successfully uploaded!") if data_file is not None: load_csv = pd.read_csv(data_file) st.write(load_csv) st.write('---') st.header('**SweetViz Profiling Report**') if st.button('Generate Report'): report = sv.analyze(load_csv) report.show_html() display_sweetviz("SWEETVIZ_REPORT.html") elif option == 'D-tale': st.header('D-tale') data_file = st.file_uploader("Upload_csv", type=['csv']) st.success("Successfully uploaded!") if data_file is not None: load_csv = pd.read_csv(data_file) st.write(load_csv) st.write('---') st.header('**D-Tale Profiling Report**') if st.button('Generate Report'): dtale.show(load_csv) components.iframe('http://dell-virlgti:40000/dtale/main/1', width=1500, height=800, scrolling=True) # st.markdown(html, unsafe_allow_html=True) elif menu == 'About': st.subheader( "Simple tool for better and quick visualization and EDA!!") st.write() st.write( "check out this [link](https://share.streamlit.io/mesmith027/streamlit_webapps/main/MC_pi/streamlit_app.py)" )
def display_data(): data = data_loader.get_data() d = dtale.show(data, host="localhost", open_browser=True) print("Hit enter to exit.") input() print("Exiting...") d.kill()
def _dtale_show(self, *args, **kwargs) -> dtale.views.DtaleData: result = dtale.show(*args, **kwargs) if self._first_show: # when running show for the first time, if that happens in rapid succession, it can cause race conditions # internal to dtale time.sleep(0.3) self._first_show = False return result
def displaydata(self): #print(self.triggerData) df = pd.DataFrame.from_dict(self.triggerData).T d = dtale.show(df, ignore_duplicate=True) d.open_browser() print(d._url) file1 = open('triggers.txt','w') file1.write(str(self.triggerData))
def backTestPatterns(self): patternReturns = {} for s in self.symboldata: print(s) for pattern in self.symboldata[s]: if(s == self.symbols[0]): patternReturns[pattern] = {} patternReturns[pattern]['POS'] = 0 patternReturns[pattern]['NEG'] = 0 patternReturns[pattern]['DATA'] = 0 Postriggers = 0 Posreturns = 0 Negstriggers = 0 Negreturns = 0 for date, value in self.symboldata[s][pattern].iteritems(): try: if(value==0): continue #print(str(date).split(' ')[0]) #print(str(datetime.datetime.today()).split(' ')[0]) if(str(date).split(' ')[0] == str(datetime.datetime.today()).split(' ')[0]): continue if (value>0): Postriggers = Postriggers + 1 tickerData = yfinance.Ticker(s) triggerPrice = self.tickerDf[s].close[str(date)] nextDayPrice = self.tickerDf[s].close[str(date+datetime.timedelta(days=1)).split(' ')[0]] Posreturns += (nextDayPrice/triggerPrice-1)*100 if (value<0): Negstriggers = Negstriggers + 1 tickerData = yfinance.Ticker(s) triggerPrice = self.tickerDf[s].close[str(date)] nextDayPrice = self.tickerDf[s].close[str(date+datetime.timedelta(days=1)).split(' ')[0]] Negreturns += (nextDayPrice/triggerPrice-1)*100 except: continue if not (Postriggers==0): patternReturns[pattern]['POS'] += Posreturns/Postriggers patternReturns[pattern]['DATA'] += Postriggers if not (Negstriggers==0): patternReturns[pattern]['NEG'] += Negreturns/Negstriggers patternReturns[pattern]['DATA'] += Negstriggers for pattern in patternReturns: patternReturns[pattern]['POS'] = patternReturns[pattern]['POS']/len(self.symbols) patternReturns[pattern]['NEG'] = patternReturns[pattern]['NEG']/len(self.symbols) df = pd.DataFrame.from_dict(patternReturns).T d = dtale.show(df, ignore_duplicate=True) d.open_browser() print(patternReturns)
autos = autos[autos["registration_year"].between(1900, 2016)] # In[14]: #Inspecting the data statistics after removal of above identified rows autos.describe() # In[15]: #Inpscting the "registration_year" column in more detail, specifically getting the value_counts and estimating the most common year. autos["registration_year"].value_counts(normalize=True) # In[16]: dtale.show(autos) # **Write a summary of the above steps, as in the removal of outliers and the calculation of value counts for the registration year.** # **The next section of analysis could essentially focus on the estimation of mean price and mileage across brands** # # - We will group (Aggregate the data) across the most common brands, create dictionaries for each of the variables and save the values # # # In[17]: count_of_brands = autos["brand"].value_counts(normalize=True) #Restricting the above analysis to the top 5% of the brands, would be a reasonable assumption here
import pandas as pd import dtale data = pd.read_excel("YOUR_FILE_PATH.xlsx") d= dtale.show(data) d.open_browser()
sweet_report1 = sv.analyze(df_hospital) sweet_report1.show_html('/Users/hantswilliams/Dropbox/Biovirtua/Python_Projects/ahi/AHI_STATS_507/Week13_Summary/output/sweet_report_df_hospital.html') sweet_report2 = sv.analyze(df_outpatient) sweet_report2.show_html('/Users/hantswilliams/Dropbox/Biovirtua/Python_Projects/ahi/AHI_STATS_507/Week13_Summary/output/sweet_report_df_outpatient.html') sweet_report3 = sv.analyze(df_inpatient) sweet_report3.show_html('/Users/hantswilliams/Dropbox/Biovirtua/Python_Projects/ahi/AHI_STATS_507/Week13_Summary/output/sweet_report_df_inpatient.html') # D-tale import dtale d = dtale.show(df_hospital, ignore_duplicate=True) d.open_browser() ######################################################################################################################## ######################################################################################################################## ######################################################################################################################## ######################################################################################################################## ######################################################################################################################## ######################################################################################################################## ### Automatic Data Cleaning
wearables_data_url = 'https://raw.githubusercontent.com/programmablewealth/aavegotchi-stats/master/src/data/wearables/wearables.json' wearables_data = requests.get(wearables_data_url).json() wearables_name = {i: wearables_data[str(i)]["0"] for i in wearables_data} gotchi_sales['Body Item'] = gotchi_sales['Body'].apply( lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"]) gotchi_sales['Face Item'] = gotchi_sales['Face'].apply( lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"]) gotchi_sales['Eyes Item'] = gotchi_sales['Eyes'].apply( lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"]) gotchi_sales['Head Item'] = gotchi_sales['Head'].apply( lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"]) gotchi_sales['Left Hand Item'] = gotchi_sales['Left Hand'].apply( lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"]) gotchi_sales['Right Hand Item'] = gotchi_sales['Right Hand'].apply( lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"]) gotchi_sales['Pet Item'] = gotchi_sales['Pet'].apply( lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"]) gotchi_sales['Background Item'] = gotchi_sales['Background'].apply( lambda x: 'NaN' if x == 0 else wearables_data[str(x)]["0"]) #%% #%% gotchi_sales.to_csv('gotchi.csv') # %% import dtale d = dtale.show(gotchi_sales) d # %%
import pandas as pd symbol="SPY" period= 1 # GET SYMBOL CALLS def getOptionCalls(symbol,period): try: ticker = yf.Ticker(symbol) #Expiration periods ex: 1 is the first expiration period optionChain = ticker.option_chain(ticker.options[period]) optionCalls = optionChain[0] return optionCalls except NameError: return NameError # GET SYMBOL PUTS def getOptionPuts(symbol,period): try: ticker = yf.Ticker(symbol) #Expiration periods ex: 1 is the first expiration period optionChain = ticker.option_chain(ticker.options[period]) optionPuts= optionChain[1] return optionPuts except NameError: return NameError options=getOptionPuts(symbol,period) d= dtale.show(options) d.open_browser()
fig.add_annotation( dict(xref='paper', yref='paper', x=0.01, y=0.02, xanchor='left', yanchor='middle', font=dict(family='Arial', size=14, color='purple'), showarrow=False, text='Dismissed')) fig.show() # dtale eds # pip install dtale import dtale d = dtale.show(departures) d.open_browser() # pandas-profiling # pip install pandas-profiling import pandas_profiling pandas_profiling.ProfileReport(departures) # AutoViz from autoviz.AutoViz_Class import AutoViz_Class AV = AutoViz_Class() sep = ',' dft = AV.AutoViz(filename="", sep=sep, depVar='fyear',
#https://github.com/man-group/dtale #pip install dtale #pip install yfinance import dtale import yfinance as yf def getSymbolHistoricalPrice(tickerSymbol): try: ticker = yf.Ticker(tickerSymbol) tickerHistPrice = ticker.history("max") return tickerHistPrice except NameError: return NameError ticker = "SPY" dfQuoteData = getSymbolHistoricalPrice(ticker) d = dtale.show(dfQuoteData) d.open_browser()
from pandas_profiling import ProfileReport #profile = ProfileReport(df, title="Tips Dataset", html={'style': {'full_width': True}}, sort="None") profile = ProfileReport(df) # for large dataset (minimal=True) profile.to_notebook_iframe() #profile.to_file(output_file="your_report.html") # For Jupyter Notebook from pandas_profiling import ProfileReport profile = ProfileReport(df, explorative=True, minimal = True) profile.to_file('output.html') ### Dtale (EDA) # For Spyder import dtale import plotly.express as px d = dtale.show(df, ignore_duplicate=True) d.open_browser() # For Jupyter Notebook import dtale dtale.show(df) ### Pandas_ui import seaborn as sns df = sns.load_dataset("tips") from pandas_ui import * pandas_ui('D:\\KNOWLEDGE KORNER\\ANALYTICS\\MISC\\Practice\\Kaggle & Hackathons\\Tips\\tips.csv') #============================================================================== ### Upload CSV Files
import pandas as pd titanic = pd.read_csv('titanic.csv') print(titanic.head) #Data EDA in two lines of code using pandas_profiling import pandas_profiling as pp profile = pp.ProfileReport(titanic, explorative=True) profile.to_file('output.html') #EDA using Sweetviz import sweetviz as sv sweet_report = sv.analyze(titanic) sweet_report.show_html('sweet_report.html') #EDA using Autoviz from autoviz.AutoViz_Class import AutoViz_Class aviz = AutoViz_Class(titanic) #EDA using dtale import dtale dtale.show(titanic, ignore_duplicate=True)
import dtale import pandas as pd df = pd.DataFrame([dict(a=1, b=2, c=3)]) # Assigning a reference to a running D-Tale process d = dtale.show(df) # Accessing data associated with D-Tale process tmp = d.data.copy() tmp['d'] = 4 # Altering data associated with D-Tale process # FYI: this will clear any front-end settings you have at the time for this process (filter, sorts, formatting) d.data = tmp # Shutting down D-Tale process d.kill() # using Python's `webbrowser` package it will try and open your server's default browser to this process d.open_browser() # There is also some helpful metadata about the process d._data_id # the process's data identifier d._url # the url to access the process d2 = dtale.get_instance( d._data_id ) # returns a new reference to the instance running at that data_id dtale.instances() # prints a list of all ids & urls of running D-Tale sessions
def main(): """We're going to create a beautiful app with Streamlit""" menu = [ "Accueil", "Pandas Profile", "D-Tale", "Visualisation", "Nuage de mots", "Machine Learning", "A propos" ] selection = st.sidebar.selectbox("Fonctions", menu) if selection == "Pandas Profile": components.html(ha.alert_panda_prof(), height=190) my_data = st.file_uploader("Charger le fichier CSV", type=['csv']) if my_data is not None: df = pd.read_csv(my_data) st.dataframe(df.head(10)) eda_profil = ProfileReport(df, title='Pandas Profiling Report...', explorative=True) st_profile_report(eda_profil) elif selection == "D-Tale": components.html(ha.alert_dtale(), height=190) data_file = st.file_uploader("Charger le fichier CSV", type=['csv']) if data_file is not None: df = pd.read_csv(data_file) st.dataframe(df.head()) d = dtale.show(df) d.open_browser() if st.button("Générer le rapport"): report = sv.analyze(df) report.show_html() utils.st_display_sweetviz("SWEETVIZ_REPORT.html") components.html(ha.alert_warning(), 1000) elif selection == "Visualisation": # Image d'entête image = Image.open('dataviz.png') col2, col1 = st.beta_columns([1, 3]) col2.image( "https://idoc-projets.ias.u-psud.fr/redmine/attachments/download/121/sunburst.gif", caption='', width=None, use_column_width=True) col1.image(image, caption='', width=None, use_column_width=True) # Visualisation catégorie / rating / reviews cat1, cat2 = st.beta_columns(2) datas = utils.lire_dataset(my_db_clean) fig = px.histogram(datas, x='Rating', y='Category', title='Somme des notes par catégorie', color='Category') cat1.plotly_chart(fig) fig = px.histogram(datas, x='Reviews', y='Category', title='Somme des commentaires par catégorie', color='Category') cat2.plotly_chart(fig) # Visualisation sunburst / pie perc1, perc2 = st.beta_columns(2) fig = px.sunburst(datas, path=['Type', 'Category', 'Genres'], title='Types, Catégories et genres') perc1.plotly_chart(fig) fig = px.pie(datas, names='Type', title='Pourcentage apllication gratuites/Payantes', color_discrete_sequence=px.colors.sequential.RdBu) perc2.plotly_chart(fig) # Visualisation histo hist1, hist2 = st.beta_columns(2) hist_data = [list(datas['Rating'])] group_labels = ['Rating'] fig = ff.create_distplot(hist_data, group_labels) hist1.plotly_chart(fig) counts, bins = np.histogram(datas.Rating, bins=range(0, 6, 1)) bins = 0.5 * (bins[:-1] + bins[1:]) fig = px.bar(x=bins, y=counts, labels={ 'x': 'Rating', 'y': 'Count' }, title='Distribution des notes') hist2.plotly_chart(fig) elif selection == "Nuage de mots": # General img1, img2 = st.beta_columns(2) img1.image('datas/wordcloud/general.png') img2.image('datas/wordcloud/free_app.png') # Free App img3, img4 = st.beta_columns(2) img3.image('datas/wordcloud/free_app_pos.png') img4.image('datas/wordcloud/free_app_neg.png') # Paid app img5, img6 = st.beta_columns(2) img5.image('datas/wordcloud/paid_app.png') img6.image('datas/wordcloud/paid_app_pos.png') img7, img8 = st.beta_columns(2) img7.image('datas/wordcloud/paid_app_neg.png') elif selection == "Machine Learning": image = Image.open('machine learning.jpg') col1, col2 = st.beta_columns([3, 1]) col2.image( "https://static.wixstatic.com/media/bb7b70_d5fde322f7914060b7d997ba9d506a50~mv2.gif", caption='', width=None, use_column_width=True) col1.image(image, caption='', width=None, use_column_width=True) if st.checkbox("Afficher le dataset"): datas = utils.lire_dataset(my_db) st.write(datas.head()) if st.checkbox("Afficher graph valeurs manquantes"): col1, col2 = st.beta_columns([2, 1]) df = datas.isnull() fig = px.imshow(df) col1.plotly_chart(fig) col2.write(datas.isnull().sum()) col2.write( "On peut voir que la colonne **Rating** contient la plupart des valeurs manquantes. A sa suite on a **Current Ver**, **Adroid ver** et **Type**." ) if st.checkbox("Afficher DB ok"): datas = utils.lire_dataset(my_db_clean) st.write(datas.head()) mat1, mat2 = st.beta_columns(2) fig = px.scatter_matrix( datas, dimensions=["Rating", "Reviews", "Size", "Installs", "Price"], color="Type", symbol="Type", title="Matrix de dispersion des variables continues") fig.update_traces(diagonal_visible=False) mat1.plotly_chart(fig) fig = px.imshow( datas[["Rating", "Reviews", "Size", "Installs", "Price"]].corr(), labels=dict(x="", y="", color="Corrélation"), ) mat2.plotly_chart(fig) if st.checkbox("Make model"): #mon_score = utils.transform_var_model(my_db_clean) mon_score = utils.make_model(my_db_clean) st.success(mon_score) elif selection == "A propos": #st.subheader("Team presentation") components.html(hp.pied_de_page(), height=800) #components.iframe('http://www.ingemedia.net/',height=1000, scrolling=True) else: components.html(hp.entete_de_page(), height=1600)