def pandas_profiling_analyze( self, title: str = "Movie dataset", sort: Union[str, None] = None, explorative: bool = True, **kwargs: Any, ): """ Generate pandas profiling report using pandas-profiling. https://pandas-profiling.github.io/ :param title: The name of the report - str :param sort: Sort the variables asc(ending), desc(ending) or None (leaves original sorting). - Union[str, None] :param explorative: Whether use explorative config file or not. - bool :return: None """ # Generate the Profiling Report profile = ProfileReport( self.data, title=title, html={"style": { "full_width": True }}, sort=sort, explorative=explorative, **kwargs, ) return profile.to_widgets()
air['log_pass'] = log_pass # In[7]: air # In[8]: #EDA report = ProfileReport(air, title="Profile Report of the Airlines data", explorative=True) # In[29]: report.to_widgets() # In[ ]: #REPORT SAYS THAT: #1) THERE ARE NO MISSING VALUES #2) NO DUPLICATE ROWS #3) NO MULTICOLINEARITY # In[9]: #CHECKING THE VARIABLE DISTRIBUTION THROUGHOUT ALL THE TIME PERIOD AVAILABLE IN THE DATA df.Passengers.plot() # In[ ]:
data.describe() # In[83]: from pandas_profiling import ProfileReport # In[84]: profile = ProfileReport(data, title='Pandas Profiling Report', explorative=True) # In[85]: profile.to_widgets() # # Data Manipulation # Checking The correlation of Data # In[4]: corrmat = data.corr() f, ax = plt.subplots(figsize=(15, 8)) sns.heatmap(corrmat, ax=ax, cmap="YlGnBu", linewidths=0.9) # In[6]: #Correlation with output variable
# # EDA # In[253]: #Data frame at a glance app_train.info() # In[254]: #profile report trainprofile = ProfileReport(app_train, minimal=True) trainprofile.to_widgets() # In[255]: #how many unique classes do we have for categotical features app_train.select_dtypes('object').apply(pd.Series.nunique, axis = 0) # In[256]: #list coloumns with most missing values def missing_values_table(df): # Total missing values
# In[ ]: Df.skew() # In[ ]: #Fact Table Quick Summary using panda profiling profile = ProfileReport(Df, title='Pandas Profiling Report', html={'style': { 'full_width': True }}) # In[ ]: profile.to_widgets() # In[ ]: display(profile) # In[ ]: #Dimension Table Quick summary using Panda Profiler DimProfile = ProfileReport(DimDf, title='Pandas Dimension table Profile Report', html={'style': { 'full_width': True }}) # In[ ]:
def do_a_pandas_profile(df, name): from pandas_profiling import ProfileReport profile = ProfileReport(df, title=name, html={'style':{'full_width':True}}) profile.to_widgets(), profile.to_notebook_iframe()