def frequency(columns=None, buckets=10): """ Plot frequency chart :param columns: Columns to be printed :param buckets: Number of buckets :return: """ columns = parse_columns(self, columns) for col_name in columns: data = self.cols.frequency(col_name, buckets) plot_freq(data, output="image")
def run(self, df, columns, buckets=20): """ Return statistical information in HTML Format :param df: :param columns: :param buckets: :return: """ columns = parse_columns(df, columns) output = Profiler.to_json(df, columns, buckets) # Load jinja path = os.path.dirname(os.path.abspath(__file__)) template_loader = jinja2.FileSystemLoader(searchpath=path + "//templates") template_env = jinja2.Environment(loader=template_loader, autoescape=True) # Render template # Create the profiler info header html = "" general_template = template_env.get_template("general_info.html") html = html + general_template.render(data=output) template = template_env.get_template("one_column.html") # Create every column stats for col_name in columns: if "hist" in output["columns"][col_name]: hist_pic = plot_hist( {col_name: output["columns"][col_name]["hist"]}, output="base64") else: hist_pic = None if "frequency" in output["columns"][col_name]: freq_pic = plot_freq( {col_name: output["columns"][col_name]["frequency"]}, output="base64") else: freq_pic = None html = html + template.render(data=output["columns"][col_name], hist_pic=hist_pic, freq_pic=freq_pic) html = html + df.table_html(10) # df.plots.correlation(columns) # Display HTML display(HTML(html)) # Save to file write_json(output, self.path)
def run(self, df, columns, buckets=40, infer=False, relative_error=1): """ Return dataframe statistical information in HTML Format :param df: Dataframe to be analyzed :param columns: Columns to be analized :param buckets: Number of buckets calculated to print the histogram :param infer: infer data type :param relative_error: Relative Error for quantile discretizer calculation :return: """ columns = parse_columns(df, columns) output = Profiler.to_json(df, columns, buckets, infer, relative_error) # Load jinja path = os.path.dirname(os.path.abspath(__file__)) template_loader = jinja2.FileSystemLoader(searchpath=path + "//templates") template_env = jinja2.Environment(loader=template_loader, autoescape=True) # Render template # Create the profiler info header html = "" general_template = template_env.get_template("general_info.html") html = html + general_template.render(data=output) template = template_env.get_template("one_column.html") # Create every column stats for col_name in columns: hist_pic = None col = output["columns"][col_name] if "hist" in col: if col["column_dtype"] == "date": hist_year = plot_hist({col_name: col["hist"]["years"]}, "base64", "years") hist_month = plot_hist({col_name: col["hist"]["months"]}, "base64", "months") hist_weekday = plot_hist( {col_name: col["hist"]["weekdays"]}, "base64", "weekdays") hist_hour = plot_hist({col_name: col["hist"]["hours"]}, "base64", "hours") hist_minute = plot_hist({col_name: col["hist"]["minutes"]}, "base64", "minutes") hist_pic = { "hist_years": hist_year, "hist_months": hist_month, "hist_weekdays": hist_weekday, "hist_hours": hist_hour, "hist_minutes": hist_minute } else: hist = plot_hist({col_name: col["hist"]}, output="base64") hist_pic = {"hist_pic": hist} if "frequency" in col: freq_pic = plot_freq({col_name: col["frequency"]}, output="base64") else: freq_pic = None html = html + template.render( data=col, freq_pic=freq_pic, **hist_pic) html = html + df.table_html(10) # Display HTML print_html(html) # send to queue if self.queue_url is not None: self.to_queue(output) # JSON # Save in case we want to output to a json file self.json = output # Save file in json format write_json(output, self.path) # Save in case we want to output to a html file self.html = html