def box(columns=None, output_format="plot", output_path=None): """ Plot boxplot :param columns: Columns to be printed :param output_format: :param output_path: path where the image is going to be saved :return: """ columns = parse_columns(self, columns, filter_by_column_dtypes=PYSPARK_NUMERIC_TYPES) check_column_numbers(columns, "*") for col_name in columns: stats = self.cols.boxplot(col_name) plot_boxplot({col_name: stats}, output=output_format, path=output_path)
def scatter(columns=None, buckets=30, output_format="plot", output_path=None): """ Plot boxplot :param columns: columns to be printed :param buckets: number of buckets :param output_format: :param output_path: path where the image is going to be saved :return: """ columns = parse_columns(self, columns, filter_by_column_dtypes=PYSPARK_NUMERIC_TYPES) check_column_numbers(columns, "*") data = self.cols.scatter(columns, buckets) plot_scatterplot(data, output=output_format, path=output_path)
def hist(columns=None, buckets=10, output_format="plot", output_path=None): """ Plot histogram :param columns: Columns to be printed :param buckets: Number of buckets :param output_format: :param output_path: path where the image is going to be saved :return: """ columns = parse_columns(self, columns, filter_by_column_dtypes=PYSPARK_NUMERIC_TYPES) check_column_numbers(columns, "*") for col_name in columns: data = self.cols.hist(col_name, buckets) plot_hist({col_name: data}, output=output_format, path=output_path)