Ejemplo n.º 1
0
    def box(columns=None, output_format="plot", output_path=None):
        """
        Plot boxplot
        :param columns: Columns to be printed
        :param output_format:
        :param output_path: path where the image is going to be saved
        :return:
        """
        columns = parse_columns(self, columns, filter_by_column_dtypes=PYSPARK_NUMERIC_TYPES)
        check_column_numbers(columns, "*")

        for col_name in columns:
            stats = self.cols.boxplot(col_name)
            plot_boxplot({col_name: stats}, output=output_format, path=output_path)
Ejemplo n.º 2
0
    def scatter(columns=None, buckets=30, output_format="plot", output_path=None):
        """
        Plot boxplot
        :param columns: columns to be printed
        :param buckets: number of buckets
        :param output_format:
        :param output_path: path where the image is going to be saved
        :return:
        """
        columns = parse_columns(self, columns, filter_by_column_dtypes=PYSPARK_NUMERIC_TYPES)
        check_column_numbers(columns, "*")

        data = self.cols.scatter(columns, buckets)
        plot_scatterplot(data, output=output_format, path=output_path)
Ejemplo n.º 3
0
    def hist(columns=None, buckets=10, output_format="plot", output_path=None):
        """
        Plot histogram
        :param columns: Columns to be printed
        :param buckets: Number of buckets
        :param output_format:
        :param output_path: path where the image is going to be saved
        :return:
        """
        columns = parse_columns(self,
                                columns,
                                filter_by_column_dtypes=PYSPARK_NUMERIC_TYPES)
        check_column_numbers(columns, "*")

        for col_name in columns:
            data = self.cols.hist(col_name, buckets)
            plot_hist({col_name: data}, output=output_format, path=output_path)