Example #1
0
    def __init__(
        self,
        df=None,
        minimal=False,
        config_file: Union[Path, str] = None,
        lazy: bool = True,
        **kwargs,
    ):
        """Generate a ProfileReport based on a pandas DataFrame

        Args:
            df: the pandas DataFrame
            minimal: minimal mode is a default configuration with minimal computation
            config_file: a config file (.yml), mutually exclusive with `minimal`
            lazy: compute when needed
            **kwargs: other arguments, for valid arguments, check the default configuration file.
        """
        if config_file is not None and minimal:
            raise ValueError(
                "Arguments `config_file` and `minimal` are mutually exclusive."
            )

        if df is None and not lazy:
            raise ValueError(
                "Can init a not-lazy ProfileReport with no DataFrame")

        if config_file:
            config.set_file(config_file)
        elif minimal:
            config.set_file(get_config_minimal())
        elif not config.is_default:
            pass
            # TODO: logging instead of warning
            # warnings.warn(
            #     "Currently configuration is not the default, if you want to restore "
            #     "default configuration, please run 'pandas_profiling.clear_config()'"
            # )

        config.set_kwargs(kwargs)

        self.df = None
        self._df_hash = -1
        self._description_set = None
        self._title = None
        self._report = None
        self._html = None
        self._widgets = None
        self._json = None

        if df is not None:
            # preprocess df
            self.df = self.preprocess(df)

        if not lazy:
            # Trigger building the report structure
            _ = self.report
Example #2
0
    def __init__(self, df, minimal=False, config_file: Path = None, **kwargs):
        if sys.version_info <= (3, 5):
            warnings.warn(
                "This is the last release to support Python 3.5, please upgrade.",
                category=DeprecationWarning,
            )

        if config_file is not None and minimal:
            raise ValueError(
                "Arguments `config_file` and `minimal` are mutually exclusive."
            )

        if minimal:
            config_file = get_config_minimal()

        if config_file:
            config.set_file(str(config_file))
        config.set_kwargs(kwargs)

        self.date_start = datetime.utcnow()

        # Treat index as any other column
        if (not pd.Index(np.arange(0, len(df))).equals(df.index)
                or df.index.dtype != np.int64):
            df = df.reset_index()

        # Rename reserved column names
        df = rename_index(df)

        # Ensure that columns are strings
        df.columns = df.columns.astype("str")

        # Get dataset statistics
        description_set = describe_df(df)

        # Build report structure
        self.sample = self.get_sample(df)
        self.title = config["title"].get(str)
        self.description_set = description_set
        self.date_end = datetime.utcnow()

        disable_progress_bar = not config["progress_bar"].get(bool)

        with tqdm(total=1,
                  desc="build report structure",
                  disable=disable_progress_bar) as pbar:
            self.report = get_report_structure(self.date_start, self.date_end,
                                               self.sample, description_set)
            pbar.update()
Example #3
0
    def __init__(
        self,
        df: Optional[pd.DataFrame] = None,
        minimal: bool = False,
        explorative: bool = False,
        sensitive: bool = False,
        dark_mode: bool = False,
        orange_mode: bool = False,
        sample: Optional[dict] = None,
        config_file: Union[Path, str] = None,
        lazy: bool = True,
        **kwargs,
    ):
        """Generate a ProfileReport based on a pandas DataFrame

        Args:
            df: the pandas DataFrame
            minimal: minimal mode is a default configuration with minimal computation
            config_file: a config file (.yml), mutually exclusive with `minimal`
            lazy: compute when needed
            sample: optional dict(name="Sample title", caption="Caption", data=pd.DataFrame())
            **kwargs: other arguments, for valid arguments, check the default configuration file.
        """
        if config_file is not None and minimal:
            raise ValueError(
                "Arguments `config_file` and `minimal` are mutually exclusive."
            )

        if df is None and not lazy:
            raise ValueError(
                "Can init a not-lazy ProfileReport with no DataFrame")

        if config_file:
            config.set_file(config_file)
        elif minimal:
            config.set_file(get_config("config_minimal.yaml"))
        elif not config.is_default:
            pass
            # warnings.warn(
            #     "Currently configuration is not the default, if you want to restore "
            #     "default configuration, please run 'pandas_profiling.clear_config()'"
            # )
        if explorative:
            config.set_arg_group("explorative")
        if sensitive:
            config.set_arg_group("sensitive")
        if dark_mode:
            config.set_arg_group("dark_mode")
        if orange_mode:
            config.set_arg_group("orange_mode")

        config.set_kwargs(kwargs)

        self.df = None
        self._df_hash = -1
        self._description_set = None
        self._sample = sample
        self._title = None
        self._report = None
        self._html = None
        self._widgets = None
        self._json = None
        self._typeset = None
        self._summarizer = None

        if df is not None:
            # preprocess df
            self.df = self.preprocess(df)

        if not lazy:
            # Trigger building the report structure
            _ = self.report