def set_variables(self, **vars):
        """Change configuration variables (invalidates caches where necessary)

        Args:
            **vars: configuration parameters to change

        Examples:
            >>> ProfileReport(df).set_variables(title="NewTitle", html={"minify_html": False})
        """
        changed = set(vars.keys())
        if len({"progress_bar", "pool_size"} & changed) > 0:
            # Cache can persist
            pass

        if len({"notebook"} & changed) > 0:
            self._widgets = None

        if len({"html", "title"} & changed) > 0:
            self._html = None

        if not {"progress_bar", "pool_size", "notebook", "html", "title"
                } >= changed:
            # In all other cases, empty cache
            self._description_set = None
            self._title = None
            self._report = None
            self._html = None
            self._widgets = None
            self._json = None

        if len(vars) == 1:
            config[list(vars.keys())[0]] = list(vars.values())[0]
        else:
            config.set_kwargs(vars)
    def __init__(
        self,
        df=None,
        minimal=False,
        explorative=False,
        config_file: Union[Path, str] = None,
        lazy: bool = True,
        **kwargs,
    ):
        """Generate a ProfileReport based on a pandas DataFrame

        Args:
            df: the pandas DataFrame
            minimal: minimal mode is a default configuration with minimal computation
            config_file: a config file (.yml), mutually exclusive with `minimal`
            lazy: compute when needed
            **kwargs: other arguments, for valid arguments, check the default configuration file.
        """
        if config_file is not None and minimal:
            raise ValueError(
                "Arguments `config_file` and `minimal` are mutually exclusive."
            )

        if df is None and not lazy:
            raise ValueError(
                "Can init a not-lazy ProfileReport with no DataFrame")

        if config_file:
            config.set_file(config_file)
        elif minimal:
            config.set_file(get_resource("configs/config_minimal.yaml"))
        elif explorative:
            config.set_file(get_resource("configs/config_explorative.yaml"))
        elif not config.is_default:
            pass
            # TODO: logging instead of warning
            # warnings.warn(
            #     "Currently configuration is not the default, if you want to restore "
            #     "default configuration, please run 'pandas_profiling.clear_config()'"
            # )

        config.set_kwargs(kwargs)

        self.df = None
        self._df_hash = -1
        self._description_set = None
        self._title = None
        self._report = None
        self._html = None
        self._widgets = None
        self._json = None

        if df is not None:
            # preprocess df
            self.df = self.preprocess(df)

        if not lazy:
            # Trigger building the report structure
            _ = self.report
Ejemplo n.º 3
0
    def __init__(self, df, config_file: Path = None, **kwargs):
        if config_file:
            config.config.set_file(str(config_file))
        config.set_kwargs(kwargs)

        # Treat index as any other column
        if (not pd.Index(np.arange(0, len(df))).equals(df.index)
                or df.index.dtype != np.int64):
            df = df.reset_index()

        # Rename reserved column names
        df = rename_index(df)

        # Remove spaces and colons from column names
        df = clean_column_names(df)

        # Sort column names
        sort = config["sort"].get(str)
        if sys.version_info[1] <= 5 and sort != "None":
            warnings.warn("Sorting is supported from Python 3.6+")

        if sort in ["asc", "ascending"]:
            df = df.reindex(sorted(df.columns, key=lambda s: s.casefold()),
                            axis=1)
        elif sort in ["desc", "descending"]:
            df = df.reindex(reversed(
                sorted(df.columns, key=lambda s: s.casefold())),
                            axis=1)
        elif sort != "None":
            raise ValueError(
                '"sort" should be "ascending", "descending" or None.')

        # Store column order
        config["column_order"] = df.columns.tolist()

        # Get dataset statistics
        description_set = describe_df(df)

        # Get sample
        sample = {}
        n_head = config["samples"]["head"].get(int)
        if n_head > 0:
            sample["head"] = df.head(n=n_head)

        n_tail = config["samples"]["tail"].get(int)
        if n_tail > 0:
            sample["tail"] = df.tail(n=n_tail)

        # Render HTML
        self.html = to_html(sample, description_set)
        self.minify_html = config["minify_html"].get(bool)
        self.use_local_assets = config["use_local_assets"].get(bool)
        self.title = config["title"].get(str)
        self.description_set = description_set
        self.sample = sample
Ejemplo n.º 4
0
    def __init__(self, df, minimal=False, config_file: Path = None, **kwargs):
        if sys.version_info <= (3, 5):
            warnings.warn(
                "This is the last release to support Python 3.5, please upgrade.",
                category=DeprecationWarning,
            )

        if config_file is not None and minimal:
            raise ValueError(
                "Arguments `config_file` and `minimal` are mutually exclusive."
            )

        if minimal:
            config_file = get_config_minimal()

        if config_file:
            config.set_file(str(config_file))
        config.set_kwargs(kwargs)

        self.date_start = datetime.utcnow()

        # Treat index as any other column
        if (not pd.Index(np.arange(0, len(df))).equals(df.index)
                or df.index.dtype != np.int64):
            df = df.reset_index()

        # Rename reserved column names
        df = rename_index(df)

        # Ensure that columns are strings
        df.columns = df.columns.astype("str")

        # Get dataset statistics
        description_set = describe_df(df)

        # Build report structure
        self.sample = self.get_sample(df)
        self.title = config["title"].get(str)
        self.description_set = description_set
        self.date_end = datetime.utcnow()

        disable_progress_bar = not config["progress_bar"].get(bool)

        with tqdm(total=1,
                  desc="build report structure",
                  disable=disable_progress_bar) as pbar:
            self.report = get_report_structure(self.date_start, self.date_end,
                                               self.sample, description_set)
            pbar.update()
Ejemplo n.º 5
0
    def __init__(self, df, minimal=False, config_file: Path = None, **kwargs):
        if config_file is not None and minimal:
            raise ValueError(
                "Arguments `config_file` and `minimal` are mutually exclusive."
            )

        if minimal:
            config_file = get_config_minimal()

        if config_file:
            config.config.set_file(str(config_file))
        config.set_kwargs(kwargs)

        self.date_start = datetime.utcnow()

        # Treat index as any other column
        if (
            not pd.Index(np.arange(0, len(df))).equals(df.index)
            or df.index.dtype != np.int64
        ):
            df = df.reset_index()

        # Rename reserved column names
        df = rename_index(df)

        # Ensure that columns are strings
        df.columns = df.columns.astype("str")

        # Sort names according to config (asc, desc, no sort)
        df = self.sort_column_names(df)
        config["column_order"] = df.columns.tolist()

        # Get dataset statistics
        description_set = describe_df(df)

        # Build report structure
        self.sample = self.get_sample(df)
        self.title = config["title"].get(str)
        self.description_set = description_set

        self.date_end = datetime.utcnow()
        self.report = get_report_structure(
            self.date_start, self.date_end, self.sample, description_set
        )
Ejemplo n.º 6
0
    def __init__(
        self,
        df: Optional[pd.DataFrame] = None,
        minimal: bool = False,
        explorative: bool = False,
        sensitive: bool = False,
        dark_mode: bool = False,
        orange_mode: bool = False,
        sample: Optional[dict] = None,
        config_file: Union[Path, str] = None,
        lazy: bool = True,
        **kwargs,
    ):
        """Generate a ProfileReport based on a pandas DataFrame

        Args:
            df: the pandas DataFrame
            minimal: minimal mode is a default configuration with minimal computation
            config_file: a config file (.yml), mutually exclusive with `minimal`
            lazy: compute when needed
            sample: optional dict(name="Sample title", caption="Caption", data=pd.DataFrame())
            **kwargs: other arguments, for valid arguments, check the default configuration file.
        """
        if config_file is not None and minimal:
            raise ValueError(
                "Arguments `config_file` and `minimal` are mutually exclusive."
            )

        if df is None and not lazy:
            raise ValueError(
                "Can init a not-lazy ProfileReport with no DataFrame")

        if config_file:
            config.set_file(config_file)
        elif minimal:
            config.set_file(get_config("config_minimal.yaml"))
        elif not config.is_default:
            pass
            # warnings.warn(
            #     "Currently configuration is not the default, if you want to restore "
            #     "default configuration, please run 'pandas_profiling.clear_config()'"
            # )
        if explorative:
            config.set_arg_group("explorative")
        if sensitive:
            config.set_arg_group("sensitive")
        if dark_mode:
            config.set_arg_group("dark_mode")
        if orange_mode:
            config.set_arg_group("orange_mode")

        config.set_kwargs(kwargs)

        self.df = None
        self._df_hash = -1
        self._description_set = None
        self._sample = sample
        self._title = None
        self._report = None
        self._html = None
        self._widgets = None
        self._json = None
        self._typeset = None
        self._summarizer = None

        if df is not None:
            # preprocess df
            self.df = self.preprocess(df)

        if not lazy:
            # Trigger building the report structure
            _ = self.report