Example #1
0
    def _write_flat(self,
                    profile: DatasetProfile,
                    indent: int = 4,
                    rotation_suffix: Optional[str] = None):
        """
        Write output data for flat format

        Parameters
        ----------
        profile : DatasetProfile
            the dataset profile to output
        indent : int
            The JSON indentation to use. Default is 4
        """
        summary = profile.to_summary()

        flat_table_path = os.path.join(self.output_path,
                                       self.path_suffix(profile), "flat_table")
        summary_df = get_dataset_frame(summary)
        with open(
                os.path.join(flat_table_path,
                             self.file_name(profile, ".csv", rotation_suffix)),
                "wt") as f:
            summary_df.to_csv(f, index=False)

        json_flat_file = self.file_name(profile, ".json")
        _suffix = rotation_suffix or ""

        frequent_numbers_path = os.path.join(self.output_path,
                                             self.path_suffix(profile),
                                             f"freq_numbers{_suffix}")
        with open(os.path.join(frequent_numbers_path, json_flat_file),
                  "wt") as f:
            hist = flatten_dataset_histograms(summary)
            json.dump(hist, f, indent=indent)

        frequent_strings_path = os.path.join(self.output_path,
                                             self.path_suffix(profile),
                                             f"frequent_strings{_suffix}")
        with open(os.path.join(frequent_strings_path, json_flat_file),
                  "wt") as f:
            frequent_strings = flatten_dataset_frequent_strings(summary)
            json.dump(frequent_strings, f, indent=indent)

        histogram_path = os.path.join(self.output_path,
                                      self.path_suffix(profile),
                                      f"histogram{_suffix}")

        with open(os.path.join(histogram_path, json_flat_file), "wt") as f:
            histogram = flatten_dataset_histograms(summary)
            json.dump(histogram, f, indent=indent)
Example #2
0
    def _write_json(self, profile: DatasetProfile):
        """
        Write a dataset profile JSON summary to disk
        """
        output_file = os.path.join(
            self.output_path,
            self.path_suffix(profile),
            "json",
            self.file_name(profile, ".json"),
        )

        summary = profile.to_summary()
        with self.fs.open(output_file, "wt") as f:
            f.write(message_to_json(summary))
Example #3
0
    def _write_json(self, profile: DatasetProfile):
        """
        Write a JSON summary of the dataset profile to disk
        """
        path = self.ensure_path(os.path.join(self.path_suffix(profile), "json"))

        output_file = os.path.join(path, self.file_name(profile, ".json"))

        path = os.path.join(self.output_path, self.path_suffix(profile))
        os.makedirs(path, exist_ok=True)
        # TODO: only calculate this summary once.  No need to calculate it for
        # _write_flat() as well
        summary = profile.to_summary()
        with open(output_file, "wt") as f:
            f.write(message_to_json(summary))
Example #4
0
    def _write_json(self,
                    profile: DatasetProfile,
                    rotation_suffix: Optional[str] = None,
                    transport_params: Optional[dict] = None):
        """
        Write a dataset profile JSON summary to disk
        """
        output_file = os.path.join(
            self.output_path,
            self.path_suffix(profile),
            "json",
            self.file_name(profile, ".json", rotation_suffix),
        )

        summary = profile.to_summary()
        with open(output_file, "wt", transport_params=transport_params) as f:
            f.write(message_to_json(summary))
Example #5
0
    def _write_flat(self, profile: DatasetProfile, indent: int = 4):
        """
        Write output data for flat format

        Parameters
        ----------
        profile : DatasetProfile
            the dataset profile to output
        indent : int
            The JSON indentation to use. Default is 4
        """
        # TODO: only calculate this summary once.  No need to calculate it for
        # _write_json() as well
        summary = profile.to_summary()

        flat_table_path = self.ensure_path(
            os.path.join(self.path_suffix(profile), "flat_table")
        )
        summary_df = get_dataset_frame(summary)
        summary_df.to_csv(
            os.path.join(flat_table_path, self.file_name(profile, ".csv")), index=False
        )

        frequent_numbers_path = self.ensure_path(
            os.path.join(self.path_suffix(profile), "freq_numbers")
        )
        json_flat_file = self.file_name(profile, ".json")
        with open(os.path.join(frequent_numbers_path, json_flat_file), "wt") as f:
            hist = flatten_dataset_frequent_numbers(summary)
            json.dump(hist, f, indent=indent)

        frequent_strings_path = self.ensure_path(
            os.path.join(self.path_suffix(profile), "frequent_strings")
        )
        with open(os.path.join(frequent_strings_path, json_flat_file), "wt") as f:
            frequent_strings = flatten_dataset_frequent_strings(summary)
            json.dump(frequent_strings, f, indent=indent)

        histogram_path = self.ensure_path(
            os.path.join(self.path_suffix(profile), "histogram")
        )
        with open(os.path.join(histogram_path, json_flat_file), "wt") as f:
            histogram = flatten_dataset_histograms(summary)
            json.dump(histogram, f, indent=indent)