def test_export_to_file(self):

        p = spark_df_profiling_optimus.ProfileReport(self.df)
        filename = os.path.join(self.test_dir, "profile_%s.html" % hash(self))
        p.to_file(outputfile=filename)

        self.assertLess(200,os.path.getsize(filename))
예제 #2
0
 def profiler(self):
     """
     This function calls the ProfileReport method from spark-df-profiling-optimus,
     it gets the current DF in the analyzer and them returns the HTML profile"
     :return: Profile of the DF in HTML format embedded in the Notebook
     """
     df_profiler = self._df
     return spark_df_profiling_optimus.ProfileReport(df_profiler)
import pandas as pd
import spark_df_profiling_optimus

if __name__ == "__main__":
    import argparse
    import webbrowser

    parser = argparse.ArgumentParser(
        description=
        'Profile the variables in a CSV file and generate a HTML report.')
    parser.add_argument("inputfile", help="CSV file to profile")
    parser.add_argument("-o",
                        "--output",
                        help="Output report file",
                        default=spark_df_profiling_optimus.DEFAULT_OUTPUTFILE)
    parser.add_argument("-s",
                        "--silent",
                        help="Only generate but do not open report",
                        action="store_true")

    args = parser.parse_args()

    df = pd.read_csv(args.inputfile, sep=None, parse_dates=True)

    p = spark_df_profiling_optimus.ProfileReport(df)
    p.to_file(outputfile=args.output)

    if not args.silent:
        webbrowser.open_new_tab(p.file.name)