def setUpClass(cls): cls.spark = setup_pyspark().appName( "test-profiles-local").getOrCreate() cls.sc = cls.spark.sparkContext cls.df = cls.sc.parallelize([ Row(a="foo", b=1, c=5), Row(a="bar", b=2, c=6), Row(a="baz", b=3, c=None) ]).toDF()
def setUpClass(cls): cls.spark = setup_pyspark().appName( "test-anomalydetection-local").getOrCreate() cls.sc = cls.spark.sparkContext cls.df_1 = cls.sc.parallelize([ Row( a=3, b=0, c="colder", ), Row( a=3, b=5, c="bolder", ), ]).toDF() cls.df_2 = cls.sc.parallelize([ Row( a=3, b=0, c="foo", ), Row( a=3, b=5, c="zoo", ), Row( a=100, b=5, c="who", ), Row( a=2, b=30, c="email", ), Row( a=10, b=5, c="cards", ), ]).toDF() cls.df_3 = cls.sc.parallelize([Row( a=1, b=23, c="pool", )]).toDF() cls.df_4 = cls.sc.parallelize([Row( a=1, b=23, c="pool", )]).toDF()
def setUpClass(cls): cls.spark = setup_pyspark().appName("test-analyzers-local").getOrCreate() # cls.AnalysisRunner = AnalysisRunner(cls.spark) cls.pydeequ_session = PyDeequSession(cls.spark) cls.AnalysisRunner = cls.pydeequ_session.createAnalysisRunner() cls.sc = cls.spark.sparkContext cls.df = cls.sc.parallelize( [Row(a="foo", b=1, c=5, d=1), Row(a="bar", b=2, c=6, d=3), Row(a="baz", b=3, c=None, d=1)] ).toDF()
def setUpClass(cls): cls.spark = setup_pyspark().appName( "test-analyzers-local").getOrCreate() cls.ConstraintSuggestionRunner = ConstraintSuggestionRunner(cls.spark) cls.sc = cls.spark.sparkContext cls.df = cls.sc.parallelize([ Row(a="foo", b=1, c=5), Row(a="bar", b=2, c=6), Row(a="baz", b=3, c=None) ]).toDF()
def setUpClass(cls): cls.spark = setup_pyspark().appName( "test-analyzers-local").getOrCreate() cls.AnalysisRunner = AnalysisRunner(cls.spark) cls.VerificationSuite = VerificationSuite(cls.spark) cls.sc = cls.spark.sparkContext cls.df = cls.sc.parallelize([ Row(a="foo", b=1, c=5), Row(a="bar", b=2, c=6), Row(a="baz", b=3, c=None) ]).toDF()
def setUpClass(cls): cls.spark = setup_pyspark().appName("test-analyzers-local").getOrCreate() cls.pydeequ_session = PyDeequSession(cls.spark) cls.AnalysisRunner = cls.pydeequ_session.createAnalysisRunner() cls.ColumnProfilerRunner = ColumnProfilerRunner(cls.spark) cls.ConstraintSuggestionRunner = ConstraintSuggestionRunner(cls.spark) cls.sc = cls.spark.sparkContext data = [("foo", 1, True, 1.0, float("nan")), ("bar", 2, False, 2.0, float("nan"))] cls.pyspark_df = cls.spark.createDataFrame(data, schema=["strings", "ints", "bools", "floats", "nans"]) cls.pandas_df = pandasDF( { "strings": ["foo", "bar"], "ints": [1, 2], "bools": [True, False], "floats": [1.0, 2.0], "nans": [np.nan, np.nan], } )
def setUpClass(cls): cls.spark = setup_pyspark().appName( "test-scala-utils-local").getOrCreate() cls.sc = cls.spark.sparkContext