def test_np_spark_compat_frame(self): # Use randomly generated dataFrame pdf = pd.DataFrame(np.random.randint(-100, 100, size=(np.random.randint(100), 2)), columns=["a", "b"]) pdf2 = pd.DataFrame(np.random.randint(-100, 100, size=(len(pdf), len(pdf.columns))), columns=["a", "b"]) kdf = ps.from_pandas(pdf) kdf2 = ps.from_pandas(pdf2) for np_name, spark_func in unary_np_spark_mappings.items(): np_func = getattr(np, np_name) if np_name not in self.blacklist: try: # unary ufunc self.assert_eq(np_func(pdf), np_func(kdf), almost=True) except Exception as e: raise AssertionError("Test in '%s' function was failed." % np_name) from e for np_name, spark_func in binary_np_spark_mappings.items(): np_func = getattr(np, np_name) if np_name not in self.blacklist: try: # binary ufunc self.assert_eq(np_func(pdf, pdf), np_func(kdf, kdf), almost=True) self.assert_eq(np_func(pdf, 1), np_func(kdf, 1), almost=True) except Exception as e: raise AssertionError("Test in '%s' function was failed." % np_name) from e # Test only top 5 for now. 'compute.ops_on_diff_frames' option increases too much time. try: set_option("compute.ops_on_diff_frames", True) for np_name, spark_func in list( binary_np_spark_mappings.items())[:5]: np_func = getattr(np, np_name) if np_name not in self.blacklist: try: # binary ufunc self.assert_eq( np_func(pdf, pdf2).sort_index(), np_func(kdf, kdf2).sort_index(), almost=True, ) except Exception as e: raise AssertionError( "Test in '%s' function was failed." % np_name) from e finally: reset_option("compute.ops_on_diff_frames")
def test_get_set_reset_option(self): self.assertEqual(ps.get_option("test.config"), "default") ps.set_option("test.config", "value") self.assertEqual(ps.get_option("test.config"), "value") ps.reset_option("test.config") self.assertEqual(ps.get_option("test.config"), "default")
def test_unknown_option(self): with self.assertRaisesRegex(config.OptionError, "No such option"): ps.get_option("unknown") with self.assertRaisesRegex(config.OptionError, "Available options"): ps.set_option("unknown", "value") with self.assertRaisesRegex(config.OptionError, "test.config"): ps.reset_option("unknown")
def test_get_set_reset_option_different_types(self): ps.set_option("test.config.list", [1, 2, 3, 4]) self.assertEqual(ps.get_option("test.config.list"), [1, 2, 3, 4]) ps.set_option("test.config.float", 5.0) self.assertEqual(ps.get_option("test.config.float"), 5.0) ps.set_option("test.config.int", 123) self.assertEqual(ps.get_option("test.config.int"), 123) self.assertEqual(ps.get_option("test.config.int.none"), None) # default None ps.set_option("test.config.int.none", 123) self.assertEqual(ps.get_option("test.config.int.none"), 123) ps.set_option("test.config.int.none", None) self.assertEqual(ps.get_option("test.config.int.none"), None)
def test_different_types(self): with self.assertRaisesRegex(ValueError, "was <class 'int'>"): ps.set_option("test.config.list", 1) with self.assertRaisesRegex(ValueError, "however, expected types are"): ps.set_option("test.config.float", "abc") with self.assertRaisesRegex(ValueError, "[<class 'int'>]"): ps.set_option("test.config.int", "abc") with self.assertRaisesRegex(ValueError, "(<class 'int'>, <class 'NoneType'>)"): ps.set_option("test.config.int.none", "abc")
def test_check_func(self): with self.assertRaisesRegex(ValueError, "bigger then 0"): ps.set_option("test.config.int", -1)