Exemple #1
0
    def test_peel(self):
        run_test_peel(self.strings, self.test_strings, self.delim)
        run_test_peel(self.strings, self.test_strings, np.str_(self.delim))
        run_test_peel(self.strings, self.test_strings, str.encode(str(self.delim)))
        
        # Test gremlins delimiters
        g = self._get_ak_gremlins()
        with self.assertRaises(ValueError):
            run_test_peel(g.gremlins_strings, g.gremlins_test_strings, '')
        run_test_peel(g.gremlins_strings, g.gremlins_test_strings, '"')
        run_test_peel(g.gremlins_strings, g.gremlins_test_strings, ' ')

        # Run a test with a specific set of strings to verify strings.bytes matches expected output
        series = pd.Series(["k1:v1", "k2:v2", "k3:v3", "no_colon"])
        pda = ak.from_series(series, "string")

        # Convert Pandas series of strings into a byte array where each string is terminated by a null byte.
        # This mimics what should be stored server-side in the strings.bytes pdarray
        expected_series_dec = convert_to_ord(series.to_list())
        actual_dec = pda._comp_to_ndarray("values").tolist() #pda.bytes.to_ndarray().tolist()
        self.assertListEqual(expected_series_dec, actual_dec)

        # Now perform the peel and verify
        a, b = pda.peel(":")
        expected_a = convert_to_ord(["k1", "k2", "k3", ""])
        expected_b = convert_to_ord(["v1", "v2", "v3", "no_colon"])
        self.assertListEqual(expected_a, a._comp_to_ndarray("values").tolist())
        self.assertListEqual(expected_b, b._comp_to_ndarray("values").tolist())
Exemple #2
0
    def test_peel_delimiter_length_issue(self):
        # See Issue 838
        d = "-" * 25 # 25 dashes as delimiter
        series = pd.Series([f"abc{d}xyz", f"small{d}dog", f"blue{d}hat", "last"])
        pda = ak.from_series(series)
        a, b = pda.peel(d)
        aa = a.to_ndarray().tolist()
        bb = b.to_ndarray().tolist()
        self.assertListEqual(["abc", "small", "blue", ""], aa)
        self.assertListEqual(["xyz", "dog", "hat", "last"], bb)

        # Try a slight permutation since we were able to get both versions to fail at one point
        series = pd.Series([f"abc{d}xyz", f"small{d}dog", "last"])
        pda = ak.from_series(series)
        a, b = pda.peel(d)
        aa = a.to_ndarray().tolist()
        bb = b.to_ndarray().tolist()
        self.assertListEqual(["abc", "small", ""], aa)
        self.assertListEqual(["xyz", "dog", "last"], bb)
    def test_from_series(self):
        strings = ak.from_series(
            pd.Series(['a', 'b', 'c', 'd', 'e'], dtype="string"))

        self.assertIsInstance(strings, ak.Strings)
        self.assertEqual(5, len(strings))

        objects = ak.from_series(pd.Series(['a', 'b', 'c', 'd', 'e']),
                                 dtype=np.str)

        self.assertIsInstance(objects, ak.Strings)
        self.assertEqual(np.str, objects.dtype)

        objects = ak.from_series(pd.Series(['a', 'b', 'c', 'd', 'e']))

        self.assertIsInstance(objects, ak.Strings)
        self.assertEqual(np.str, objects.dtype)

        p_array = ak.from_series(pd.Series(np.random.randint(0, 10, 10)))

        self.assertIsInstance(p_array, ak.pdarray)
        self.assertEqual(np.int64, p_array.dtype)

        p_i_objects_array = ak.from_series(pd.Series(np.random.randint(
            0, 10, 10),
                                                     dtype='object'),
                                           dtype=np.int64)

        self.assertIsInstance(p_i_objects_array, ak.pdarray)
        self.assertEqual(np.int64, p_i_objects_array.dtype)

        p_array = ak.from_series(
            pd.Series(np.random.uniform(low=0.0, high=1.0, size=10)))

        self.assertIsInstance(p_array, ak.pdarray)
        self.assertEqual(np.float64, p_array.dtype)

        p_f_objects_array = ak.from_series(pd.Series(np.random.uniform(
            low=0.0, high=1.0, size=10),
                                                     dtype='object'),
                                           dtype=np.float64)

        self.assertIsInstance(p_f_objects_array, ak.pdarray)
        self.assertEqual(np.float64, p_f_objects_array.dtype)

        p_array = ak.from_series(
            pd.Series(np.random.choice([True, False], size=10)))

        self.assertIsInstance(p_array, ak.pdarray)
        self.assertEqual(bool, p_array.dtype)

        p_b_objects_array = ak.from_series(pd.Series(np.random.choice(
            [True, False], size=10),
                                                     dtype='object'),
                                           dtype=np.bool)

        self.assertIsInstance(p_b_objects_array, ak.pdarray)
        self.assertEqual(bool, p_b_objects_array.dtype)

        p_array = ak.from_series(pd.Series([dt.datetime(2016, 1, 1, 0, 0, 1)]))

        self.assertIsInstance(p_array, ak.pdarray)
        self.assertEqual(np.int64, p_array.dtype)

        p_array = ak.from_series(pd.Series([np.datetime64('2018-01-01')]))

        self.assertIsInstance(p_array, ak.pdarray)
        self.assertEqual(np.int64, p_array.dtype)

        p_array = ak.from_series(
            pd.Series(
                pd.to_datetime([
                    '1/1/2018',
                    np.datetime64('2018-01-01'),
                    dt.datetime(2018, 1, 1)
                ])))

        self.assertIsInstance(p_array, ak.pdarray)
        self.assertEqual(np.int64, p_array.dtype)

        with self.assertRaises(TypeError) as cm:
            ak.from_series(np.ones(100))
        self.assertEqual(
            ('type of argument "series" must be pandas.core.series.Series; ' +
             'got numpy.ndarray instead'), cm.exception.args[0])

        with self.assertRaises(ValueError) as cm:
            ak.from_series(
                pd.Series(np.random.randint(0, 10, 10), dtype=np.int8))
        self.assertEqual(
            ('dtype int8 is unsupported. Supported dtypes are bool, ' +
             'float64, int64, string, datetime64[ns], and timedelta64[ns]'),
            cm.exception.args[0])