Esempio n. 1
0
    def test_categorical_df_concat_value_error(self):

        mismatched_dtypes = [
            pd.DataFrame({
                'A': pd.Series(['a', 'b', 'c'], dtype='category'),
                'B': pd.Series([100, 102, 103], dtype='int64'),
            }),
            pd.DataFrame({
                'A': pd.Series(['c', 'b', 'd'], dtype='category'),
                'B': pd.Series([103, 102, 104], dtype='float64'),
            }),
        ]
        mismatched_column_names = [
            pd.DataFrame({
                'A': pd.Series(['a', 'b', 'c'], dtype='category'),
                'B': pd.Series([100, 102, 103], dtype='int64'),
            }),
            pd.DataFrame({
                'A': pd.Series(['c', 'b', 'd'], dtype='category'),
                'X': pd.Series([103, 102, 104], dtype='int64'),
            }),
        ]

        with self.assertRaises(ValueError) as cm:
            categorical_df_concat(mismatched_dtypes)
        self.assertEqual(
            str(cm.exception),
            "Input DataFrames must have the same columns/dtypes.")

        with self.assertRaises(ValueError) as cm:
            categorical_df_concat(mismatched_column_names)
        self.assertEqual(
            str(cm.exception),
            "Input DataFrames must have the same columns/dtypes.")
Esempio n. 2
0
    def test_categorical_df_concat_value_error(self):

        mismatched_dtypes = [
            pd.DataFrame(
                {
                    'A': pd.Series(['a', 'b', 'c'], dtype='category'),
                    'B': pd.Series([100, 102, 103], dtype='int64'),
                }
            ),
            pd.DataFrame(
                {
                    'A': pd.Series(['c', 'b', 'd'], dtype='category'),
                    'B': pd.Series([103, 102, 104], dtype='float64'),
                }
            ),
        ]
        mismatched_column_names = [
            pd.DataFrame(
                {
                    'A': pd.Series(['a', 'b', 'c'], dtype='category'),
                    'B': pd.Series([100, 102, 103], dtype='int64'),
                }
            ),
            pd.DataFrame(
                {
                    'A': pd.Series(['c', 'b', 'd'], dtype='category'),
                    'X': pd.Series([103, 102, 104], dtype='int64'),
                }
            ),
        ]

        with self.assertRaises(ValueError) as cm:
            categorical_df_concat(mismatched_dtypes)
        self.assertEqual(
            str(cm.exception),
            "Input DataFrames must have the same columns/dtypes."
        )

        with self.assertRaises(ValueError) as cm:
            categorical_df_concat(mismatched_column_names)
        self.assertEqual(
            str(cm.exception),
            "Input DataFrames must have the same columns/dtypes."
        )
Esempio n. 3
0
    def run_chunked_pipeline(self, pipeline, start_date, end_date, chunksize):
        ranges = compute_date_range_chunks(
            self._calendar,
            start_date,
            end_date,
            chunksize,
        )
        chunks = [self.run_pipeline(pipeline, s, e) for s, e in ranges]

        return categorical_df_concat(chunks, inplace=True)
Esempio n. 4
0
    def run_chunked_pipeline(self, pipeline, start_date, end_date, chunksize):
        ranges = compute_date_range_chunks(
            self._calendar,
            start_date,
            end_date,
            chunksize,
        )
        chunks = [self.run_pipeline(pipeline, s, e) for s, e in ranges]

        return categorical_df_concat(chunks, inplace=True)
Esempio n. 5
0
    def test_categorical_df_concat(self):

        inp = [
            pd.DataFrame(
                {
                    'A': pd.Series(['a', 'b', 'c'], dtype='category'),
                    'B': pd.Series([100, 102, 103], dtype='int64'),
                    'C': pd.Series(['x', 'x', 'x'], dtype='category'),
                }
            ),
            pd.DataFrame(
                {
                    'A': pd.Series(['c', 'b', 'd'], dtype='category'),
                    'B': pd.Series([103, 102, 104], dtype='int64'),
                    'C': pd.Series(['y', 'y', 'y'], dtype='category'),
                }
            ),
            pd.DataFrame(
                {
                    'A': pd.Series(['a', 'b', 'd'], dtype='category'),
                    'B': pd.Series([101, 102, 104], dtype='int64'),
                    'C': pd.Series(['z', 'z', 'z'], dtype='category'),
                }
            ),
        ]
        result = categorical_df_concat(inp)

        expected = pd.DataFrame(
            {
                'A': pd.Series(
                    ['a', 'b', 'c', 'c', 'b', 'd', 'a', 'b', 'd'],
                    dtype='category'
                ),
                'B': pd.Series(
                    [100, 102, 103, 103, 102, 104, 101, 102, 104],
                    dtype='int64'
                ),
                'C': pd.Series(
                    ['x', 'x', 'x', 'y', 'y', 'y', 'z', 'z', 'z'],
                    dtype='category'
                ),
            },
        )
        expected.index = pd.Int64Index([0, 1, 2, 0, 1, 2, 0, 1, 2])
        assert_equal(expected, result)
        assert_equal(
            expected['A'].cat.categories,
            result['A'].cat.categories
        )
        assert_equal(
            expected['C'].cat.categories,
            result['C'].cat.categories
        )