Example #1
0
    def test_index_mismatch(self):
        """Indexes can be different as long as they're the same length.
        The returned Series will have an index matching the first column's."""

        df = pd.DataFrame(data=self.reference_data)

        # Same-length columns with mismatched indexes compare just fine.
        a = df.loc[:, 'A']
        a.index = test_index = ['v', 'w', 'x', 'y', 'z']

        result = coalesce(a, df['C'])
        self.assertTrue((result.index == test_index).all())
        self.assertTrue((result.index != df['C'].index).all())
        self.assertTrue(
            (result.values == pd.Series(['c', 'aa', 'ccc', 'cccc',
                                         'ccccc']).values).all())

        # Columns must be the same length, however.
        too_short = pd.Series(['foo', 'bar'])
        too_long = pd.Series(['foo', 'bar', 'baz', 'qux', 'quux', 'corge'])

        with self.assertRaises(Exception):
            result = coalesce(a, too_short)

        with self.assertRaises(Exception):
            result = coalesce(a, too_long)
Example #2
0
    def test_consider_null_cross_type(self):
        """Test the optional keyword argument test_consider_null."""
        df = pd.DataFrame(data=self.reference_data)

        # Don't allow cross-type comparison with mixed types...
        # ...for now.
        with self.assertRaises(TypeError):
            coalesce(df['two'], df['C'], consider_null=['cccc'])
Example #3
0
    def test_consider_null(self):
        """Test the optional keyword argument test_consider_null."""

        df = pd.DataFrame(data=self.reference_data)

        # Maybe zero is a bad number. Consider it null.
        result = coalesce(df['two'], df['three'], consider_null=[0])
        self.assertTrue((result == pd.Series([2, 2, 2.2, 3, 3])).all())

        # consider_nulls takes multiple values.
        result = coalesce(df['D'], df['C'], consider_null=['d', ''])
        self.assertTrue(
            (result == pd.Series(['c', 'cc', 'ccc', 'cccc', 'ccccc'])).all())
Example #4
0
    def test_numeric_columns(self):
        """Test the basic case with numbers."""

        df = pd.DataFrame(data=self.reference_data)

        # Two columns
        result = coalesce(df['one'], df['two'])
        result = result.fillna('nan')
        self.assertTrue((result == pd.Series([1., 2., 2.2, 'nan', 0.])).all())

        # Three columns
        result = coalesce(df['one'], df['two'], df['three'])
        self.assertTrue((result == pd.Series([1., 2., 2.2, 3., 0.])).all())
Example #5
0
    def test_value_preservation(self):
        """Make sure valid values aren't overwritten by nulls."""

        df = pd.DataFrame(data=self.reference_data)

        result = coalesce(df['C'], df['A'])
        self.assertTrue((result == df['C']).all())
Example #6
0
    def test_cross_type_comparison(self):
        """Cross type comparison is allowed in the standard use case."""

        df = pd.DataFrame(data=self.reference_data)

        result = coalesce(df['A'], df['one'], df['E'])
        result = result.fillna('nan')
        self.assertTrue((result == pd.Series([1, 'aa', 'nan', 'nan',
                                              7])).all())
Example #7
0
    def test_one_column(self):
        """Test that using one column is a no-op, returning no changes."""

        df = pd.DataFrame(data=self.reference_data)

        for c in df.columns:
            col = df.loc[:, c]
            result = coalesce(col)
            self.assertTrue((result.fillna('nan') == col.fillna('nan')).all())
            self.assertTrue((result.index == col.index).all())
Example #8
0
    def test_string_columns(self):
        """Test the basic case with strings."""

        df = pd.DataFrame(data=self.reference_data)

        # Two columns
        result = coalesce(df['A'], df['C'])
        self.assertTrue(
            (result == pd.Series(['c', 'aa', 'ccc', 'cccc', 'ccccc'])).all())

        # Three columns
        result = coalesce(df['A'], df['D'], df['C'])
        self.assertTrue(
            (result == pd.Series(['d', 'aa', 'ccc', 'cccc', 'ccccc'])).all())

        # None is equivalent to NaN
        result = coalesce(df['B'], df['C'])
        self.assertTrue(
            (result == pd.Series(['b', 'bb', 'ccc', 'cccc', 'bbbbb'])).all())