def test_leading_trailing_whitespace_column_name(self):
        col1 = CategoricalMetadataColumn(
            pd.Series(['foo', ' bar ', 'baz'],
                      name=' col ',
                      index=pd.Index(['a', 'b', 'c'], name='id')))
        col2 = CategoricalMetadataColumn(
            pd.Series(['foo', ' bar ', 'baz'],
                      name='col',
                      index=pd.Index(['a', 'b', 'c'], name='id')))

        self.assertEqual(col1, col2)
Example #2
0
 def test_leading_trailing_whitespace_value(self):
     with self.assertRaisesRegex(
             ValueError, "CategoricalMetadataColumn.*leading or trailing "
             "whitespace characters.*Column 'col1'.*' bar '"):
         CategoricalMetadataColumn(
             pd.Series(['foo', ' bar ', 'baz'],
                       name='col1',
                       index=pd.Index(['a', 'b', 'c'], name='id')))
Example #3
0
 def test_empty_str_value(self):
     with self.assertRaisesRegex(
             ValueError, "CategoricalMetadataColumn.*empty strings.*"
             "column 'col1'"):
         CategoricalMetadataColumn(
             pd.Series(['foo', '', 'bar'],
                       name='col1',
                       index=pd.Index(['a', 'b', 'c'], name='id')))
Example #4
0
 def test_unsupported_type_value(self):
     with self.assertRaisesRegex(
             TypeError, "CategoricalMetadataColumn.*strings or missing "
             r"values.*42\.5.*float.*'col1'"):
         CategoricalMetadataColumn(
             pd.Series(['foo', 'bar', 42.5],
                       name='col1',
                       index=pd.Index(['a', 'b', 'c'], name='id')))
Example #5
0
 def test_unsupported_dtype(self):
     with self.assertRaisesRegex(
             TypeError, "CategoricalMetadataColumn 'col1' does not support"
             ".*Series.*dtype.*float64"):
         CategoricalMetadataColumn(
             pd.Series([42.5, 42.6, 42.7],
                       name='col1',
                       index=pd.Index(['a', 'b', 'c'], name='id')))
Example #6
0
    def test_wrong_obj(self):
        with self.assertRaisesRegex(
                TypeError, 'NumericMetadataColumn constructor.*pandas.Series'):
            NumericMetadataColumn(pd.DataFrame([[1, 2, 3]]))

        with self.assertRaisesRegex(
                TypeError,
                'CategoricalMetadataColumn constructor.*pandas.Series'):
            CategoricalMetadataColumn({})
Example #7
0
    def test_all_missing_data(self):
        mdc = CategoricalMetadataColumn(
            pd.Series(np.array([np.nan, np.nan, np.nan], dtype=object),
                      name='col1',
                      index=pd.Index(['a', 'b', 'c'], name='id')))

        obs = mdc.to_series()

        exp = pd.Series(np.array([np.nan, np.nan, np.nan], dtype=object),
                        name='col1',
                        index=pd.Index(['a', 'b', 'c'], name='id'))

        pdt.assert_series_equal(obs, exp)
        self.assertEqual(obs.dtype, object)
Example #8
0
    def test_numeric_strings_preserved_as_strings(self):
        series = pd.Series(['1', np.nan, '2.5', '3.0'],
                           name='my column',
                           index=pd.Index(['a', 'b', 'c', 'd'], name='id'))
        mdc = CategoricalMetadataColumn(series)

        self.assertEqual(mdc.id_count, 4)
        self.assertEqual(mdc.id_header, 'id')
        self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd'))
        self.assertEqual(mdc.name, 'my column')

        obs_series = mdc.to_series()
        pdt.assert_series_equal(obs_series, series)
        self.assertEqual(obs_series.dtype, object)
Example #9
0
    def test_supported_dtype(self):
        series = pd.Series(['foo', np.nan, 'bar', 'foo'],
                           name='my column',
                           index=pd.Index(['a', 'b', 'c', 'd'], name='id'))
        mdc = CategoricalMetadataColumn(series)

        self.assertEqual(mdc.id_count, 4)
        self.assertEqual(mdc.id_header, 'id')
        self.assertEqual(mdc.ids, ('a', 'b', 'c', 'd'))
        self.assertEqual(mdc.name, 'my column')

        obs_series = mdc.to_series()
        pdt.assert_series_equal(obs_series, series)
        self.assertEqual(obs_series.dtype, object)
Example #10
0
    def test_type_mismatch(self):
        dummy = DummyMetadataColumn(
            pd.Series([1.0, 2.0, 3.0],
                      name='col1',
                      index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        numeric = NumericMetadataColumn(
            pd.Series([1.0, 2.0, 3.0],
                      name='col1',
                      index=pd.Index(['id1', 'id2', 'id3'], name='id')))
        categorical = CategoricalMetadataColumn(
            pd.Series(['a', 'b', 'c'],
                      name='col1',
                      index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        self.assertReallyNotEqual(dummy, numeric)
        self.assertReallyNotEqual(dummy, categorical)
Example #11
0
    def test_missing_data_normalized(self):
        # Different missing data representations should be normalized to np.nan
        mdc = CategoricalMetadataColumn(
            pd.Series([np.nan, 'foo', float('nan'), None],
                      name='col1',
                      index=pd.Index(['a', 'b', 'c', 'd'], name='id')))

        obs = mdc.to_series()

        exp = pd.Series([np.nan, 'foo', np.nan, np.nan],
                        name='col1',
                        index=pd.Index(['a', 'b', 'c', 'd'], name='id'))

        pdt.assert_series_equal(obs, exp)
        self.assertEqual(obs.dtype, object)
        self.assertTrue(np.isnan(obs['a']))
        self.assertTrue(np.isnan(obs['c']))
        self.assertTrue(np.isnan(obs['d']))
Example #12
0
    def test_categorical_metadata_column(self):
        mdc = CategoricalMetadataColumn(
            pd.Series(['foo', 'bar', '42.50'],
                      name='categorical-column',
                      index=pd.Index(['id1', 'id2', 'id3'], name='id')))

        mdc.save(self.filepath)

        with open(self.filepath, 'r') as fh:
            obs = fh.read()

        exp = ("id\tcategorical-column\n"
               "#q2:types\tcategorical\n"
               "id1\tfoo\n"
               "id2\tbar\n"
               "id3\t42.50\n")

        self.assertEqual(obs, exp)