def test_array_td(self):
     a = np.array([10, 20, 30], dtype='timedelta64[Y]')
     b = np.array([1, 2, 3], dtype='timedelta64[Y]')
     arr = np.column_stack((a, b)).astype('timedelta64[ns]')
     df1 = dx.DataFrame({'a': a, 'b': b})
     assert_array_equal(arr, df1._data['m'])
     assert df1._column_info['a'].values == ('m', 0, 0)
     assert df1._column_info['b'].values == ('m', 1, 1)
 def test_array_bool(self):
     a = np.array([True, False, True])
     b = np.array([False, False, False])
     arr = np.column_stack((a, b)).astype('int8')
     df1 = dx.DataFrame({'a': a, 'b': b})
     assert_array_equal(arr, df1._data['b'])
     assert df1._column_info['a'].values == ('b', 0, 0)
     assert df1._column_info['b'].values == ('b', 1, 1)
 def test_array_dt(self):
     a = np.array([10, 20, 30], dtype='datetime64[ns]')
     b = np.array([100, 200, 300], dtype='datetime64[ns]')
     arr = np.column_stack((a, b))
     df1 = dx.DataFrame({'a': a, 'b': b})
     assert_array_equal(arr, df1._data['M'])
     assert df1._column_info['a'].values == ('M', 0, 0)
     assert df1._column_info['b'].values == ('M', 1, 1)
 def test_array_string(self):
     a = np.array(['asdf', 'wer'])
     b = np.array(['wyw', 'xcvd'])
     df1 = dx.DataFrame({'a': a, 'b': b})
     a1 = array([[1, 1], [2, 2]], dtype='uint32')
     assert_array_equal(a1, df1._data['S'])
     assert df1._column_info['a'].values == ('S', 0, 0)
     assert df1._column_info['b'].values == ('S', 1, 1)
 def test_array_float(self):
     a = np.array([1.1, 2, 3])
     b = np.array([10, 20.2, 30])
     arr = np.column_stack((a, b))
     df1 = dx.DataFrame({'a': a, 'b': b})
     assert_array_equal(arr, df1._data['f'])
     assert df1._column_info['a'].values == ('f', 0, 0)
     assert df1._column_info['b'].values == ('f', 1, 1)
Beispiel #6
0
    def test_to_dict(self):
        d1 = self.df1.to_dict('array')
        d2 = {
            'a': np.array([1, 5, 7, 11]),
            'b': np.array([nan, 5.4, -1.1, .045])
        }
        for key, arr in d1.items():
            assert_array_equal(arr, d2[key])

        d1 = self.df1.to_dict('list')
        d2 = {'a': [1, 5, 7, 11], 'b': [nan, 5.4, -1.1, .045]}
        assert_dict_list(d1, d2)
Beispiel #7
0
    def test_streak_group(self):
        df = de.DataFrame(
            {'AIRLINE': ['AA', 'AA', 'AA', 'UA', 'DL', 'DL', 'WN', 'WN', 'AA', 'AA', None],
             'DAY_OF_WEEK': [2, 3, 6, 6, 6, 6, 4, 4, 1, 6, 6],
             'DEPARTURE_DELAY': [nan, nan, -1.0, -1.0, -1.0, 22.0, 3.0, 3.0, 21.0,
                                 -2.0, nan]})
        arr1 = df.streak('AIRLINE', group=True)
        arr2 = array([1, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6])
        assert_array_equal(arr1, arr2)

        arr1 = df.streak('DEPARTURE_DELAY', group=True)
        arr2 = array([1, 2, 3, 3, 3, 4, 5, 5, 6, 7, 8])
        assert_array_equal(arr1, arr2)
Beispiel #8
0
    def test_streak_value(self):
        df = de.DataFrame(
            {'AIRLINE': ['AA', 'AA', 'AA', 'UA', 'DL', 'DL', 'WN', 'WN', 'WN', 'AS', None],
             'DAY_OF_WEEK': [2, 3, 6, 6, 6, 6, 4, 4, 1, 2, 2],
             'DEPARTURE_DELAY': [nan, nan, -1.0, -1.0, -1.0, 22.0, 3.0, 3.0, 21.0,
                                 -2.0, nan]})

        with pytest.raises(TypeError):
            df.streak('DEPARTURE_DELAY', 'AA')

        arr1 = df.streak('DEPARTURE_DELAY', -1)
        arr2 = array([0, 0, 1, 2, 3, 0, 0, 0, 0, 0, 0])
        assert_array_equal(arr1, arr2)

        arr1 = df.streak('DAY_OF_WEEK', 6)
        arr2 = array([0, 0, 1, 2, 3, 4, 0, 0, 0, 0, 0])
        assert_array_equal(arr1, arr2)
Beispiel #9
0
    def test_get_values(self):
        values1 = self.df1.values
        values2 = np.array([[1, 5, 7, 11], [nan, 5.4, -1.1, .045]]).T
        assert_array_equal(values1, values2)

        a = np.random.rand(100, 5)
        df = dx.DataFrame(a)
        assert_array_equal(df.values, a)

        values1 = self.df2.values
        values2 = np.array([[1, 5, 7, 11], [nan, 5.4, -1.1, .045],
                            ['ted', 'fred', 'ted', 'fred']],
                           dtype='O').T
        assert_array_equal(values1, values2)
Beispiel #10
0
    def test_streak(self):
        df = de.DataFrame(
            {'AIRLINE': ['AA', 'AA', 'AA', 'UA', 'DL', 'DL', 'WN', 'WN', 'WN', 'AS', None],
             'DAY_OF_WEEK': [2, 3, 6, 6, 6, 6, 4, 4, 1, 2, 2],
             'DEPARTURE_DELAY': [nan, nan, -1.0, -1.0, -1.0, 22.0, 3.0, 3.0, 21.0,
                                 -2.0, nan]})
        arr1 = df.streak('AIRLINE')
        arr2 = array([1, 2, 3, 1, 1, 2, 1, 2, 3, 1, 1])
        assert_array_equal(arr1, arr2)

        arr1 = df.streak('DAY_OF_WEEK')
        arr2 = array([1, 1, 1, 2, 3, 4, 1, 2, 1, 1, 2])
        assert_array_equal(arr1, arr2)

        arr1 = df.streak('DEPARTURE_DELAY')
        arr2 = array([1, 1, 1, 2, 3, 1, 1, 2, 1, 1, 1])
        assert_array_equal(arr1, arr2)
 def test_single_list_string(self):
     a = np.array(['a', 'b'])
     df1 = dx.DataFrame({'a': a.tolist()})
     a1 = array([1, 2], dtype='uint32')
     assert_array_equal(a1, df1._data['S'][:, 0])
     assert df1._column_info['a'].values == ('S', 0, 0)
    def test_all(self):
        assert_array_equal(np.array(a), df_mix._data['i'][:, 0])
        assert_array_equal(np.array(b), df_mix._data['f'][:, 0])
        a1 = array([1, 2, 3, 4, 5, 6, 7, 8], dtype='uint32')
        assert_array_equal(a1, df_mix._data['S'][:, 0])
        assert_array_equal(np.array(d).astype('int8'), df_mix._data['b'][:, 0])
        assert_array_equal(np.array(e, dtype='datetime64[ns]'), df_mix._data['M'][:, 0])
        assert_array_equal(np.array(f, dtype='timedelta64[ns]'), df_mix._data['m'][:, 0])

        assert df_mix._column_info['a'].values == ('i', 0, 0)
        assert df_mix._column_info['b'].values == ('f', 0, 1)
        assert df_mix._column_info['c'].values == ('S', 0, 2)
        assert df_mix._column_info['d'].values == ('b', 0, 3)
        assert df_mix._column_info['e'].values == ('M', 0, 4)
        assert df_mix._column_info['f'].values == ('m', 0, 5)
 def test_single_array_bool(self):
     a = np.array([True, False])
     df1 = dx.DataFrame(a)
     assert_array_equal(a.astype('int8'), df1._data['b'][:, 0])
     assert df1._column_info['a0'].values == ('b', 0, 0)
 def test_single_array_float(self):
     a = np.array([1, 2.5, 3.2])
     df1 = dx.DataFrame(a)
     assert_array_equal(a, df1._data['f'][:, 0])
     assert df1._column_info['a0'].values == ('f', 0, 0)
 def test_single_array_int(self):
     a = np.array([1, 2, 3])
     df1 = dx.DataFrame({'a': a})
     assert_array_equal(a, df1._data['i'][:, 0])
     assert df1._column_info['a'].values == ('i', 0, 0)
    def test_all(self):
        assert_array_equal(a1, df_mix2._data['i'][:, 0])
        assert_array_equal(b1, df_mix2._data['f'][:, 0])
        arr1 = array([1, 2, 3, 4, 5, 6, 7, 8], dtype='uint32')
        assert_array_equal(arr1, df_mix2._data['S'][:, 0])
        assert_array_equal(d1.astype('int8'), df_mix2._data['b'][:, 0])
        assert_array_equal(e1, df_mix2._data['M'][:, 0])
        assert_array_equal(f1, df_mix2._data['m'][:, 0])

        assert df_mix2._column_info['a0'].values == ('i', 0, 0)
        assert df_mix2._column_info['a1'].values == ('f', 0, 1)
        assert df_mix2._column_info['a2'].values == ('S', 0, 2)
        assert df_mix2._column_info['a3'].values == ('b', 0, 3)
        assert df_mix2._column_info['a4'].values == ('M', 0, 4)
        assert df_mix2._column_info['a5'].values == ('m', 0, 5)
 def test_single_array_dt(self):
     a = np.array([10, 20, 30], dtype='datetime64[ns]')
     df1 = dx.DataFrame({'a': a})
     assert_array_equal(a, df1._data['M'][:, 0])
     assert df1._column_info['a'].values == ('M', 0, 0)
 def test_single_array_td(self):
     a = np.array([10, 20, 30], dtype='timedelta64[Y]')
     df1 = dx.DataFrame({'a': a})
     assert_array_equal(a.astype('timedelta64[ns]'), df1._data['m'][:, 0])
     assert df1._column_info['a'].values == ('m', 0, 0)
Beispiel #19
0
    def test_factorize(self):
        data = {'a': [9, 10, 9, 9, 10],
                'b': [0, nan, nan, 0, 1],
                'c': [''] + list('eeaz'),
                'd': [False, False, True, False, True],
                'e': [0, 20, 30, 4, 4],
                'f': ['a', nan, 'ad', None, 'ad'],
                'g': [np.nan] * 5}
        df = de.DataFrame(data)

        arr11, arr12 = df.factorize('a')
        arr21, arr22 = (array([0, 1, 0, 0, 1]), array([9, 10]))

        assert_array_equal(arr11, arr21)
        assert_array_equal(arr12, arr22)

        arr11, arr12 = df.factorize('b')
        arr21, arr22 = (array([0, 1, 1, 0, 2]), array([0., nan, 1.]))

        assert_array_equal(arr11, arr21)
        assert_array_equal(arr12, arr22)

        arr11, arr12 = df.factorize('c')
        arr21, arr22 = (array([0, 1, 1, 2, 3]), array(['', 'e', 'a', 'z'], dtype=object))

        assert_array_equal(arr11, arr21)
        assert_array_equal(arr12, arr22)

        arr11, arr12 = df.factorize('d')
        arr21, arr22 = (array([0, 0, 1, 0, 1]), array([False, True]))

        assert_array_equal(arr11, arr21)
        assert_array_equal(arr12, arr22)
 def test_single_list_td(self):
     a = [np.timedelta64(x, 'ns') for x in [10, 20, 30]]
     df1 = dx.DataFrame({'a': a})
     assert_array_equal(np.array(a), df1._data['m'][:, 0])
     assert df1._column_info['a'].values == ('m', 0, 0)