예제 #1
0
def test_to_datetime():
    wrong_args = [pd.DataFrame({'a': [1, 2]}), {'a': [1, 2]}]

    for arg in wrong_args:
        with pytest.raises(ValueError) as cm:
            md.to_datetime(arg)
        assert '[year, month, day]' in str(cm.value)

    with pytest.raises(TypeError):
        md.to_datetime([[1, 2], [3, 4]])
예제 #2
0
    def testToDatetime(self):
        wrong_args = [pd.DataFrame({'a': [1, 2]}), {'a': [1, 2]}]

        for arg in wrong_args:
            with self.assertRaises(ValueError) as cm:
                md.to_datetime(arg)
            self.assertIn('[year, month, day]', str(cm.exception))

        with self.assertRaises(TypeError):
            md.to_datetime([[1, 2], [3, 4]])
예제 #3
0
def test_date_time_bin(setup):
    rs = np.random.RandomState(0)
    df_raw = pd.DataFrame({'a': rs.randint(1000, size=10),
                           'b': rs.rand(10),
                           'c': [pd.Timestamp(rs.randint(1604000000, 1604481373))
                                 for _ in range(10)]},
                          index=pd.RangeIndex(9, -1, -1))
    df = from_pandas(df_raw, chunk_size=5)
    r = (df['c'] > to_datetime('2000-01-01')) & (df['c'] < to_datetime('2021-01-01'))

    result = r.execute().fetch()
    expected = (df_raw['c'] > pd.to_datetime('2000-01-01')) & \
               (df_raw['c'] < pd.to_datetime('2021-01-01'))
    pd.testing.assert_series_equal(result, expected)
예제 #4
0
    def testToDatetimeExecution(self):
        # scalar
        r = to_datetime(1490195805, unit='s')

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(1490195805, unit='s')
        self.assertEqual(pd.to_datetime(result.item()), expected)

        # test list like
        raw = ['3/11/2000', '3/12/2000', '3/13/2000']
        t = tensor(raw, chunk_size=2)
        r = to_datetime(t, infer_datetime_format=True)

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw, infer_datetime_format=True)
        pd.testing.assert_index_equal(result, expected)

        # test series
        raw_series = pd.Series(raw)
        s = Series(raw_series, chunk_size=2)
        r = to_datetime(s)

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw_series)
        pd.testing.assert_series_equal(result, expected)

        # test DataFrame
        raw_df = pd.DataFrame({
            'year': [2015, 2016],
            'month': [2, 3],
            'day': [4, 5]
        })
        df = DataFrame(raw_df, chunk_size=(1, 2))
        r = to_datetime(df)

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw_df)
        pd.testing.assert_series_equal(result, expected)

        # test Index
        raw_index = pd.Index([1, 2, 3])
        s = Index(raw_index, chunk_size=2)
        r = to_datetime(s)

        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw_index)
        pd.testing.assert_index_equal(result, expected)

        # test raises == 'ignore'
        raw = ['13000101']
        r = to_datetime(raw, format='%Y%m%d', errors='ignore')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime(raw, format='%Y%m%d', errors='ignore')
        pd.testing.assert_index_equal(result, expected)

        # test unit
        r = to_datetime([1490195805], unit='s')
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime([1490195805], unit='s')
        pd.testing.assert_index_equal(result, expected)

        # test origin
        r = to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01'))
        result = self.executor.execute_dataframe(r, concat=True)[0]
        expected = pd.to_datetime([1, 2, 3],
                                  unit='D',
                                  origin=pd.Timestamp('1960-01-01'))
        pd.testing.assert_index_equal(result, expected)
예제 #5
0
def test_to_datetime_execution(setup):
    # scalar
    r = to_datetime(1490195805, unit='s')

    result = r.execute().fetch(extra_config={
        'check_dtypes': False,
        'check_shape': False
    })
    expected = pd.to_datetime(1490195805, unit='s')
    assert pd.to_datetime(result) == expected

    # test list like
    raw = ['3/11/2000', '3/12/2000', '3/13/2000']
    t = tensor(raw, chunk_size=2)
    r = to_datetime(t, infer_datetime_format=True)

    result = r.execute().fetch()
    expected = pd.to_datetime(raw, infer_datetime_format=True)
    pd.testing.assert_index_equal(result, expected)

    # test series
    raw_series = pd.Series(raw)
    s = Series(raw_series, chunk_size=2)
    r = to_datetime(s)

    result = r.execute().fetch()
    expected = pd.to_datetime(raw_series)
    pd.testing.assert_series_equal(result, expected)

    # test DataFrame
    raw_df = pd.DataFrame({
        'year': [2015, 2016],
        'month': [2, 3],
        'day': [4, 5]
    })
    df = DataFrame(raw_df, chunk_size=(1, 2))
    r = to_datetime(df)

    result = r.execute().fetch()
    expected = pd.to_datetime(raw_df)
    pd.testing.assert_series_equal(result, expected)

    # test Index
    raw_index = pd.Index([1, 2, 3])
    s = Index(raw_index, chunk_size=2)
    r = to_datetime(s)

    result = r.execute().fetch()
    expected = pd.to_datetime(raw_index)
    pd.testing.assert_index_equal(result, expected)

    # test raises == 'ignore'
    raw = ['13000101']
    r = to_datetime(raw, format='%Y%m%d', errors='ignore')
    result = r.execute().fetch()
    expected = pd.to_datetime(raw, format='%Y%m%d', errors='ignore')
    pd.testing.assert_index_equal(result, expected)

    # test unit
    r = to_datetime([1490195805], unit='s')
    result = r.execute().fetch()
    expected = pd.to_datetime([1490195805], unit='s')
    pd.testing.assert_index_equal(result, expected)

    # test origin
    r = to_datetime([1, 2, 3], unit='D', origin=pd.Timestamp('1960-01-01'))
    result = r.execute().fetch()
    expected = pd.to_datetime([1, 2, 3],
                              unit='D',
                              origin=pd.Timestamp('1960-01-01'))
    pd.testing.assert_index_equal(result, expected)