def test_frame_datetime(tc):
    """
    Create a frame with datetime values and check the values from frame.
    Take the value to scala and then again check the values.
    """
    data = [[1, "Bob", "1950-05-12T03:25:21.123000Z"],
            [2, "Susan", "1979-08-05T07:51:28.535000Z"],
            [3, "Jane", "1986-10-17T11:45:00.183000Z"]]
    frame = tc.frame.create(data, [("id", int), ("name", str), ("bday", dtypes.datetime)])
    assert(frame._is_python)
    row_count = frame.count()
    assert(row_count == 3)
    assert(frame.take(row_count).data == data)

    # frame to scala
    frame._scala
    assert(frame._is_scala)
    frame_data = frame.take(frame.count()).data
    for original, row in zip(data, frame_data):
        assert(len(original) == len(row) == 3)
        assert(original[0] == row[0])
        assert(original[1] == row[1])
        # After going to scala, the "bday" column uses the long type.  Convert it to a string to compare with the original data
        assert(original[2] == dtypes.ms_to_datetime_str(row[2]))

    # back to python
    frame._python
    assert(frame._is_python)
    frame_data = frame.take(frame.count()).data
    for original, row in zip(data, frame_data):
        assert(len(original) == len(row) == 3)
        assert(original[0] == row[0])
        assert(original[1] == row[1])
        # After going to scala, the "bday" column uses the long type.  Convert it to a string to compare with the original data
        assert(original[2] == dtypes.ms_to_datetime_str(row[2]))
Example #2
0
def test_frame_datetime(tc):
    """
    Create a frame with datetime values and check the values from frame.
    Take the value to scala and then again check the values.
    """
    data = [[1, "Bob", "1950-05-12T03:25:21.123000Z"],
            [2, "Susan", "1979-08-05T07:51:28.535000Z"],
            [3, "Jane", "1986-10-17T11:45:00.183000Z"]]
    frame = tc.frame.create(data, [("id", int), ("name", str), ("bday", dtypes.datetime)])
    assert(frame._is_python)
    assert(frame.row_count == 3)
    assert(frame.take(frame.row_count).data == data)

    # frame to scala
    frame._scala
    assert(frame._is_scala)
    frame_data = frame.take(frame.row_count).data
    for original, row in zip(data, frame_data):
        assert(len(original) == len(row) == 3)
        assert(original[0] == row[0])
        assert(original[1] == row[1])
        # After going to scala, the "bday" column uses the long type.  Convert it to a string to compare with the original data
        assert(original[2] == dtypes.ms_to_datetime_str(row[2]))

    # back to python
    frame._python
    assert(frame._is_python)
    frame_data = frame.take(frame.row_count).data
    for original, row in zip(data, frame_data):
        assert(len(original) == len(row) == 3)
        assert(original[0] == row[0])
        assert(original[1] == row[1])
        # After going to scala, the "bday" column uses the long type.  Convert it to a string to compare with the original data
        assert(original[2] == dtypes.ms_to_datetime_str(row[2]))
Example #3
0
def test_import_csv_datetime_format(tc):
    path = "../datasets/datetimes.csv"

    # Load with the date format that matches column a
    f = tc.frame.import_csv(path,
                            schema=[("a", dtypes.datetime), ("b", str)],
                            datetime_format="yyyy-MM-ddX")

    expected = ["2015-01-03T00:00:00.000000Z", "2015-04-12T00:00:00.000000Z"]
    actual_data = f.take(f.count())

    for row, expected_str in zip(actual_data, expected):
        assert (
            isinstance(row[0], long)
        )  # 'a' datetime column should be a long (number of ms since epoch)
        assert (dtypes.ms_to_datetime_str(row[0]) == expected_str)
        assert (isinstance(row[1], basestring))  # column 'b' should be a str

    # Load with the date format that matches column b
    f = tc.frame.import_csv(path,
                            schema=[("a", str), ("b", dtypes.datetime)],
                            datetime_format="MM-dd-yyyy kk:mm X")

    expected = ["2015-01-02T11:30:00.000000Z", "2015-04-12T04:25:00.000000Z"]
    actual_data = f.take(f.count())

    for row, expected_str in zip(actual_data, expected):
        assert (isinstance(row[0], basestring))  # column 'a' should be a str
        assert (isinstance(row[1], long)
                )  # column 'b' should be a long (number of ms since epoch)
        assert (dtypes.ms_to_datetime_str(row[1]) == expected_str)
Example #4
0
 def format_datetime(d):
     from datetime import datetime
     if d is None:
         return None
     elif isinstance(d, long) or isinstance(d, int):
         return dtypes.ms_to_datetime_str(d)
     elif isinstance(d, datetime):
         return d.strftime("%Y-%m-%dT%H:%M:%S.%fZ")
     else:
         return str(d)
Example #5
0
def test_import_csv_datetime_format(tc):
    path = "../datasets/datetimes.csv"

    # Load with the date format that matches column a
    f = tc.frame.import_csv(path, schema=[("a",dtypes.datetime),("b",str)], datetime_format="yyyy-MM-ddX")

    expected = ["2015-01-03T00:00:00.000000Z","2015-04-12T00:00:00.000000Z"]
    actual_data = f.take(f.count())

    for row, expected_str in zip(actual_data, expected):
        assert(isinstance(row[0], long))    # 'a' datetime column should be a long (number of ms since epoch)
        assert(dtypes.ms_to_datetime_str(row[0]) == expected_str)
        assert(isinstance(row[1], basestring))     # column 'b' should be a str

    # Load with the date format that matches column b
    f = tc.frame.import_csv(path, schema=[("a",str),("b",dtypes.datetime)], datetime_format="MM-dd-yyyy kk:mm X")

    expected = ["2015-01-02T11:30:00.000000Z","2015-04-12T04:25:00.000000Z"]
    actual_data = f.take(f.count())

    for row, expected_str in zip(actual_data, expected):
        assert(isinstance(row[0], basestring))     # column 'a' should be a str
        assert(isinstance(row[1], long))    # column 'b' should be a long (number of ms since epoch)
        assert(dtypes.ms_to_datetime_str(row[1]) == expected_str)