def test_frame_datetime(tc): """ Create a frame with datetime values and check the values from frame. Take the value to scala and then again check the values. """ data = [[1, "Bob", "1950-05-12T03:25:21.123000Z"], [2, "Susan", "1979-08-05T07:51:28.535000Z"], [3, "Jane", "1986-10-17T11:45:00.183000Z"]] frame = tc.frame.create(data, [("id", int), ("name", str), ("bday", dtypes.datetime)]) assert(frame._is_python) row_count = frame.count() assert(row_count == 3) assert(frame.take(row_count).data == data) # frame to scala frame._scala assert(frame._is_scala) frame_data = frame.take(frame.count()).data for original, row in zip(data, frame_data): assert(len(original) == len(row) == 3) assert(original[0] == row[0]) assert(original[1] == row[1]) # After going to scala, the "bday" column uses the long type. Convert it to a string to compare with the original data assert(original[2] == dtypes.ms_to_datetime_str(row[2])) # back to python frame._python assert(frame._is_python) frame_data = frame.take(frame.count()).data for original, row in zip(data, frame_data): assert(len(original) == len(row) == 3) assert(original[0] == row[0]) assert(original[1] == row[1]) # After going to scala, the "bday" column uses the long type. Convert it to a string to compare with the original data assert(original[2] == dtypes.ms_to_datetime_str(row[2]))
def test_frame_datetime(tc): """ Create a frame with datetime values and check the values from frame. Take the value to scala and then again check the values. """ data = [[1, "Bob", "1950-05-12T03:25:21.123000Z"], [2, "Susan", "1979-08-05T07:51:28.535000Z"], [3, "Jane", "1986-10-17T11:45:00.183000Z"]] frame = tc.frame.create(data, [("id", int), ("name", str), ("bday", dtypes.datetime)]) assert(frame._is_python) assert(frame.row_count == 3) assert(frame.take(frame.row_count).data == data) # frame to scala frame._scala assert(frame._is_scala) frame_data = frame.take(frame.row_count).data for original, row in zip(data, frame_data): assert(len(original) == len(row) == 3) assert(original[0] == row[0]) assert(original[1] == row[1]) # After going to scala, the "bday" column uses the long type. Convert it to a string to compare with the original data assert(original[2] == dtypes.ms_to_datetime_str(row[2])) # back to python frame._python assert(frame._is_python) frame_data = frame.take(frame.row_count).data for original, row in zip(data, frame_data): assert(len(original) == len(row) == 3) assert(original[0] == row[0]) assert(original[1] == row[1]) # After going to scala, the "bday" column uses the long type. Convert it to a string to compare with the original data assert(original[2] == dtypes.ms_to_datetime_str(row[2]))
def test_import_csv_datetime_format(tc): path = "../datasets/datetimes.csv" # Load with the date format that matches column a f = tc.frame.import_csv(path, schema=[("a", dtypes.datetime), ("b", str)], datetime_format="yyyy-MM-ddX") expected = ["2015-01-03T00:00:00.000000Z", "2015-04-12T00:00:00.000000Z"] actual_data = f.take(f.count()) for row, expected_str in zip(actual_data, expected): assert ( isinstance(row[0], long) ) # 'a' datetime column should be a long (number of ms since epoch) assert (dtypes.ms_to_datetime_str(row[0]) == expected_str) assert (isinstance(row[1], basestring)) # column 'b' should be a str # Load with the date format that matches column b f = tc.frame.import_csv(path, schema=[("a", str), ("b", dtypes.datetime)], datetime_format="MM-dd-yyyy kk:mm X") expected = ["2015-01-02T11:30:00.000000Z", "2015-04-12T04:25:00.000000Z"] actual_data = f.take(f.count()) for row, expected_str in zip(actual_data, expected): assert (isinstance(row[0], basestring)) # column 'a' should be a str assert (isinstance(row[1], long) ) # column 'b' should be a long (number of ms since epoch) assert (dtypes.ms_to_datetime_str(row[1]) == expected_str)
def format_datetime(d): from datetime import datetime if d is None: return None elif isinstance(d, long) or isinstance(d, int): return dtypes.ms_to_datetime_str(d) elif isinstance(d, datetime): return d.strftime("%Y-%m-%dT%H:%M:%S.%fZ") else: return str(d)
def test_import_csv_datetime_format(tc): path = "../datasets/datetimes.csv" # Load with the date format that matches column a f = tc.frame.import_csv(path, schema=[("a",dtypes.datetime),("b",str)], datetime_format="yyyy-MM-ddX") expected = ["2015-01-03T00:00:00.000000Z","2015-04-12T00:00:00.000000Z"] actual_data = f.take(f.count()) for row, expected_str in zip(actual_data, expected): assert(isinstance(row[0], long)) # 'a' datetime column should be a long (number of ms since epoch) assert(dtypes.ms_to_datetime_str(row[0]) == expected_str) assert(isinstance(row[1], basestring)) # column 'b' should be a str # Load with the date format that matches column b f = tc.frame.import_csv(path, schema=[("a",str),("b",dtypes.datetime)], datetime_format="MM-dd-yyyy kk:mm X") expected = ["2015-01-02T11:30:00.000000Z","2015-04-12T04:25:00.000000Z"] actual_data = f.take(f.count()) for row, expected_str in zip(actual_data, expected): assert(isinstance(row[0], basestring)) # column 'a' should be a str assert(isinstance(row[1], long)) # column 'b' should be a long (number of ms since epoch) assert(dtypes.ms_to_datetime_str(row[1]) == expected_str)