def test_inspect_nones(self): schema = [('s', str), ('v', dtypes.vector(2))] rows = [['super', [1.0095, 2.034]], [None, None]] result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap=2, round=2, truncate=4))) result = '\n'.join([line.rstrip() for line in result.splitlines()]) print result
def test_inspect_round(self): schema = [('f32', dtypes.float32), ('f64', dtypes.float64), ('v', dtypes.vector(2))] rows = [[0.1234, 9.87654321, [1.0095, 2.034]], [1234.5, 9876.54321, [99.999, 33.33]]] result = repr( ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap=2, round=2))) result = '\n'.join([line.rstrip() for line in result.splitlines()]) expected = '''[#] f32 f64 v ====================================== [0] 0.12 9.88 [1.01, 2.03] [1] 1234.50 9876.54 [100.00, 33.33]''' self.assertEqual(expected, result) result = repr( ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings( wrap='stripes', round=3))) result = '\n'.join([line.rstrip() for line in result.splitlines()]) expected = '''[0]- f32=0.123 f64=9.877 v =[1.010, 2.034] [1]- f32=1234.500 f64=9876.543 v =[99.999, 33.330]''' self.assertEqual(expected, result)
def test_flatten_mult_simple(self): """Test multiple columns flatten""" block_data = [ [[4, 3, 2, 1], "Calling cards,French toast,Turtle necks,Partridge in a Parody"], [[8, 7, 6, 5], "Maids a-milking,Swans a-swimming,Geese a-laying,Gold rings"], [[12, 11, 10, 9], "Drummers drumming,Lords a-leaping,Pipers piping,Ladies dancing"]] block_schema = [("day", dtypes.vector(4)), ("gift", str)] expected_take = [[12, "Drummers drumming"], [11, "Lords a-leaping"], [10, "Pipers piping"], [9, "Ladies dancing"], [8, "Maids a-milking"], [7, "Swans a-swimming"], [6, "Geese a-laying"], [5, "Gold rings"], [4, "Calling cards"], [3, "French toast"], [2, "Turtle necks"], [1, "Partridge in a Parody"]] frame = self.context.frame.create(block_data, schema=block_schema) # Validate flatten against hand crafted results frame.flatten_columns("day", "gift") frame_take = frame.take(frame.count()).data self.assertItemsEqual(frame_take, expected_take)
def test_inspect_nones(self): schema = [('s', str), ('v', dtypes.vector(2))] rows = [['super', [1.0095, 2.034]], [None, None]] result = repr(ui.ATable(rows, schema, offset=0, format_settings=ui.Formatting(wrap=2, round=2, truncate=4))) result = '\n'.join([line.rstrip() for line in result.splitlines()]) self.assertEqual("""[#] s v ======================= [0] s... [1.01, 2.03] [1] None None""", result)
def test_frame_upload_raw_list_data(tc): """does round trip with list data --> upload to frame --> 'take' back to list and compare""" data = [[1, 'one', [1.0, 1.1]], [2, 'two', [2.0, 2.2]], [3, 'three', [3.0, 3.3]]] schema = [('n', int), ('s', str), ('v', dtypes.vector(2))] frame = tc.frame.create(data, schema) taken = frame.take(5).data assert(len(data) == len(taken)) for r, row in enumerate(taken): assert(len(data[r]) == len(row)) for c, column in enumerate(row): assert(data[r][c] == column)
def test_frame_upload_raw_list_data(tc): """does round trip with list data --> upload to frame --> 'take' back to list and compare""" data = [[1, 'one', [1.0, 1.1]], [2, 'two', [2.0, 2.2]], [3, 'three', [3.0, 3.3]]] schema = [('n', int), ('s', str), ('v', dtypes.vector(2))] frame = tc.frame.create(data, schema) taken = frame.take(5) assert (len(data) == len(taken)) for r, row in enumerate(taken): assert (len(data[r]) == len(row)) for c, column in enumerate(row): assert (data[r][c] == column)
def _infer_types_for_row(self, row): """ Returns a list of data types for the data in the specified row :param row: List or Row of data :return: List of data types """ inferred_types = [] for item in row: if not isinstance(item, list): inferred_types.append(type(item)) else: inferred_types.append(dtypes.vector((len(item)))) return inferred_types
def test_convert_matrix_col_to_vector(self): """ Convert a matrix column to vector using add_columns""" frame = self.context.frame.create(self.dataset, self.schema) # Filter the rows which have more than 2 rows as the final vector construction can be for only 2 values # as vector needs the length to be defined frame.filter(lambda row: row["C1"].shape[0] == 2) # Add first column of each matrix as a new column with vector data type frame.add_columns(lambda row: row["C1"][:,0], ('first_column', vector(2))) obtained_result = frame.take(10, columns='first_column') # Convert the first 2 elements of the dataset to numpy array and get the fist column expected_result = [[numpy.array(item[1])[:,0]] for item in self.dataset[:2]] numpy.testing.assert_array_equal(obtained_result, expected_result)
def test_inspect_nones(self): schema = [('s', str), ('v', dtypes.vector(2))] rows = [['super', [1.0095, 2.034]], [None, None]] result = repr( ui.ATable(rows, schema, offset=0, format_settings=ui.Formatting(wrap=2, round=2, truncate=4))) result = '\n'.join([line.rstrip() for line in result.splitlines()]) self.assertEqual( """[#] s v ======================= [0] s... [1.01, 2.03] [1] None None""", result)
def test_flatten_mult_simple(self): """Test multiple columns flatten""" block_data = [ [[4, 3, 2, 1], "Calling cards,French toast,Turtle necks,Partridge in a Parody"], [[8, 7, 6, 5], "Maids a-milking,Swans a-swimming,Geese a-laying,Gold rings"], [[12, 11, 10, 9], "Drummers drumming,Lords a-leaping,Pipers piping,Ladies dancing"] ] block_schema = [("day", dtypes.vector(4)), ("gift", str)] expected_take = [[12, "Drummers drumming"], [11, "Lords a-leaping"], [10, "Pipers piping"], [9, "Ladies dancing"], [8, "Maids a-milking"], [7, "Swans a-swimming"], [6, "Geese a-laying"], [5, "Gold rings"], [4, "Calling cards"], [3, "French toast"], [2, "Turtle necks"], [1, "Partridge in a Parody"]] frame = self.context.frame.create(block_data, schema=block_schema) # Validate flatten against hand crafted results frame.flatten_columns("day", "gift") frame_take = frame.take(frame.count()) self.assertItemsEqual(frame_take, expected_take)
def test_inspect_round(self): schema = [('f32', dtypes.float32), ('f64', dtypes.float64), ('v', dtypes.vector(2))] rows = [[0.1234, 9.87654321, [1.0095, 2.034]], [1234.5, 9876.54321, [99.999, 33.33]]] result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap=2, round=2))) result = '\n'.join([line.rstrip() for line in result.splitlines()]) expected = '''[#] f32 f64 v ====================================== [0] 0.12 9.88 [1.01, 2.03] [1] 1234.50 9876.54 [100.00, 33.33]''' self.assertEqual(expected, result) result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap='stripes', round=3))) result = '\n'.join([line.rstrip() for line in result.splitlines()]) expected = '''[0]- f32=0.123 f64=9.877 v =[1.010, 2.034] [1]- f32=1234.500 f64=9876.543 v =[99.999, 33.330]''' self.assertEqual(expected, result)
from setup import tc, rm, get_sandbox_path from sparktk import dtypes schema = [("key", str), ("series", dtypes.vector(6))] data = [["A", [62, 55, 60, 61, 60, 59]], ["B", [60, 58, 61, 62, 60, 61]], ["C", [69, 68, 68, 70, 71, 69]]] datetimeindex = [ "2016-01-01T12:00:00.000Z", "2016-01-02T12:00:00.000Z", "2016-01-03T12:00:00.000Z", "2016-01-04T12:00:00.000Z", "2016-01-05T12:00:00.000Z", "2016-01-06T12:00:00.000Z" ] def test_date_without_time(tc): """ Tests using start/end dates (no times) and verifies that we get back a frame with the expected number of rows """ ts_frame = tc.frame.create(data, schema) start = "2016-01-02" end = "2016-01-04" f = ts_frame.timeseries_slice(datetimeindex, start, end) assert (f.row_count == 3) def test_invalid_string_start(tc): """ Tests calling time series slice with an invalid start date string """ ts_frame = tc.frame.create(data, schema) try: start = "abc"
from setup import tc, rm, get_sandbox_path from sparktk import dtypes schema= [("key", str), ("series", dtypes.vector(6))] data = [["A", [62,55,60,61,60,59]],["B", [60,58,61,62,60,61]],["C", [69,68,68,70,71,69]]] datetimeindex = ["2016-01-01T12:00:00.000Z","2016-01-02T12:00:00.000Z","2016-01-03T12:00:00.000Z","2016-01-04T12:00:00.000Z","2016-01-05T12:00:00.000Z","2016-01-06T12:00:00.000Z"] def test_date_without_time(tc): """ Tests using start/end dates (no times) and verifies that we get back a frame with the expected number of rows """ ts_frame = tc.frame.create(data, schema) start = "2016-01-02" end = "2016-01-04" f = ts_frame.timeseries_slice(datetimeindex, start, end) assert(f.row_count == 3) def test_invalid_string_start(tc): """ Tests calling time series slice with an invalid start date string """ ts_frame = tc.frame.create(data, schema) try: start = "abc" end = "2016-01-04T12:00:00.000Z" ts_frame.timeseries_slice(datetimeindex, start, end) raise RuntimeError("Expected exception from invalid start date: " + start) except: pass def test_invalid_string_end(tc):
def r(n, value, num_digits): return ui.ATable.get_rounder(dtypes.vector(n), num_digits)(value)