Python vectorの例、sparktk.dtypes.vector Pythonの例

コード例 #1

0

ファイルを表示

ファイル: test_inspect.py プロジェクト: AayushiD/spark-tk

 def test_inspect_nones(self):
     schema = [('s', str), ('v', dtypes.vector(2))]
     rows = [['super', [1.0095, 2.034]],
             [None, None]]
     result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap=2, round=2, truncate=4)))
     result = '\n'.join([line.rstrip() for line in result.splitlines()])
     print result

コード例 #2

0

ファイルを表示

ファイル: test_inspect.py プロジェクト: aayushidwivedi01/spark-tk-old

 def test_inspect_nones(self):
     schema = [('s', str), ('v', dtypes.vector(2))]
     rows = [['super', [1.0095, 2.034]],
             [None, None]]
     result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap=2, round=2, truncate=4)))
     result = '\n'.join([line.rstrip() for line in result.splitlines()])
     print result

コード例 #3

0

ファイルを表示

ファイル: test_inspect.py プロジェクト: tlisonbee/spark-tk

    def test_inspect_round(self):
        schema = [('f32', dtypes.float32), ('f64', dtypes.float64),
                  ('v', dtypes.vector(2))]
        rows = [[0.1234, 9.87654321, [1.0095, 2.034]],
                [1234.5, 9876.54321, [99.999, 33.33]]]
        result = repr(
            ui.RowsInspection(rows,
                              schema,
                              offset=0,
                              format_settings=ui.InspectSettings(wrap=2,
                                                                 round=2)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        expected = '''[#]  f32      f64      v
======================================
[0]     0.12     9.88  [1.01, 2.03]
[1]  1234.50  9876.54  [100.00, 33.33]'''
        self.assertEqual(expected, result)

        result = repr(
            ui.RowsInspection(rows,
                              schema,
                              offset=0,
                              format_settings=ui.InspectSettings(
                                  wrap='stripes', round=3)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        expected = '''[0]-
f32=0.123
f64=9.877
v  =[1.010, 2.034]
[1]-
f32=1234.500
f64=9876.543
v  =[99.999, 33.330]'''
        self.assertEqual(expected, result)

コード例 #4

0

ファイルを表示

ファイル: flatten_unflatten_test.py プロジェクト: aayushidwivedi01/spark-tk

    def test_flatten_mult_simple(self):
        """Test multiple columns flatten"""
        block_data = [
            [[4, 3, 2, 1],
             "Calling cards,French toast,Turtle necks,Partridge in a Parody"],
            [[8, 7, 6, 5],
             "Maids a-milking,Swans a-swimming,Geese a-laying,Gold rings"],
            [[12, 11, 10, 9],
             "Drummers drumming,Lords a-leaping,Pipers piping,Ladies dancing"]]
        block_schema = [("day", dtypes.vector(4)), ("gift", str)]
        expected_take = [[12, "Drummers drumming"],
                         [11, "Lords a-leaping"],
                         [10, "Pipers piping"],
                         [9, "Ladies dancing"],
                         [8, "Maids a-milking"],
                         [7, "Swans a-swimming"],
                         [6, "Geese a-laying"],
                         [5, "Gold rings"],
                         [4, "Calling cards"],
                         [3, "French toast"],
                         [2, "Turtle necks"],
                         [1, "Partridge in a Parody"]]
        frame = self.context.frame.create(block_data, schema=block_schema)

        # Validate flatten against hand crafted results
        frame.flatten_columns("day", "gift")
        frame_take = frame.take(frame.count()).data
        self.assertItemsEqual(frame_take, expected_take)

コード例 #5

0

ファイルを表示

ファイル: test_inspect.py プロジェクト: Haleyo/spark-tk

    def test_inspect_nones(self):
        schema = [('s', str), ('v', dtypes.vector(2))]
        rows = [['super', [1.0095, 2.034]],
                [None, None]]
        result = repr(ui.ATable(rows, schema, offset=0, format_settings=ui.Formatting(wrap=2, round=2, truncate=4)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        self.assertEqual("""[#]  s     v
=======================
[0]  s...  [1.01, 2.03]
[1]  None  None""", result)

コード例 #6

0

ファイルを表示

ファイル: test_frame_create.py プロジェクト: aayushidwivedi01/spark-tk

def test_frame_upload_raw_list_data(tc):
        """does round trip with list data --> upload to frame --> 'take' back to list and compare"""
        data = [[1, 'one', [1.0, 1.1]], [2, 'two', [2.0, 2.2]], [3, 'three', [3.0, 3.3]]]
        schema = [('n', int), ('s', str), ('v', dtypes.vector(2))]
        frame = tc.frame.create(data, schema)
        taken = frame.take(5).data
        assert(len(data) == len(taken))
        for r, row in enumerate(taken):
            assert(len(data[r]) == len(row))
            for c, column in enumerate(row):
                assert(data[r][c] == column)

コード例 #7

0

ファイルを表示

def test_frame_upload_raw_list_data(tc):
    """does round trip with list data --> upload to frame --> 'take' back to list and compare"""
    data = [[1, 'one', [1.0, 1.1]], [2, 'two', [2.0, 2.2]],
            [3, 'three', [3.0, 3.3]]]
    schema = [('n', int), ('s', str), ('v', dtypes.vector(2))]
    frame = tc.frame.create(data, schema)
    taken = frame.take(5)
    assert (len(data) == len(taken))
    for r, row in enumerate(taken):
        assert (len(data[r]) == len(row))
        for c, column in enumerate(row):
            assert (data[r][c] == column)

コード例 #8

0

ファイルを表示

    def _infer_types_for_row(self, row):
        """
        Returns a list of data types for the data in the specified row

        :param row: List or Row of data
        :return: List of data types
        """
        inferred_types = []
        for item in row:
            if not isinstance(item, list):
                inferred_types.append(type(item))
            else:
                inferred_types.append(dtypes.vector((len(item))))
        return inferred_types

コード例 #9

0

ファイルを表示

ファイル: frame_matrix_datatype_test.py プロジェクト: dmsuehir/spark-tk-1

    def test_convert_matrix_col_to_vector(self):
        """ Convert a matrix column to vector using add_columns"""
        frame = self.context.frame.create(self.dataset, self.schema)
        
        # Filter the rows which have more than 2 rows as the final vector construction can be for only 2 values
        # as vector needs the length to be defined
        frame.filter(lambda row: row["C1"].shape[0] == 2)
        
        # Add first column of each matrix as a new column with vector data type
        frame.add_columns(lambda row: row["C1"][:,0], ('first_column', vector(2)))
        obtained_result = frame.take(10, columns='first_column')

        # Convert the first 2 elements of the dataset to numpy array and get the fist column
        expected_result = [[numpy.array(item[1])[:,0]] for item in self.dataset[:2]]
        numpy.testing.assert_array_equal(obtained_result, expected_result)

コード例 #10

0

ファイルを表示

ファイル: frame_matrix_datatype_test.py プロジェクト: grehx/spark-tk

    def test_convert_matrix_col_to_vector(self):
        """ Convert a matrix column to vector using add_columns"""
        frame = self.context.frame.create(self.dataset, self.schema)
        
        # Filter the rows which have more than 2 rows as the final vector construction can be for only 2 values
        # as vector needs the length to be defined
        frame.filter(lambda row: row["C1"].shape[0] == 2)
        
        # Add first column of each matrix as a new column with vector data type
        frame.add_columns(lambda row: row["C1"][:,0], ('first_column', vector(2)))
        obtained_result = frame.take(10, columns='first_column')

        # Convert the first 2 elements of the dataset to numpy array and get the fist column
        expected_result = [[numpy.array(item[1])[:,0]] for item in self.dataset[:2]]
        numpy.testing.assert_array_equal(obtained_result, expected_result)

コード例 #11

0

ファイルを表示

ファイル: test_inspect.py プロジェクト: lewisc/spark-tk-1

    def test_inspect_nones(self):
        schema = [('s', str), ('v', dtypes.vector(2))]
        rows = [['super', [1.0095, 2.034]], [None, None]]
        result = repr(
            ui.ATable(rows,
                      schema,
                      offset=0,
                      format_settings=ui.Formatting(wrap=2,
                                                    round=2,
                                                    truncate=4)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        self.assertEqual(
            """[#]  s     v
=======================
[0]  s...  [1.01, 2.03]
[1]  None  None""", result)

コード例 #12

0

ファイルを表示

    def test_flatten_mult_simple(self):
        """Test multiple columns flatten"""
        block_data = [
            [[4, 3, 2, 1],
             "Calling cards,French toast,Turtle necks,Partridge in a Parody"],
            [[8, 7, 6, 5],
             "Maids a-milking,Swans a-swimming,Geese a-laying,Gold rings"],
            [[12, 11, 10, 9],
             "Drummers drumming,Lords a-leaping,Pipers piping,Ladies dancing"]
        ]
        block_schema = [("day", dtypes.vector(4)), ("gift", str)]
        expected_take = [[12, "Drummers drumming"], [11, "Lords a-leaping"],
                         [10, "Pipers piping"], [9, "Ladies dancing"],
                         [8, "Maids a-milking"], [7, "Swans a-swimming"],
                         [6, "Geese a-laying"], [5, "Gold rings"],
                         [4, "Calling cards"], [3, "French toast"],
                         [2, "Turtle necks"], [1, "Partridge in a Parody"]]
        frame = self.context.frame.create(block_data, schema=block_schema)

        # Validate flatten against hand crafted results
        frame.flatten_columns("day", "gift")
        frame_take = frame.take(frame.count())
        self.assertItemsEqual(frame_take, expected_take)

コード例 #13

0

ファイルを表示

ファイル: test_inspect.py プロジェクト: AayushiD/spark-tk

    def test_inspect_round(self):
        schema = [('f32', dtypes.float32), ('f64', dtypes.float64), ('v', dtypes.vector(2))]
        rows = [[0.1234, 9.87654321, [1.0095, 2.034]],
                [1234.5, 9876.54321, [99.999, 33.33]]]
        result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap=2, round=2)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        expected = '''[#]  f32      f64      v
======================================
[0]     0.12     9.88  [1.01, 2.03]
[1]  1234.50  9876.54  [100.00, 33.33]'''
        self.assertEqual(expected, result)

        result = repr(ui.RowsInspection(rows, schema, offset=0, format_settings=ui.InspectSettings(wrap='stripes', round=3)))
        result = '\n'.join([line.rstrip() for line in result.splitlines()])
        expected = '''[0]-
f32=0.123
f64=9.877
v  =[1.010, 2.034]
[1]-
f32=1234.500
f64=9876.543
v  =[99.999, 33.330]'''
        self.assertEqual(expected, result)

コード例 #14

0

ファイルを表示

from setup import tc, rm, get_sandbox_path
from sparktk import dtypes

schema = [("key", str), ("series", dtypes.vector(6))]
data = [["A", [62, 55, 60, 61, 60, 59]], ["B", [60, 58, 61, 62, 60, 61]],
        ["C", [69, 68, 68, 70, 71, 69]]]
datetimeindex = [
    "2016-01-01T12:00:00.000Z", "2016-01-02T12:00:00.000Z",
    "2016-01-03T12:00:00.000Z", "2016-01-04T12:00:00.000Z",
    "2016-01-05T12:00:00.000Z", "2016-01-06T12:00:00.000Z"
]


def test_date_without_time(tc):
    """
    Tests using start/end dates (no times) and verifies that we get back a frame with the expected number of rows
    """
    ts_frame = tc.frame.create(data, schema)
    start = "2016-01-02"
    end = "2016-01-04"
    f = ts_frame.timeseries_slice(datetimeindex, start, end)
    assert (f.row_count == 3)


def test_invalid_string_start(tc):
    """
    Tests calling time series slice with an invalid start date string
    """
    ts_frame = tc.frame.create(data, schema)
    try:
        start = "abc"

コード例 #15

0

ファイルを表示

ファイル: test_frame_timeseries_slice.py プロジェクト: AayushiD/spark-tk

from setup import tc, rm, get_sandbox_path
from sparktk import dtypes

schema= [("key", str), ("series", dtypes.vector(6))]
data = [["A", [62,55,60,61,60,59]],["B", [60,58,61,62,60,61]],["C", [69,68,68,70,71,69]]]
datetimeindex = ["2016-01-01T12:00:00.000Z","2016-01-02T12:00:00.000Z","2016-01-03T12:00:00.000Z","2016-01-04T12:00:00.000Z","2016-01-05T12:00:00.000Z","2016-01-06T12:00:00.000Z"]

def test_date_without_time(tc):
    """
    Tests using start/end dates (no times) and verifies that we get back a frame with the expected number of rows
    """
    ts_frame = tc.frame.create(data, schema)
    start = "2016-01-02"
    end = "2016-01-04"
    f = ts_frame.timeseries_slice(datetimeindex, start, end)
    assert(f.row_count == 3)

def test_invalid_string_start(tc):
    """
    Tests calling time series slice with an invalid start date string
    """
    ts_frame = tc.frame.create(data, schema)
    try:
        start = "abc"
        end = "2016-01-04T12:00:00.000Z"
        ts_frame.timeseries_slice(datetimeindex, start, end)
        raise RuntimeError("Expected exception from invalid start date: " + start)
    except:
        pass

def test_invalid_string_end(tc):

コード例 #16

0

ファイルを表示

ファイル: test_inspect.py プロジェクト: lewisc/spark-tk-1

 def r(n, value, num_digits):
     return ui.ATable.get_rounder(dtypes.vector(n), num_digits)(value)

コード例 #17

0

ファイルを表示

ファイル: test_inspect.py プロジェクト: Haleyo/spark-tk

 def r(n, value, num_digits):
     return ui.ATable.get_rounder(dtypes.vector(n), num_digits)(value)