Esempi in Python per int64, esempi in Python per pyarrow.int64

Esempio n. 1

0

Mostra file

File: test_table.py Progetto: emkornfield/arrow

def test_table_safe_casting():
    data = [
        pa.array(range(5), type=pa.int64()),
        pa.array([-10, -5, 0, 5, 10], type=pa.int32()),
        pa.array([1.0, 2.0, 3.0, 4.0, 5.0], type=pa.float64()),
        pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
    ]
    table = pa.Table.from_arrays(data, names=tuple('abcd'))

    expected_data = [
        pa.array(range(5), type=pa.int32()),
        pa.array([-10, -5, 0, 5, 10], type=pa.int16()),
        pa.array([1, 2, 3, 4, 5], type=pa.int64()),
        pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
    ]
    expected_table = pa.Table.from_arrays(expected_data, names=tuple('abcd'))

    target_schema = pa.schema([
        pa.field('a', pa.int32()),
        pa.field('b', pa.int16()),
        pa.field('c', pa.int64()),
        pa.field('d', pa.string())
    ])
    casted_table = table.cast(target_schema)

    assert casted_table.equals(expected_table)

Esempio n. 2

0

Mostra file

File: test_convert_builtin.py Progetto: dremio/arrow

def test_sequence_nesting_levels():
    data = [1, 2, None]
    arr = pa.array(data)
    assert arr.type == pa.int64()
    assert arr.to_pylist() == data

    data = [[1], [2], None]
    arr = pa.array(data)
    assert arr.type == pa.list_(pa.int64())
    assert arr.to_pylist() == data

    data = [[1], [2, 3, 4], [None]]
    arr = pa.array(data)
    assert arr.type == pa.list_(pa.int64())
    assert arr.to_pylist() == data

    data = [None, [[None, 1]], [[2, 3, 4], None], [None]]
    arr = pa.array(data)
    assert arr.type == pa.list_(pa.list_(pa.int64()))
    assert arr.to_pylist() == data

    exceptions = (pa.ArrowInvalid, pa.ArrowTypeError)

    # Mixed nesting levels are rejected
    with pytest.raises(exceptions):
        pa.array([1, 2, [1]])

    with pytest.raises(exceptions):
        pa.array([1, 2, []])

    with pytest.raises(exceptions):
        pa.array([[1], [2], [None, [1]]])

Esempio n. 3

0

Mostra file

File: test_table.py Progetto: emkornfield/arrow

def test_table_unsafe_casting():
    data = [
        pa.array(range(5), type=pa.int64()),
        pa.array([-10, -5, 0, 5, 10], type=pa.int32()),
        pa.array([1.1, 2.2, 3.3, 4.4, 5.5], type=pa.float64()),
        pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
    ]
    table = pa.Table.from_arrays(data, names=tuple('abcd'))

    expected_data = [
        pa.array(range(5), type=pa.int32()),
        pa.array([-10, -5, 0, 5, 10], type=pa.int16()),
        pa.array([1, 2, 3, 4, 5], type=pa.int64()),
        pa.array(['ab', 'bc', 'cd', 'de', 'ef'], type=pa.string())
    ]
    expected_table = pa.Table.from_arrays(expected_data, names=tuple('abcd'))

    target_schema = pa.schema([
        pa.field('a', pa.int32()),
        pa.field('b', pa.int16()),
        pa.field('c', pa.int64()),
        pa.field('d', pa.string())
    ])

    with pytest.raises(pa.ArrowInvalid,
                       match='Floating point value truncated'):
        table.cast(target_schema)

    casted_table = table.cast(target_schema, safe=False)
    assert casted_table.equals(expected_table)

Esempio n. 4

0

Mostra file

File: test_types.py Progetto: dremio/arrow

def test_struct_type():
    fields = [pa.field('a', pa.int64()),
              pa.field('a', pa.int32()),
              pa.field('b', pa.int32())]
    ty = pa.struct(fields)

    assert len(ty) == ty.num_children == 3
    assert list(ty) == fields

    for a, b in zip(ty, fields):
        a == b

    # Construct from list of tuples
    ty = pa.struct([('a', pa.int64()),
                    ('a', pa.int32()),
                    ('b', pa.int32())])
    assert list(ty) == fields
    for a, b in zip(ty, fields):
        a == b

    # Construct from mapping
    fields = [pa.field('a', pa.int64()),
              pa.field('b', pa.int32())]
    ty = pa.struct(OrderedDict([('a', pa.int64()),
                                ('b', pa.int32())]))
    assert list(ty) == fields
    for a, b in zip(ty, fields):
        a == b

Esempio n. 5

0

Mostra file

File: pandas_examples.py Progetto: NonVolatileComputing/arrow

def dataframe_with_arrays(include_index=False):
    """
    Dataframe with numpy arrays columns of every possible primtive type.

    Returns
    -------
    df: pandas.DataFrame
    schema: pyarrow.Schema
        Arrow schema definition that is in line with the constructed df.
    """
    dtypes = [('i1', pa.int8()), ('i2', pa.int16()),
              ('i4', pa.int32()), ('i8', pa.int64()),
              ('u1', pa.uint8()), ('u2', pa.uint16()),
              ('u4', pa.uint32()), ('u8', pa.uint64()),
              ('f4', pa.float32()), ('f8', pa.float64())]

    arrays = OrderedDict()
    fields = []
    for dtype, arrow_dtype in dtypes:
        fields.append(pa.field(dtype, pa.list_(arrow_dtype)))
        arrays[dtype] = [
            np.arange(10, dtype=dtype),
            np.arange(5, dtype=dtype),
            None,
            np.arange(1, dtype=dtype)
        ]

    fields.append(pa.field('str', pa.list_(pa.string())))
    arrays['str'] = [
        np.array([u"1", u"ä"], dtype="object"),
        None,
        np.array([u"1"], dtype="object"),
        np.array([u"1", u"2", u"3"], dtype="object")
    ]

    fields.append(pa.field('datetime64', pa.list_(pa.timestamp('ms'))))
    arrays['datetime64'] = [
        np.array(['2007-07-13T01:23:34.123456789',
                  None,
                  '2010-08-13T05:46:57.437699912'],
                 dtype='datetime64[ms]'),
        None,
        None,
        np.array(['2007-07-13T02',
                  None,
                  '2010-08-13T05:46:57.437699912'],
                 dtype='datetime64[ms]'),
    ]

    if include_index:
        fields.append(pa.field('__index_level_0__', pa.int64()))
    df = pd.DataFrame(arrays)
    schema = pa.schema(fields)

    return df, schema

Esempio n. 6

0

Mostra file

File: test_types.py Progetto: rok/arrow

def test_struct_type():
    fields = [
        # Duplicate field name on purpose
        pa.field('a', pa.int64()),
        pa.field('a', pa.int32()),
        pa.field('b', pa.int32())
    ]
    ty = pa.struct(fields)

    assert len(ty) == ty.num_children == 3
    assert list(ty) == fields
    assert ty[0].name == 'a'
    assert ty[2].type == pa.int32()
    with pytest.raises(IndexError):
        assert ty[3]

    assert ty['b'] == ty[2]

    # Duplicate
    with pytest.warns(UserWarning):
        with pytest.raises(KeyError):
            ty['a']

    # Not found
    with pytest.raises(KeyError):
        ty['c']

    # Neither integer nor string
    with pytest.raises(TypeError):
        ty[None]

    for a, b in zip(ty, fields):
        a == b

    # Construct from list of tuples
    ty = pa.struct([('a', pa.int64()),
                    ('a', pa.int32()),
                    ('b', pa.int32())])
    assert list(ty) == fields
    for a, b in zip(ty, fields):
        a == b

    # Construct from mapping
    fields = [pa.field('a', pa.int64()),
              pa.field('b', pa.int32())]
    ty = pa.struct(OrderedDict([('a', pa.int64()),
                                ('b', pa.int32())]))
    assert list(ty) == fields
    for a, b in zip(ty, fields):
        a == b

    # Invalid args
    with pytest.raises(TypeError):
        pa.struct([('a', None)])

Esempio n. 7

0

Mostra file

File: test_types.py Progetto: rok/arrow

def test_fields_hashable():
    in_dict = {}
    fields = [pa.field('a', pa.int32()),
              pa.field('a', pa.int64()),
              pa.field('a', pa.int64(), nullable=False),
              pa.field('b', pa.int32()),
              pa.field('b', pa.int32(), nullable=False)]
    for i, field in enumerate(fields):
        in_dict[field] = i
    assert len(in_dict) == len(fields)
    for i, field in enumerate(fields):
        assert in_dict[field] == i

Esempio n. 8

0

Mostra file

File: pandas_examples.py Progetto: CodingCat/arrow

def dataframe_with_lists(include_index=False):
    """
    Dataframe with list columns of every possible primtive type.

    Returns
    -------
    df: pandas.DataFrame
    schema: pyarrow.Schema
        Arrow schema definition that is in line with the constructed df.
    """
    arrays = OrderedDict()
    fields = []

    fields.append(pa.field('int64', pa.list_(pa.int64())))
    arrays['int64'] = [
        [0, 1, 2, 3, 4, 5, 6, 7, 8, 9],
        [0, 1, 2, 3, 4],
        None,
        [],
        np.array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9] * 2,
                 dtype=np.int64)[::2]
    ]
    fields.append(pa.field('double', pa.list_(pa.float64())))
    arrays['double'] = [
        [0., 1., 2., 3., 4., 5., 6., 7., 8., 9.],
        [0., 1., 2., 3., 4.],
        None,
        [],
        np.array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.] * 2)[::2],
    ]
    fields.append(pa.field('bytes_list', pa.list_(pa.binary())))
    arrays['bytes_list'] = [
        [b"1", b"f"],
        None,
        [b"1"],
        [b"1", b"2", b"3"],
        [],
    ]
    fields.append(pa.field('str_list', pa.list_(pa.string())))
    arrays['str_list'] = [
        [u"1", u"ä"],
        None,
        [u"1"],
        [u"1", u"2", u"3"],
        [],
    ]

    if include_index:
        fields.append(pa.field('__index_level_0__', pa.int64()))
    df = pd.DataFrame(arrays)
    schema = pa.schema(fields)

    return df, schema

Esempio n. 9

0

Mostra file

File: test_csv.py Progetto: wesm/arrow

 def test_simple_ints(self):
     # Infer integer columns
     rows = b"a,b,c\n1,2,3\n4,5,6\n"
     table = self.read_bytes(rows)
     schema = pa.schema([('a', pa.int64()),
                         ('b', pa.int64()),
                         ('c', pa.int64())])
     assert table.schema == schema
     assert table.to_pydict() == {
         'a': [1, 4],
         'b': [2, 5],
         'c': [3, 6],
         }

Esempio n. 10

0

Mostra file

File: test_table.py Progetto: emkornfield/arrow

def test_table_from_arrays_preserves_column_metadata():
    # Added to test https://issues.apache.org/jira/browse/ARROW-3866
    arr0 = pa.array([1, 2])
    arr1 = pa.array([3, 4])
    field0 = pa.field('field1', pa.int64(), metadata=dict(a="A", b="B"))
    field1 = pa.field('field2', pa.int64(), nullable=False)
    columns = [
        pa.column(field0, arr0),
        pa.column(field1, arr1)
    ]
    table = pa.Table.from_arrays(columns)
    assert b"a" in table.column(0).field.metadata
    assert table.column(1).field.nullable is False

Esempio n. 11

0

Mostra file

File: test_json.py Progetto: rok/arrow

 def test_simple_ints(self):
     # Infer integer columns
     rows = b'{"a": 1,"b": 2, "c": 3}\n{"a": 4,"b": 5, "c": 6}\n'
     table = self.read_bytes(rows)
     schema = pa.schema([('a', pa.int64()),
                         ('b', pa.int64()),
                         ('c', pa.int64())])
     assert table.schema == schema
     assert table.to_pydict() == {
         'a': [1, 4],
         'b': [2, 5],
         'c': [3, 6],
         }

Esempio n. 12

0

Mostra file

File: test_array.py Progetto: emkornfield/arrow

def test_list_array_flatten():
    typ2 = pa.list_(
        pa.list_(
            pa.int64()
        )
    )
    arr2 = pa.array([
        None,
        [
            [1, None, 2],
            None,
            [3, 4]
        ],
        [],
        [
            [],
            [5, 6],
            None
        ],
        [
            [7, 8]
        ]
    ])
    assert arr2.type.equals(typ2)

    typ1 = pa.list_(pa.int64())
    arr1 = pa.array([
        [1, None, 2],
        None,
        [3, 4],
        [],
        [5, 6],
        None,
        [7, 8]
    ])
    assert arr1.type.equals(typ1)

    typ0 = pa.int64()
    arr0 = pa.array([
        1, None, 2,
        3, 4,
        5, 6,
        7, 8
    ])
    assert arr0.type.equals(typ0)

    assert arr2.flatten().equals(arr1)
    assert arr1.flatten().equals(arr0)
    assert arr2.flatten().flatten().equals(arr0)

Esempio n. 13

0

Mostra file

File: test_orc.py Progetto: dremio/arrow

def test_orcfile_empty():
    from pyarrow import orc
    f = orc.ORCFile(path_for_orc_example('TestOrcFile.emptyFile'))
    table = f.read()
    assert table.num_rows == 0
    schema = table.schema
    expected_schema = pa.schema([
        ('boolean1', pa.bool_()),
        ('byte1', pa.int8()),
        ('short1', pa.int16()),
        ('int1', pa.int32()),
        ('long1', pa.int64()),
        ('float1', pa.float32()),
        ('double1', pa.float64()),
        ('bytes1', pa.binary()),
        ('string1', pa.string()),
        ('middle', pa.struct([
            ('list', pa.list_(pa.struct([
                ('int1', pa.int32()),
                ('string1', pa.string()),
                ]))),
            ])),
        ('list', pa.list_(pa.struct([
            ('int1', pa.int32()),
            ('string1', pa.string()),
            ]))),
        ('map', pa.list_(pa.struct([
            ('key', pa.string()),
            ('value', pa.struct([
                ('int1', pa.int32()),
                ('string1', pa.string()),
                ])),
            ]))),
        ])
    assert schema == expected_schema

Esempio n. 14

0

Mostra file

File: test_schema.py Progetto: giantwhale/arrow

def test_type_to_pandas_dtype():
    M8_ns = np.dtype('datetime64[ns]')
    cases = [
        (pa.null(), np.float64),
        (pa.bool_(), np.bool_),
        (pa.int8(), np.int8),
        (pa.int16(), np.int16),
        (pa.int32(), np.int32),
        (pa.int64(), np.int64),
        (pa.uint8(), np.uint8),
        (pa.uint16(), np.uint16),
        (pa.uint32(), np.uint32),
        (pa.uint64(), np.uint64),
        (pa.float16(), np.float16),
        (pa.float32(), np.float32),
        (pa.float64(), np.float64),
        (pa.date32(), M8_ns),
        (pa.date64(), M8_ns),
        (pa.timestamp('ms'), M8_ns),
        (pa.binary(), np.object_),
        (pa.binary(12), np.object_),
        (pa.string(), np.object_),
        (pa.list_(pa.int8()), np.object_),
    ]
    for arrow_type, numpy_type in cases:
        assert arrow_type.to_pandas_dtype() == numpy_type

Esempio n. 15

0

Mostra file

File: test_convert_builtin.py Progetto: dremio/arrow

def test_iterator_without_size():
    expected = pa.array((0, 1, 2))
    arr1 = pa.array(iter(range(3)))
    assert arr1.equals(expected)
    # Same with explicit type
    arr1 = pa.array(iter(range(3)), type=pa.int64())
    assert arr1.equals(expected)

Esempio n. 16

0

Mostra file

File: test_convert_builtin.py Progetto: apache/arrow

 def test_list_of_int(self):
     data = [[1, 2, 3], [], None, [1, 2]]
     arr = pyarrow.from_pylist(data)
     assert len(arr) == 4
     assert arr.null_count == 1
     assert arr.type == pyarrow.list_(pyarrow.int64())
     assert arr.to_pylist() == data

Esempio n. 17

0

Mostra file

File: test_convert_builtin.py Progetto: dremio/arrow

def test_sequence_integer_inferred(seq):
    expected = [1, None, 3, None]
    arr = pa.array(seq(expected))
    assert len(arr) == 4
    assert arr.null_count == 2
    assert arr.type == pa.int64()
    assert arr.to_pylist() == expected

Esempio n. 18

0

Mostra file

File: test_convert_builtin.py Progetto: apache/arrow

 def test_integer(self):
     expected = [1, None, 3, None]
     arr = pyarrow.from_pylist(expected)
     assert len(arr) == 4
     assert arr.null_count == 2
     assert arr.type == pyarrow.int64()
     assert arr.to_pylist() == expected

Esempio n. 19

0

Mostra file

File: test_array.py Progetto: emkornfield/arrow

def test_empty_cast():
    types = [
        pa.null(),
        pa.bool_(),
        pa.int8(),
        pa.int16(),
        pa.int32(),
        pa.int64(),
        pa.uint8(),
        pa.uint16(),
        pa.uint32(),
        pa.uint64(),
        pa.float16(),
        pa.float32(),
        pa.float64(),
        pa.date32(),
        pa.date64(),
        pa.binary(),
        pa.binary(length=4),
        pa.string(),
    ]

    for (t1, t2) in itertools.product(types, types):
        try:
            # ARROW-4766: Ensure that supported types conversion don't segfault
            # on empty arrays of common types
            pa.array([], type=t1).cast(t2)
        except pa.lib.ArrowNotImplementedError:
            continue

Esempio n. 20

0

Mostra file

File: test_convert_builtin.py Progetto: dremio/arrow

def test_nested_arrays(seq):
    arr = pa.array(seq([np.array([], dtype=np.int64),
                        np.array([1, 2], dtype=np.int64), None]))
    assert len(arr) == 3
    assert arr.null_count == 1
    assert arr.type == pa.list_(pa.int64())
    assert arr.to_pylist() == [[], [1, 2], None]

Esempio n. 21

0

Mostra file

File: test_array.py Progetto: emkornfield/arrow

def test_asarray():
    arr = pa.array(range(4))

    # The iterator interface gives back an array of Int64Value's
    np_arr = np.asarray([_ for _ in arr])
    assert np_arr.tolist() == [0, 1, 2, 3]
    assert np_arr.dtype == np.dtype('O')
    assert type(np_arr[0]) == pa.lib.Int64Value

    # Calling with the arrow array gives back an array with 'int64' dtype
    np_arr = np.asarray(arr)
    assert np_arr.tolist() == [0, 1, 2, 3]
    assert np_arr.dtype == np.dtype('int64')

    # An optional type can be specified when calling np.asarray
    np_arr = np.asarray(arr, dtype='str')
    assert np_arr.tolist() == ['0', '1', '2', '3']

    # If PyArrow array has null values, numpy type will be changed as needed
    # to support nulls.
    arr = pa.array([0, 1, 2, None])
    assert arr.type == pa.int64()
    np_arr = np.asarray(arr)
    elements = np_arr.tolist()
    assert elements[:3] == [0., 1., 2.]
    assert np.isnan(elements[3])
    assert np_arr.dtype == np.dtype('float64')

Esempio n. 22

0

Mostra file

File: test_array.py Progetto: CodingCat/arrow

def test_buffers_nested():
    a = pa.array([[1, 2], None, [3, None, 4, 5]], type=pa.list_(pa.int64()))
    buffers = a.buffers()
    assert len(buffers) == 4
    # The parent buffers
    null_bitmap = buffers[0].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00000101
    offsets = buffers[1].to_pybytes()
    assert struct.unpack('4i', offsets) == (0, 2, 2, 6)
    # The child buffers
    null_bitmap = buffers[2].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00110111
    values = buffers[3].to_pybytes()
    assert struct.unpack('qqq8xqq', values) == (1, 2, 3, 4, 5)

    a = pa.array([(42, None), None, (None, 43)],
                 type=pa.struct([pa.field('a', pa.int8()),
                                 pa.field('b', pa.int16())]))
    buffers = a.buffers()
    assert len(buffers) == 5
    # The parent buffer
    null_bitmap = buffers[0].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00000101
    # The child buffers: 'a'
    null_bitmap = buffers[1].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00000001
    values = buffers[2].to_pybytes()
    assert struct.unpack('bxx', values) == (42,)
    # The child buffers: 'b'
    null_bitmap = buffers[3].to_pybytes()
    assert bytearray(null_bitmap)[0] == 0b00000100
    values = buffers[4].to_pybytes()
    assert struct.unpack('4xh', values) == (43,)

Esempio n. 23

0

Mostra file

File: jvm.py Progetto: rok/arrow

def _from_jvm_int_type(jvm_type):
    """
    Convert a JVM int type to its Python equivalent.

    Parameters
    ----------
    jvm_type: org.apache.arrow.vector.types.pojo.ArrowType$Int

    Returns
    -------
    typ: pyarrow.DataType
    """
    if jvm_type.isSigned:
        if jvm_type.bitWidth == 8:
            return pa.int8()
        elif jvm_type.bitWidth == 16:
            return pa.int16()
        elif jvm_type.bitWidth == 32:
            return pa.int32()
        elif jvm_type.bitWidth == 64:
            return pa.int64()
    else:
        if jvm_type.bitWidth == 8:
            return pa.uint8()
        elif jvm_type.bitWidth == 16:
            return pa.uint16()
        elif jvm_type.bitWidth == 32:
            return pa.uint32()
        elif jvm_type.bitWidth == 64:
            return pa.uint64()

Esempio n. 24

0

Mostra file

File: test_csv.py Progetto: laurentgo/arrow

    def test_custom_nulls(self):
        # Infer nulls with custom values
        opts = ConvertOptions(null_values=['Xxx', 'Zzz'])
        rows = b"a,b,c,d\nZzz,Xxx,1,2\nXxx,#N/A,,Zzz\n"
        table = self.read_bytes(rows, convert_options=opts)
        schema = pa.schema([('a', pa.null()),
                            ('b', pa.string()),
                            ('c', pa.string()),
                            ('d', pa.int64())])
        assert table.schema == schema
        assert table.to_pydict() == {
            'a': [None, None],
            'b': [u"Xxx", u"#N/A"],
            'c': [u"1", u""],
            'd': [2, None],
            }

        opts = ConvertOptions(null_values=[])
        rows = b"a,b\n#N/A,\n"
        table = self.read_bytes(rows, convert_options=opts)
        schema = pa.schema([('a', pa.string()),
                            ('b', pa.string())])
        assert table.schema == schema
        assert table.to_pydict() == {
            'a': [u"#N/A"],
            'b': [u""],
            }

Esempio n. 25

0

Mostra file

File: test_convert_builtin.py Progetto: dremio/arrow

def test_infinite_iterator():
    expected = pa.array((0, 1, 2))
    arr1 = pa.array(itertools.count(0), size=3)
    assert arr1.equals(expected)
    # Same with explicit type
    arr1 = pa.array(itertools.count(0), type=pa.int64(), size=3)
    assert arr1.equals(expected)

Esempio n. 26

0

Mostra file

File: test_convert_builtin.py Progetto: dremio/arrow

def test_struct_from_dicts_inference():
    expected_type = pa.struct([pa.field('a', pa.int64()),
                               pa.field('b', pa.string()),
                               pa.field('c', pa.bool_())])
    data = [{'a': 5, 'b': u'foo', 'c': True},
            {'a': 6, 'b': u'bar', 'c': False}]
    arr = pa.array(data)
    check_struct_type(arr.type, expected_type)
    assert arr.to_pylist() == data

    # With omitted values
    data = [{'a': 5, 'c': True},
            None,
            {},
            {'a': None, 'b': u'bar'}]
    expected = [{'a': 5, 'b': None, 'c': True},
                None,
                {'a': None, 'b': None, 'c': None},
                {'a': None, 'b': u'bar', 'c': None}]
    arr = pa.array(data)
    data_as_ndarray = np.empty(len(data), dtype=object)
    data_as_ndarray[:] = data
    arr2 = pa.array(data)

    check_struct_type(arr.type, expected_type)
    assert arr.to_pylist() == expected
    assert arr.equals(arr2)

    # Nested
    expected_type = pa.struct([
        pa.field('a', pa.struct([pa.field('aa', pa.list_(pa.int64())),
                                 pa.field('ab', pa.bool_())])),
        pa.field('b', pa.string())])
    data = [{'a': {'aa': [5, 6], 'ab': True}, 'b': 'foo'},
            {'a': {'aa': None, 'ab': False}, 'b': None},
            {'a': None, 'b': 'bar'}]
    arr = pa.array(data)
    assert arr.to_pylist() == data

    # Edge cases
    arr = pa.array([{}])
    assert arr.type == pa.struct([])
    assert arr.to_pylist() == [{}]

    # Mixing structs and scalars is rejected
    with pytest.raises((pa.ArrowInvalid, pa.ArrowTypeError)):
        pa.array([1, {'a': 2}])

Esempio n. 27

0

Mostra file

File: test_convert_pandas.py Progetto: NonVolatileComputing/arrow

 def test_int_object_nulls(self):
     arr = np.array([None, 1, np.int64(3)] * 5, dtype=object)
     df = pd.DataFrame({'ints': arr})
     expected = pd.DataFrame({'ints': pd.to_numeric(arr)})
     field = pa.field('ints', pa.int64())
     schema = pa.schema([field])
     self._check_pandas_roundtrip(df, expected=expected,
                                  expected_schema=schema)

Esempio n. 28

0

Mostra file

File: test_schema.py Progetto: rok/arrow

def test_empty_table():
    schema = pa.schema([
        pa.field('oneField', pa.int64())
    ])
    table = schema.empty_table()
    assert isinstance(table, pa.Table)
    assert table.num_rows == 0
    assert table.schema == schema

Esempio n. 29

0

Mostra file

File: test_convert_pandas.py Progetto: NonVolatileComputing/arrow

    def test_infer_lists(self):
        data = OrderedDict([
            ('nan_ints', [[None, 1], [2, 3]]),
            ('ints', [[0, 1], [2, 3]]),
            ('strs', [[None, u'b'], [u'c', u'd']]),
            ('nested_strs', [[[None, u'b'], [u'c', u'd']], None])
        ])
        df = pd.DataFrame(data)

        expected_schema = pa.schema([
            pa.field('nan_ints', pa.list_(pa.int64())),
            pa.field('ints', pa.list_(pa.int64())),
            pa.field('strs', pa.list_(pa.string())),
            pa.field('nested_strs', pa.list_(pa.list_(pa.string())))
        ])

        self._check_pandas_roundtrip(df, expected_schema=expected_schema)

Esempio n. 30

0

Mostra file

File: test_table.py Progetto: emkornfield/arrow

def test_chunked_array_equals():
    def eq(xarrs, yarrs):
        if isinstance(xarrs, pa.ChunkedArray):
            x = xarrs
        else:
            x = pa.chunked_array(xarrs)
        if isinstance(yarrs, pa.ChunkedArray):
            y = yarrs
        else:
            y = pa.chunked_array(yarrs)
        assert x.equals(y)
        assert y.equals(x)
        assert x == y
        assert x != str(y)

    def ne(xarrs, yarrs):
        if isinstance(xarrs, pa.ChunkedArray):
            x = xarrs
        else:
            x = pa.chunked_array(xarrs)
        if isinstance(yarrs, pa.ChunkedArray):
            y = yarrs
        else:
            y = pa.chunked_array(yarrs)
        assert not x.equals(y)
        assert not y.equals(x)
        assert x != y

    eq(pa.chunked_array([], type=pa.int32()),
       pa.chunked_array([], type=pa.int32()))
    ne(pa.chunked_array([], type=pa.int32()),
       pa.chunked_array([], type=pa.int64()))

    a = pa.array([0, 2], type=pa.int32())
    b = pa.array([0, 2], type=pa.int64())
    c = pa.array([0, 3], type=pa.int32())
    d = pa.array([0, 2, 0, 3], type=pa.int32())

    eq([a], [a])
    ne([a], [b])
    eq([a, c], [a, c])
    eq([a, c], [d])
    ne([c, a], [a, c])

    assert not pa.chunked_array([], type=pa.int32()).equals(None)

Esempio n. 31

0

Mostra file

File: test_algorithms.py Progetto: tinyclues/fletcher

    assert all_op(arrow, skipna) == pandas.all(skipna=skipna)

    # Split in the middle and check whether this still works
    if len(data) > 2:
        arrow = pa.chunked_array(
            [data[: len(data) // 2], data[len(data) // 2 :]], type=pa.bool_()
        )
        assert all_op(arrow, skipna) == pandas.all(skipna=skipna)


@pytest.mark.parametrize(
    ("array", "fill_null_value", "expected"),
    [
        (pa.array([2, 1], type=pa.int16()), -1, np.array([2, 1], dtype=np.int16)),
        (pa.array([2, None], type=pa.int32()), -1, np.array([2, -1], dtype=np.int32)),
        (pa.array([2, None], type=pa.int64()), -1.5, np.array([2, -1], dtype=np.int64)),
        (pa.array([1, None], type=pa.uint8()), 257, np.array([1, 1], dtype=np.uint8)),
        (pa.array([None, None], type=pa.int8()), 5, np.array([5, 5], dtype=np.int8)),
        (pa.array([], type=pa.int8()), 5, np.array([], dtype=np.int8)),
    ],
)
def test_integer_array_to_numpy(array, fill_null_value, expected):
    actual = integer_array_to_numpy(array, fill_null_value)
    assert actual.dtype == expected.dtype
    np.testing.assert_array_equal(actual, expected)


@pytest.mark.parametrize(
    ("array", "indices"),
    [
        (

Esempio n. 32

0

Mostra file

File: test_algorithms.py Progetto: tinyclues/fletcher

def test_in_chunk_offsets(data: List[List[int]]):
    arr = pa.chunked_array(data, type=pa.int64())
    # Simple case: Passing in the actual chunk offsets should yield a valid selection
    offsets = list(_calculate_chunk_offsets(arr))
    in_offsets = _in_chunk_offsets(arr, offsets)
    check_valid_in_offsets(arr, in_offsets)

Esempio n. 33

0

Mostra file

File: test_unit_arrow_chunk_iterator.py Progetto: kkothand/snowflake-connector-python

def test_iterate_over_decimal_chunk(): 
    random.seed(datetime.datetime.now())
    precision = random.randint(1, 38)
    scale = random.randint(0, precision)
    datatype = None
    if precision <= 2:
        datatype = pyarrow.int8()
    elif precision <= 4:
        datatype = pyarrow.int16()
    elif precision <= 9:
        datatype = pyarrow.int32()
    elif precision <= 19:
        datatype = pyarrow.int64()
    else:
        datatype = pyarrow.decimal128(precision, scale)

    def decimal_generator(_precision, _scale):
        def decimal128_generator(precision, scale):
            data = []
            for i in range(precision):
                data.append(str(random.randint(0, 9)))

            if scale:
                data.insert(-scale, '.')
            return decimal.Decimal("".join(data))

        def int64_generator(precision):
            data = random.randint(-9223372036854775808, 9223372036854775807)
            return int(str(data)[:precision if data >= 0 else precision + 1])
            
        def int32_generator(precision):
            data = random.randint(-2147483648, 2147483637)
            return int(str(data)[:precision if data >= 0 else precision + 1])

        def int16_generator(precision):
            data = random.randint(-32768, 32767)
            return int(str(data)[:precision if data >= 0 else precision + 1])

        def int8_generator(precision):
            data = random.randint(-128, 127)
            return int(str(data)[:precision if data >= 0 else precision + 1])

        if _precision <= 2:
            return int8_generator(_precision)
        elif _precision <= 4:
            return int16_generator(_precision)
        elif _precision <= 9:
            return int32_generator(_precision)
        elif _precision <= 19:
            return int64_generator(_precision)
        else:
            return decimal128_generator(_precision, _scale)

    def expected_data_transform_decimal(_precision, _scale):
        def expected_data_transform_decimal_impl(data, precision=_precision, scale=_scale):
            if precision <= 19:
                return decimal.Decimal(data).scaleb(-scale)
            else:
                return data

        return expected_data_transform_decimal_impl 

    column_meta = { "logicalType" : "FIXED", "precision" : str(precision), "scale" : str(scale) }
    iterate_over_test_chunk([datatype, datatype], [column_meta, column_meta],
        lambda: decimal_generator(precision, scale), expected_data_transform_decimal(precision, scale))

Esempio n. 34

0

Mostra file

File: test_convert_builtin.py Progetto: Shelna-GCP/SentinelToEVI-dataflow

def test_limited_iterator_types():
    arr1 = pa.array(iter(range(3)), type=pa.int64(), size=3)
    arr2 = pa.array((0, 1, 2))
    assert arr1.equals(arr2)

Esempio n. 35

0

Mostra file

 def __init__(self):
     pa.PyExtensionType.__init__(self, pa.int64())

Esempio n. 36

0

Mostra file

File: test_parquet_diff.py Progetto: CJWorkbench/parquet-to-arrow

def test_column_type_int64_same():
    table = pyarrow.table(
        {"A": pyarrow.array([1, 2, -1, None, 3, None, 1], pyarrow.int64())})
    assert_arrow_table_identity(table)

Esempio n. 37

0

Mostra file

File: test_table.py Progetto: abbotware/arrow-fork

    b = pa.array([0, 2], type=pa.int64())
    c = pa.array([0, 3], type=pa.int32())
    d = pa.array([0, 2, 0, 3], type=pa.int32())

    eq([a], [a])
    ne([a], [b])
    eq([a, c], [a, c])
    eq([a, c], [d])
    ne([c, a], [a, c])

    assert not pa.chunked_array([], type=pa.int32()).equals(None)


@pytest.mark.parametrize(
    ('data', 'typ'),
    [([True, False, True, True], pa.bool_()), ([1, 2, 4, 6], pa.int64()),
     ([1.0, 2.5, None], pa.float64()), (['a', None, 'b'], pa.string()),
     ([], pa.list_(pa.uint8())), ([[1, 2], [3]], pa.list_(pa.int64())),
     ([['a'], None, ['b', 'c']], pa.list_(pa.string())),
     ([(1, 'a'), (2, 'c'), None
       ], pa.struct([pa.field('a', pa.int64()),
                     pa.field('b', pa.string())]))])
def test_chunked_array_pickle(data, typ):
    arrays = []
    while data:
        arrays.append(pa.array(data[:2], type=typ))
        data = data[2:]
    array = pa.chunked_array(arrays, type=typ)
    array.validate()
    result = pickle.loads(pickle.dumps(array))
    result.validate()

Esempio n. 38

0

Mostra file

File: test_extension_type.py Progetto: rustbeltanalytica/Web-Assets

 def __init__(self, freq):
     # attributes need to be set first before calling
     # super init (as that calls serialize)
     self._freq = freq
     pa.ExtensionType.__init__(self, pa.int64(), 'pandas.period')

Esempio n. 39

0

Mostra file

File: test_extension_type.py Progetto: rustbeltanalytica/Web-Assets

def test_generic_ext_type():
    period_type = PeriodType('D')
    assert period_type.extension_name == "pandas.period"
    assert period_type.storage_type == pa.int64()

Esempio n. 40

0

Mostra file

File: base.py Progetto: tinyclues/fletcher

# fmt:on

PANDAS_GE_0_26_0 = LooseVersion(pd.__version__) >= "0.26.0"
if PANDAS_GE_0_26_0:
    from pandas.core.indexers import check_array_indexer

_python_type_map = {
    pa.null().id: six.text_type,
    pa.bool_().id: bool,
    pa.int8().id: int,
    pa.uint8().id: int,
    pa.int16().id: int,
    pa.uint16().id: int,
    pa.int32().id: int,
    pa.uint32().id: int,
    pa.int64().id: int,
    pa.uint64().id: int,
    pa.float16().id: float,
    pa.float32().id: float,
    pa.float64().id: float,
    pa.date32().id: datetime.date,
    pa.date64().id: datetime.date,
    pa.timestamp("ms").id: datetime.datetime,
    pa.binary().id: six.binary_type,
    pa.string().id: six.text_type,
    # Use any list type here, only LIST is important
    pa.list_(pa.string()).id: list,
    pa.large_list(pa.string()).id: list,
}

_string_type_map = {"date64[ms]": pa.date64(), "string": pa.string()}

Esempio n. 41

0

Mostra file

        def __arrow_ext_serialize__(self):
            metadata = {"subtype": str(self.subtype), "closed": self.closed}
            return json.dumps(metadata).encode()

        @classmethod
        def __arrow_ext_deserialize__(cls, storage_type, serialized):
            metadata = json.loads(serialized.decode())
            subtype = pyarrow.type_for_alias(metadata["subtype"])
            closed = metadata["closed"]
            return ArrowIntervalType(subtype, closed)

        def __eq__(self, other):
            if isinstance(other, pyarrow.BaseExtensionType):
                return (type(self) == type(other)
                        and self.subtype == other.subtype
                        and self.closed == other.closed)
            else:
                return NotImplemented

        def __hash__(self):
            return hash((str(self), str(self.subtype), self.closed))

        def to_pandas_dtype(self):
            import pandas as pd

            return pd.IntervalDtype(self.subtype.to_pandas_dtype())

    # register the type with a dummy instance
    _interval_type = ArrowIntervalType(pyarrow.int64(), "left")
    pyarrow.register_extension_type(_interval_type)

Esempio n. 42

0

Mostra file

File: test_convert_builtin.py Progetto: Shelna-GCP/SentinelToEVI-dataflow

from pyarrow.compat import unittest  # noqa
from pyarrow.pandas_compat import _pandas_api  # noqa
import pyarrow as pa

import collections
import datetime
import decimal
import itertools
import math
import traceback

import numpy as np
import pytz

int_type_pairs = [(np.int8, pa.int8()), (np.int16, pa.int16()),
                  (np.int32, pa.int32()), (np.int64, pa.int64()),
                  (np.uint8, pa.uint8()), (np.uint16, pa.uint16()),
                  (np.uint32, pa.uint32()), (np.uint64, pa.uint64())]

np_int_types, _ = zip(*int_type_pairs)


class StrangeIterable:
    def __init__(self, lst):
        self.lst = lst

    def __iter__(self):
        return self.lst.__iter__()


class MyInt:

Esempio n. 43

0

Mostra file

File: test_parquet_util.py Progetto: sungchun12/professional-services

    def test_get_pyarrow_translated_schema(self):

        string_input_schema = [{
            "type": "STRING",
            "name": "string1",
            "mode": "REQUIRED"
        }, {
            "type": "NUMERIC",
            "name": "numeric1",
            "mode": "NULLABLE"
        }, {
            "type": "INTEGER",
            "name": "integer1",
            "mode": "REQUIRED"
        }, {
            "type": "FLOAT",
            "name": "float1",
            "mode": "NULLABLE"
        }, {
            "type": "BOOLEAN",
            "name": "boolean1",
            "mode": "REQUIRED"
        }, {
            "type": "TIMESTAMP",
            "name": "timestamp1",
            "mode": "REQUIRED"
        }, {
            "type": "DATE",
            "name": "date1",
            "mode": "REQUIRED"
        }, {
            "type": "TIME",
            "name": "time1",
            "mode": "REQUIRED"
        }, {
            "type": "DATETIME",
            "name": "datetime1",
            "mode": "REQUIRED"
        }, {
            "type":
            "RECORD",
            "name":
            "record1",
            "mode":
            "REPEATED",
            "fields": [{
                "type": "BOOLEAN",
                "name": "boolean1",
                "mode": "REQUIRED"
            }, {
                "type": "TIMESTAMP",
                "name": "timestamp1",
                "mode": "REQUIRED"
            }]
        }]
        expected_pa_schema = pa.schema([
            pa.field(name='string1', type=pa.string()
                     #nullable=False
                     ),
            pa.field(name='numeric1', type=pa.int64()
                     #nullable=True
                     ),
            pa.field(
                name='integer1',
                type=pa.int64(),
                #nullable=False
            ),
            pa.field(name='float1', type=pa.float64()
                     #nullable=True
                     ),
            pa.field(name='boolean1', type=pa.bool_()
                     #nullable=False
                     ),
            pa.field(name='timestamp1',
                     type=pa.timestamp('us')
                     #nullable=False
                     ),
            pa.field(
                name='date1',
                type=pa.date32(),
                #nullable=False
            ),
            pa.field(name='time1', type=pa.time64('us')
                     #nullable=False
                     ),
            pa.field(name='datetime1',
                     type=pa.timestamp('us')
                     #nullable=False
                     ),
            pa.field(
                name='record1',
                type=pa.list_(
                    pa.struct([
                        pa.field(name='boolean1',
                                 type=pa.bool_()
                                 #nullable=False
                                 ),
                        pa.field(name='timestamp1',
                                 type=pa.timestamp('us')
                                 #nullable=False
                                 )
                    ])))
        ])

        pyarrow_schema = get_pyarrow_translated_schema(string_input_schema)
        self.assertEqual(pyarrow_schema, expected_pa_schema)

Esempio n. 44

0

Mostra file

File: task.py Progetto: atlarge-research/wta-tools

    def get_pyarrow_schema():
        fields = [
            pa.field('id', pa.int64()),
            pa.field('ts_submit', pa.int64()),
            pa.field('submission_site', pa.int32()),
            pa.field('runtime', pa.int64()),
            pa.field('resource_type', pa.string()),
            pa.field('resource_amount_requested', pa.float64()),
            pa.field('parents', pa.list_(pa.int64())),
            pa.field('children', pa.list_(pa.int64())),
            pa.field('user_id', pa.int32()),
            pa.field('group_id', pa.int32()),
            pa.field('nfrs', pa.string()),
            pa.field('workflow_id', pa.int64()),
            pa.field('wait_time', pa.int64()),
            pa.field('params', pa.string()),
            pa.field('memory_requested', pa.float64()),
            pa.field('network_io_time', pa.int64()),
            pa.field('disk_io_time', pa.int64()),
            pa.field('disk_space_requested', pa.float64()),
            pa.field('energy_consumption', pa.int64()),
            pa.field('resource_used', pa.int64()),
        ]

        return pa.schema(fields)

Esempio n. 45

0

Mostra file

File: test_types.py Progetto: pudidic/arrow

    for i, field in enumerate(fields):
        in_dict[field] = i
    assert len(in_dict) == len(fields)
    for i, field in enumerate(fields):
        assert in_dict[field] == i


@pytest.mark.parametrize('t,check_func', [
    (pa.date32(), types.is_date32),
    (pa.date64(), types.is_date64),
    (pa.time32('s'), types.is_time32),
    (pa.time64('ns'), types.is_time64),
    (pa.int8(), types.is_int8),
    (pa.int16(), types.is_int16),
    (pa.int32(), types.is_int32),
    (pa.int64(), types.is_int64),
    (pa.uint8(), types.is_uint8),
    (pa.uint16(), types.is_uint16),
    (pa.uint32(), types.is_uint32),
    (pa.uint64(), types.is_uint64),
    (pa.float16(), types.is_float16),
    (pa.float32(), types.is_float32),
    (pa.float64(), types.is_float64)
])
def test_exact_primitive_types(t, check_func):
    assert check_func(t)


def test_bit_width():
    for ty, expected in [(pa.bool_(), 1),
                         (pa.int8(), 8),

Esempio n. 46

0

Mostra file

File: test_table.py Progetto: abbotware/arrow-fork

def test_concat_tables_with_promotion_error():
    t1 = pa.Table.from_arrays([pa.array([1, 2], type=pa.int64())], ["f"])
    t2 = pa.Table.from_arrays([pa.array([1, 2], type=pa.float32())], ["f"])

    with pytest.raises(pa.ArrowInvalid):
        pa.concat_tables([t1, t2], promote=True)

Esempio n. 47

0

Mostra file

def test_casting_to_extension_type_raises():
    arr = pa.array([1, 2, 3, 4], pa.int64())
    with pytest.raises(pa.ArrowNotImplementedError):
        arr.cast(IntegerType())

Esempio n. 48

0

Mostra file

File: test_convert_builtin.py Progetto: Shelna-GCP/SentinelToEVI-dataflow

def test_list_with_non_list(seq):
    # List types don't accept non-sequences
    with pytest.raises(TypeError):
        pa.array(seq([[], [1, 2], 3]), type=pa.list_(pa.int64()))
    with pytest.raises(TypeError):
        pa.array(seq([[], [1, 2], 3]), type=pa.large_list(pa.int64()))

Esempio n. 49

0

Mostra file

File: service.py Progetto: th3architect/suzieq

    def clean_data_common(self, processed_data, raw_data):
        """Fix the type and default value of of each extracted field

        This routine is common to all services. It ensures that all the missing
        fields, as defined by the schema, are added to the records extracted.
        Furthermore, each field is set to the specified type.
        """

        # Build default data structure
        schema_rec = {}
        def_vals = self._get_default_vals()

        ptype_map = {
            pa.string(): str,
            pa.int32(): int,
            pa.int64(): int,
            pa.float32(): float,
            pa.float64(): float,
            pa.date64(): float,
            pa.list_(pa.string()): list,
            pa.list_(pa.int64()): list,
            pa.bool_(): bool,
        }

        for field in self.schema:
            default = def_vals[field.type]
            schema_rec.update({field.name: default})

        if isinstance(raw_data, list):
            read_from = raw_data[0]
        else:
            read_from = raw_data
        for entry in processed_data:
            entry.update({"hostname": read_from["hostname"]})
            entry.update({"namespace": read_from["namespace"]})
            entry.update({"timestamp": read_from["timestamp"]})
            entry.update({"sqvers": self.version})
            for fld in schema_rec:
                if fld not in entry:
                    if fld == "active":
                        entry.update({fld: True})
                    else:
                        entry.update({fld: schema_rec[fld]})
                else:
                    fld_type = self.schema.field(fld).type
                    if not isinstance(entry[fld], ptype_map[fld_type]):
                        try:
                            entry[fld] = ptype_map[fld_type](entry[fld])
                        except (ValueError, TypeError):
                            entry[fld] = schema_rec[fld]
                    elif isinstance(entry[fld], list):
                        for i, ele in enumerate(entry[fld]):
                            if not isinstance(ele,
                                              ptype_map[fld_type.value_type]):
                                try:
                                    if ptype_map[fld_type.value_type] == int:
                                        entry[fld][i] = int(entry[fld][i])
                                    elif ptype_map[fld_type.value_type] == str:
                                        entry[fld][i] = str(entry[fld][i])
                                    else:
                                        raise ValueError
                                except (ValueError, TypeError):
                                    entry[fld][i] = schema_rec[fld]
        return processed_data

Esempio n. 50

0

Mostra file

File: test_convert_builtin.py Progetto: Shelna-GCP/SentinelToEVI-dataflow

def test_sequence_custom_integers(seq):
    expected = [0, 42, 2**33 + 1, -2**63]
    data = list(map(MyInt, expected))
    arr = pa.array(seq(data), type=pa.int64())
    assert arr.to_pylist() == expected

Esempio n. 51

0

Mostra file

File: test_unit_arrow_chunk_iterator.py Progetto: kkothand/snowflake-connector-python

def test_iterate_over_timestamp_tz_chunk():
    random.seed(datetime.datetime.now())
    scale = random.randint(0, 9)
    column_meta = [
        {"byteLength": "16" if scale > 3 else "8", "logicalType": "TIMESTAMP_TZ", "scale": str(scale)},
        {"byteLength": "16" if scale > 3 else "8", "logicalType": "TIMESTAMP_TZ", "scale": str(scale)}
    ]

    type1 = pyarrow.struct([pyarrow.field('epoch', pyarrow.int64()),
              pyarrow.field('timezone', pyarrow.int32()),
              pyarrow.field('fraction', pyarrow.int32())])
    type2 = pyarrow.struct([pyarrow.field('epoch', pyarrow.int64()),
              pyarrow.field('timezone', pyarrow.int32())])
    data_type = type1 if scale > 3 else type2
    
    def timestamp_tz_generator(scale):
        epoch = random.randint(-621355968, 2534023007)
        frac = random.randint(0, 10**scale - 1) * (10**(9 - scale)) if scale > 3 else random.randint(0, 10**scale - 1) 
        timezone = random.randint(1, 2879)
        if scale > 3:
            return {'epoch': epoch, 'timezone': timezone, 'fraction' : frac}
        else:
            epoch = str(epoch)
            frac = str(frac)            
            ZEROFILL = '000000000'
            frac = ZEROFILL[:scale - len(frac)] + frac
            return {'epoch': int(epoch + frac) if scale else int(epoch), 'timezone': timezone}

    def expected_data_transform_tz(_scale):
        def expected_data_transform_tz_impl(data, scale=_scale):
            timezone = data['timezone']
            tzinfo = _generate_tzinfo_from_tzoffset(timezone - 1440)
            epoch = data['epoch']
            if scale > 3:
                frac = data['fraction']
                if epoch < 0:
                    epoch += 1
                    frac = 10**9 - frac
                frac = str(int(frac / 10**(9 - scale)))
                ZERO_FILL = '000000000'
                frac = ZERO_FILL[:scale - len(frac)] + frac
                epoch = int(str(epoch) + frac)

            microsec = str(epoch)
            if scale > 6:
                microsec = microsec[:-scale] + "." + microsec[-scale:-scale + 6]
            else:
                microsec = microsec[:-scale] + "." + microsec[-scale:] if scale else microsec

            if platform.system() == 'Windows':
                t = datetime.datetime.utcfromtimestamp(0) + datetime.timedelta(seconds=(float(microsec)))
                if pytz.utc != tzinfo:
                    t += tzinfo.utcoffset(t)
                return t.replace(tzinfo=tzinfo)
            else:
                return datetime.datetime.fromtimestamp(float(microsec), tz=tzinfo)
                
        return expected_data_transform_tz_impl

    iterate_over_test_chunk([data_type, data_type],
        column_meta, lambda: timestamp_tz_generator(scale), expected_data_transform_tz(scale))

Esempio n. 52

0

Mostra file

File: test_convert_builtin.py Progetto: Shelna-GCP/SentinelToEVI-dataflow

def test_struct_from_dicts_inference():
    expected_type = pa.struct([
        pa.field('a', pa.int64()),
        pa.field('b', pa.string()),
        pa.field('c', pa.bool_())
    ])
    data = [{'a': 5, 'b': 'foo', 'c': True}, {'a': 6, 'b': 'bar', 'c': False}]
    arr = pa.array(data)
    check_struct_type(arr.type, expected_type)
    assert arr.to_pylist() == data

    # With omitted values
    data = [{'a': 5, 'c': True}, None, {}, {'a': None, 'b': 'bar'}]
    expected = [{
        'a': 5,
        'b': None,
        'c': True
    }, None, {
        'a': None,
        'b': None,
        'c': None
    }, {
        'a': None,
        'b': 'bar',
        'c': None
    }]
    arr = pa.array(data)
    data_as_ndarray = np.empty(len(data), dtype=object)
    data_as_ndarray[:] = data
    arr2 = pa.array(data)

    check_struct_type(arr.type, expected_type)
    assert arr.to_pylist() == expected
    assert arr.equals(arr2)

    # Nested
    expected_type = pa.struct([
        pa.field(
            'a',
            pa.struct([
                pa.field('aa', pa.list_(pa.int64())),
                pa.field('ab', pa.bool_())
            ])),
        pa.field('b', pa.string())
    ])
    data = [{
        'a': {
            'aa': [5, 6],
            'ab': True
        },
        'b': 'foo'
    }, {
        'a': {
            'aa': None,
            'ab': False
        },
        'b': None
    }, {
        'a': None,
        'b': 'bar'
    }]
    arr = pa.array(data)
    assert arr.to_pylist() == data

    # Edge cases
    arr = pa.array([{}])
    assert arr.type == pa.struct([])
    assert arr.to_pylist() == [{}]

    # Mixing structs and scalars is rejected
    with pytest.raises((pa.ArrowInvalid, pa.ArrowTypeError)):
        pa.array([1, {'a': 2}])

Esempio n. 53

0

Mostra file

File: test_schema.py Progetto: bixuehujin/arrow

def test_empty_table():
    schema = pa.schema([pa.field('oneField', pa.int64())])
    table = schema.empty_table()
    assert isinstance(table, pa.Table)
    assert table.num_rows == 0
    assert table.schema == schema

Esempio n. 54

0

Mostra file

File: test_index.py Progetto: lr4d/kartothek

        original_index.update(new_index)
    assert (
        str(e.value) ==
        "Trying to update an index with the wrong column. Got `another_col` but expected `col`"
    )


@pytest.mark.parametrize(
    "dtype",
    [
        pa.binary(),
        pa.bool_(),
        pa.date32(),
        pa.float32(),
        pa.float64(),
        pa.int64(),
        pa.int8(),
        pa.string(),
        pa.timestamp("ns"),
    ],
)
def test_index_empty(store, dtype):
    storage_key = "dataset_uuid/some_index.parquet"
    index1 = ExplicitSecondaryIndex(column="col",
                                    index_dct={},
                                    dtype=dtype,
                                    index_storage_key=storage_key)
    key1 = index1.store(store, "dataset_uuid")

    index2 = ExplicitSecondaryIndex(column="col",
                                    index_storage_key=key1).load(store)

Esempio n. 55

0

Mostra file

File: test_array.py Progetto: mmulvahill/arrow

def test_safe_cast_nan_to_int_raises():
    arr = pa.array([np.nan, 1.])

    with pytest.raises(pa.ArrowInvalid,
                       match='Floating point value truncated'):
        arr.cast(pa.int64(), safe=True)

Esempio n. 56

0

Mostra file

File: test_orc.py Progetto: c-jamie/arrow

def test_column_selection(tempdir):
    from pyarrow import orc

    # create a table with nested types
    inner = pa.field('inner', pa.int64())
    middle = pa.field('middle', pa.struct([inner]))
    fields = [
        pa.field('basic', pa.int32()),
        pa.field('list', pa.list_(pa.field('item', pa.int32()))),
        pa.field('struct', pa.struct([middle,
                                      pa.field('inner2', pa.int64())])),
        pa.field(
            'list-struct',
            pa.list_(
                pa.field(
                    'item',
                    pa.struct([
                        pa.field('inner1', pa.int64()),
                        pa.field('inner2', pa.int64())
                    ])))),
        pa.field('basic2', pa.int64()),
    ]
    arrs = [[0], [[1, 2]], [{
        "middle": {
            "inner": 3
        },
        "inner2": 4
    }], [[{
        "inner1": 5,
        "inner2": 6
    }, {
        "inner1": 7,
        "inner2": 8
    }]], [9]]
    table = pa.table(arrs, schema=pa.schema(fields))

    path = str(tempdir / 'test.orc')
    orc.write_table(table, path)
    orc_file = orc.ORCFile(path)

    # default selecting all columns
    result1 = orc_file.read()
    assert result1.equals(table)

    # selecting with columns names
    result2 = orc_file.read(columns=["basic", "basic2"])
    assert result2.equals(table.select(["basic", "basic2"]))

    result3 = orc_file.read(columns=["list", "struct", "basic2"])
    assert result3.equals(table.select(["list", "struct", "basic2"]))

    # using dotted paths
    result4 = orc_file.read(columns=["struct.middle.inner"])
    expected4 = pa.table({"struct": [{"middle": {"inner": 3}}]})
    assert result4.equals(expected4)

    result5 = orc_file.read(columns=["struct.inner2"])
    expected5 = pa.table({"struct": [{"inner2": 4}]})
    assert result5.equals(expected5)

    result6 = orc_file.read(
        columns=["list", "struct.middle.inner", "struct.inner2"])
    assert result6.equals(table.select(["list", "struct"]))

    result7 = orc_file.read(columns=["list-struct.inner1"])
    expected7 = pa.table({"list-struct": [[{"inner1": 5}, {"inner1": 7}]]})
    assert result7.equals(expected7)

    # selecting with (Arrow-based) field indices
    result2 = orc_file.read(columns=[0, 4])
    assert result2.equals(table.select(["basic", "basic2"]))

    result3 = orc_file.read(columns=[1, 2, 3])
    assert result3.equals(table.select(["list", "struct", "list-struct"]))

    # error on non-existing name or index
    with pytest.raises(IOError):
        # liborc returns ParseError, which gets translated into IOError
        # instead of ValueError
        orc_file.read(columns=["wrong"])

    with pytest.raises(ValueError):
        orc_file.read(columns=[5])

Esempio n. 57

0

Mostra file

from redvox.common import file_statistics as fs
from redvox.common.parallel_utils import maybe_parallel_map
from redvox.common.station import Station
from redvox.common.errors import RedVoxExceptions

id_py_stct = pa.struct([
    ("id", pa.string()),
    ("uuid", pa.string()),
    ("start_time", pa.float64()),
])
meta_py_stct = pa.struct([
    ("api", pa.float64()),
    ("sub_api", pa.float64()),
    ("make", pa.string()),
    ("model", pa.string()),
    ("os", pa.int64()),
    ("os_version", pa.string()),
    ("app", pa.string()),
    ("app_version", pa.string()),
    ("is_private", pa.bool_()),
    ("packet_duration_s", pa.float64()),
    ("station_description", pa.string()),
])

PERCENT_FREE_MEM_USE = .8  # Percentage of total free memory to use when creating stations (1. is 100%)


class ApiReader:
    """
    Reads data from api 900 or api 1000 format, converting all data read into RedvoxPacketM for
    ease of comparison and use.

Esempio n. 58

0

Mostra file

File: arrow_util_test.py Progetto: tensorflow/data-validation

    def testIsListLike(self):
        for t in (pa.list_(pa.int64()), pa.large_list(pa.int64())):
            self.assertTrue(arrow_util.is_list_like(t))

        for t in (pa.binary(), pa.int64(), pa.large_string()):
            self.assertFalse(arrow_util.is_list_like(t))

Esempio n. 59

0

Mostra file

File: test_convert_builtin.py Progetto: Shelna-GCP/SentinelToEVI-dataflow

def test_limited_iterator_size_underflow():
    arr1 = pa.array(iter(range(3)), type=pa.int64(), size=10)
    arr2 = pa.array((0, 1, 2))
    assert arr1.equals(arr2)

Esempio n. 60

0

Mostra file

    b = pa.array([0, 2], type=pa.int64())
    c = pa.array([0, 3], type=pa.int32())
    d = pa.array([0, 2, 0, 3], type=pa.int32())

    eq([a], [a])
    ne([a], [b])
    eq([a, c], [a, c])
    eq([a, c], [d])
    ne([c, a], [a, c])


@pytest.mark.parametrize(
    ('data', 'typ'),
    [
        ([True, False, True, True], pa.bool_()),
        ([1, 2, 4, 6], pa.int64()),
        ([1.0, 2.5, None], pa.float64()),
        (['a', None, 'b'], pa.string()),
        ([], pa.list_(pa.uint8())),
        ([[1, 2], [3]], pa.list_(pa.int64())),
        ([['a'], None, ['b', 'c']], pa.list_(pa.string())),
        ([(1, 'a'), (2, 'c'), None],
            pa.struct([pa.field('a', pa.int64()), pa.field('b', pa.string())]))
    ]
)
def test_chunked_array_pickle(data, typ):
    arrays = []
    while data:
        arrays.append(pa.array(data[:2], type=typ))
        data = data[2:]
    array = pa.chunked_array(arrays, type=typ)