Ejemplo n.º 1
0
 def decode(cls, encoding, spec, df):
     return cls(df,
                spec['expression'],
                encoding.decode('dtype', spec['dtype']),
                encoding.decode('dtype', spec['dtype_item']),
                flatten=spec['flatten'],
                unique_limit=spec['unique_limit'],
                selection=spec['selection'])
Ejemplo n.º 2
0
 def decode(cls, encoding, spec, df, nthreads):
     return cls(df,
                spec['expression'],
                encoding.decode('dtype', spec['dtype']),
                encoding.decode('dtype', spec['dtype_item']),
                flatten=spec['flatten'],
                unique_limit=spec['unique_limit'],
                selection=spec['selection'],
                return_inverse=spec['return_inverse'],
                nthreads=nthreads)
Ejemplo n.º 3
0
 def decode(cls, encoding, spec, df):
     spec = spec.copy()
     spec['op'] = encoding.decode('_op', spec['op'])
     spec['dtype'] = encoding.decode('dtype', spec['dtype'])
     selection_waslist = spec.pop('selection_waslist')
     if selection_waslist:
         spec['selection'] = spec.pop('selections')
     else:
         spec['selection'] = spec.pop('selections')[0]
     spec['limits'] = list(zip(spec.pop('minima'), spec.pop('maxima')))
     return cls(df, **spec)
Ejemplo n.º 4
0
 def decode(cls, encoding, spec, df):
     grid = encoding.decode('grid', spec['grid'])
     task = cls(df, grid)
     aggs = encoding.decode_list('aggregation', spec['aggregations'])
     for agg in aggs:
         agg._prepare_types(df)
         task.add_aggregation_operation(agg)
     return task
Ejemplo n.º 5
0
def test_encoding():
    encoding = vaex.encoding.Encoding()
    data = encoding.encode('blobtest', {'someblob': b'1234'})
    wiredata = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    data = vaex.encoding.deserialize(wiredata, encoding)
    values = encoding.decode('blobtest', data)
    assert values['someblob'] == b'1234'
Ejemplo n.º 6
0
 def decode(cls, encoding, spec, df):
     # aggs = [vaex.agg._from_spec(agg_spec) for agg_spec in spec['aggregations']]
     aggs = encoding.decode_list('aggregation', spec['aggregations'])
     dtypes = encoding.decode_dict('dtype', spec['dtypes'])
     grid = encoding.decode('grid', spec['grid'])
     # dtypes = {expr: _deserialize_type(type_spec) for expr, type_spec in spec['dtypes'].items()}
     for agg in aggs:
         agg._prepare_types(df)
     return cls(df, grid, aggs, dtypes)
Ejemplo n.º 7
0
def rebuild_dataframe_vaex(df):
    # encoding and decode
    encoding = vaex.encoding.Encoding()
    data = encoding.encode('dataframe', df)
    blob = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    data = vaex.encoding.deserialize(blob, encoding)
    return encoding.decode('dataframe', data)
Ejemplo n.º 8
0
def rebuild_dataset_vaex(ds):
    # encoding and decode
    encoding = vaex.encoding.Encoding()
    data = encoding.encode('dataset', ds)
    blob = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    data = vaex.encoding.deserialize(blob, encoding)
    return encoding.decode('dataset', data)
Ejemplo n.º 9
0
def test_encoding_numpy():
    x = np.arange(10, dtype='>f4')
    encoding = vaex.encoding.Encoding()
    data = encoding.encode('ndarray', x)
    wiredata = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    data = vaex.encoding.deserialize(wiredata, encoding)
    value = encoding.decode('ndarray', data)
    assert np.all(value == x)
Ejemplo n.º 10
0
def test_encoding_numpy_datetime():
    x = np.arange('2001', '2005', dtype='M')
    encoding = vaex.encoding.Encoding()
    data = encoding.encode('ndarray', x)
    wiredata = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    data = vaex.encoding.deserialize(wiredata, encoding)
    value = encoding.decode('ndarray', data)
    assert np.all(value == x)
Ejemplo n.º 11
0
def test_encoding_numpy_string_objects():
    x = np.array(['vaex', 'is', None, 'fast'])
    encoding = vaex.encoding.Encoding()
    data = encoding.encode('ndarray', x)
    wiredata = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    data = vaex.encoding.deserialize(wiredata, encoding)
    value = encoding.decode('ndarray', data)
    assert np.all(value == x)
Ejemplo n.º 12
0
def test_encoding_arrow(array_factory_arrow):
    x = array_factory_arrow(np.arange(10, dtype='f4'))
    encoding = vaex.encoding.Encoding()
    data = encoding.encode('arrow-array', x)
    wiredata = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    data = vaex.encoding.deserialize(wiredata, encoding)
    value = encoding.decode('arrow-array', data)
    assert value.to_pylist() == x.to_pylist()
Ejemplo n.º 13
0
def test_encoding_dtype():
    dtype = np.dtype('>f8')
    encoding = vaex.encoding.Encoding()
    data = encoding.encode('dtype', dtype)
    wiredata = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    data = vaex.encoding.deserialize(wiredata, encoding)
    print(data)
    value = encoding.decode('dtype', data)
    assert value == dtype
    assert value.is_numpy
Ejemplo n.º 14
0
 def decode(encoding, binner_spec, nthreads):
     type = binner_spec['binner-type']
     dtype = encoding.decode('dtype', binner_spec['dtype'])
     if type == 'ordinal':
         cls = vaex.utils.find_type_from_dtype(vaex.superagg, "BinnerOrdinal_", dtype)
         return cls(nthreads, binner_spec['expression'], binner_spec['count'], binner_spec['minimum'], False)
     elif type == 'scalar':
         cls = vaex.utils.find_type_from_dtype(vaex.superagg, "BinnerScalar_", dtype)
         return cls(nthreads, binner_spec["expression"], binner_spec["minimum"], binner_spec["maximum"], binner_spec["count"])
     elif type == "hash":
         cls = vaex.utils.find_type_from_dtype(vaex.superagg, "BinnerHash_", dtype)
         hash_map_unique_id = binner_spec["hash_map_unique"]
         if encoding.has_object(hash_map_unique_id):
             hash_map_unique = encoding.get_object(hash_map_unique_id)
         else:
             hash_map_unique_spec = encoding.get_object_spec(hash_map_unique_id)
             hash_map_unique = encoding.decode("hash-map-unique", hash_map_unique_spec)
             encoding.set_object(hash_map_unique_id, hash_map_unique)
         return cls(binner_spec["expression"], hash_map_unique._internal)
     else:
         raise ValueError('Cannot deserialize: %r' % binner_spec)
Ejemplo n.º 15
0
 def create_task_part():
     nonlocal memory_usage
     task_part = encoding.decode('task-part-cpu',
                                 spec,
                                 df=task.df,
                                 nthreads=nthreads)
     memory_usage += task_part.memory_usage()
     for task_checker in task_checkers:
         task_checker.add_task(task)
     if task.requires_fingerprint:
         task_part.fingerprint = task.fingerprint()
     return task_part
Ejemplo n.º 16
0
def test_encoding_numpy_masked():
    x = np.arange(10, dtype='>f4')
    mask = x > 4
    x = np.ma.array(x, mask=mask)
    encoding = vaex.encoding.Encoding()
    data = encoding.encode('ndarray', x)
    wiredata = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    data = vaex.encoding.deserialize(wiredata, encoding)
    value = encoding.decode('ndarray', data)
    assert np.all(value == x)
    assert np.all(value.mask == x.mask)
Ejemplo n.º 17
0
def rebuild_with_skip(ds, skip):
    repr(ds)  # for coverage
    # encoding and decode
    encoding = vaex.encoding.Encoding()
    encoding.set_object_spec(skip.id, None)  # this will cause it to skip serialization
    data = encoding.encode('dataset', ds)
    assert encoding._object_specs[skip.id] is None
    del encoding._object_specs[skip.id]
    blob = vaex.encoding.serialize(data, encoding)

    encoding = vaex.encoding.Encoding()
    encoding.set_object(skip.id, skip)
    data = vaex.encoding.deserialize(blob, encoding)
    return encoding.decode('dataset', data)
Ejemplo n.º 18
0
 def decode(encoding, binner_spec):
     type = binner_spec['binner-type']
     dtype = encoding.decode('dtype', binner_spec['dtype'])
     if type == 'ordinal':
         cls = vaex.utils.find_type_from_dtype(vaex.superagg,
                                               "BinnerOrdinal_", dtype)
         return cls(binner_spec['expression'], binner_spec['count'],
                    binner_spec['minimum'])
     elif type == 'scalar':
         cls = vaex.utils.find_type_from_dtype(vaex.superagg,
                                               "BinnerScalar_", dtype)
         return cls(binner_spec['expression'], binner_spec['minimum'],
                    binner_spec['maximum'], binner_spec['count'])
     else:
         raise ValueError('Cannot deserialize: %r' % binner_spec)
Ejemplo n.º 19
0
 def decode(cls, encoding, spec, df):
     binners = tuple(encoding.decode_list('binner', spec['binners']))
     agg = encoding.decode('aggregation', spec['aggregation'])
     task = cls(df, binners, agg)
     return task
Ejemplo n.º 20
0
 def decode(cls, encoding, spec, df, nthreads):
     spec = spec.copy()
     spec['op'] = encoding.decode('_op', spec['op'])
     spec['dtype'] = encoding.decode('dtype', spec['dtype'])
     return cls(df, **spec)
Ejemplo n.º 21
0
 def decode(cls, encoding, spec, df):
     spec = spec.copy()
     del spec['task']
     spec['op'] = encoding.decode('_op', spec['op'])
     spec['dtype'] = encoding.decode('dtype', spec['dtype'])
     return cls(df, **spec)
Ejemplo n.º 22
0
 def create_task_part():
     return encoding.decode('task-part-cpu',
                            spec,
                            df=run.df)