def evaluate_transform(self, x, no_copy=False): r"""Call transform on the provided message. Args: x (object): Message object to transform. no_copy (bool, optional): If True, the transformation occurs in place. Otherwise a copy is created and transformed. Defaults to False. Returns: object: The transformed message. """ out = x np_dtype = type2numpy(self.transformed_datatype) if isinstance(x, pandas.DataFrame): out = pandas2numpy(x).astype(np_dtype, copy=True) elif isinstance(x, np.ndarray): out = x.astype(np_dtype, copy=True) elif np_dtype and isinstance(x, (list, tuple, dict, np.ndarray)): if len(x) == 0: out = np.zeros(0, np_dtype) else: if isinstance(x, dict): x = dict2list(x, order=np_dtype.names) out = consolidate_array(x, dtype=np_dtype) else: # warning? raise TypeError(("Cannot consolidate object of type %s " "into a structured numpy array.") % type(x)) if not no_copy: out = copy.deepcopy(out) return out
def test_numpy2pandas(): r"""Test conversion of a numpy array to a pandas data frame and back.""" with pytest.raises(TypeError): serialize.numpy2pandas(None) with pytest.raises(TypeError): serialize.pandas2numpy(None) nele = 5 names = ["name", "number", "value", "complex"] dtypes = ['S5', 'i8', 'f8', 'c16'] dtype = np.dtype([(n, f) for n, f in zip(names, dtypes)]) arr_mix = np.zeros(nele, dtype) arr_mix['name'][0] = 'hello' arr_obj = np.array([list(), 'hello', 5], dtype='O') test_arrs = [ arr_mix, np.zeros(nele, 'float'), arr_mix['name'], arr_obj, np.array([]) ] for ans in test_arrs: frame = serialize.numpy2pandas(ans) res = serialize.pandas2numpy(frame) np.testing.assert_array_equal(ans, res)
def func_deserialize(self, msg): r"""Deserialize a message. Args: msg (str, bytes): Message to be deserialized. Returns: obj: Deserialized Python object. """ fd = backwards.BytesIO(msg) names = None dtype = None if self.initialized: dtype = self.numpy_dtype out = pandas.read_csv(fd, sep=backwards.as_str(self.delimiter), names=names, dtype=dtype, encoding='utf8') fd.close() if not backwards.PY2: # For Python 3 and higher, make sure strings are bytes for c, d in zip(out.columns, out.dtypes): if (d == object) and isinstance(out[c][0], backwards.unicode_type): out[c] = out[c].apply(lambda s: s.encode('utf-8')) # On windows, long != longlong and longlong requires special cformat # For now, long will be used to preserve the use of %ld to match long if platform._is_win: # pragma: windows if np.dtype('longlong').itemsize == 8: new_dtypes = dict() for c, d in zip(out.columns, out.dtypes): if d == np.dtype('longlong'): new_dtypes[c] = np.int32 else: new_dtypes[c] = d out = out.astype(new_dtypes, copy=False) # Reorder if necessary out = self.apply_field_names(out, self.get_field_names()) if self.field_names is None: self.field_names = out.columns.tolist() if not self.initialized: typedef = {'type': 'array', 'items': []} np_out = serialize.pandas2numpy(out) for n in self.get_field_names(): typedef['items'].append( OneDArrayMetaschemaType.encode_type(np_out[n], title=n)) self.update_serializer(extract=True, **typedef) return out
def object2array(cls, obj, **kwargs): r"""Convert a message object into an array. Args: obj (object): Object that would be serialized by this class and should be returned in an array form. **kwargs: Additional keyword arguments are ignored. Returns: np.array: Array version of the provided object. """ if isinstance(obj, pandas.DataFrame): return serialize.pandas2numpy(obj) return super(PandasSerialize, cls).object2array(obj, as_array=True, **kwargs)
def consolidate_array(self, out): r"""Consolidate message into a structure numpy array if possible. Args: out (list, tuple, np.ndarray): Object to consolidate into a structured numpy array. Returns: np.ndarray: Structured numpy array containing consolidated message. Raises: ValueError: If the array cannot be consolidated. """ if isinstance(out, pandas.DataFrame): out = serialize.pandas2numpy(out) return super(PandasSerialize, self).consolidate_array(out)
def get_testing_options(cls, as_frames=False, no_names=False): r"""Method to return a dictionary of testing options for this class. Args: as_frames (bool, optional): If True, the test objects will be Pandas data frames. Defaults to False. no_names (bool, optional): If True, an example is returned where the names are not provided to the deserializer. Defaults to False. Returns: dict: Dictionary of variables to use for testing. Key/value pairs: kwargs (dict): Keyword arguments for comms tested with the provided content. send (list): List of objects to send to test file. recv (list): List of objects that will be received from a test file that was sent the messages in 'send'. contents (bytes): Bytes contents of test file created by sending the messages in 'send'. """ out_seri = PandasSerialize.get_testing_options(no_names=no_names) out = { 'kwargs': out_seri['kwargs'], 'send': out_seri['objects'], 'recv': [pd.concat(out_seri['objects'], ignore_index=True)], 'dict': serialize.pandas2dict(out_seri['objects'][0]), 'contents': out_seri['contents'], 'msg_array': serialize.pandas2numpy(out_seri['objects'][0]) } if not as_frames: out['recv'] = [serialize.pandas2list(x) for x in out['recv']] out['send'] = [serialize.pandas2list(x) for x in out['send']] out['msg'] = out['send'][0] for k in ['format_str', 'as_array']: if k in out['kwargs']: del out['kwargs'][k] return out