def from_dict(cls, db_path, params): ''' 使用字典类型的参数数据构造参数解析类 Parameter --------- db_path: string 数据库的绝对路径 params: dict 字典类型的参数,参数域包含['rel_path'(必须)(string), 'start_time'(datetime), 'end_time'(datetime), 'store_fmt'(StoreFormat), 'dtype'(numpy.dtype)] Return ------ obj: ParamsParser ''' obj = cls() obj._main_path = db_path obj._start_time = params.get('start_time', None) if obj._start_time is not None: obj._start_time = to_datetime(obj._start_time) obj._end_time = params.get('end_time', None) if obj._end_time is not None: obj._end_time = to_datetime(obj._end_time) obj._rel_path = params['rel_path'] obj._store_fmt = params.get('store_fmt', None) if obj._store_fmt is not None: obj._store_fmt = StoreFormat.from_iterable(obj._store_fmt) obj._dtype = params.get('dtype', None) if obj._dtype is not None: obj._dtype = np_dtype(obj._dtype) if not obj.store_fmt.validate(): raise ValueError("Invalid parameter group!") return obj
def _assertSizesMatch(space, dofVector): # only allow contiguous arrays as dof vectors try: assert dofVector.data.contiguous, "dofVector needs to provide a contiguous data memory, sliced arrays are not supported!" except AttributeError: pass # only allow arrays with one dimensional shapes try: assert len( dofVector.shape ) == 1, "dofVector should be a simple array, i.e. len(shape) == 1" except AttributeError: pass # check that sizes and data size match try: assert space.size == len( dofVector ), f"space (size={space.size}) and vector (size={len(dofVector)}) do not match!" except TypeError: assert space.size == dofVector.size, f"space (size={space.size}) and vector (size={dofVector.size}) do not match!" if hasattr(dofVector, "dtype"): dtype = dofVector.dtype itemsize = np_dtype(dofVector.dtype).itemsize match = space._sizeOfField == itemsize and dtype == 'float64' if space.field == 'double' else True assert match, f"space (dtype={space._sizeOfField},{space.field}) and vector (dtype={itemsize},{dtype}) do not match!"
def pil_scale(orig: Image, w: int = None, h: int = None) -> Image: """ Scale a Pillow image :param orig: ndarray Original cv2 image :param w: New width :param h: New height :return: ndarray """ new_width, new_height = _calculate_scale(orig.width, orig.height, w, h) # thumb = orig.copy() # thumb.thumbnail((new_width, new_height)) # also allows enlarging: if orig.mode.startswith('I'): # workaround for Pillow#4402: arr = np_array(orig) if arr.dtype.kind == 'i': # signed integer is *not* trustworthy in this context # (usually a mistake in the array interface) arr.dtype = np_dtype('u' + arr.dtype.name) if arr.dtype.kind == 'u': # integer needs to be scaled linearly to 8 bit # of course, an image might actually have some lower range # (e.g. 10-bit in I;16 or 20-bit in I or 4-bit in L), # but that would be guessing anyway, so here don't # make assumptions on _scale_, just reduce _precision_ arr = arr >> 8 * (arr.dtype.itemsize - 1) arr = arr.astype(np_uint8) elif arr.dtype.kind == 'f': # float needs to be scaled from [0,1.0] to [0,255] arr *= 255 arr = arr.astype(np_uint8) orig = fromarray(arr) thumb = orig.resize((new_width, new_height)) return thumb
def __init__(self, shape, dtype=float, buffer=None): # pylint: disable=redefined-builtin """Initializes an ndarray. This is a low level interface for building ndarrays and should be avoided. Users should instead use methods in array_creation.py. This class provides a numpy.ndarray like interface for a TF Tensor with a fully-defined shape. Note that, unlike the backing buffer of np.ndarray, Tensors are immutable. So, operations like `__setitem__` are performed by replacing the Tensor. This restricts the ability to implement NumPy `view` semantics. Compared to numpy.ndarray, this does not support `offset`, `strides` and `order` arguments. Args: shape: The shape of the array. Must be a scalar, an iterable of integers or a `TensorShape` object. dtype: Optional. The dtype of the array. Must be a python type, a numpy type or a tensorflow `DType` object. buffer: Optional. The backing buffer of the array. Must have shape `shape`. Must be a `ndarray`, `np.ndarray` or a `Tensor`. Raises: ValueError: If `buffer` is specified and its shape does not match `shape`. """ if dtype and not isinstance(dtype, tf.DType): dtype = tf.as_dtype(np_dtype(dtype)) if buffer is None: buffer = tf.zeros(shape, dtype=dtype) else: if isinstance(buffer, ndarray): buffer = buffer.data elif isinstance(buffer, np_ndarray): # If `buffer` is a np.ndarray, the Tensor will share the underlying # storage of the array. buffer = tf.convert_to_tensor(value=buffer, dtype=dtype) elif not isinstance(buffer, tf.Tensor): raise ValueError( 'Unexpected type for `buffer` {}. Must be an ndarray,' ' Tensor or np.ndarray.'.format(type(buffer))) if list(shape) != buffer.shape.as_list(): # TODO(srbs): NumPy allows this. Investigate if/how to support this. raise ValueError('shape arg must match buffer.shape.') assert isinstance(buffer, tf.Tensor) if dtype and dtype != buffer.dtype: buffer = tf.bitcast(buffer, dtype) self._data = buffer self.base = None
def second_insert(): data = query(test_factor, (start_time, second_end_time)) HDF5Engine.insert( data, ParamsParser.from_dict( db_path, { 'rel_path': rel_path, 'store_fmt': (DataClassification.STRUCTURED, DataValueCategory.NUMERIC, DataFormatCategory.PANEL), 'dtype': np_dtype('float64') }))
def first_insert(): data = query(test_factor, (start_time, first_end_time)).iloc[:, :initial_size] columns = list(data.columns) shuffle(columns) data = data.loc[:, columns] HDF5Engine.insert( data, ParamsParser.from_dict( db_path, { 'rel_path': rel_path, 'store_fmt': (DataClassification.STRUCTURED, DataValueCategory.NUMERIC, DataFormatCategory.PANEL), 'dtype': np_dtype('float64') }))
from numpy import dtype as np_dtype from database.hdf5Engine.dbcore import HDF5Engine from database.const import DataFormatCategory, DataValueCategory, DataClassification from database.db import ParamsParser from fmanager import query TEST_FACTOR = 'CLOSE' start_time = '2017-01-01' end_time = '2018-01-15' new_end = '2018-02-01' sample_df = query(TEST_FACTOR, (start_time, end_time)) new_data = query(TEST_FACTOR, (end_time, new_end)) db_path = r'C:\Users\c\Desktop\test' # file_path = join(db_path, 'test.h5') # if exists(file_path): # remove(file_path) HDF5Engine.insert( new_data, ParamsParser.from_dict( db_path, { "rel_path": 'test', "store_fmt": (DataClassification.STRUCTURED, DataValueCategory.NUMERIC, DataFormatCategory.PANEL), "dtype": np_dtype('float64') }))
""" Code for identifying phonenumbers """ import re from typing import List import pandas as pd from numpy import dtype as np_dtype __all__ = ["check_phonenumbers"] OBJECT_DTYPE = np_dtype("O") # REGEX NUMBER_PREFIX = r"(\+44|0)7" # A simple UK phone number is +447 or 07, followed by 9 digits SIMPLE_UK_MOBILE = re.compile(NUMBER_PREFIX + "[0-9]{9}") def check_phonenumbers(df: pd.DataFrame) -> List: """ Check a dataframe for columns containing phonenumbers. Returns a list of column names which contain at least one address "Addresses" currently only concerns UK mobile numbers. These begin with +44/0 7, then 9 digits. Parameters ---------- df : pandas.DataFrame
def zeros_aligned(shape, dtype, order='C', align=128): """Like `numpy.zeros()`, but the array will be aligned at `align` byte boundary.""" nbytes = prod(shape, dtype=int64) * np_dtype(dtype).itemsize buffer = zeros(nbytes + align, dtype=uint8) # problematic on win64 ("maximum allowed dimension exceeded") start_index = -buffer.ctypes.data % align return buffer[start_index : start_index + nbytes].view(dtype).reshape(shape, order=order)