Esempio n. 1
0
    def from_dict(cls, db_path, params):
        '''
        使用字典类型的参数数据构造参数解析类

        Parameter
        ---------
        db_path: string
            数据库的绝对路径
        params: dict
            字典类型的参数,参数域包含['rel_path'(必须)(string), 'start_time'(datetime),
            'end_time'(datetime), 'store_fmt'(StoreFormat), 'dtype'(numpy.dtype)]

        Return
        ------
        obj: ParamsParser
        '''
        obj = cls()
        obj._main_path = db_path
        obj._start_time = params.get('start_time', None)
        if obj._start_time is not None:
            obj._start_time = to_datetime(obj._start_time)
        obj._end_time = params.get('end_time', None)
        if obj._end_time is not None:
            obj._end_time = to_datetime(obj._end_time)
        obj._rel_path = params['rel_path']
        obj._store_fmt = params.get('store_fmt', None)
        if obj._store_fmt is not None:
            obj._store_fmt = StoreFormat.from_iterable(obj._store_fmt)
        obj._dtype = params.get('dtype', None)
        if obj._dtype is not None:
            obj._dtype = np_dtype(obj._dtype)
        if not obj.store_fmt.validate():
            raise ValueError("Invalid parameter group!")
        return obj
Esempio n. 2
0
def _assertSizesMatch(space, dofVector):
    # only allow contiguous arrays as dof vectors
    try:
        assert dofVector.data.contiguous, "dofVector needs to provide a contiguous data memory, sliced arrays are not supported!"
    except AttributeError:
        pass

    # only allow arrays with one dimensional shapes
    try:
        assert len(
            dofVector.shape
        ) == 1, "dofVector should be a simple array, i.e. len(shape) == 1"
    except AttributeError:
        pass

    # check that sizes and data size match
    try:
        assert space.size == len(
            dofVector
        ), f"space (size={space.size}) and vector (size={len(dofVector)}) do not match!"
    except TypeError:
        assert space.size == dofVector.size, f"space (size={space.size}) and vector (size={dofVector.size}) do not match!"

    if hasattr(dofVector, "dtype"):
        dtype = dofVector.dtype
        itemsize = np_dtype(dofVector.dtype).itemsize
        match = space._sizeOfField == itemsize and dtype == 'float64' if space.field == 'double' else True
        assert match, f"space (dtype={space._sizeOfField},{space.field}) and vector (dtype={itemsize},{dtype}) do not match!"
Esempio n. 3
0
def pil_scale(orig: Image, w: int = None, h: int = None) -> Image:
    """
    Scale a Pillow image
    :param orig: ndarray Original cv2 image
    :param w: New width
    :param h: New height
    :return: ndarray
    """
    new_width, new_height = _calculate_scale(orig.width, orig.height, w, h)
    # thumb = orig.copy()
    # thumb.thumbnail((new_width, new_height))
    # also allows enlarging:
    if orig.mode.startswith('I'):
        # workaround for Pillow#4402:
        arr = np_array(orig)
        if arr.dtype.kind == 'i':
            # signed integer is *not* trustworthy in this context
            # (usually a mistake in the array interface)
            arr.dtype = np_dtype('u' + arr.dtype.name)
        if arr.dtype.kind == 'u':
            # integer needs to be scaled linearly to 8 bit
            # of course, an image might actually have some lower range
            # (e.g. 10-bit in I;16 or 20-bit in I or 4-bit in L),
            # but that would be guessing anyway, so here don't
            # make assumptions on _scale_, just reduce _precision_
            arr = arr >> 8 * (arr.dtype.itemsize - 1)
            arr = arr.astype(np_uint8)
        elif arr.dtype.kind == 'f':
            # float needs to be scaled from [0,1.0] to [0,255]
            arr *= 255
            arr = arr.astype(np_uint8)
        orig = fromarray(arr)
    thumb = orig.resize((new_width, new_height))
    return thumb
Esempio n. 4
0
    def __init__(self, shape, dtype=float, buffer=None):  # pylint: disable=redefined-builtin
        """Initializes an ndarray.

    This is a low level interface for building ndarrays and should be avoided.
    Users should instead use methods in array_creation.py.

    This class provides a numpy.ndarray like interface for a TF Tensor with a
    fully-defined shape. Note that, unlike the backing buffer of np.ndarray,
    Tensors are immutable. So, operations like `__setitem__` are performed by
    replacing the Tensor. This restricts the ability to implement NumPy `view`
    semantics.

    Compared to numpy.ndarray, this does not support `offset`, `strides`
    and `order` arguments.

    Args:
      shape: The shape of the array. Must be a scalar, an iterable of integers
        or a `TensorShape` object.
      dtype: Optional. The dtype of the array. Must be a python type, a numpy
        type or a tensorflow `DType` object.
      buffer: Optional. The backing buffer of the array. Must have shape
        `shape`. Must be a `ndarray`, `np.ndarray` or a `Tensor`.

    Raises:
      ValueError: If `buffer` is specified and its shape does not match
       `shape`.
    """
        if dtype and not isinstance(dtype, tf.DType):
            dtype = tf.as_dtype(np_dtype(dtype))
        if buffer is None:
            buffer = tf.zeros(shape, dtype=dtype)
        else:
            if isinstance(buffer, ndarray):
                buffer = buffer.data
            elif isinstance(buffer, np_ndarray):
                # If `buffer` is a np.ndarray, the Tensor will share the underlying
                # storage of the array.
                buffer = tf.convert_to_tensor(value=buffer, dtype=dtype)
            elif not isinstance(buffer, tf.Tensor):
                raise ValueError(
                    'Unexpected type for `buffer` {}. Must be an ndarray,'
                    ' Tensor or np.ndarray.'.format(type(buffer)))

            if list(shape) != buffer.shape.as_list():
                # TODO(srbs): NumPy allows this. Investigate if/how to support this.
                raise ValueError('shape arg must match buffer.shape.')

        assert isinstance(buffer, tf.Tensor)
        if dtype and dtype != buffer.dtype:
            buffer = tf.bitcast(buffer, dtype)
        self._data = buffer
        self.base = None
Esempio n. 5
0
def second_insert():
    data = query(test_factor, (start_time, second_end_time))
    HDF5Engine.insert(
        data,
        ParamsParser.from_dict(
            db_path, {
                'rel_path':
                rel_path,
                'store_fmt':
                (DataClassification.STRUCTURED, DataValueCategory.NUMERIC,
                 DataFormatCategory.PANEL),
                'dtype':
                np_dtype('float64')
            }))
Esempio n. 6
0
def first_insert():
    data = query(test_factor,
                 (start_time, first_end_time)).iloc[:, :initial_size]
    columns = list(data.columns)
    shuffle(columns)
    data = data.loc[:, columns]
    HDF5Engine.insert(
        data,
        ParamsParser.from_dict(
            db_path, {
                'rel_path':
                rel_path,
                'store_fmt':
                (DataClassification.STRUCTURED, DataValueCategory.NUMERIC,
                 DataFormatCategory.PANEL),
                'dtype':
                np_dtype('float64')
            }))
Esempio n. 7
0
from numpy import dtype as np_dtype

from database.hdf5Engine.dbcore import HDF5Engine
from database.const import DataFormatCategory, DataValueCategory, DataClassification
from database.db import ParamsParser
from fmanager import query

TEST_FACTOR = 'CLOSE'
start_time = '2017-01-01'
end_time = '2018-01-15'
new_end = '2018-02-01'

sample_df = query(TEST_FACTOR, (start_time, end_time))
new_data = query(TEST_FACTOR, (end_time, new_end))

db_path = r'C:\Users\c\Desktop\test'
# file_path = join(db_path, 'test.h5')
# if exists(file_path):
#     remove(file_path)
HDF5Engine.insert(
    new_data,
    ParamsParser.from_dict(
        db_path, {
            "rel_path":
            'test',
            "store_fmt": (DataClassification.STRUCTURED,
                          DataValueCategory.NUMERIC, DataFormatCategory.PANEL),
            "dtype":
            np_dtype('float64')
        }))
Esempio n. 8
0
"""
Code for identifying phonenumbers
"""
import re
from typing import List

import pandas as pd
from numpy import dtype as np_dtype

__all__ = ["check_phonenumbers"]

OBJECT_DTYPE = np_dtype("O")

# REGEX
NUMBER_PREFIX = r"(\+44|0)7"

# A simple UK phone number is +447 or 07, followed by 9 digits
SIMPLE_UK_MOBILE = re.compile(NUMBER_PREFIX + "[0-9]{9}")


def check_phonenumbers(df: pd.DataFrame) -> List:
    """
    Check a dataframe for columns containing phonenumbers. Returns a list of column
    names which contain at least one address

    "Addresses" currently only concerns UK mobile numbers. These begin with
    +44/0 7, then 9 digits.

    Parameters
    ----------
    df : pandas.DataFrame
Esempio n. 9
0
def zeros_aligned(shape, dtype, order='C', align=128):
    """Like `numpy.zeros()`, but the array will be aligned at `align` byte boundary."""
    nbytes = prod(shape, dtype=int64) * np_dtype(dtype).itemsize
    buffer = zeros(nbytes + align, dtype=uint8)  # problematic on win64 ("maximum allowed dimension exceeded")
    start_index = -buffer.ctypes.data % align
    return buffer[start_index : start_index + nbytes].view(dtype).reshape(shape, order=order)