Example #1
0
def data_to_params(data,
                   expparams_dtype,
                   col_outcomes=(0, 'counts'),
                   cols_expparams=None):
    """
    Given data as a NumPy array, separates out each column either as
    the outcomes, or as a field of an expparams array. Columns may be specified
    either as indices into a two-axis scalar array, or as field names for a one-axis
    record array.

    Since scalar arrays are homogenous in type, this may result in loss of precision
    due to casting between data types.
    """
    BY_IDX, BY_NAME = range(2)

    is_exp_scalar = np.issctype(expparams_dtype)
    is_data_scalar = np.issctype(data.dtype) and not data.dtype.fields

    s_ = ((lambda idx: np.s_[..., idx[BY_IDX]]) if is_data_scalar else
          (lambda idx: np.s_[idx[BY_NAME]]))

    outcomes = data[s_(col_outcomes)].astype(int)

    # mk new slicer t

    expparams = np.empty(outcomes.shape, dtype=expparams_dtype)
    if is_exp_scalar:
        expparams[:] = data[s_(cols_expparams)]
    else:
        for expparams_key, column in cols_expparams.items():
            expparams[expparams_key] = data[s_(column)]

    return outcomes, expparams
Example #2
0
 def __new__(cls, input):
     if isinstance(input, np.ndarray):
         series = input.view(cls)
         if np.issctype(series.dtype):
             series._concrete_type = series.dtype
         else:
             series._concrete_type = common_concrete_type.of_values(input)
     else:
         t = common_min_type.of_values(input)
         dtype = np.dtype(t) if np.issctype(t) else np.dtype(np.object)
         series = np.empty(len(input), dtype=dtype).view(cls)  # ensures 1D
         series[:] = input
         series._concrete_type = t
     return series
Example #3
0
def common_symbolic_value_elimination_impl(gdict):
    order = topsort(gdict)
    roots = compute_roots(gdict, order)
    values = {}
    for k in order:
        n = gdict[k]
        nodeval = n.attr.get('symbolic_value')
        try:
            if nodeval is None:
                continue
            elif isscalar(nodeval.val) and nodeval.val == -1:
                continue
            elif (not isscalar(nodeval.val)) and -1 in nodeval.val:
                continue
            elif isinstance(val, np.ndarray) and np.issctype(val.dtype) and val.size > 100:
                continue
        except:
            continue

        hashable_val, any_symbolic = make_hashable(nodeval.val)
        if any_symbolic:
            if hashable_val in values:
                # rewrite graph
                othernodes = values[hashable_val]
                for othernode in othernodes:
                    if len(roots[othernode].intersection(roots[n.name])) > 0:
                        outputs = list(n.outputs)
                        for outnode in outputs:
                            replace_source(gdict, n.name, outnode, othernode)
            else:
                values[hashable_val] = values.get(hashable_val, []) + [k]
Example #4
0
    def get_accessor(self, field, dtype, address_space_qualifier):

        child_name = BlobLib.get_interface(dtype).get_spaced_name(address_space_qualifier)

        definition = '%(child_name)s* %(function_name)s(%(address_space_qualifier)s %(cname)s* self)' % {
            'function_name': self.get_accessor_name(field, address_space_qualifier),
            'cname': self.get_name(address_space_qualifier),
            'child_name': child_name,
            'address_space_qualifier': address_space_qualifier
        }

        field_chain = [field]
        current_dtype = dtype
        while not numpy.issctype(current_dtype) and not issubclass(current_dtype, BlobEnum):
            try:
                subfield, current_dtype = current_dtype.subtypes[0]
                field_chain.append(subfield)

            except Exception as ex:
                raise ex

        declaration = \
'''
%(definition)s
{
    return (%(child_name)s *)&self->%(field)s;
};''' % {
    'definition': definition,
    'child_name': child_name,
    'field': '_'.join(field_chain)
}
        return definition + ';', declaration
Example #5
0
 def from_value(cls, val):
     """
     Creates a :py:class:`Type` object corresponding to the given value.
     """
     if isinstance(val, Type):
         # Creating a new object, because ``val`` may be some derivative of Type,
         # used as a syntactic sugar, and we do not want it to confuse us later.
         return cls(val.dtype,
                    shape=val.shape,
                    strides=val.strides,
                    offset=val.offset,
                    nbytes=val.nbytes)
     elif numpy.issctype(val):
         return cls(val)
     elif hasattr(val, 'dtype') and hasattr(val, 'shape'):
         strides = val.strides if hasattr(val, 'strides') else None
         offset = val.offset if hasattr(val, 'offset') else 0
         nbytes = val.nbytes if hasattr(val, 'nbytes') else None
         return cls(val.dtype,
                    shape=val.shape,
                    strides=strides,
                    offset=offset,
                    nbytes=nbytes)
     else:
         return cls(dtypes.detect_type(val))
Example #6
0
    def __init__(self, initializer, dtype=None, shape=None):

        if dtype is not None:
            assert(np.issctype(dtype))
        if shape is not None:
            assert(isinstance(shape, tuple))


        if isinstance(initializer, str):

            # treat initializer as a filename to load tensor data from
            npdata = self.load_data(initializer)
            if dtype != None and dtype != npdata.dtype:
                npdata = npdata.astype(dtype, copy=False)
            if shape != None:
                npdata = npdata.reshape(shape)
            super().__init__(npdata.shape, dtype=npdata.dtype)
            self.set(npdata)
        elif isinstance(initializer, tuple):
            # print("GPUTensor(shape=", initializer)
            super().__init__(initializer, dtype=np.float32 if dtype is None else dtype)
        elif isinstance(initializer, np.ndarray):
            # print("SHAPE:", initializer.shape)
            if dtype and dtype != initializer.dtype:
                initializer = initializer.astype(dtype)

            if shape is not None and shape != initializer.shape:
                initializer = initializer.reshape(shape)
            super().__init__(initializer.shape, dtype=initializer.dtype)
            self.set(initializer)
        else:
            raise NotImplementedError
Example #7
0
    def __init__(self, initializer, dtype=None, shape=None):

        if dtype is not None:
            assert (np.issctype(dtype))
        if shape is not None:
            assert (isinstance(shape, tuple))

        if isinstance(initializer, str):

            # treat initializer as a filename to load tensor data from
            npdata = self.load_data(initializer)
            if dtype != None and dtype != npdata.dtype:
                npdata = npdata.astype(dtype, copy=False)
            if shape != None:
                npdata = npdata.reshape(shape)
            super().__init__(npdata.shape, dtype=npdata.dtype)
            self.set(npdata)
        elif isinstance(initializer, tuple):
            # print("GPUTensor(shape=", initializer)
            super().__init__(initializer,
                             dtype=np.float32 if dtype is None else dtype)
        elif isinstance(initializer, np.ndarray):
            # print("SHAPE:", initializer.shape)
            if dtype and dtype != initializer.dtype:
                initializer = initializer.astype(dtype)

            if shape is not None and shape != initializer.shape:
                initializer = initializer.reshape(shape)
            super().__init__(initializer.shape, dtype=initializer.dtype)
            self.set(initializer)
        else:
            raise NotImplementedError
Example #8
0
    def _create_unaligned_dtype(cls, *subtypes):

        dtype_components = []
        for index, component in enumerate(subtypes):
            field, subtype = component

            if numpy.issctype(subtype):
                if hasattr(subtype, 'descr'):
                    for name, sub_dtype in subtype.descr:
                        subfield = '%s_%s' % (field, name)
                        dtype_components.append((subfield, sub_dtype))

                else:
                    dtype_components.append(component)

            elif issubclass(subtype, BlobEnum):
                dtype_components.append((field, BlobEnum.dtype))

            elif issubclass(subtype, Blob) and subtype.is_plain():
                if hasattr(subtype, 'dtype'):
                    sub_dtype = subtype.dtype

                else:
                    sub_dtype, subtype_requirements = subtype.create_plain_dtype(*subtype.subtypes)

                for name, sub_dtype in sub_dtype.descr:
                    subfield = '%s_%s' % (field, name)
                    dtype_components.append((subfield, sub_dtype))

            else:
                raise NotImplementedError()

        return numpy.dtype(dtype_components), subtypes
Example #9
0
def convert_to_dtype(tensor_or_dtype, dtype=None, dtype_hint=None):
    """Get a dtype from a list/tensor/dtype using convert_to_tensor semantics."""
    if tensor_or_dtype is None:
        return dtype or dtype_hint

    # Tensorflow dtypes need to be typechecked
    if tf.is_tensor(tensor_or_dtype):
        dt = base_dtype(tensor_or_dtype.dtype)
    elif isinstance(tensor_or_dtype, tf.DType):
        dt = base_dtype(tensor_or_dtype)
    # Numpy dtypes defer to dtype/dtype_hint
    elif isinstance(tensor_or_dtype, np.ndarray):
        dt = base_dtype(dtype or dtype_hint or tensor_or_dtype.dtype)
    elif np.issctype(tensor_or_dtype):
        dt = base_dtype(dtype or dtype_hint or tensor_or_dtype)
    else:
        # If this is a Python object, call `convert_to_tensor` and grab the dtype.
        # Note that this will add ops in graph-mode; we may want to consider
        # other ways to handle this case.
        dt = tf.convert_to_tensor(tensor_or_dtype, dtype, dtype_hint).dtype

    if not SKIP_DTYPE_CHECKS and dtype and not base_equal(dtype, dt):
        raise TypeError('Found incompatible dtypes, {} and {}.'.format(
            dtype, dt))
    return dt
Example #10
0
    def toString(self,
                 buffer=sys.stdout,
                 columns=None,
                 colSpace=None,
                 nanRep='NaN',
                 formatters=None,
                 float_format=None):
        """
        Output a string version of this DataMatrix
        """
        _pf = common._pfixed
        formatters = formatters or {}

        if columns is None:
            columns = self.columns
            values = self.values
            if self.objects:
                columns = list(columns) + list(self.objects.columns)
                values = np.column_stack(
                    (values.astype(object), self.objects.values))
        else:
            columns = [c for c in columns if c in self]
            values = self.asMatrix(columns)

        ident = lambda x: x

        idxSpace = max([len(str(idx)) for idx in self.index]) + 4

        if colSpace is None:
            colSpace = {}

            for c in columns:
                if np.issctype(self[c].dtype):
                    colSpace[c] = max(len(str(c)) + 4, 12)
                else:
                    # hack
                    colSpace[c] = 15
        else:
            colSpace = dict((k, 15) for k in columns)

        if len(self.cols()) == 0:
            buffer.write('DataMatrix is empty!\n')
            buffer.write(repr(self.index))
        else:
            buffer.write(_pf('', idxSpace))
            for h in columns:
                buffer.write(_pf(h, colSpace[h]))
            buffer.write('\n')

            for i, idx in enumerate(self.index):
                buffer.write(_pf(idx, idxSpace - 1))
                for j, col in enumerate(columns):
                    formatter = formatters.get(col, ident)
                    buffer.write(
                        _pf(formatter(values[i, j]),
                            colSpace[col],
                            float_format=float_format,
                            nanRep=nanRep))
                buffer.write('\n')
Example #11
0
def any_symbolic_or_unknown(val):
    if is_symbolic_or_unknown(val):
        return True
    elif isinstance(val, np.ndarray) and np.issctype(val.dtype):
        return False
    elif hasattr(val, '__iter__'):
        return any(any_symbolic_or_unknown(i) for i in val)
    else:
        return is_symbolic_or_unknown(val)
Example #12
0
def any_variadic(val):
    if is_variadic(val):
        return True
    elif isinstance(val, np.ndarray) and np.issctype(val.dtype):
        return False
    elif isinstance(val, str):  # string is iterable
        return False
    elif hasattr(val, "__iter__"):
        return any(any_variadic(i) for i in val)
    return False
Example #13
0
def _msgpack_ext_pack(x):
  """Messagepack encoders for custom types."""
  if isinstance(x, (np.ndarray, jax.xla.DeviceArray)):
    return msgpack.ExtType(_MsgpackExtType.ndarray, _ndarray_to_bytes(x))
  if np.issctype(type(x)):
    # pack scalar as ndarray
    return msgpack.ExtType(_MsgpackExtType.npscalar, _ndarray_to_bytes(np.asarray(x)))
  elif isinstance(x, complex):
    return msgpack.ExtType(_MsgpackExtType.native_complex,
                           msgpack.packb((x.real, x.imag)))
  return x
Example #14
0
def num_symbolic(val):
    """
    Return the number of symbols in val
    """
    if is_symbolic(val):
        return 1
    elif isinstance(val, np.ndarray) and np.issctype(val.dtype):
        return 0
    elif hasattr(val, "__iter__"):
        return sum(any_symbolic(i) for i in val)
    return 0
Example #15
0
def cast_array(values, ref):
    """Use a PyTorch/Numpy array representation for `values` that matches `ref`."""
    if is_torch_tensor(ref):
        return as_torch_tensor(values, dtype=ref.dtype)
    elif is_torch_dtype(ref):
        return as_torch_tensor(values, dtype=ref)
    elif isinstance(ref, np.ndarray):
        return np.asarray(values, dtype=ref.dtype)
    elif np.issctype(ref):
        return np.asarray(values, dtype=ref)
    raise ValueError('`ref` must be a torch.Tensor, np.ndarray, or dtype')
Example #16
0
def as_tensor(arr):
    """
    Wraps the specified NumPy array as a Tensor.
    """
    if not np.issctype(arr.dtype):
        raise ValueError("A non-scalar array cannot be converted to a Tensor "
                         "without copying.")

    return DenseTensor(arr.shape,
                       data=arr,
                       strides=tuple(s // 8 for s in arr.strides),
                       copy=False)
Example #17
0
def any_symbolic(val):
    if is_symbolic(val):
        return True
    if isinstance(val, np.ndarray) and val.ndim == 0:
        return is_symbolic(val[()])
    elif isinstance(val, np.ndarray) and np.issctype(val.dtype):
        return False
    elif isinstance(val, str):  # string is iterable
        return False
    elif hasattr(val, "__iter__"):
        return any(any_symbolic(i) for i in val)
    return False
Example #18
0
    def get_sizeof(self, address_space_qualifier):
        """Creates a c99 sizeof method."""

        definition = 'unsigned long %(function_name)s(%(address_space_qualifier)s char* blob)' % {
            'function_name': self.get_sizeof_name(address_space_qualifier),
            'address_space_qualifier': address_space_qualifier,
        }

        arguments = ['blob', '&self']  # the first argument must be the data itself.
        variables = ['%s %s;' % (self.get_name(address_space_qualifier, ), 'self')]  # all required variable names
        lines = []  # all required source code lines

        # iterate over all components/subtypes
        for field, subtype in self.blob_type.subtypes:
            if field.endswith(Blob.PADDING_FIELD_SUFFIX):
                continue

            if numpy.issctype(subtype):
                # determine the size of the scalar type
                cname = dtype_to_ctype(subtype)
                sizeof_call = 'sizeof(%s)' % cname
            else:
                # determine the size of the complex type
                assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype)
                sizeof_call = '%s((%s char*)(blob + size))' % (
                    BlobLib.get_interface(subtype).get_sizeof_name(address_space_qualifier),
                    address_space_qualifier,
                )

            # save which arguments and lines are required to determine the total size
            lines.append('size += %s;' % sizeof_call)

        lines.insert(0, '%s(%s);' % (self.get_deserialize_name(address_space_qualifier), ', '.join(arguments)))

        # prepend the variable declarations to the source code
        variables.extend(lines)
        lines = variables

        # fill the function template
        declaration = \
'''
%(definition)s
{
    unsigned long size = 0;
%(lines)s
    return size;
}
''' % {
    'definition': definition.strip(),
    'cname': self.get_name(address_space_qualifier),
    'lines': '\n'.join(['\t' + line for line in lines])
}
        return definition.strip() + ';', declaration.strip()
    def testConvertToDTypeRaises(self, tensor_or_dtype, dtype, dtype_hint):
        if np.issctype(tensor_or_dtype):
            example_tensor = np.zeros([], tensor_or_dtype)
        elif isinstance(tensor_or_dtype, tf.DType):
            example_tensor = tf.zeros([], tensor_or_dtype)
        else:
            example_tensor = tensor_or_dtype

        with self.assertRaisesRegex(TypeError, 'Found incompatible dtypes'):
            dtype_util.convert_to_dtype(tensor_or_dtype, dtype, dtype_hint)
        with self.assertRaisesRegex(TypeError, 'Found incompatible dtypes'):
            dtype_util.convert_to_dtype(example_tensor, dtype, dtype_hint)
Example #20
0
    def toString(self, buffer=sys.stdout, columns=None, colSpace=None,
                 nanRep='NaN', formatters=None, float_format=None):
        """
        Output a string version of this DataMatrix
        """
        _pf = common._pfixed
        formatters = formatters or {}

        if columns is None:
            columns = self.columns
            values = self.values
            if self.objects:
                columns = list(columns) + list(self.objects.columns)
                values = np.column_stack((values.astype(object),
                                          self.objects.values))
        else:
            columns = [c for c in columns if c in self]
            values = self.asMatrix(columns)

        ident = lambda x: x

        idxSpace = max([len(str(idx)) for idx in self.index]) + 4

        if colSpace is None:
            colSpace = {}

            for c in columns:
                if np.issctype(self[c].dtype):
                    colSpace[c] = max(len(str(c)) + 4, 12)
                else:
                    # hack
                    colSpace[c] = 15
        else:
            colSpace = dict((k, 15) for k in columns)

        if len(self.cols()) == 0:
            buffer.write('DataMatrix is empty!\n')
            buffer.write(repr(self.index))
        else:
            buffer.write(_pf('', idxSpace))
            for h in columns:
                buffer.write(_pf(h, colSpace[h]))
            buffer.write('\n')

            for i, idx in enumerate(self.index):
                buffer.write(_pf(idx, idxSpace - 1))
                for j, col in enumerate(columns):
                    formatter = formatters.get(col, ident)
                    buffer.write(_pf(formatter(values[i, j]), colSpace[col],
                                     float_format=float_format,
                                     nanRep=nanRep))
                buffer.write('\n')
Example #21
0
def normalize_hounsfield(vol, dtype=None):
    ''' Normalizes `vol` by 4095 and clamps to [0,1]. `dtype=None` defaults to 32-bit float'''
    if isinstance(dtype, torch.dtype): vol = torch.tensor(vol, dtype=dtype)
    elif np.issctype(dtype): vol = np.array(vol, dtype=dtype)
    elif dtype is None:
        vol = vol.float() if torch.is_tensor(vol) else np.array(vol).astype(
            np.float32)
    if torch.is_tensor(vol):
        return torch.clamp(vol / 4095.0, 0.0, 1.0)
    elif isinstance(vol, np.ndarray):
        return np.clip(vol / 4095.0, 0.0, 1.0)
    else:
        raise Exception(
            f'vol (type={type(vol)}) is neither torch.tensor, nor np.ndarray')
def data_to_params(data,
        expparams_dtype,
        col_outcomes=(0, 'counts'),
        cols_expparams=None
    ):
    """
    Given data as a NumPy array, separates out each column either as
    the outcomes, or as a field of an expparams array. Columns may be specified
    either as indices into a two-axis scalar array, or as field names for a one-axis
    record array.

    Since scalar arrays are homogenous in type, this may result in loss of precision
    due to casting between data types.
    """
    BY_IDX, BY_NAME = range(2)

    is_exp_scalar = np.issctype(expparams_dtype)
    is_data_scalar = np.issctype(data.dtype) and not data.dtype.fields

    s_ = (
        (lambda idx: np.s_[..., idx[BY_IDX]])
        if is_data_scalar else
        (lambda idx: np.s_[idx[BY_NAME]])
    )

    outcomes = data[s_(col_outcomes)].astype(int)

    # mk new slicer t

    expparams = np.empty(outcomes.shape, dtype=expparams_dtype)
    if is_exp_scalar:
        expparams[:] = data[s_(cols_expparams)]
    else:
        for expparams_key, column in cols_expparams.items():
            expparams[expparams_key] = data[s_(column)]

    return outcomes, expparams
Example #23
0
 def from_value(cls, val):
     """
     Creates a :py:class:`Type` object corresponding to the given value.
     """
     if isinstance(val, Type):
         # Creating a new object, because ``val`` may be some derivative of Type,
         # used as a syntactic sugar, and we do not want it to confuse us later.
         return cls(val.dtype, shape=val.shape, strides=val.strides)
     elif numpy.issctype(val):
         return cls(val)
     elif hasattr(val, 'dtype') and hasattr(val, 'shape'):
         strides = val.strides if hasattr(val, 'strides') else None
         return cls(val.dtype, shape=val.shape, strides=strides)
     else:
         return cls(dtypes.detect_type(val))
    def testConvertToDtype(self, tensor_or_dtype, dtype, dtype_hint):
        if np.issctype(tensor_or_dtype):
            example_tensor = np.zeros([], tensor_or_dtype)
        elif isinstance(tensor_or_dtype, tf.DType):
            example_tensor = tf.zeros([], tensor_or_dtype)
        else:
            example_tensor = tensor_or_dtype

        # Try with the original argument.
        self.assertEqual(
            tf.convert_to_tensor(example_tensor, dtype, dtype_hint).dtype,
            dtype_util.convert_to_dtype(tensor_or_dtype, dtype, dtype_hint))
        # Try with a concrete value.
        self.assertEqual(
            tf.convert_to_tensor(example_tensor, dtype, dtype_hint).dtype,
            dtype_util.convert_to_dtype(example_tensor, dtype, dtype_hint))
Example #25
0
    def sizeof_dtype(cls, dtype_params):
        size = 0

        for field, subtype in cls.subtypes:
            subtype_params = cls.explode_dtype_params(field=field, dtype_params=dtype_params)
            if issubclass(subtype, Blob):
                subtype_size = subtype.sizeof_dtype(dtype_params=subtype_params)

            elif numpy.issctype(subtype):
                subtype_size = subtype().nbytes

            else:
                raise NotImplementedError

            size += subtype_size

        return size
Example #26
0
    def get_type(self, address_space_qualifier):
        """Returns the c99 deserializer function declaration, which separates the components of a flat type."""

        fields = []

        # iterate over all subtypes/components
        for field, subtype in self.blob_type.subtypes:
            if field.endswith(Blob.PADDING_FIELD_SUFFIX):
                continue

            # used variable names

            # add sizeof call of component
            if numpy.issctype(subtype):
                fields.append('%s %s* %s;' % (address_space_qualifier, dtype_to_ctype(subtype), field))

            else:
                assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype)
                if subtype.is_plain():
                    fields.append('%s* %s;' % (
                        BlobLib.get_interface(subtype).get_spaced_name(address_space_qualifier),
                        field
                    ))

                else:
                    fields.append('%s %s;' % (
                        BlobLib.get_interface(subtype).get_name(address_space_qualifier),
                        field
                    ))

        definition = \
'''
/* complex type %(name)s */

typedef struct _%(name)s
{
    %(fields)s
} %(name)s;
''' % {
    'name': self.get_name(address_space_qualifier, ),
    'fields': '\n\t'.join(fields)
}
        return definition
Example #27
0
def round_sigfigs(x, sigfigs):
    """
    Rounds the value(s) in x to the number of significant figures in sigfigs.

    Restrictions:
    sigfigs must be an integer type and store a positive value.
    x must be a real value or an array like object containing only real values.
    """
    if not (type(sigfigs) is int or np.issubdtype(sigfigs, np.integer)):
        raise TypeError("round_sigfigs: sigfigs must be an integer.")

    if not np.all(np.isreal(x)):
        raise TypeError("round_sigfigs: all x must be real.")

    if sigfigs <= 0:
        raise ValueError("round_sigfigs: sigfigs must be positive.")

    xsgn = np.sign(x)
    absx = xsgn * x
    mantissas, binaryExponents = np.frexp(absx)

    decimalExponents = __logBase10of2 * binaryExponents
    intParts = np.floor(decimalExponents)

    mantissas *= 10.0**(decimalExponents - intParts)

    if type(mantissas) is float or np.issctype(np.dtype(mantissas)):
        if mantissas < 1.0:
            mantissas *= 10.0
            omags -= 1.0

    elif np.issubdtype(mantissas, np.ndarray):
        fixmsk = mantissas < 1.0
        mantissas[fixmsk] *= 10.0
        omags[fixmsk] -= 1.0

    return xsgn * np.around(mantissas, decimals=sigfigs - 1) * 10.0**intParts
Example #28
0
def _my_issctype(dtype):
    """Hack around np.issctype bug"""
    return np.issctype(dtype) and str(dtype)[0:2] != '|S'
Example #29
0
def summary(dataset,
            stats=True,
            lstats='auto',
            sstats='auto',
            idhash=False,
            targets_attr='targets',
            chunks_attr='chunks',
            maxc=30,
            maxt=20):
    """String summary over the object

    Parameters
    ----------
    stats : bool
      Include some basic statistics (mean, std, var) over dataset samples
    lstats : 'auto' or bool
      Include statistics on chunks/targets.  If 'auto', includes only if both
      targets_attr and chunks_attr are present.
    sstats : 'auto' or bool
      Sequence (order) statistics. If 'auto', includes only if
      targets_attr is present.
    idhash : bool
      Include idhash value for dataset and samples
    targets_attr : str, optional
      Name of sample attributes of targets
    chunks_attr : str, optional
      Name of sample attributes of chunks -- independent groups of samples
    maxt : int
      Maximal number of targets when provide details on targets/chunks
    maxc : int
      Maximal number of chunks when provide details on targets/chunks
    """
    # local bindings
    samples = dataset.samples
    sa = dataset.sa
    s = str(dataset)[1:-1]

    if idhash:
        s += '\nID-Hashes: %s' % dataset.idhash

    # Deduce if necessary lstats and sstats
    if lstats == 'auto':
        lstats = (targets_attr in sa) and (chunks_attr in sa)
    if sstats == 'auto':
        sstats = (targets_attr in sa)

    ssep = (' ', '\n')[lstats]

    ## Possibly summarize attributes listed as having unique
    if stats:
        if np.issctype(samples.dtype):
            # TODO -- avg per chunk?
            # XXX We might like to use scipy.stats.describe to get
            # quick summary statistics (mean/range/skewness/kurtosis)
            if dataset.nfeatures:
                s += "%sstats: mean=%g std=%g var=%g min=%g max=%g\n" % \
                     (ssep, np.mean(samples), np.std(samples),
                      np.var(samples), np.min(samples), np.max(samples))
            else:
                s += "%sstats: dataset has no features\n" % ssep
        else:
            s += "%sstats: no stats for dataset of '%s' dtype" % (
                ssep, samples.dtype)
    if lstats:
        try:
            s += dataset.summary_targets(targets_attr=targets_attr,
                                         chunks_attr=chunks_attr,
                                         maxc=maxc,
                                         maxt=maxt)
        except KeyError, e:
            s += 'No per %s/%s due to %r' % (targets_attr, chunks_attr, e)
Example #30
0
    def get_deserialize(self, address_space_qualifier):
        """Returns the c99 deserializer function declaration, which separates the components of a flat type."""

        arguments = ['%s char* blob' % address_space_qualifier]
        declarations = []
        lines = []
        previous_field_offset, previous_field_space = 0, 0

        last_field = self.blob_type.subtypes[-1][0]

        # iterate over all subtypes/components
        for field, subtype in self.blob_type.subtypes:
            if field.endswith(Blob.PADDING_FIELD_SUFFIX):
                continue

            is_last_field = field == last_field

            # format
            lines.append('')
            lines.append('/* cast of %s */' % field)

            # used variable names
            field_variable = 'self->%s' % field
            field_offset = '%s_offset' % field
            field_reference = 'blob + %s' % field_offset
            if not is_last_field:
                field_space = '%s_space' % field

            declarations.append('unsigned long %s;' % field_offset)
            if not is_last_field:
                declarations.append('unsigned long %s;' % field_space)

            # add sizeof call of component
            if numpy.issctype(subtype):
                cname = "%s %s" % (address_space_qualifier, dtype_to_ctype(subtype))
                sizeof_call = 'sizeof(%s)' % cname

            else:
                assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype)
                cname = "%s %s" % (
                    address_space_qualifier,
                    BlobLib.get_interface(subtype).get_name(address_space_qualifier)
                )
                sizeof_call = '%s((%s char*)%s)' % (
                    BlobLib.get_interface(subtype).get_sizeof_name(address_space_qualifier),
                    address_space_qualifier,
                    field_reference
                )

            # determine offset of component
            lines.append('%s = %s + %s;' % (field_offset, previous_field_offset, previous_field_space))

            # set and cast component reference
            if not numpy.issctype(subtype) and not subtype.is_plain():
                lines.append('%s(%s, &%s);' % (
                    BlobLib.get_interface(subtype).get_deserialize_name(address_space_qualifier),
                    field_reference,
                    field_variable
                ))
            else:
                lines.append('%s = (%s*)(%s);' % (field_variable, cname, field_reference))

            if not is_last_field:
                # determine size of component
                lines.append('%s = %s;' % (field_space, sizeof_call))

            previous_field_space = field_space
            previous_field_offset = field_offset

        lines = ['\t' + line for line in lines]

        arguments.append('%s* %s' % (self.get_name(address_space_qualifier, ), 'self'))

        definition = 'void %s(%s)' % (self.get_deserialize_name(address_space_qualifier), ', '.join(arguments))

        # fill function template
        lines.insert(0, definition)
        lines.insert(1, '{')
        for index, line in enumerate(declarations):
            lines.insert(2 + index, '\t' + line)
        lines.append('}')
        declaration = '\n'.join(lines)

        return definition.strip() + ';', declaration
Example #31
0
def test_issctype(rep, expected):
    # ensure proper identification of scalar
    # data-types by issctype()
    actual = np.issctype(rep)
    assert_equal(actual, expected)
Example #32
0
import numpy as np

np.maximum_sctype("S8")
np.maximum_sctype(object)

np.issctype(object)
np.issctype("S8")

np.obj2sctype(list)
np.obj2sctype(list, default=None)
np.obj2sctype(list, default=np.string_)

np.issubclass_(np.int32, int)
np.issubclass_(np.float64, float)
np.issubclass_(np.float64, (int, float))

np.issubsctype("int64", int)
np.issubsctype(np.array([1]), np.array([1]))

np.issubdtype("S1", np.string_)
np.issubdtype(np.float64, np.float32)

np.sctype2char("S1")
np.sctype2char(list)

np.find_common_type([], [np.int64, np.float32, complex])
np.find_common_type((), (np.int64, np.float32, complex))
np.find_common_type([np.int64, np.float32], [])
np.find_common_type([np.float32], [np.int64, np.float64])

np.cast[int]
Example #33
0
def genfromdta(fname, missing_flt=-999., missing_str=""):
    """
    Returns an ndarray from a Stata .dta file.

    Parameters
    ----------
    fname : str or filehandle
        Stata .dta file.
    missing_flt : numeric
        The numeric value to replace missing values with. Will be used for
        any numeric value.
    missing_str : str
        The string to replace missing values with for string variables.

    Notes
    ------
    Date types will be returned as their numeric value in Stata. A date
    parser is not written yet.
    """
    if isinstance(fname, basestring):
        fhd = StataReader(open(fname, 'rb'), missing_values=False)
    elif not hasattr(fname, 'read'):
        raise TypeError("The input should be a string or a filehandle. "\
                "(got %s instead)" % type(fname))
    else:
        fhd = StataReader(fname, missing_values=False)


#    validate_names = np.lib._iotools.NameValidator(excludelist=excludelist,
#                                    deletechars=deletechars,
#                                    case_sensitive=case_sensitive)

#TODO: This needs to handle the byteorder?
    header = fhd.file_headers()
    types = header['dtyplist']
    nobs = header['nobs']
    numvars = header['nvar']
    varnames = header['varlist']
    dataname = header['data_label']
    labels = header['vlblist']  # labels are thrown away unless DataArray
    # type is used
    data = np.zeros((nobs, numvars))
    stata_dta = fhd.dataset()

    # key is given by np.issctype
    convert_missing = {True: missing_flt, False: missing_str}

    dt = np.dtype(zip(varnames, types))
    data = np.zeros((nobs), dtype=dt)  # init final array

    for rownum, line in enumerate(stata_dta):
        # doesn't handle missing value objects, just casts
        # None will only work without missing value object.
        if None in line:  # and not remove_comma:
            for i, val in enumerate(line):
                if val is None:
                    line[i] = convert_missing[np.issctype(types[i])]
        data[rownum] = tuple(line)

    #TODO: make it possible to return plain array if all 'f8' for example
    return data
Example #34
0
def summary(
    dataset,
    stats=True,
    lstats="auto",
    sstats="auto",
    idhash=False,
    targets_attr="targets",
    chunks_attr="chunks",
    maxc=30,
    maxt=20,
):
    """String summary over the object

    Parameters
    ----------
    stats : bool
      Include some basic statistics (mean, std, var) over dataset samples
    lstats : 'auto' or bool
      Include statistics on chunks/targets.  If 'auto', includes only if both
      targets_attr and chunks_attr are present.
    sstats : 'auto' or bool
      Sequence (order) statistics. If 'auto', includes only if
      targets_attr is present.
    idhash : bool
      Include idhash value for dataset and samples
    targets_attr : str, optional
      Name of sample attributes of targets
    chunks_attr : str, optional
      Name of sample attributes of chunks -- independent groups of samples
    maxt : int
      Maximal number of targets when provide details on targets/chunks
    maxc : int
      Maximal number of chunks when provide details on targets/chunks
    """
    # local bindings
    samples = dataset.samples
    sa = dataset.sa
    s = str(dataset)[1:-1]

    if idhash:
        s += "\nID-Hashes: %s" % dataset.idhash

    # Deduce if necessary lstats and sstats
    if lstats == "auto":
        lstats = (targets_attr in sa) and (chunks_attr in sa)
    if sstats == "auto":
        sstats = targets_attr in sa

    ssep = (" ", "\n")[lstats]

    ## Possibly summarize attributes listed as having unique
    if stats:
        if np.issctype(samples.dtype):
            # TODO -- avg per chunk?
            # XXX We might like to use scipy.stats.describe to get
            # quick summary statistics (mean/range/skewness/kurtosis)
            if dataset.nfeatures:
                s += "%sstats: mean=%g std=%g var=%g min=%g max=%g\n" % (
                    ssep,
                    np.mean(samples),
                    np.std(samples),
                    np.var(samples),
                    np.min(samples),
                    np.max(samples),
                )
            else:
                s += "%sstats: dataset has no features\n" % ssep
        else:
            s += "%sstats: no stats for dataset of '%s' dtype" % (ssep, samples.dtype)
    if lstats:
        try:
            s += dataset.summary_targets(targets_attr=targets_attr, chunks_attr=chunks_attr, maxc=maxc, maxt=maxt)
        except KeyError, e:
            s += "No per %s/%s due to %r" % (targets_attr, chunks_attr, e)
import numpy as np

np.issctype(np.int32)
np.issctype(list)
np.issctype(1.1)
np.issctype(np.dtype('str'))
def ndarray_to_mxarray(libmx, arr):

    ### Prepare `arr` object (convert to ndarray if possible), assert
    ### data type
    if isinstance(arr, str) or isinstance(arr, unicode):
        pass

    elif isinstance(arr, dict):
        raise NotImplementedError('dicts are not supported.')

    elif ('pandas' in sys.modules) and isinstance(
            arr, sys.modules['pandas'].DataFrame):
        arr = arr.to_records()

    elif ('pandas' in sys.modules) and isinstance(
            arr, sys.modules['pandas'].Series):
        arr = arr.to_frame().to_records()

    elif isinstance(arr, collections.Iterable):
        arr = np.array(arr, ndmin=2)

    elif np.issctype(type(arr)):
        arr = np.array(arr, ndmin=2)

    else:
        raise NotImplementedError("Data type not supported: {}".format(
            type(arr)))

    ### Convert ndarray to mxarray
    if isinstance(arr, str):
        pm = libmx.mxCreateString(arr)

    elif isinstance(arr, unicode):
        pm = libmx.mxCreateString(arr.encode('utf-8'))

    elif isinstance(arr,
                    np.ndarray) and arr.dtype.kind in ['i', 'u', 'f', 'c']:
        dim = arr.ctypes.shape_as(mwSize)
        complex_flag = (arr.dtype.kind == 'c')

        pm = libmx.mxCreateNumericArray(arr.ndim, dim, dtype_to_mat(arr.dtype),
                                        complex_flag)

        mat_data = libmx.mxGetData(pm)
        np_data = arr.real.tostring('F')
        ctypes.memmove(mat_data, np_data, len(np_data))

        if complex_flag:
            mat_data = libmx.mxGetImagData(pm)
            np_data = arr.imag.tostring('F')
            ctypes.memmove(mat_data, np_data, len(np_data))

    elif isinstance(arr, np.ndarray) and arr.dtype.kind == 'b':
        dim = arr.ctypes.shape_as(mwSize)

        pm = libmx.mxCreateLogicalArray(arr.ndim, dim)

        mat_data = libmx.mxGetData(pm)
        np_data = arr.real.tostring('F')
        ctypes.memmove(mat_data, np_data, len(np_data))

    elif isinstance(arr, np.ndarray) and arr.dtype.kind in ('O', 'S', 'U'):
        dim = arr.ctypes.shape_as(mwSize)

        pm = libmx.mxCreateCellArray(arr.ndim, dim)

        for i, el in enumerate(arr.flatten('F')):
            p = ndarray_to_mxarray(libmx, el)
            libmx.mxSetCell(pm, i, p)

    elif isinstance(arr, np.ndarray) and len(arr.dtype) > 0:
        dim = arr.ctypes.shape_as(mwSize)

        name_num = len(arr.dtype.names)

        names_p = (c_char_p *
                   name_num)(*[c_char_p(name) for name in arr.dtype.names])

        pm = libmx.mxCreateStructArray(
            arr.ndim,
            dim,
            name_num,
            names_p,
        )

        for i, record in enumerate(arr.flatten('F')):
            for name in arr.dtype.names:
                el = record[name]
                p = ndarray_to_mxarray(libmx, el)

                libmx.mxSetField(pm, i, name, p)

    elif isinstance(arr, np.ndarray):
        raise NotImplementedError('Unsupported dtype: {}'.format(arr.dtype))

    return pm
Example #37
0
import numpy as np

reveal_type(np.issctype(np.generic))  # E: bool
reveal_type(np.issctype("foo"))  # E: bool

reveal_type(np.obj2sctype("S8"))  # E: Union[numpy.generic, None]
reveal_type(np.obj2sctype("S8", default=None))  # E: Union[numpy.generic, None]
reveal_type(
    np.obj2sctype("foo",
                  default=int)  # E: Union[numpy.generic, Type[builtins.int*]]
)

reveal_type(np.issubclass_(np.float64, float))  # E: bool
reveal_type(np.issubclass_(np.float64, (int, float)))  # E: bool

reveal_type(np.sctype2char("S8"))  # E: str
reveal_type(np.sctype2char(list))  # E: str

reveal_type(np.find_common_type([np.int64], [np.int64]))  # E: numpy.dtype
Example #38
0
def ValueWithUncsRounding(x, uncs, uncsigfigs=1):
    """
    Rounds all of the values in uncs (the uncertainties) to the number of
    significant figures in uncsigfigs. Then
    rounds the values in x to the same decimal pace as the values in uncs.
    Return value is a two element tuple each element of which has the same
    type as x and uncs, respectively.
    Restrictions:
    - uncsigfigs must be a positive integer. 
    
    - x must be a real value or an array like object containing only real
      values.
    - uncs must be a real value or an array like object containing only real
      values.
    """
    if not (type(uncsigfigs) is int or type(uncsigfigs) is long
            or isinstance(uncsigfigs, np.integer)):
        raise TypeError(
            "ValueWithUncsRounding: uncsigfigs must be an integer.")

    if uncsigfigs <= 0:
        raise ValueError("ValueWithUncsRounding: uncsigfigs must be positive.")

    if not np.all(np.isreal(x)):
        raise TypeError("ValueWithUncsRounding: all x must be real.")

    if not np.all(np.isreal(uncs)):
        raise TypeError("ValueWithUncsRounding: all uncs must be real.")

    if np.any(uncs <= 0):
        raise ValueError("ValueWithUncsRounding: uncs must all be positive.")

    #temporarily suppres floating point errors
    errhanddict = np.geterr()
    np.seterr(all="ignore")

    matrixflag = False
    if isinstance(x, np.matrix):  #Convert matrices to arrays
        matrixflag = True
        x = np.asarray(x)

    #Pre-round unc to correctly handle cases where rounding alters the
    # most significant digit of unc.
    uncs = RoundToSigFigs_fp(uncs, uncsigfigs)

    mantissas, binaryExponents = np.frexp(uncs)

    decimalExponents = __logBase10of2 * binaryExponents
    omags = np.floor(decimalExponents)

    mantissas *= 10.0**(decimalExponents - omags)
    if type(mantissas) is float or np.issctype(np.dtype(mantissas)):
        if mantissas < 1.0:
            mantissas *= 10.0
            omags -= 1.0

    else:  #elif np.all(np.isreal( mantissas )):
        fixmsk = mantissas < 1.0
        mantissas[fixmsk] *= 10.0
        omags[fixmsk] -= 1.0

    scales = 10.0**omags

    prec = uncsigfigs - 1
    result = (np.around(x / scales, decimals=prec) * scales,
              np.around(mantissas, decimals=prec) * scales)
    if matrixflag:
        result = np.matrix(result, copy=False)

    np.seterr(**errhanddict)
    return result
Example #39
0
import numpy as np

a = [1, None]

b = np.array(a)

x = b.dtype.kind in {'U', 'S'}

print(x)

print(np.argwhere(np.isnan(b)))

print(np.issctype(np.array([1])))
Example #40
0
def test_issctype(rep, expected):
    # ensure proper identification of scalar
    # data-types by issctype()
    actual = np.issctype(rep)
    assert_equal(actual, expected)
Example #41
0
import numpy as np

reveal_type(np.maximum_sctype(np.float64))  # E: Type[{float64}]
reveal_type(np.maximum_sctype("f8"))  # E: Type[Any]

reveal_type(np.issctype(np.float64))  # E: bool
reveal_type(np.issctype("foo"))  # E: Literal[False]

reveal_type(np.obj2sctype(np.float64))  # E: Union[None, Type[{float64}]]
reveal_type(np.obj2sctype(
    np.float64, default=False))  # E: Union[builtins.bool, Type[{float64}]]
reveal_type(np.obj2sctype("S8"))  # E: Union[None, Type[Any]]
reveal_type(np.obj2sctype("S8", default=None))  # E: Union[None, Type[Any]]
reveal_type(np.obj2sctype("foo",
                          default=False))  # E: Union[builtins.bool, Type[Any]]
reveal_type(np.obj2sctype(1))  # E: None
reveal_type(np.obj2sctype(1, default=False))  # E: bool

reveal_type(np.issubclass_(np.float64, float))  # E: bool
reveal_type(np.issubclass_(np.float64, (int, float)))  # E: bool
reveal_type(np.issubclass_(1, 1))  # E: Literal[False]

reveal_type(np.sctype2char("S8"))  # E: str
reveal_type(np.sctype2char(list))  # E: str

reveal_type(np.find_common_type([np.int64], [np.int64]))  # E: numpy.dtype[Any]

reveal_type(np.cast[int])  # E: _CastFunc
reveal_type(np.cast["i8"])  # E: _CastFunc
reveal_type(np.cast[np.int64])  # E: _CastFunc
Example #42
0
def ndarray_to_mxarray(libmx, arr):

    ### Prepare `arr` object (convert to ndarray if possible), assert
    ### data type
    if isinstance(arr, str) or isinstance(arr, unicode):
        pass

    elif isinstance(arr, dict):
        raise NotImplementedError('dicts are not supported.')

    elif ('pandas' in sys.modules) and isinstance(arr, sys.modules['pandas'].DataFrame):
        arr = arr.to_records()

    elif ('pandas' in sys.modules) and isinstance(arr, sys.modules['pandas'].Series):
        arr = arr.to_frame().to_records()

    elif isinstance(arr, collections.Iterable):
        arr = np.array(arr, ndmin=2)

    elif np.issctype(type(arr)):
        arr = np.array(arr, ndmin=2)

    else:
        raise NotImplementedError("Data type not supported: {}".format(type(arr)))




    ### Convert ndarray to mxarray
    if isinstance(arr, str):
        pm = libmx.mxCreateString(arr)

    elif isinstance(arr, unicode):
        pm = libmx.mxCreateString(arr.encode('utf-8'))

    elif isinstance(arr, np.ndarray) and arr.dtype.kind in ['i','u','f','c']:
        dim = arr.ctypes.shape_as(mwSize)
        complex_flag = (arr.dtype.kind == 'c')

        pm = libmx.mxCreateNumericArray(
            arr.ndim,
            dim,
            dtype_to_mat(arr.dtype),
            complex_flag
        )

        mat_data = libmx.mxGetData(pm)
        np_data = arr.real.tostring('F')
        ctypes.memmove(mat_data, np_data, len(np_data))

        if complex_flag:
            mat_data = libmx.mxGetImagData(pm)
            np_data = arr.imag.tostring('F')
            ctypes.memmove(mat_data, np_data, len(np_data))


    elif isinstance(arr, np.ndarray) and arr.dtype.kind == 'b':
        dim = arr.ctypes.shape_as(mwSize)

        pm = libmx.mxCreateLogicalArray(arr.ndim, dim)

        mat_data = libmx.mxGetData(pm)
        np_data = arr.real.tostring('F')
        ctypes.memmove(mat_data, np_data, len(np_data))


    elif isinstance(arr, np.ndarray) and arr.dtype.kind in ('O', 'S', 'U'):
        dim = arr.ctypes.shape_as(mwSize)

        pm = libmx.mxCreateCellArray(arr.ndim, dim)

        for i,el in enumerate(arr.flatten('F')):
            p = ndarray_to_mxarray(libmx, el)
            libmx.mxSetCell(pm, i, p)


    elif isinstance(arr, np.ndarray) and len(arr.dtype) > 0:
        dim = arr.ctypes.shape_as(mwSize)

        name_num = len(arr.dtype.names)

        names_p = (c_char_p*name_num)(*[c_char_p(name) for name in arr.dtype.names])

        pm = libmx.mxCreateStructArray(
            arr.ndim,
            dim,
            name_num,
            names_p,
        )

        for i,record in enumerate(arr.flatten('F')):
            for name in arr.dtype.names:
                el = record[name]
                p = ndarray_to_mxarray(libmx, el)

                libmx.mxSetField(pm, i, name, p)

    elif isinstance(arr, np.ndarray):
        raise NotImplementedError('Unsupported dtype: {}'.format(arr.dtype))

    return pm
Example #43
0
    def unify_datatype(cls, graphs, inplace=False):
        '''Ensure that each attribute has the same data type across graphs.

        Parameters
        ----------
        graphs: list
            A list of graphs that have the same set of node and edge
            features. The types for each attribute will then be
            chosen to be the smallest scalar type that can safely hold all the
            values as found across the graphs.
        inplace: bool
            Whether or not to modify the graph features in-place.

        Returns
        -------
        None or list
            If inplace is True, the graphs will be modified in-place and
            nothing will be returned. Otherwise, a new list of graphs with
            type-unified features will be returned.
        '''
        '''copy graphs if not editing in-place'''
        for g in graphs:
            g.cookie.clear()
        if inplace is not True:
            graphs = [g.copy(deep=False) for g in graphs]
        '''ensure all graphs have the same node and edge features'''
        features = {}
        for component in ['nodes', 'edges']:
            first = None
            for g in graphs:
                second = set(getattr(g, component).columns)
                first = first or second
                if second != first:
                    raise TypeError(f'Graph {g} with node features {second} '
                                    'does not match with the other graphs.')
            features[component] = first
        '''unify data type for each feature'''
        for component in ['nodes', 'edges']:
            group = [getattr(g, component) for g in graphs]
            for key in features[component]:
                types = [g[key].concrete_type for g in group]
                t = common_min_type.of_types(types)
                if t == np.object:
                    t = common_min_type.of_types(types, coerce=False)
                if t is None:
                    raise TypeError(
                        f'Cannot unify attribute {key} containing mixed '
                        'object types')

                if np.issctype(t):
                    for g in group:
                        g[key] = g[key].astype(t)
                elif t in [list, tuple, np.ndarray]:
                    t_sub = common_min_type.of_values(
                        it.chain.from_iterable(
                            it.chain.from_iterable([g[key] for g in group])))
                    if t_sub is None:
                        raise TypeError(
                            f'Cannot find a common type for elements in {key}.'
                        )
                    for g in group:
                        g[key] = [np.array(seq, dtype=t_sub) for seq in g[key]]
        '''only returns if not editing in-place'''
        if inplace is not True:
            return graphs
Example #44
0
def genfromdta(fname, missing_flt=-999., missing_str="", encoding=None):
    """
    Returns an ndarray from a Stata .dta file.

    Parameters
    ----------
    fname : str or filehandle
        Stata .dta file.
    missing_flt : numeric
        The numeric value to replace missing values with. Will be used for
        any numeric value.
    missing_str : str
        The string to replace missing values with for string variables.
    encoding : string, optional
        Used for Python 3 only. Encoding to use when reading the .dta file.
        Defaults to `locale.getpreferredencoding`

    Notes
    ------
    Date types will be returned as their numeric value in Stata. A date
    parser is not written yet.
    """
    if isinstance(fname, basestring):
        fhd = StataReader(open(fname, 'rb'), missing_values=False,
                encoding=encoding)
    elif not hasattr(fname, 'read'):
        raise TypeError("The input should be a string or a filehandle. "\
                "(got %s instead)" % type(fname))
    else:
        fhd = StataReader(fname, missing_values=False, encoding=encoding)
#    validate_names = np.lib._iotools.NameValidator(excludelist=excludelist,
#                                    deletechars=deletechars,
#                                    case_sensitive=case_sensitive)

    #TODO: This needs to handle the byteorder?
    header = fhd.file_headers()
    types = header['dtyplist']
    nobs = header['nobs']
    numvars = header['nvar']
    varnames = header['varlist']
    dataname = header['data_label']
    labels = header['vlblist'] # labels are thrown away unless DataArray
                               # type is used
    data = np.zeros((nobs,numvars))
    stata_dta = fhd.dataset()

    # key is given by np.issctype
    convert_missing = {
            True : missing_flt,
            False : missing_str}

    dt = np.dtype(zip(varnames, types))
    data = np.zeros((nobs), dtype=dt) # init final array

    for rownum,line in enumerate(stata_dta):
        # doesn't handle missing value objects, just casts
        # None will only work without missing value object.
        if None in line:# and not remove_comma:
            for i,val in enumerate(line):
                if val is None:
                    line[i] = convert_missing[np.issctype(types[i])]
        data[rownum] = tuple(line)

    #TODO: make it possible to return plain array if all 'f8' for example
    return data
Example #45
0
    def __init__(self, graph):

        self.nodes = nodes = graph.nodes.copy(deep=False)
        self.edges = edges = graph.edges.copy(deep=False)
        self.n_node = len(nodes)
        ''' substitute columns corresponding to object-type node/edge
        attributes to their GPU counterparts '''
        for df in [nodes, edges]:
            for key in list(df.columns):
                if not np.issctype(df[key].dtype):
                    if issubclass(df[key].concrete_type,
                                  (list, tuple, np.ndarray)):
                        inner_type = common_min_type.of_types([
                            x.dtype if isinstance(x, np.ndarray) else
                            common_min_type.of_values(x) for x in df[key]
                        ])
                        if not np.issctype(inner_type):
                            raise (TypeError(
                                f'Expect scalar elements in tuple or list'
                                f'atttributes, got {inner_type}.'))
                        if not np.issctype(inner_type):
                            raise TypeError(
                                f'List-like graphs attribute must have scalar'
                                f'elements. Attribute {key} is {inner_type}.')
                        buffer = memoryview(
                            umlike(
                                np.fromiter(it.chain.from_iterable(df[key]),
                                            dtype=inner_type)))
                        size = np.fromiter(map(len, df[key]), dtype=np.int)
                        head = np.cumsum(size) - size
                        # mangle key with type information
                        tag = '${key}::frozen_array::{dtype}'.format(
                            key=key, dtype=inner_type.str)
                        data = np.empty_like(df[key], dtype=np.object)
                        for i, (h, s) in enumerate(zip(head, size)):
                            data[i] = np.frombuffer(
                                buffer[h:h + s], dtype=inner_type).view(
                                    self.CustomType.FrozenArray)
                        df[tag] = data
                        df.drop([key], inplace=True)
                    else:
                        raise TypeError(
                            f'Unsupported non-scalar attribute {key} '
                            f'of type {df[key].concrete_type}')
        ''' add phantom label if none exists to facilitate C++ interop '''
        assert (len(nodes.columns) >= 1)
        if len(nodes.columns) == 1:
            nodes['labeled'] = np.zeros(len(nodes), np.bool_)

        assert (len(edges.columns) >= 2)
        if len(edges.columns) == 2:
            assert ('!i' in edges and '!j' in edges)
            edges['labeled'] = np.zeros(len(edges), np.bool_)
        ''' determine node type '''
        i = nodes['!i']
        nodes.drop(['!i'], inplace=True)
        self.node_t = node_t = nodes.rowtype()
        self.nodes_aos = umempty(len(nodes), dtype=node_t)
        self.nodes_aos[i] = list(nodes.iterstates())
        ''' determine whether graph is weighted, determine edge type,
            and compute node degrees '''
        self.degree = degree = umzeros(self.n_node, dtype=np.float32)
        edge_t = edges.drop(['!i', '!j', '!w']).rowtype()
        self_loops = edges[edges['!i'] == edges['!j']]
        nnz = len(edges)
        if '!w' in edges:  # weighted graph
            self.weighted = True
            np.add.at(degree, edges['!i'], edges['!w'])
            np.add.at(degree, edges['!j'], edges['!w'])
            np.subtract.at(degree, self_loops['!i'], self_loops['!w'])

            if edge_t.itemsize != 0:
                labels = list(edges[edge_t.names].iterstates())
            else:
                labels = [None] * len(edges)

            edge_t = np.dtype([('weight', np.float32), ('label', edge_t)],
                              align=True)

            edges_aos = np.fromiter(zip(edges['!w'], labels),
                                    dtype=edge_t,
                                    count=nnz)
        else:
            self.weighted = False
            np.add.at(degree, edges['!i'], 1.0)
            np.add.at(degree, edges['!j'], 1.0)
            np.subtract.at(degree, self_loops['!i'], 1.0)
            edges_aos = np.fromiter(edges[edge_t.names].iterstates(),
                                    dtype=edge_t,
                                    count=nnz)
        self.edge_t = edge_t
        degree[degree == 0] = 1.0
        ''' collect non-zero edge octiles '''
        indices = np.empty((4, nnz * 2), dtype=np.uint32, order='C')
        i, j, up, lf = indices
        i[:nnz] = edges['!i']
        j[:nnz] = edges['!j']
        # replicate & swap i and j for the lower triangular part
        i[nnz:], j[nnz:] = j[:nnz], i[:nnz]
        # get upper left corner of owner octiles
        up[:] = i - i % 8
        lf[:] = j - j % 8

        # np.unique implies lexical sort
        (lf, up, j, i), perm = np.unique(indices[-1::-1, :],
                                         axis=1,
                                         return_index=True)
        self.edges_aos = umempty(len(i), edge_t)
        self.edges_aos[:] = edges_aos[perm % nnz]  # mod nnz due to symmetry

        diff = np.empty_like(up)
        diff[1:] = (up[:-1] != up[1:]) | (lf[:-1] != lf[1:])
        diff[:1] = True
        oct_offset = np.flatnonzero(diff)
        self.n_octile = len(oct_offset)

        nzmasks = np.bitwise_or.reduceat(
            1 << (i - up + (j - lf) * 8).astype(np.uint64), oct_offset)
        nzmasks_r = np.bitwise_or.reduceat(
            1 << (j - lf + (i - up) * 8).astype(np.uint64), oct_offset)

        self.octiles = octiles = umempty(self.n_octile, self.Octile.dtype)
        octiles[:] = list(
            zip(
                int(self.edges_aos.base) + oct_offset * edge_t.itemsize,
                nzmasks, nzmasks_r, up[oct_offset], lf[oct_offset]))