def data_to_params(data, expparams_dtype, col_outcomes=(0, 'counts'), cols_expparams=None): """ Given data as a NumPy array, separates out each column either as the outcomes, or as a field of an expparams array. Columns may be specified either as indices into a two-axis scalar array, or as field names for a one-axis record array. Since scalar arrays are homogenous in type, this may result in loss of precision due to casting between data types. """ BY_IDX, BY_NAME = range(2) is_exp_scalar = np.issctype(expparams_dtype) is_data_scalar = np.issctype(data.dtype) and not data.dtype.fields s_ = ((lambda idx: np.s_[..., idx[BY_IDX]]) if is_data_scalar else (lambda idx: np.s_[idx[BY_NAME]])) outcomes = data[s_(col_outcomes)].astype(int) # mk new slicer t expparams = np.empty(outcomes.shape, dtype=expparams_dtype) if is_exp_scalar: expparams[:] = data[s_(cols_expparams)] else: for expparams_key, column in cols_expparams.items(): expparams[expparams_key] = data[s_(column)] return outcomes, expparams
def __new__(cls, input): if isinstance(input, np.ndarray): series = input.view(cls) if np.issctype(series.dtype): series._concrete_type = series.dtype else: series._concrete_type = common_concrete_type.of_values(input) else: t = common_min_type.of_values(input) dtype = np.dtype(t) if np.issctype(t) else np.dtype(np.object) series = np.empty(len(input), dtype=dtype).view(cls) # ensures 1D series[:] = input series._concrete_type = t return series
def common_symbolic_value_elimination_impl(gdict): order = topsort(gdict) roots = compute_roots(gdict, order) values = {} for k in order: n = gdict[k] nodeval = n.attr.get('symbolic_value') try: if nodeval is None: continue elif isscalar(nodeval.val) and nodeval.val == -1: continue elif (not isscalar(nodeval.val)) and -1 in nodeval.val: continue elif isinstance(val, np.ndarray) and np.issctype(val.dtype) and val.size > 100: continue except: continue hashable_val, any_symbolic = make_hashable(nodeval.val) if any_symbolic: if hashable_val in values: # rewrite graph othernodes = values[hashable_val] for othernode in othernodes: if len(roots[othernode].intersection(roots[n.name])) > 0: outputs = list(n.outputs) for outnode in outputs: replace_source(gdict, n.name, outnode, othernode) else: values[hashable_val] = values.get(hashable_val, []) + [k]
def get_accessor(self, field, dtype, address_space_qualifier): child_name = BlobLib.get_interface(dtype).get_spaced_name(address_space_qualifier) definition = '%(child_name)s* %(function_name)s(%(address_space_qualifier)s %(cname)s* self)' % { 'function_name': self.get_accessor_name(field, address_space_qualifier), 'cname': self.get_name(address_space_qualifier), 'child_name': child_name, 'address_space_qualifier': address_space_qualifier } field_chain = [field] current_dtype = dtype while not numpy.issctype(current_dtype) and not issubclass(current_dtype, BlobEnum): try: subfield, current_dtype = current_dtype.subtypes[0] field_chain.append(subfield) except Exception as ex: raise ex declaration = \ ''' %(definition)s { return (%(child_name)s *)&self->%(field)s; };''' % { 'definition': definition, 'child_name': child_name, 'field': '_'.join(field_chain) } return definition + ';', declaration
def from_value(cls, val): """ Creates a :py:class:`Type` object corresponding to the given value. """ if isinstance(val, Type): # Creating a new object, because ``val`` may be some derivative of Type, # used as a syntactic sugar, and we do not want it to confuse us later. return cls(val.dtype, shape=val.shape, strides=val.strides, offset=val.offset, nbytes=val.nbytes) elif numpy.issctype(val): return cls(val) elif hasattr(val, 'dtype') and hasattr(val, 'shape'): strides = val.strides if hasattr(val, 'strides') else None offset = val.offset if hasattr(val, 'offset') else 0 nbytes = val.nbytes if hasattr(val, 'nbytes') else None return cls(val.dtype, shape=val.shape, strides=strides, offset=offset, nbytes=nbytes) else: return cls(dtypes.detect_type(val))
def __init__(self, initializer, dtype=None, shape=None): if dtype is not None: assert(np.issctype(dtype)) if shape is not None: assert(isinstance(shape, tuple)) if isinstance(initializer, str): # treat initializer as a filename to load tensor data from npdata = self.load_data(initializer) if dtype != None and dtype != npdata.dtype: npdata = npdata.astype(dtype, copy=False) if shape != None: npdata = npdata.reshape(shape) super().__init__(npdata.shape, dtype=npdata.dtype) self.set(npdata) elif isinstance(initializer, tuple): # print("GPUTensor(shape=", initializer) super().__init__(initializer, dtype=np.float32 if dtype is None else dtype) elif isinstance(initializer, np.ndarray): # print("SHAPE:", initializer.shape) if dtype and dtype != initializer.dtype: initializer = initializer.astype(dtype) if shape is not None and shape != initializer.shape: initializer = initializer.reshape(shape) super().__init__(initializer.shape, dtype=initializer.dtype) self.set(initializer) else: raise NotImplementedError
def __init__(self, initializer, dtype=None, shape=None): if dtype is not None: assert (np.issctype(dtype)) if shape is not None: assert (isinstance(shape, tuple)) if isinstance(initializer, str): # treat initializer as a filename to load tensor data from npdata = self.load_data(initializer) if dtype != None and dtype != npdata.dtype: npdata = npdata.astype(dtype, copy=False) if shape != None: npdata = npdata.reshape(shape) super().__init__(npdata.shape, dtype=npdata.dtype) self.set(npdata) elif isinstance(initializer, tuple): # print("GPUTensor(shape=", initializer) super().__init__(initializer, dtype=np.float32 if dtype is None else dtype) elif isinstance(initializer, np.ndarray): # print("SHAPE:", initializer.shape) if dtype and dtype != initializer.dtype: initializer = initializer.astype(dtype) if shape is not None and shape != initializer.shape: initializer = initializer.reshape(shape) super().__init__(initializer.shape, dtype=initializer.dtype) self.set(initializer) else: raise NotImplementedError
def _create_unaligned_dtype(cls, *subtypes): dtype_components = [] for index, component in enumerate(subtypes): field, subtype = component if numpy.issctype(subtype): if hasattr(subtype, 'descr'): for name, sub_dtype in subtype.descr: subfield = '%s_%s' % (field, name) dtype_components.append((subfield, sub_dtype)) else: dtype_components.append(component) elif issubclass(subtype, BlobEnum): dtype_components.append((field, BlobEnum.dtype)) elif issubclass(subtype, Blob) and subtype.is_plain(): if hasattr(subtype, 'dtype'): sub_dtype = subtype.dtype else: sub_dtype, subtype_requirements = subtype.create_plain_dtype(*subtype.subtypes) for name, sub_dtype in sub_dtype.descr: subfield = '%s_%s' % (field, name) dtype_components.append((subfield, sub_dtype)) else: raise NotImplementedError() return numpy.dtype(dtype_components), subtypes
def convert_to_dtype(tensor_or_dtype, dtype=None, dtype_hint=None): """Get a dtype from a list/tensor/dtype using convert_to_tensor semantics.""" if tensor_or_dtype is None: return dtype or dtype_hint # Tensorflow dtypes need to be typechecked if tf.is_tensor(tensor_or_dtype): dt = base_dtype(tensor_or_dtype.dtype) elif isinstance(tensor_or_dtype, tf.DType): dt = base_dtype(tensor_or_dtype) # Numpy dtypes defer to dtype/dtype_hint elif isinstance(tensor_or_dtype, np.ndarray): dt = base_dtype(dtype or dtype_hint or tensor_or_dtype.dtype) elif np.issctype(tensor_or_dtype): dt = base_dtype(dtype or dtype_hint or tensor_or_dtype) else: # If this is a Python object, call `convert_to_tensor` and grab the dtype. # Note that this will add ops in graph-mode; we may want to consider # other ways to handle this case. dt = tf.convert_to_tensor(tensor_or_dtype, dtype, dtype_hint).dtype if not SKIP_DTYPE_CHECKS and dtype and not base_equal(dtype, dt): raise TypeError('Found incompatible dtypes, {} and {}.'.format( dtype, dt)) return dt
def toString(self, buffer=sys.stdout, columns=None, colSpace=None, nanRep='NaN', formatters=None, float_format=None): """ Output a string version of this DataMatrix """ _pf = common._pfixed formatters = formatters or {} if columns is None: columns = self.columns values = self.values if self.objects: columns = list(columns) + list(self.objects.columns) values = np.column_stack( (values.astype(object), self.objects.values)) else: columns = [c for c in columns if c in self] values = self.asMatrix(columns) ident = lambda x: x idxSpace = max([len(str(idx)) for idx in self.index]) + 4 if colSpace is None: colSpace = {} for c in columns: if np.issctype(self[c].dtype): colSpace[c] = max(len(str(c)) + 4, 12) else: # hack colSpace[c] = 15 else: colSpace = dict((k, 15) for k in columns) if len(self.cols()) == 0: buffer.write('DataMatrix is empty!\n') buffer.write(repr(self.index)) else: buffer.write(_pf('', idxSpace)) for h in columns: buffer.write(_pf(h, colSpace[h])) buffer.write('\n') for i, idx in enumerate(self.index): buffer.write(_pf(idx, idxSpace - 1)) for j, col in enumerate(columns): formatter = formatters.get(col, ident) buffer.write( _pf(formatter(values[i, j]), colSpace[col], float_format=float_format, nanRep=nanRep)) buffer.write('\n')
def any_symbolic_or_unknown(val): if is_symbolic_or_unknown(val): return True elif isinstance(val, np.ndarray) and np.issctype(val.dtype): return False elif hasattr(val, '__iter__'): return any(any_symbolic_or_unknown(i) for i in val) else: return is_symbolic_or_unknown(val)
def any_variadic(val): if is_variadic(val): return True elif isinstance(val, np.ndarray) and np.issctype(val.dtype): return False elif isinstance(val, str): # string is iterable return False elif hasattr(val, "__iter__"): return any(any_variadic(i) for i in val) return False
def _msgpack_ext_pack(x): """Messagepack encoders for custom types.""" if isinstance(x, (np.ndarray, jax.xla.DeviceArray)): return msgpack.ExtType(_MsgpackExtType.ndarray, _ndarray_to_bytes(x)) if np.issctype(type(x)): # pack scalar as ndarray return msgpack.ExtType(_MsgpackExtType.npscalar, _ndarray_to_bytes(np.asarray(x))) elif isinstance(x, complex): return msgpack.ExtType(_MsgpackExtType.native_complex, msgpack.packb((x.real, x.imag))) return x
def num_symbolic(val): """ Return the number of symbols in val """ if is_symbolic(val): return 1 elif isinstance(val, np.ndarray) and np.issctype(val.dtype): return 0 elif hasattr(val, "__iter__"): return sum(any_symbolic(i) for i in val) return 0
def cast_array(values, ref): """Use a PyTorch/Numpy array representation for `values` that matches `ref`.""" if is_torch_tensor(ref): return as_torch_tensor(values, dtype=ref.dtype) elif is_torch_dtype(ref): return as_torch_tensor(values, dtype=ref) elif isinstance(ref, np.ndarray): return np.asarray(values, dtype=ref.dtype) elif np.issctype(ref): return np.asarray(values, dtype=ref) raise ValueError('`ref` must be a torch.Tensor, np.ndarray, or dtype')
def as_tensor(arr): """ Wraps the specified NumPy array as a Tensor. """ if not np.issctype(arr.dtype): raise ValueError("A non-scalar array cannot be converted to a Tensor " "without copying.") return DenseTensor(arr.shape, data=arr, strides=tuple(s // 8 for s in arr.strides), copy=False)
def any_symbolic(val): if is_symbolic(val): return True if isinstance(val, np.ndarray) and val.ndim == 0: return is_symbolic(val[()]) elif isinstance(val, np.ndarray) and np.issctype(val.dtype): return False elif isinstance(val, str): # string is iterable return False elif hasattr(val, "__iter__"): return any(any_symbolic(i) for i in val) return False
def get_sizeof(self, address_space_qualifier): """Creates a c99 sizeof method.""" definition = 'unsigned long %(function_name)s(%(address_space_qualifier)s char* blob)' % { 'function_name': self.get_sizeof_name(address_space_qualifier), 'address_space_qualifier': address_space_qualifier, } arguments = ['blob', '&self'] # the first argument must be the data itself. variables = ['%s %s;' % (self.get_name(address_space_qualifier, ), 'self')] # all required variable names lines = [] # all required source code lines # iterate over all components/subtypes for field, subtype in self.blob_type.subtypes: if field.endswith(Blob.PADDING_FIELD_SUFFIX): continue if numpy.issctype(subtype): # determine the size of the scalar type cname = dtype_to_ctype(subtype) sizeof_call = 'sizeof(%s)' % cname else: # determine the size of the complex type assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype) sizeof_call = '%s((%s char*)(blob + size))' % ( BlobLib.get_interface(subtype).get_sizeof_name(address_space_qualifier), address_space_qualifier, ) # save which arguments and lines are required to determine the total size lines.append('size += %s;' % sizeof_call) lines.insert(0, '%s(%s);' % (self.get_deserialize_name(address_space_qualifier), ', '.join(arguments))) # prepend the variable declarations to the source code variables.extend(lines) lines = variables # fill the function template declaration = \ ''' %(definition)s { unsigned long size = 0; %(lines)s return size; } ''' % { 'definition': definition.strip(), 'cname': self.get_name(address_space_qualifier), 'lines': '\n'.join(['\t' + line for line in lines]) } return definition.strip() + ';', declaration.strip()
def testConvertToDTypeRaises(self, tensor_or_dtype, dtype, dtype_hint): if np.issctype(tensor_or_dtype): example_tensor = np.zeros([], tensor_or_dtype) elif isinstance(tensor_or_dtype, tf.DType): example_tensor = tf.zeros([], tensor_or_dtype) else: example_tensor = tensor_or_dtype with self.assertRaisesRegex(TypeError, 'Found incompatible dtypes'): dtype_util.convert_to_dtype(tensor_or_dtype, dtype, dtype_hint) with self.assertRaisesRegex(TypeError, 'Found incompatible dtypes'): dtype_util.convert_to_dtype(example_tensor, dtype, dtype_hint)
def toString(self, buffer=sys.stdout, columns=None, colSpace=None, nanRep='NaN', formatters=None, float_format=None): """ Output a string version of this DataMatrix """ _pf = common._pfixed formatters = formatters or {} if columns is None: columns = self.columns values = self.values if self.objects: columns = list(columns) + list(self.objects.columns) values = np.column_stack((values.astype(object), self.objects.values)) else: columns = [c for c in columns if c in self] values = self.asMatrix(columns) ident = lambda x: x idxSpace = max([len(str(idx)) for idx in self.index]) + 4 if colSpace is None: colSpace = {} for c in columns: if np.issctype(self[c].dtype): colSpace[c] = max(len(str(c)) + 4, 12) else: # hack colSpace[c] = 15 else: colSpace = dict((k, 15) for k in columns) if len(self.cols()) == 0: buffer.write('DataMatrix is empty!\n') buffer.write(repr(self.index)) else: buffer.write(_pf('', idxSpace)) for h in columns: buffer.write(_pf(h, colSpace[h])) buffer.write('\n') for i, idx in enumerate(self.index): buffer.write(_pf(idx, idxSpace - 1)) for j, col in enumerate(columns): formatter = formatters.get(col, ident) buffer.write(_pf(formatter(values[i, j]), colSpace[col], float_format=float_format, nanRep=nanRep)) buffer.write('\n')
def normalize_hounsfield(vol, dtype=None): ''' Normalizes `vol` by 4095 and clamps to [0,1]. `dtype=None` defaults to 32-bit float''' if isinstance(dtype, torch.dtype): vol = torch.tensor(vol, dtype=dtype) elif np.issctype(dtype): vol = np.array(vol, dtype=dtype) elif dtype is None: vol = vol.float() if torch.is_tensor(vol) else np.array(vol).astype( np.float32) if torch.is_tensor(vol): return torch.clamp(vol / 4095.0, 0.0, 1.0) elif isinstance(vol, np.ndarray): return np.clip(vol / 4095.0, 0.0, 1.0) else: raise Exception( f'vol (type={type(vol)}) is neither torch.tensor, nor np.ndarray')
def data_to_params(data, expparams_dtype, col_outcomes=(0, 'counts'), cols_expparams=None ): """ Given data as a NumPy array, separates out each column either as the outcomes, or as a field of an expparams array. Columns may be specified either as indices into a two-axis scalar array, or as field names for a one-axis record array. Since scalar arrays are homogenous in type, this may result in loss of precision due to casting between data types. """ BY_IDX, BY_NAME = range(2) is_exp_scalar = np.issctype(expparams_dtype) is_data_scalar = np.issctype(data.dtype) and not data.dtype.fields s_ = ( (lambda idx: np.s_[..., idx[BY_IDX]]) if is_data_scalar else (lambda idx: np.s_[idx[BY_NAME]]) ) outcomes = data[s_(col_outcomes)].astype(int) # mk new slicer t expparams = np.empty(outcomes.shape, dtype=expparams_dtype) if is_exp_scalar: expparams[:] = data[s_(cols_expparams)] else: for expparams_key, column in cols_expparams.items(): expparams[expparams_key] = data[s_(column)] return outcomes, expparams
def from_value(cls, val): """ Creates a :py:class:`Type` object corresponding to the given value. """ if isinstance(val, Type): # Creating a new object, because ``val`` may be some derivative of Type, # used as a syntactic sugar, and we do not want it to confuse us later. return cls(val.dtype, shape=val.shape, strides=val.strides) elif numpy.issctype(val): return cls(val) elif hasattr(val, 'dtype') and hasattr(val, 'shape'): strides = val.strides if hasattr(val, 'strides') else None return cls(val.dtype, shape=val.shape, strides=strides) else: return cls(dtypes.detect_type(val))
def testConvertToDtype(self, tensor_or_dtype, dtype, dtype_hint): if np.issctype(tensor_or_dtype): example_tensor = np.zeros([], tensor_or_dtype) elif isinstance(tensor_or_dtype, tf.DType): example_tensor = tf.zeros([], tensor_or_dtype) else: example_tensor = tensor_or_dtype # Try with the original argument. self.assertEqual( tf.convert_to_tensor(example_tensor, dtype, dtype_hint).dtype, dtype_util.convert_to_dtype(tensor_or_dtype, dtype, dtype_hint)) # Try with a concrete value. self.assertEqual( tf.convert_to_tensor(example_tensor, dtype, dtype_hint).dtype, dtype_util.convert_to_dtype(example_tensor, dtype, dtype_hint))
def sizeof_dtype(cls, dtype_params): size = 0 for field, subtype in cls.subtypes: subtype_params = cls.explode_dtype_params(field=field, dtype_params=dtype_params) if issubclass(subtype, Blob): subtype_size = subtype.sizeof_dtype(dtype_params=subtype_params) elif numpy.issctype(subtype): subtype_size = subtype().nbytes else: raise NotImplementedError size += subtype_size return size
def get_type(self, address_space_qualifier): """Returns the c99 deserializer function declaration, which separates the components of a flat type.""" fields = [] # iterate over all subtypes/components for field, subtype in self.blob_type.subtypes: if field.endswith(Blob.PADDING_FIELD_SUFFIX): continue # used variable names # add sizeof call of component if numpy.issctype(subtype): fields.append('%s %s* %s;' % (address_space_qualifier, dtype_to_ctype(subtype), field)) else: assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype) if subtype.is_plain(): fields.append('%s* %s;' % ( BlobLib.get_interface(subtype).get_spaced_name(address_space_qualifier), field )) else: fields.append('%s %s;' % ( BlobLib.get_interface(subtype).get_name(address_space_qualifier), field )) definition = \ ''' /* complex type %(name)s */ typedef struct _%(name)s { %(fields)s } %(name)s; ''' % { 'name': self.get_name(address_space_qualifier, ), 'fields': '\n\t'.join(fields) } return definition
def round_sigfigs(x, sigfigs): """ Rounds the value(s) in x to the number of significant figures in sigfigs. Restrictions: sigfigs must be an integer type and store a positive value. x must be a real value or an array like object containing only real values. """ if not (type(sigfigs) is int or np.issubdtype(sigfigs, np.integer)): raise TypeError("round_sigfigs: sigfigs must be an integer.") if not np.all(np.isreal(x)): raise TypeError("round_sigfigs: all x must be real.") if sigfigs <= 0: raise ValueError("round_sigfigs: sigfigs must be positive.") xsgn = np.sign(x) absx = xsgn * x mantissas, binaryExponents = np.frexp(absx) decimalExponents = __logBase10of2 * binaryExponents intParts = np.floor(decimalExponents) mantissas *= 10.0**(decimalExponents - intParts) if type(mantissas) is float or np.issctype(np.dtype(mantissas)): if mantissas < 1.0: mantissas *= 10.0 omags -= 1.0 elif np.issubdtype(mantissas, np.ndarray): fixmsk = mantissas < 1.0 mantissas[fixmsk] *= 10.0 omags[fixmsk] -= 1.0 return xsgn * np.around(mantissas, decimals=sigfigs - 1) * 10.0**intParts
def _my_issctype(dtype): """Hack around np.issctype bug""" return np.issctype(dtype) and str(dtype)[0:2] != '|S'
def summary(dataset, stats=True, lstats='auto', sstats='auto', idhash=False, targets_attr='targets', chunks_attr='chunks', maxc=30, maxt=20): """String summary over the object Parameters ---------- stats : bool Include some basic statistics (mean, std, var) over dataset samples lstats : 'auto' or bool Include statistics on chunks/targets. If 'auto', includes only if both targets_attr and chunks_attr are present. sstats : 'auto' or bool Sequence (order) statistics. If 'auto', includes only if targets_attr is present. idhash : bool Include idhash value for dataset and samples targets_attr : str, optional Name of sample attributes of targets chunks_attr : str, optional Name of sample attributes of chunks -- independent groups of samples maxt : int Maximal number of targets when provide details on targets/chunks maxc : int Maximal number of chunks when provide details on targets/chunks """ # local bindings samples = dataset.samples sa = dataset.sa s = str(dataset)[1:-1] if idhash: s += '\nID-Hashes: %s' % dataset.idhash # Deduce if necessary lstats and sstats if lstats == 'auto': lstats = (targets_attr in sa) and (chunks_attr in sa) if sstats == 'auto': sstats = (targets_attr in sa) ssep = (' ', '\n')[lstats] ## Possibly summarize attributes listed as having unique if stats: if np.issctype(samples.dtype): # TODO -- avg per chunk? # XXX We might like to use scipy.stats.describe to get # quick summary statistics (mean/range/skewness/kurtosis) if dataset.nfeatures: s += "%sstats: mean=%g std=%g var=%g min=%g max=%g\n" % \ (ssep, np.mean(samples), np.std(samples), np.var(samples), np.min(samples), np.max(samples)) else: s += "%sstats: dataset has no features\n" % ssep else: s += "%sstats: no stats for dataset of '%s' dtype" % ( ssep, samples.dtype) if lstats: try: s += dataset.summary_targets(targets_attr=targets_attr, chunks_attr=chunks_attr, maxc=maxc, maxt=maxt) except KeyError, e: s += 'No per %s/%s due to %r' % (targets_attr, chunks_attr, e)
def get_deserialize(self, address_space_qualifier): """Returns the c99 deserializer function declaration, which separates the components of a flat type.""" arguments = ['%s char* blob' % address_space_qualifier] declarations = [] lines = [] previous_field_offset, previous_field_space = 0, 0 last_field = self.blob_type.subtypes[-1][0] # iterate over all subtypes/components for field, subtype in self.blob_type.subtypes: if field.endswith(Blob.PADDING_FIELD_SUFFIX): continue is_last_field = field == last_field # format lines.append('') lines.append('/* cast of %s */' % field) # used variable names field_variable = 'self->%s' % field field_offset = '%s_offset' % field field_reference = 'blob + %s' % field_offset if not is_last_field: field_space = '%s_space' % field declarations.append('unsigned long %s;' % field_offset) if not is_last_field: declarations.append('unsigned long %s;' % field_space) # add sizeof call of component if numpy.issctype(subtype): cname = "%s %s" % (address_space_qualifier, dtype_to_ctype(subtype)) sizeof_call = 'sizeof(%s)' % cname else: assert issubclass(subtype, Blob), 'unexpected type %s %s' % (type(subtype), subtype) cname = "%s %s" % ( address_space_qualifier, BlobLib.get_interface(subtype).get_name(address_space_qualifier) ) sizeof_call = '%s((%s char*)%s)' % ( BlobLib.get_interface(subtype).get_sizeof_name(address_space_qualifier), address_space_qualifier, field_reference ) # determine offset of component lines.append('%s = %s + %s;' % (field_offset, previous_field_offset, previous_field_space)) # set and cast component reference if not numpy.issctype(subtype) and not subtype.is_plain(): lines.append('%s(%s, &%s);' % ( BlobLib.get_interface(subtype).get_deserialize_name(address_space_qualifier), field_reference, field_variable )) else: lines.append('%s = (%s*)(%s);' % (field_variable, cname, field_reference)) if not is_last_field: # determine size of component lines.append('%s = %s;' % (field_space, sizeof_call)) previous_field_space = field_space previous_field_offset = field_offset lines = ['\t' + line for line in lines] arguments.append('%s* %s' % (self.get_name(address_space_qualifier, ), 'self')) definition = 'void %s(%s)' % (self.get_deserialize_name(address_space_qualifier), ', '.join(arguments)) # fill function template lines.insert(0, definition) lines.insert(1, '{') for index, line in enumerate(declarations): lines.insert(2 + index, '\t' + line) lines.append('}') declaration = '\n'.join(lines) return definition.strip() + ';', declaration
def test_issctype(rep, expected): # ensure proper identification of scalar # data-types by issctype() actual = np.issctype(rep) assert_equal(actual, expected)
import numpy as np np.maximum_sctype("S8") np.maximum_sctype(object) np.issctype(object) np.issctype("S8") np.obj2sctype(list) np.obj2sctype(list, default=None) np.obj2sctype(list, default=np.string_) np.issubclass_(np.int32, int) np.issubclass_(np.float64, float) np.issubclass_(np.float64, (int, float)) np.issubsctype("int64", int) np.issubsctype(np.array([1]), np.array([1])) np.issubdtype("S1", np.string_) np.issubdtype(np.float64, np.float32) np.sctype2char("S1") np.sctype2char(list) np.find_common_type([], [np.int64, np.float32, complex]) np.find_common_type((), (np.int64, np.float32, complex)) np.find_common_type([np.int64, np.float32], []) np.find_common_type([np.float32], [np.int64, np.float64]) np.cast[int]
def genfromdta(fname, missing_flt=-999., missing_str=""): """ Returns an ndarray from a Stata .dta file. Parameters ---------- fname : str or filehandle Stata .dta file. missing_flt : numeric The numeric value to replace missing values with. Will be used for any numeric value. missing_str : str The string to replace missing values with for string variables. Notes ------ Date types will be returned as their numeric value in Stata. A date parser is not written yet. """ if isinstance(fname, basestring): fhd = StataReader(open(fname, 'rb'), missing_values=False) elif not hasattr(fname, 'read'): raise TypeError("The input should be a string or a filehandle. "\ "(got %s instead)" % type(fname)) else: fhd = StataReader(fname, missing_values=False) # validate_names = np.lib._iotools.NameValidator(excludelist=excludelist, # deletechars=deletechars, # case_sensitive=case_sensitive) #TODO: This needs to handle the byteorder? header = fhd.file_headers() types = header['dtyplist'] nobs = header['nobs'] numvars = header['nvar'] varnames = header['varlist'] dataname = header['data_label'] labels = header['vlblist'] # labels are thrown away unless DataArray # type is used data = np.zeros((nobs, numvars)) stata_dta = fhd.dataset() # key is given by np.issctype convert_missing = {True: missing_flt, False: missing_str} dt = np.dtype(zip(varnames, types)) data = np.zeros((nobs), dtype=dt) # init final array for rownum, line in enumerate(stata_dta): # doesn't handle missing value objects, just casts # None will only work without missing value object. if None in line: # and not remove_comma: for i, val in enumerate(line): if val is None: line[i] = convert_missing[np.issctype(types[i])] data[rownum] = tuple(line) #TODO: make it possible to return plain array if all 'f8' for example return data
def summary( dataset, stats=True, lstats="auto", sstats="auto", idhash=False, targets_attr="targets", chunks_attr="chunks", maxc=30, maxt=20, ): """String summary over the object Parameters ---------- stats : bool Include some basic statistics (mean, std, var) over dataset samples lstats : 'auto' or bool Include statistics on chunks/targets. If 'auto', includes only if both targets_attr and chunks_attr are present. sstats : 'auto' or bool Sequence (order) statistics. If 'auto', includes only if targets_attr is present. idhash : bool Include idhash value for dataset and samples targets_attr : str, optional Name of sample attributes of targets chunks_attr : str, optional Name of sample attributes of chunks -- independent groups of samples maxt : int Maximal number of targets when provide details on targets/chunks maxc : int Maximal number of chunks when provide details on targets/chunks """ # local bindings samples = dataset.samples sa = dataset.sa s = str(dataset)[1:-1] if idhash: s += "\nID-Hashes: %s" % dataset.idhash # Deduce if necessary lstats and sstats if lstats == "auto": lstats = (targets_attr in sa) and (chunks_attr in sa) if sstats == "auto": sstats = targets_attr in sa ssep = (" ", "\n")[lstats] ## Possibly summarize attributes listed as having unique if stats: if np.issctype(samples.dtype): # TODO -- avg per chunk? # XXX We might like to use scipy.stats.describe to get # quick summary statistics (mean/range/skewness/kurtosis) if dataset.nfeatures: s += "%sstats: mean=%g std=%g var=%g min=%g max=%g\n" % ( ssep, np.mean(samples), np.std(samples), np.var(samples), np.min(samples), np.max(samples), ) else: s += "%sstats: dataset has no features\n" % ssep else: s += "%sstats: no stats for dataset of '%s' dtype" % (ssep, samples.dtype) if lstats: try: s += dataset.summary_targets(targets_attr=targets_attr, chunks_attr=chunks_attr, maxc=maxc, maxt=maxt) except KeyError, e: s += "No per %s/%s due to %r" % (targets_attr, chunks_attr, e)
import numpy as np np.issctype(np.int32) np.issctype(list) np.issctype(1.1) np.issctype(np.dtype('str'))
def ndarray_to_mxarray(libmx, arr): ### Prepare `arr` object (convert to ndarray if possible), assert ### data type if isinstance(arr, str) or isinstance(arr, unicode): pass elif isinstance(arr, dict): raise NotImplementedError('dicts are not supported.') elif ('pandas' in sys.modules) and isinstance( arr, sys.modules['pandas'].DataFrame): arr = arr.to_records() elif ('pandas' in sys.modules) and isinstance( arr, sys.modules['pandas'].Series): arr = arr.to_frame().to_records() elif isinstance(arr, collections.Iterable): arr = np.array(arr, ndmin=2) elif np.issctype(type(arr)): arr = np.array(arr, ndmin=2) else: raise NotImplementedError("Data type not supported: {}".format( type(arr))) ### Convert ndarray to mxarray if isinstance(arr, str): pm = libmx.mxCreateString(arr) elif isinstance(arr, unicode): pm = libmx.mxCreateString(arr.encode('utf-8')) elif isinstance(arr, np.ndarray) and arr.dtype.kind in ['i', 'u', 'f', 'c']: dim = arr.ctypes.shape_as(mwSize) complex_flag = (arr.dtype.kind == 'c') pm = libmx.mxCreateNumericArray(arr.ndim, dim, dtype_to_mat(arr.dtype), complex_flag) mat_data = libmx.mxGetData(pm) np_data = arr.real.tostring('F') ctypes.memmove(mat_data, np_data, len(np_data)) if complex_flag: mat_data = libmx.mxGetImagData(pm) np_data = arr.imag.tostring('F') ctypes.memmove(mat_data, np_data, len(np_data)) elif isinstance(arr, np.ndarray) and arr.dtype.kind == 'b': dim = arr.ctypes.shape_as(mwSize) pm = libmx.mxCreateLogicalArray(arr.ndim, dim) mat_data = libmx.mxGetData(pm) np_data = arr.real.tostring('F') ctypes.memmove(mat_data, np_data, len(np_data)) elif isinstance(arr, np.ndarray) and arr.dtype.kind in ('O', 'S', 'U'): dim = arr.ctypes.shape_as(mwSize) pm = libmx.mxCreateCellArray(arr.ndim, dim) for i, el in enumerate(arr.flatten('F')): p = ndarray_to_mxarray(libmx, el) libmx.mxSetCell(pm, i, p) elif isinstance(arr, np.ndarray) and len(arr.dtype) > 0: dim = arr.ctypes.shape_as(mwSize) name_num = len(arr.dtype.names) names_p = (c_char_p * name_num)(*[c_char_p(name) for name in arr.dtype.names]) pm = libmx.mxCreateStructArray( arr.ndim, dim, name_num, names_p, ) for i, record in enumerate(arr.flatten('F')): for name in arr.dtype.names: el = record[name] p = ndarray_to_mxarray(libmx, el) libmx.mxSetField(pm, i, name, p) elif isinstance(arr, np.ndarray): raise NotImplementedError('Unsupported dtype: {}'.format(arr.dtype)) return pm
import numpy as np reveal_type(np.issctype(np.generic)) # E: bool reveal_type(np.issctype("foo")) # E: bool reveal_type(np.obj2sctype("S8")) # E: Union[numpy.generic, None] reveal_type(np.obj2sctype("S8", default=None)) # E: Union[numpy.generic, None] reveal_type( np.obj2sctype("foo", default=int) # E: Union[numpy.generic, Type[builtins.int*]] ) reveal_type(np.issubclass_(np.float64, float)) # E: bool reveal_type(np.issubclass_(np.float64, (int, float))) # E: bool reveal_type(np.sctype2char("S8")) # E: str reveal_type(np.sctype2char(list)) # E: str reveal_type(np.find_common_type([np.int64], [np.int64])) # E: numpy.dtype
def ValueWithUncsRounding(x, uncs, uncsigfigs=1): """ Rounds all of the values in uncs (the uncertainties) to the number of significant figures in uncsigfigs. Then rounds the values in x to the same decimal pace as the values in uncs. Return value is a two element tuple each element of which has the same type as x and uncs, respectively. Restrictions: - uncsigfigs must be a positive integer. - x must be a real value or an array like object containing only real values. - uncs must be a real value or an array like object containing only real values. """ if not (type(uncsigfigs) is int or type(uncsigfigs) is long or isinstance(uncsigfigs, np.integer)): raise TypeError( "ValueWithUncsRounding: uncsigfigs must be an integer.") if uncsigfigs <= 0: raise ValueError("ValueWithUncsRounding: uncsigfigs must be positive.") if not np.all(np.isreal(x)): raise TypeError("ValueWithUncsRounding: all x must be real.") if not np.all(np.isreal(uncs)): raise TypeError("ValueWithUncsRounding: all uncs must be real.") if np.any(uncs <= 0): raise ValueError("ValueWithUncsRounding: uncs must all be positive.") #temporarily suppres floating point errors errhanddict = np.geterr() np.seterr(all="ignore") matrixflag = False if isinstance(x, np.matrix): #Convert matrices to arrays matrixflag = True x = np.asarray(x) #Pre-round unc to correctly handle cases where rounding alters the # most significant digit of unc. uncs = RoundToSigFigs_fp(uncs, uncsigfigs) mantissas, binaryExponents = np.frexp(uncs) decimalExponents = __logBase10of2 * binaryExponents omags = np.floor(decimalExponents) mantissas *= 10.0**(decimalExponents - omags) if type(mantissas) is float or np.issctype(np.dtype(mantissas)): if mantissas < 1.0: mantissas *= 10.0 omags -= 1.0 else: #elif np.all(np.isreal( mantissas )): fixmsk = mantissas < 1.0 mantissas[fixmsk] *= 10.0 omags[fixmsk] -= 1.0 scales = 10.0**omags prec = uncsigfigs - 1 result = (np.around(x / scales, decimals=prec) * scales, np.around(mantissas, decimals=prec) * scales) if matrixflag: result = np.matrix(result, copy=False) np.seterr(**errhanddict) return result
import numpy as np a = [1, None] b = np.array(a) x = b.dtype.kind in {'U', 'S'} print(x) print(np.argwhere(np.isnan(b))) print(np.issctype(np.array([1])))
import numpy as np reveal_type(np.maximum_sctype(np.float64)) # E: Type[{float64}] reveal_type(np.maximum_sctype("f8")) # E: Type[Any] reveal_type(np.issctype(np.float64)) # E: bool reveal_type(np.issctype("foo")) # E: Literal[False] reveal_type(np.obj2sctype(np.float64)) # E: Union[None, Type[{float64}]] reveal_type(np.obj2sctype( np.float64, default=False)) # E: Union[builtins.bool, Type[{float64}]] reveal_type(np.obj2sctype("S8")) # E: Union[None, Type[Any]] reveal_type(np.obj2sctype("S8", default=None)) # E: Union[None, Type[Any]] reveal_type(np.obj2sctype("foo", default=False)) # E: Union[builtins.bool, Type[Any]] reveal_type(np.obj2sctype(1)) # E: None reveal_type(np.obj2sctype(1, default=False)) # E: bool reveal_type(np.issubclass_(np.float64, float)) # E: bool reveal_type(np.issubclass_(np.float64, (int, float))) # E: bool reveal_type(np.issubclass_(1, 1)) # E: Literal[False] reveal_type(np.sctype2char("S8")) # E: str reveal_type(np.sctype2char(list)) # E: str reveal_type(np.find_common_type([np.int64], [np.int64])) # E: numpy.dtype[Any] reveal_type(np.cast[int]) # E: _CastFunc reveal_type(np.cast["i8"]) # E: _CastFunc reveal_type(np.cast[np.int64]) # E: _CastFunc
def ndarray_to_mxarray(libmx, arr): ### Prepare `arr` object (convert to ndarray if possible), assert ### data type if isinstance(arr, str) or isinstance(arr, unicode): pass elif isinstance(arr, dict): raise NotImplementedError('dicts are not supported.') elif ('pandas' in sys.modules) and isinstance(arr, sys.modules['pandas'].DataFrame): arr = arr.to_records() elif ('pandas' in sys.modules) and isinstance(arr, sys.modules['pandas'].Series): arr = arr.to_frame().to_records() elif isinstance(arr, collections.Iterable): arr = np.array(arr, ndmin=2) elif np.issctype(type(arr)): arr = np.array(arr, ndmin=2) else: raise NotImplementedError("Data type not supported: {}".format(type(arr))) ### Convert ndarray to mxarray if isinstance(arr, str): pm = libmx.mxCreateString(arr) elif isinstance(arr, unicode): pm = libmx.mxCreateString(arr.encode('utf-8')) elif isinstance(arr, np.ndarray) and arr.dtype.kind in ['i','u','f','c']: dim = arr.ctypes.shape_as(mwSize) complex_flag = (arr.dtype.kind == 'c') pm = libmx.mxCreateNumericArray( arr.ndim, dim, dtype_to_mat(arr.dtype), complex_flag ) mat_data = libmx.mxGetData(pm) np_data = arr.real.tostring('F') ctypes.memmove(mat_data, np_data, len(np_data)) if complex_flag: mat_data = libmx.mxGetImagData(pm) np_data = arr.imag.tostring('F') ctypes.memmove(mat_data, np_data, len(np_data)) elif isinstance(arr, np.ndarray) and arr.dtype.kind == 'b': dim = arr.ctypes.shape_as(mwSize) pm = libmx.mxCreateLogicalArray(arr.ndim, dim) mat_data = libmx.mxGetData(pm) np_data = arr.real.tostring('F') ctypes.memmove(mat_data, np_data, len(np_data)) elif isinstance(arr, np.ndarray) and arr.dtype.kind in ('O', 'S', 'U'): dim = arr.ctypes.shape_as(mwSize) pm = libmx.mxCreateCellArray(arr.ndim, dim) for i,el in enumerate(arr.flatten('F')): p = ndarray_to_mxarray(libmx, el) libmx.mxSetCell(pm, i, p) elif isinstance(arr, np.ndarray) and len(arr.dtype) > 0: dim = arr.ctypes.shape_as(mwSize) name_num = len(arr.dtype.names) names_p = (c_char_p*name_num)(*[c_char_p(name) for name in arr.dtype.names]) pm = libmx.mxCreateStructArray( arr.ndim, dim, name_num, names_p, ) for i,record in enumerate(arr.flatten('F')): for name in arr.dtype.names: el = record[name] p = ndarray_to_mxarray(libmx, el) libmx.mxSetField(pm, i, name, p) elif isinstance(arr, np.ndarray): raise NotImplementedError('Unsupported dtype: {}'.format(arr.dtype)) return pm
def unify_datatype(cls, graphs, inplace=False): '''Ensure that each attribute has the same data type across graphs. Parameters ---------- graphs: list A list of graphs that have the same set of node and edge features. The types for each attribute will then be chosen to be the smallest scalar type that can safely hold all the values as found across the graphs. inplace: bool Whether or not to modify the graph features in-place. Returns ------- None or list If inplace is True, the graphs will be modified in-place and nothing will be returned. Otherwise, a new list of graphs with type-unified features will be returned. ''' '''copy graphs if not editing in-place''' for g in graphs: g.cookie.clear() if inplace is not True: graphs = [g.copy(deep=False) for g in graphs] '''ensure all graphs have the same node and edge features''' features = {} for component in ['nodes', 'edges']: first = None for g in graphs: second = set(getattr(g, component).columns) first = first or second if second != first: raise TypeError(f'Graph {g} with node features {second} ' 'does not match with the other graphs.') features[component] = first '''unify data type for each feature''' for component in ['nodes', 'edges']: group = [getattr(g, component) for g in graphs] for key in features[component]: types = [g[key].concrete_type for g in group] t = common_min_type.of_types(types) if t == np.object: t = common_min_type.of_types(types, coerce=False) if t is None: raise TypeError( f'Cannot unify attribute {key} containing mixed ' 'object types') if np.issctype(t): for g in group: g[key] = g[key].astype(t) elif t in [list, tuple, np.ndarray]: t_sub = common_min_type.of_values( it.chain.from_iterable( it.chain.from_iterable([g[key] for g in group]))) if t_sub is None: raise TypeError( f'Cannot find a common type for elements in {key}.' ) for g in group: g[key] = [np.array(seq, dtype=t_sub) for seq in g[key]] '''only returns if not editing in-place''' if inplace is not True: return graphs
def genfromdta(fname, missing_flt=-999., missing_str="", encoding=None): """ Returns an ndarray from a Stata .dta file. Parameters ---------- fname : str or filehandle Stata .dta file. missing_flt : numeric The numeric value to replace missing values with. Will be used for any numeric value. missing_str : str The string to replace missing values with for string variables. encoding : string, optional Used for Python 3 only. Encoding to use when reading the .dta file. Defaults to `locale.getpreferredencoding` Notes ------ Date types will be returned as their numeric value in Stata. A date parser is not written yet. """ if isinstance(fname, basestring): fhd = StataReader(open(fname, 'rb'), missing_values=False, encoding=encoding) elif not hasattr(fname, 'read'): raise TypeError("The input should be a string or a filehandle. "\ "(got %s instead)" % type(fname)) else: fhd = StataReader(fname, missing_values=False, encoding=encoding) # validate_names = np.lib._iotools.NameValidator(excludelist=excludelist, # deletechars=deletechars, # case_sensitive=case_sensitive) #TODO: This needs to handle the byteorder? header = fhd.file_headers() types = header['dtyplist'] nobs = header['nobs'] numvars = header['nvar'] varnames = header['varlist'] dataname = header['data_label'] labels = header['vlblist'] # labels are thrown away unless DataArray # type is used data = np.zeros((nobs,numvars)) stata_dta = fhd.dataset() # key is given by np.issctype convert_missing = { True : missing_flt, False : missing_str} dt = np.dtype(zip(varnames, types)) data = np.zeros((nobs), dtype=dt) # init final array for rownum,line in enumerate(stata_dta): # doesn't handle missing value objects, just casts # None will only work without missing value object. if None in line:# and not remove_comma: for i,val in enumerate(line): if val is None: line[i] = convert_missing[np.issctype(types[i])] data[rownum] = tuple(line) #TODO: make it possible to return plain array if all 'f8' for example return data
def __init__(self, graph): self.nodes = nodes = graph.nodes.copy(deep=False) self.edges = edges = graph.edges.copy(deep=False) self.n_node = len(nodes) ''' substitute columns corresponding to object-type node/edge attributes to their GPU counterparts ''' for df in [nodes, edges]: for key in list(df.columns): if not np.issctype(df[key].dtype): if issubclass(df[key].concrete_type, (list, tuple, np.ndarray)): inner_type = common_min_type.of_types([ x.dtype if isinstance(x, np.ndarray) else common_min_type.of_values(x) for x in df[key] ]) if not np.issctype(inner_type): raise (TypeError( f'Expect scalar elements in tuple or list' f'atttributes, got {inner_type}.')) if not np.issctype(inner_type): raise TypeError( f'List-like graphs attribute must have scalar' f'elements. Attribute {key} is {inner_type}.') buffer = memoryview( umlike( np.fromiter(it.chain.from_iterable(df[key]), dtype=inner_type))) size = np.fromiter(map(len, df[key]), dtype=np.int) head = np.cumsum(size) - size # mangle key with type information tag = '${key}::frozen_array::{dtype}'.format( key=key, dtype=inner_type.str) data = np.empty_like(df[key], dtype=np.object) for i, (h, s) in enumerate(zip(head, size)): data[i] = np.frombuffer( buffer[h:h + s], dtype=inner_type).view( self.CustomType.FrozenArray) df[tag] = data df.drop([key], inplace=True) else: raise TypeError( f'Unsupported non-scalar attribute {key} ' f'of type {df[key].concrete_type}') ''' add phantom label if none exists to facilitate C++ interop ''' assert (len(nodes.columns) >= 1) if len(nodes.columns) == 1: nodes['labeled'] = np.zeros(len(nodes), np.bool_) assert (len(edges.columns) >= 2) if len(edges.columns) == 2: assert ('!i' in edges and '!j' in edges) edges['labeled'] = np.zeros(len(edges), np.bool_) ''' determine node type ''' i = nodes['!i'] nodes.drop(['!i'], inplace=True) self.node_t = node_t = nodes.rowtype() self.nodes_aos = umempty(len(nodes), dtype=node_t) self.nodes_aos[i] = list(nodes.iterstates()) ''' determine whether graph is weighted, determine edge type, and compute node degrees ''' self.degree = degree = umzeros(self.n_node, dtype=np.float32) edge_t = edges.drop(['!i', '!j', '!w']).rowtype() self_loops = edges[edges['!i'] == edges['!j']] nnz = len(edges) if '!w' in edges: # weighted graph self.weighted = True np.add.at(degree, edges['!i'], edges['!w']) np.add.at(degree, edges['!j'], edges['!w']) np.subtract.at(degree, self_loops['!i'], self_loops['!w']) if edge_t.itemsize != 0: labels = list(edges[edge_t.names].iterstates()) else: labels = [None] * len(edges) edge_t = np.dtype([('weight', np.float32), ('label', edge_t)], align=True) edges_aos = np.fromiter(zip(edges['!w'], labels), dtype=edge_t, count=nnz) else: self.weighted = False np.add.at(degree, edges['!i'], 1.0) np.add.at(degree, edges['!j'], 1.0) np.subtract.at(degree, self_loops['!i'], 1.0) edges_aos = np.fromiter(edges[edge_t.names].iterstates(), dtype=edge_t, count=nnz) self.edge_t = edge_t degree[degree == 0] = 1.0 ''' collect non-zero edge octiles ''' indices = np.empty((4, nnz * 2), dtype=np.uint32, order='C') i, j, up, lf = indices i[:nnz] = edges['!i'] j[:nnz] = edges['!j'] # replicate & swap i and j for the lower triangular part i[nnz:], j[nnz:] = j[:nnz], i[:nnz] # get upper left corner of owner octiles up[:] = i - i % 8 lf[:] = j - j % 8 # np.unique implies lexical sort (lf, up, j, i), perm = np.unique(indices[-1::-1, :], axis=1, return_index=True) self.edges_aos = umempty(len(i), edge_t) self.edges_aos[:] = edges_aos[perm % nnz] # mod nnz due to symmetry diff = np.empty_like(up) diff[1:] = (up[:-1] != up[1:]) | (lf[:-1] != lf[1:]) diff[:1] = True oct_offset = np.flatnonzero(diff) self.n_octile = len(oct_offset) nzmasks = np.bitwise_or.reduceat( 1 << (i - up + (j - lf) * 8).astype(np.uint64), oct_offset) nzmasks_r = np.bitwise_or.reduceat( 1 << (j - lf + (i - up) * 8).astype(np.uint64), oct_offset) self.octiles = octiles = umempty(self.n_octile, self.Octile.dtype) octiles[:] = list( zip( int(self.edges_aos.base) + oct_offset * edge_t.itemsize, nzmasks, nzmasks_r, up[oct_offset], lf[oct_offset]))