def test_upgrade(self): "Tests the upgrade method." converter = StringConverter() assert_equal(converter._status, 0) # test int assert_equal(converter.upgrade(b'0'), 0) assert_equal(converter._status, 1) # On systems where integer defaults to 32-bit, the statuses will be # offset by one, so we check for this here. import numpy.core.numeric as nx status_offset = int(nx.dtype(nx.integer).itemsize < nx.dtype(nx.int64).itemsize) # test int > 2**32 assert_equal(converter.upgrade(b'17179869184'), 17179869184) assert_equal(converter._status, 1 + status_offset) # test float assert_allclose(converter.upgrade(b'0.'), 0.0) assert_equal(converter._status, 2 + status_offset) # test complex assert_equal(converter.upgrade(b'0j'), complex('0j')) assert_equal(converter._status, 3 + status_offset) # test str assert_equal(converter.upgrade(b'a'), b'a') assert_equal(converter._status, len(converter._mapper) - 1)
def __new__(subtype, shape, dtype=None, buf=None, offset=0, strides=None, formats=None, names=None, titles=None, byteorder=None, aligned=False): if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr if buf is None: self = ndarray.__new__(subtype, shape, (record, descr)) else: self = ndarray.__new__(subtype, shape, (record, descr), buffer=buf, offset=offset, strides=strides) return self
def __init__(self, pyfunc, otypes='', doc=None): self.thefunc = pyfunc self.ufunc = None nin, ndefault = _get_nargs(pyfunc) if nin == 0 and ndefault == 0: self.nin = None self.nin_wo_defaults = None else: self.nin = nin self.nin_wo_defaults = nin - ndefault self.nout = None if doc is None: self.__doc__ = pyfunc.__doc__ else: self.__doc__ = doc if isinstance(otypes, types.StringType): self.otypes = otypes for char in self.otypes: if char not in typecodes['All']: raise ValueError, "invalid otype specified" elif iterable(otypes): self.otypes = ''.join([_nx.dtype(x).char for x in otypes]) else: raise ValueError, "output types must be a string of typecode characters or a list of data-types" self.lastcallargs = 0
def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None, names=None, titles=None, aligned=False, byteorder=None): """ create a (read-only) record array from binary data contained in a string""" if dtype is None and formats is None: raise ValueError, "Must have dtype= or formats=" if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr itemsize = descr.itemsize if (shape is None or shape == 0 or shape == -1): shape = (len(datastring) - offset) / itemsize _array = recarray(shape, descr, buf=datastring, offset=offset) return _array
def __new__(subtype, data, dtype=None, copy=True): warnings.warn( "the matrix subclass is not the recommended way to " "represent matrices or deal with linear algebra (see " "https://docs.scipy.org/doc/numpy/user/" "numpy-for-matlab-users.html). " "Please adjust your code to use regular ndarray.", PendingDeprecationWarning, stacklevel=2, ) if isinstance(data, matrix): dtype2 = data.dtype if dtype is None: dtype = dtype2 if (dtype2 == dtype) and (not copy): return data return data.astype(dtype) if isinstance(data, N.ndarray): if dtype is None: intype = data.dtype else: intype = N.dtype(dtype) new = data.view(subtype) if intype != data.dtype: return new.astype(intype) if copy: return new.copy() else: return new if isinstance(data, str): data = _convert_from_string(data) # now convert data to an array arr = N.array(data, dtype=dtype, copy=copy) ndim = arr.ndim shape = arr.shape if ndim > 2: raise ValueError("matrix must be 2-dimensional") elif ndim == 0: shape = (1, 1) elif ndim == 1: shape = (1, shape[0]) order = "C" if (ndim == 2) and arr.flags.fortran: order = "F" if not (order or arr.flags.contiguous): arr = arr.copy() ret = N.ndarray.__new__(subtype, shape, arr.dtype, buffer=arr, order=order) return ret
def _parseFormats(self, formats, aligned=0): """ Parse the field formats """ if formats is None: raise ValueError, "Need formats argument" if isinstance(formats, list): if len(formats) < 2: formats.append('') formats = ','.join(formats) dtype = sb.dtype(formats, aligned) fields = dtype.fields if fields is None: dtype = sb.dtype([('f1', dtype)], aligned) fields = dtype.fields keys = dtype.names self._f_formats = [fields[key][0] for key in keys] self._offsets = [fields[key][1] for key in keys] self._nfields = len(keys)
def view(self, obj): try: if issubclass(obj, ndarray): return ndarray.view(self, obj) except TypeError: pass dtype = sb.dtype(obj) if dtype.fields is None: return self.__array__().view(dtype) return ndarray.view(self, obj)
def _createdescr(self, byteorder): descr = sb.dtype({'names':self._names, 'formats':self._f_formats, 'offsets':self._offsets, 'titles':self._titles}) if (byteorder is not None): byteorder = _byteorderconv[byteorder[0]] descr = descr.newbyteorder(byteorder) self._descr = descr
def fromrecords( reclist, dates=None, freq=None, start_date=None, dtype=None, shape=None, formats=None, names=None, titles=None, aligned=False, byteorder=None, ): """Creates a MaskedRecords from a list of records. The data in the same field can be heterogeneous, they will be promoted to the highest data type. This method is intended for creating smaller record arrays. If used to create large array without formats defined, it can be slow. If formats is None, then this will auto-detect formats. Use a list of tuples rather than a list of lists for faster processing. """ # reclist is in fact a mrecarray ................. if isinstance(reclist, MultiTimeSeries): mdescr = reclist.dtype shape = reclist.shape return MultiTimeSeries(reclist, dtype=mdescr) # No format, no dtype: create from to arrays ..... _data = mrecfromrecords( reclist, dtype=dtype, shape=shape, formats=formats, names=names, titles=titles, aligned=aligned, byteorder=byteorder, ) _dtype = _data.dtype # Check the names for a '_dates' ................. newdates = None _names = list(_dtype.names) reserved = [n for n in _names if n.lower() in ["dates", "_dates"]] if len(reserved) > 0: newdates = _data[reserved[-1]] [_names.remove(n) for n in reserved] _dtype = numeric.dtype([t for t in _dtype.descr if t[0] not in reserved]) _data = [_data[n] for n in _names] # newdates = __getdates(dates=dates, newdates=newdates, length=len(_data), freq=freq, start_date=start_date) # return MultiTimeSeries(_data, dates=newdates, dtype=_dtype, names=_names)
def _createdescr(self, byteorder): descr = sb.dtype({ 'names': self._names, 'formats': self._f_formats, 'offsets': self._offsets, 'titles': self._titles }) if (byteorder is not None): byteorder = _byteorderconv[byteorder[0]] descr = descr.newbyteorder(byteorder) self._descr = descr
def __new__(subtype, data, dtype=None, copy=True): warnings.warn('the matrix subclass is not the recommended way to ' 'represent matrices or deal with linear algebra (see ' 'https://docs.scipy.org/doc/numpy/user/' 'numpy-for-matlab-users.html). ' 'Please adjust your code to use regular ndarray.', PendingDeprecationWarning, stacklevel=2) if isinstance(data, matrix): dtype2 = data.dtype if (dtype is None): dtype = dtype2 if (dtype2 == dtype) and (not copy): return data return data.astype(dtype) if isinstance(data, N.ndarray): if dtype is None: intype = data.dtype else: intype = N.dtype(dtype) new = data.view(subtype) if intype != data.dtype: return new.astype(intype) if copy: return new.copy() else: return new if isinstance(data, str): data = _convert_from_string(data) # now convert data to an array arr = N.array(data, dtype=dtype, copy=copy) ndim = arr.ndim shape = arr.shape if (ndim > 2): raise ValueError("matrix must be 2-dimensional") elif ndim == 0: shape = (1, 1) elif ndim == 1: shape = (1, shape[0]) order = 'C' if (ndim == 2) and arr.flags.fortran: order = 'F' if not (order or arr.flags.contiguous): arr = arr.copy() ret = N.ndarray.__new__(subtype, shape, arr.dtype, buffer=arr, order=order) return ret
def test_upgrade(self): "Tests the upgrade method." converter = StringConverter() assert_equal(converter._status, 0) # test int assert_equal(converter.upgrade("0"), 0) assert_equal(converter._status, 1) # On systems where long defaults to 32-bit, the statuses will be # offset by one, so we check for this here. import numpy.core.numeric as nx status_offset = int( nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize) # test int > 2**32 assert_equal(converter.upgrade("17179869184"), 17179869184) assert_equal(converter._status, 1 + status_offset) # test float assert_allclose(converter.upgrade("0."), 0.0) assert_equal(converter._status, 2 + status_offset) # test complex assert_equal(converter.upgrade("0j"), complex("0j")) assert_equal(converter._status, 3 + status_offset) # test str # note that the longdouble type has been skipped, so the # _status increases by 2. Everything should succeed with # unicode conversion (8). for s in ["a", b"a"]: res = converter.upgrade(s) assert_(type(res) is str) assert_equal(res, "a") assert_equal(converter._status, 8 + status_offset)
def __new__(subtype, data, dtype=None, copy=True): if isinstance(data, matrix): dtype2 = data.dtype if (dtype is None): dtype = dtype2 if (dtype2 == dtype) and (not copy): return data return data.astype(dtype) if isinstance(data, N.ndarray): if dtype is None: intype = data.dtype else: intype = N.dtype(dtype) new = data.view(subtype) if intype != data.dtype: return new.astype(intype) if copy: return new.copy() else: return new if isinstance(data, str): data = _convert_from_string(data) # now convert data to an array arr = N.array(data, dtype=dtype, copy=copy) ndim = arr.ndim shape = arr.shape if (ndim > 2): raise ValueError("matrix must be 2-dimensional") elif ndim == 0: shape = (1, 1) elif ndim == 1: shape = (1, shape[0]) order = 'C' if (ndim == 2) and arr.flags.fortran: order = 'F' if not (order or arr.flags.contiguous): arr = arr.copy() ret = N.ndarray.__new__(subtype, shape, arr.dtype, buffer=arr, order=order) return ret
def test_upgrade(self): "Tests the upgrade method." converter = StringConverter() assert_equal(converter._status, 0) # test int assert_equal(converter.upgrade('0'), 0) assert_equal(converter._status, 1) # On systems where long defaults to 32-bit, the statuses will be # offset by one, so we check for this here. import numpy.core.numeric as nx status_offset = int(nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize) # test int > 2**32 assert_equal(converter.upgrade('17179869184'), 17179869184) assert_equal(converter._status, 1 + status_offset) # test float assert_allclose(converter.upgrade('0.'), 0.0) assert_equal(converter._status, 2 + status_offset) # test complex assert_equal(converter.upgrade('0j'), complex('0j')) assert_equal(converter._status, 3 + status_offset) # test str # note that the longdouble type has been skipped, so the # _status increases by 2. Everything should succeed with # unicode conversion (5). for s in ['a', u'a', b'a']: res = converter.upgrade(s) assert_(type(res) is unicode) assert_equal(res, u'a') assert_equal(converter._status, 5 + status_offset)
def __new__(subtype, data, dtype=None, copy=True): if isinstance(data, matrix): dtype2 = data.dtype if (dtype is None): dtype = dtype2 if (dtype2 == dtype) and (not copy): return data return data.astype(dtype) if isinstance(data, N.ndarray): if dtype is None: intype = data.dtype else: intype = N.dtype(dtype) new = data.view(subtype) if intype != data.dtype: return new.astype(intype) if copy: return new.copy() else: return new if isinstance(data, str): data = _convert_from_string(data) # now convert data to an array arr = N.array(data, dtype=dtype, copy=copy) ndim = arr.ndim shape = arr.shape if (ndim > 2): raise ValueError("matrix must be 2-dimensional") elif ndim == 0: shape = (1, 1) elif ndim == 1: shape = (1, shape[0]) order = False if (ndim == 2) and arr.flags.fortran: order = True if not (order or arr.flags.contiguous): arr = arr.copy() ret = N.ndarray.__new__(subtype, shape, arr.dtype, buffer=arr, order=order) return ret
def __fromrecords(recList, dtype=None, intNullVal=None): """ This function was taken from np.core.records and updated to support conversion null integers to intNullVal """ nfields = len(recList[0]) shape = None descr = sb.dtype((np.core.records.record, dtype)) try: retval = sb.array(recList, dtype=descr) except TypeError: # list of lists instead of list of tuples shape = (len(recList), ) _array = np.core.records.recarray(shape, descr) try: for k in range(_array.size): _array[k] = tuple(recList[k]) except TypeError: convs = [] ncols = len(dtype.fields) for _k in dtype.names: _v = dtype.fields[_k] if _v[0] in [np.int16, np.int32, np.int64]: convs.append(lambda x: intNullVal if x is None else x) else: convs.append(lambda x: x) convs = tuple(convs) def convF(x): return [convs[_](x[_]) for _ in range(ncols)] for k in range(k, _array.size): try: _array[k] = tuple(recList[k]) except TypeError: _array[k] = tuple(convF(recList[k])) return _array else: if shape is not None and retval.shape != shape: retval.shape = shape res = retval.view(numpy.core.records.recarray) return res
def fromrecords(recList, dtype=None, intNullVal=None): """ This function was taken from np.core.records and updated to support conversion null integers to intNullVal """ nfields = len(recList[0]) shape = None descr = sb.dtype((np.core.records.record, dtype)) try: retval = sb.array(recList, dtype=descr) except TypeError: # list of lists instead of list of tuples shape = (len(recList),) _array = np.core.records.recarray(shape, descr) try: for k in range(_array.size): _array[k] = tuple(recList[k]) except TypeError: convs = [] ncols = len(dtype.fields) for _k in dtype.names: _v = dtype.fields[_k] if _v[0] in [np.int16, np.int32, np.int64]: convs.append(lambda x: intNullVal if x is None else x) else: convs.append(lambda x: x) convs = tuple(convs) convF = lambda x: [convs[_](x[_]) for _ in range(ncols)] for k in range(k, _array.size): try: _array[k] = tuple(recList[k]) except TypeError: _array[k] = tuple(convF(recList[k])) return _array else: if shape is not None and retval.shape != shape: retval.shape = shape res = retval.view(numpy.core.records.recarray) return res
def fromstring(datastring, dtype=None, shape=None, offset=0, formats=None, names=None, titles=None, aligned=False, byteorder=None): """ create a (read-only) record array from binary data contained in a string""" if dtype is None and formats is None: raise ValueError, "Must have dtype= or formats=" if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr itemsize = descr.itemsize if (shape is None or shape == 0 or shape == -1): shape = (len(datastring)-offset) // itemsize _array = recarray(shape, descr, buf=datastring, offset=offset) return _array
def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None, names=None, titles=None, aligned=False, byteorder=None, copy=True): """Construct a record array from a wide-variety of objects. """ if isinstance(obj, (type(None), str, file)) and (formats is None) \ and (dtype is None): raise ValueError("Must define formats (or dtype) if object is "\ "None, string, or an open file") kwds = {} if dtype is not None: dtype = sb.dtype(dtype) elif formats is not None: dtype = format_parser(formats, names, titles, aligned, byteorder)._descr else: kwds = {'formats': formats, 'names' : names, 'titles' : titles, 'aligned' : aligned, 'byteorder' : byteorder } if obj is None: if shape is None: raise ValueError("Must define a shape if obj is None") return recarray(shape, dtype, buf=obj, offset=offset, strides=strides) elif isinstance(obj, str): return fromstring(obj, dtype, shape=shape, offset=offset, **kwds) elif isinstance(obj, (list, tuple)): if isinstance(obj[0], (tuple, list)): return fromrecords(obj, dtype=dtype, shape=shape, **kwds) else: return fromarrays(obj, dtype=dtype, shape=shape, **kwds) elif isinstance(obj, recarray): if dtype is not None and (obj.dtype != dtype): new = obj.view(dtype) else: new = obj if copy: new = new.copy() return new elif isinstance(obj, file) or isinstance(obj, StringIO.StringIO): return fromfile(obj, dtype=dtype, shape=shape, offset=offset) elif isinstance(obj, ndarray): if dtype is not None and (obj.dtype != dtype): new = obj.view(dtype) else: new = obj if copy: new = new.copy() res = new.view(recarray) if issubclass(res.dtype.type, nt.void): res.dtype = sb.dtype((record, res.dtype)) return res else: interface = getattr(obj, "__array_interface__", None) if interface is None or not isinstance(interface, dict): raise ValueError("Unknown input type") obj = sb.array(obj) if dtype is not None and (obj.dtype != dtype): obj = obj.view(dtype) res = obj.view(recarray) if issubclass(res.dtype.type, nt.void): res.dtype = sb.dtype((record, res.dtype)) return res
def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, names=None, titles=None, aligned=False, byteorder=None): """Create an array from binary file data If file is a string then that file is opened, else it is assumed to be a file object. >>> from tempfile import TemporaryFile >>> a = N.empty(10,dtype='f8,i4,a5') >>> a[5] = (0.5,10,'abcde') >>> >>> fd=TemporaryFile() >>> a = a.newbyteorder('<') >>> a.tofile(fd) >>> >>> fd.seek(0) >>> r=fromfile(fd, formats='f8,i4,a5', shape=10, byteorder='<') >>> print r[5] (0.5, 10, 'abcde') >>> r.shape (10,) """ if (shape is None or shape == 0): shape = (-1,) elif isinstance(shape, (int, long)): shape = (shape,) name = 0 if isinstance(fd, str): name = 1 fd = open(fd, 'rb') if (offset > 0): fd.seek(offset, 1) size = get_remaining_size(fd) if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr itemsize = descr.itemsize shapeprod = sb.array(shape).prod() shapesize = shapeprod*itemsize if shapesize < 0: shape = list(shape) shape[ shape.index(-1) ] = size / -shapesize shape = tuple(shape) shapeprod = sb.array(shape).prod() nbytes = shapeprod*itemsize if nbytes > size: raise ValueError( "Not enough bytes left in file for specified shape and type") # create the array _array = recarray(shape, descr) nbytesread = fd.readinto(_array.data) if nbytesread != nbytes: raise IOError("Didn't read as many bytes as expected") if name: fd.close() return _array
def fromrecords(recList, dtype=None, shape=None, formats=None, names=None, titles=None, aligned=False, byteorder=None): """ create a recarray from a list of records in text form The data in the same field can be heterogeneous, they will be promoted to the highest data type. This method is intended for creating smaller record arrays. If used to create large array without formats defined r=fromrecords([(2,3.,'abc')]*100000) it can be slow. If formats is None, then this will auto-detect formats. Use list of tuples rather than list of lists for faster processing. >>> r=fromrecords([(456,'dbe',1.2),(2,'de',1.3)],names='col1,col2,col3') >>> print r[0] (456, 'dbe', 1.2) >>> r.col1 array([456, 2]) >>> r.col2 chararray(['dbe', 'de'], dtype='|S3') >>> import cPickle >>> print cPickle.loads(cPickle.dumps(r)) [(456, 'dbe', 1.2) (2, 'de', 1.3)] """ nfields = len(recList[0]) if formats is None and dtype is None: # slower obj = sb.array(recList, dtype=object) arrlist = [sb.array(obj[..., i].tolist()) for i in xrange(nfields)] return fromarrays(arrlist, formats=formats, shape=shape, names=names, titles=titles, aligned=aligned, byteorder=byteorder) if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr try: retval = sb.array(recList, dtype=descr) except TypeError: # list of lists instead of list of tuples if (shape is None or shape == 0): shape = len(recList) if isinstance(shape, (int, long)): shape = (shape, ) if len(shape) > 1: raise ValueError, "Can only deal with 1-d array." _array = recarray(shape, descr) for k in xrange(_array.size): _array[k] = tuple(recList[k]) return _array else: if shape is not None and retval.shape != shape: retval.shape = shape res = retval.view(recarray) res.dtype = sb.dtype((record, res.dtype)) return res
def fromrecords(recList, dtype=None, shape=None, formats=None, names=None, titles=None, aligned=False, byteorder=None): """ create a recarray from a list of records in text form The data in the same field can be heterogeneous, they will be promoted to the highest data type. This method is intended for creating smaller record arrays. If used to create large array without formats defined r=fromrecords([(2,3.,'abc')]*100000) it can be slow. If formats is None, then this will auto-detect formats. Use list of tuples rather than list of lists for faster processing. >>> r=fromrecords([(456,'dbe',1.2),(2,'de',1.3)],names='col1,col2,col3') >>> print r[0] (456, 'dbe', 1.2) >>> r.col1 array([456, 2]) >>> r.col2 chararray(['dbe', 'de'], dtype='|S3') >>> import cPickle >>> print cPickle.loads(cPickle.dumps(r)) [(456, 'dbe', 1.2) (2, 'de', 1.3)] """ nfields = len(recList[0]) if formats is None and dtype is None: # slower obj = sb.array(recList, dtype=object) arrlist = [sb.array(obj[...,i].tolist()) for i in xrange(nfields)] return fromarrays(arrlist, formats=formats, shape=shape, names=names, titles=titles, aligned=aligned, byteorder=byteorder) if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr try: retval = sb.array(recList, dtype = descr) except TypeError: # list of lists instead of list of tuples if (shape is None or shape == 0): shape = len(recList) if isinstance(shape, (int, long)): shape = (shape,) if len(shape) > 1: raise ValueError, "Can only deal with 1-d array." _array = recarray(shape, descr) for k in xrange(_array.size): _array[k] = tuple(recList[k]) return _array else: if shape is not None and retval.shape != shape: retval.shape = shape res = retval.view(recarray) res.dtype = sb.dtype((record, res.dtype)) return res
def fromarrays(arrayList, dtype=None, shape=None, formats=None, names=None, titles=None, aligned=False, byteorder=None): """ create a record array from a (flat) list of arrays >>> x1=N.array([1,2,3,4]) >>> x2=N.array(['a','dd','xyz','12']) >>> x3=N.array([1.1,2,3,4]) >>> r = fromarrays([x1,x2,x3],names='a,b,c') >>> print r[1] (2, 'dd', 2.0) >>> x1[1]=34 >>> r.a array([1, 2, 3, 4]) """ arrayList = [sb.asarray(x) for x in arrayList] if shape is None or shape == 0: shape = arrayList[0].shape if isinstance(shape, int): shape = (shape,) if formats is None and dtype is None: # go through each object in the list to see if it is an ndarray # and determine the formats. formats = '' for obj in arrayList: if not isinstance(obj, ndarray): raise ValueError, "item in the array list must be an ndarray." formats += _typestr[obj.dtype.type] if issubclass(obj.dtype.type, nt.flexible): formats += `obj.itemsize` formats += ',' formats = formats[:-1] if dtype is not None: descr = sb.dtype(dtype) _names = descr.names else: parsed = format_parser(formats, names, titles, aligned, byteorder) _names = parsed._names descr = parsed._descr # Determine shape from data-type. if len(descr) != len(arrayList): raise ValueError, "mismatch between the number of fields "\ "and the number of arrays" d0 = descr[0].shape nn = len(d0) if nn > 0: shape = shape[:-nn] for k, obj in enumerate(arrayList): nn = len(descr[k].shape) testshape = obj.shape[:len(obj.shape)-nn] if testshape != shape: raise ValueError, "array-shape mismatch in array %d" % k _array = recarray(shape, descr) # populate the record array (makes a copy) for i in range(len(arrayList)): _array[_names[i]] = arrayList[i] return _array
def apply_along_axis(func1d,axis,arr,*args,**kwargs): """ Execute func1d(arr[i],*args) where func1d takes 1-D arrays and arr is an N-d array. i varies so as to apply the function along the given axis for each 1-d subarray in arr. """ arr = core.array(arr, copy=False, subok=True) nd = arr.ndim if axis < 0: axis += nd if (axis >= nd): raise ValueError("axis must be less than arr.ndim; axis=%d, rank=%d." % (axis,nd)) ind = [0]*(nd-1) i = numeric.zeros(nd,'O') indlist = range(nd) indlist.remove(axis) i[axis] = slice(None,None) outshape = numeric.asarray(arr.shape).take(indlist) i.put(indlist, ind) j = i.copy() res = func1d(arr[tuple(i.tolist())],*args,**kwargs) # if res is a number, then we have a smaller output array asscalar = numeric.isscalar(res) if not asscalar: try: len(res) except TypeError: asscalar = True # Note: we shouldn't set the dtype of the output from the first result... #...so we force the type to object, and build a list of dtypes #...we'll just take the largest, to avoid some downcasting dtypes = [] if asscalar: dtypes.append(numeric.asarray(res).dtype) outarr = zeros(outshape, object_) outarr[tuple(ind)] = res Ntot = numeric.product(outshape) k = 1 while k < Ntot: # increment the index ind[-1] += 1 n = -1 while (ind[n] >= outshape[n]) and (n > (1-nd)): ind[n-1] += 1 ind[n] = 0 n -= 1 i.put(indlist,ind) res = func1d(arr[tuple(i.tolist())],*args,**kwargs) outarr[tuple(ind)] = res dtypes.append(asarray(res).dtype) k += 1 else: res = core.array(res, copy=False, subok=True) j = i.copy() j[axis] = ([slice(None,None)] * res.ndim) j.put(indlist, ind) Ntot = numeric.product(outshape) holdshape = outshape outshape = list(arr.shape) outshape[axis] = res.shape dtypes.append(asarray(res).dtype) outshape = flatten_inplace(outshape) outarr = zeros(outshape, object_) outarr[tuple(flatten_inplace(j.tolist()))] = res k = 1 while k < Ntot: # increment the index ind[-1] += 1 n = -1 while (ind[n] >= holdshape[n]) and (n > (1-nd)): ind[n-1] += 1 ind[n] = 0 n -= 1 i.put(indlist, ind) j.put(indlist, ind) res = func1d(arr[tuple(i.tolist())],*args,**kwargs) outarr[tuple(flatten_inplace(j.tolist()))] = res dtypes.append(asarray(res).dtype) k += 1 max_dtypes = numeric.dtype(numeric.asarray(dtypes).max()) if not hasattr(arr, '_mask'): result = numeric.asarray(outarr, dtype=max_dtypes) else: result = core.asarray(outarr, dtype=max_dtypes) result.fill_value = core.default_fill_value(result) return result
def fromarrays(arrayList, dtype=None, shape=None, formats=None, names=None, titles=None, aligned=False, byteorder=None): """ create a record array from a (flat) list of arrays >>> x1=N.array([1,2,3,4]) >>> x2=N.array(['a','dd','xyz','12']) >>> x3=N.array([1.1,2,3,4]) >>> r = fromarrays([x1,x2,x3],names='a,b,c') >>> print r[1] (2, 'dd', 2.0) >>> x1[1]=34 >>> r.a array([1, 2, 3, 4]) """ arrayList = [sb.asarray(x) for x in arrayList] if shape is None or shape == 0: shape = arrayList[0].shape if isinstance(shape, int): shape = (shape, ) if formats is None and dtype is None: # go through each object in the list to see if it is an ndarray # and determine the formats. formats = '' for obj in arrayList: if not isinstance(obj, ndarray): raise ValueError, "item in the array list must be an ndarray." formats += _typestr[obj.dtype.type] if issubclass(obj.dtype.type, nt.flexible): formats += ` obj.itemsize ` formats += ',' formats = formats[:-1] if dtype is not None: descr = sb.dtype(dtype) _names = descr.names else: parsed = format_parser(formats, names, titles, aligned, byteorder) _names = parsed._names descr = parsed._descr # Determine shape from data-type. if len(descr) != len(arrayList): raise ValueError, "mismatch between the number of fields "\ "and the number of arrays" d0 = descr[0].shape nn = len(d0) if nn > 0: shape = shape[:-nn] for k, obj in enumerate(arrayList): nn = len(descr[k].shape) testshape = obj.shape[:len(obj.shape) - nn] if testshape != shape: raise ValueError, "array-shape mismatch in array %d" % k _array = recarray(shape, descr) # populate the record array (makes a copy) for i in range(len(arrayList)): _array[_names[i]] = arrayList[i] return _array
def fromtextfile( fname, delimitor=None, commentchar="#", missingchar="", dates_column=None, varnames=None, vartypes=None, dates=None ): """Creates a multitimeseries from data stored in the file `filename`. :Parameters: - `filename` : file name/handle Handle of an opened file. - `delimitor` : Character *None* Alphanumeric character used to separate columns in the file. If None, any (group of) white spacestring(s) will be used. - `commentchar` : String *['#']* Alphanumeric character used to mark the start of a comment. - `missingchar` : String *['']* String indicating missing data, and used to create the masks. - `datescol` : Integer *[None]* Position of the columns storing dates. If None, a position will be estimated from the variable names. - `varnames` : Sequence *[None]* Sequence of the variable names. If None, a list will be created from the first non empty line of the file. - `vartypes` : Sequence *[None]* Sequence of the variables dtypes. If None, the sequence will be estimated from the first non-commented line. Ultra simple: the varnames are in the header, one line""" # Try to open the file ...................... f = openfile(fname) # Get the first non-empty line as the varnames while True: line = f.readline() firstline = line[: line.find(commentchar)].strip() _varnames = firstline.split(delimitor) if len(_varnames) > 1: break if varnames is None: varnames = _varnames # Get the data .............................. _variables = MA.asarray([line.strip().split(delimitor) for line in f if line[0] != commentchar and len(line) > 1]) (nvars, nfields) = _variables.shape # Check if we need to get the dates.......... if dates_column is None: dates_column = [i for (i, n) in enumerate(list(varnames)) if n.lower() in ["_dates", "dates"]] elif isinstance(dates_column, (int, float)): if dates_column > nfields: raise ValueError, "Invalid column number: %i > %i" % (dates_column, nfields) dates_column = [dates_column] if len(dates_column) > 0: cols = range(nfields) [cols.remove(i) for i in dates_column] newdates = date_array(_variables[:, dates_column[-1]]) _variables = _variables[:, cols] varnames = [varnames[i] for i in cols] if vartypes is not None: vartypes = [vartypes[i] for i in cols] nfields -= len(dates_column) else: newdates = None # Try to guess the dtype .................... if vartypes is None: vartypes = _guessvartypes(_variables[0]) else: vartypes = [numeric.dtype(v) for v in vartypes] if len(vartypes) != nfields: msg = "Attempting to %i dtypes for %i fields!" msg += " Reverting to default." warnings.warn(msg % (len(vartypes), nfields)) vartypes = _guessvartypes(_variables[0]) # Construct the descriptor .................. mdescr = [(n, f) for (n, f) in zip(varnames, vartypes)] # Get the data and the mask ................. # We just need a list of masked_arrays. It's easier to create it like that: _mask = _variables.T == missingchar _datalist = [masked_array(a, mask=m, dtype=t) for (a, m, t) in zip(_variables.T, _mask, vartypes)] # newdates = __getdates(dates=dates, newdates=newdates, length=nvars, freq=None, start_date=None) return MultiTimeSeries(_datalist, dates=newdates, dtype=mdescr)
def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, names=None, titles=None, aligned=False, byteorder=None): """Create an array from binary file data If file is a string then that file is opened, else it is assumed to be a file object. >>> from tempfile import TemporaryFile >>> a = N.empty(10,dtype='f8,i4,a5') >>> a[5] = (0.5,10,'abcde') >>> >>> fd=TemporaryFile() >>> a = a.newbyteorder('<') >>> a.tofile(fd) >>> >>> fd.seek(0) >>> r=fromfile(fd, formats='f8,i4,a5', shape=10, byteorder='<') >>> print r[5] (0.5, 10, 'abcde') >>> r.shape (10,) """ if (shape is None or shape == 0): shape = (-1,) elif isinstance(shape, (int, long)): shape = (shape,) name = 0 if isinstance(fd, str): name = 1 fd = open(fd, 'rb') if (offset > 0): fd.seek(offset, 1) size = get_remaining_size(fd) if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr itemsize = descr.itemsize shapeprod = sb.array(shape).prod() shapesize = shapeprod*itemsize if shapesize < 0: shape = list(shape) shape[ shape.index(-1) ] = size // -shapesize shape = tuple(shape) shapeprod = sb.array(shape).prod() nbytes = shapeprod*itemsize if nbytes > size: raise ValueError( "Not enough bytes left in file for specified shape and type") # create the array if isinstance (fd, file): arr = np.fromfile(fd,dtype=descr,count=shape[0]) else: read_size = np.dtype(descr).itemsize * shape[0] st=fd.read(read_size) arr = np.fromstring(st, dtype=descr, count=shape[0]) # TODO: There was a problem with large arrays, don't fully understand # but this is more efficient anyway #_array = recarray(shape, descr, arr.data) _array = arr.view(recarray) if name: fd.close() return _array
def fromfile(fd, dtype=None, shape=None, offset=0, formats=None, names=None, titles=None, aligned=False, byteorder=None): """Create an array from binary file data If file is a string then that file is opened, else it is assumed to be a file object. >>> from tempfile import TemporaryFile >>> a = N.empty(10,dtype='f8,i4,a5') >>> a[5] = (0.5,10,'abcde') >>> >>> fd=TemporaryFile() >>> a = a.newbyteorder('<') >>> a.tofile(fd) >>> >>> fd.seek(0) >>> r=fromfile(fd, formats='f8,i4,a5', shape=10, byteorder='<') >>> print r[5] (0.5, 10, 'abcde') >>> r.shape (10,) """ if (shape is None or shape == 0): shape = (-1, ) elif isinstance(shape, (int, long)): shape = (shape, ) name = 0 if isinstance(fd, str): name = 1 fd = open(fd, 'rb') if (offset > 0): fd.seek(offset, 1) size = get_remaining_size(fd) if dtype is not None: descr = sb.dtype(dtype) else: descr = format_parser(formats, names, titles, aligned, byteorder)._descr itemsize = descr.itemsize shapeprod = sb.array(shape).prod() shapesize = shapeprod * itemsize if shapesize < 0: shape = list(shape) shape[shape.index(-1)] = size / -shapesize shape = tuple(shape) shapeprod = sb.array(shape).prod() nbytes = shapeprod * itemsize if nbytes > size: raise ValueError( "Not enough bytes left in file for specified shape and type") # create the array _array = recarray(shape, descr) nbytesread = fd.readinto(_array.data) if nbytesread != nbytes: raise IOError("Didn't read as many bytes as expected") if name: fd.close() return _array
class StringConverter: """ Factory class for function transforming a string into another object (int, float). After initialization, an instance can be called to transform a string into another object. If the string is recognized as representing a missing value, a default value is returned. Attributes ---------- func : function Function used for the conversion. default : any Default value to return when the input corresponds to a missing value. type : type Type of the output. _status : int Integer representing the order of the conversion. _mapper : sequence of tuples Sequence of tuples (dtype, function, default value) to evaluate in order. _locked : bool Holds `locked` parameter. Parameters ---------- dtype_or_func : {None, dtype, function}, optional If a `dtype`, specifies the input data type, used to define a basic function and a default value for missing data. For example, when `dtype` is float, the `func` attribute is set to `float` and the default value to `np.nan`. If a function, this function is used to convert a string to another object. In this case, it is recommended to give an associated default value as input. default : any, optional Value to return by default, that is, when the string to be converted is flagged as missing. If not given, `StringConverter` tries to supply a reasonable default value. missing_values : {None, sequence of str}, optional ``None`` or sequence of strings indicating a missing value. If ``None`` then missing values are indicated by empty entries. The default is ``None``. locked : bool, optional Whether the StringConverter should be locked to prevent automatic upgrade or not. Default is False. """ _mapper = [ (nx.bool_, str2bool, False), (nx.int_, int, -1), ] # On 32-bit systems, we need to make sure that we explicitly include # nx.int64 since ns.int_ is nx.int32. if nx.dtype(nx.int_).itemsize < nx.dtype(nx.int64).itemsize: _mapper.append((nx.int64, int, -1)) _mapper.extend([ (nx.float64, float, nx.nan), (nx.complex128, complex, nx.nan + 0j), (nx.longdouble, nx.longdouble, nx.nan), # If a non-default dtype is passed, fall back to generic # ones (should only be used for the converter) (nx.integer, int, -1), (nx.floating, float, nx.nan), (nx.complexfloating, complex, nx.nan + 0j), # Last, try with the string types (must be last, because # `_mapper[-1]` is used as default in some cases) (nx.unicode_, asunicode, '???'), (nx.string_, asbytes, '???'), ]) @classmethod def _getdtype(cls, val): """Returns the dtype of the input variable.""" return np.array(val).dtype @classmethod def _getsubdtype(cls, val): """Returns the type of the dtype of the input variable.""" return np.array(val).dtype.type @classmethod def _dtypeortype(cls, dtype): """Returns dtype for datetime64 and type of dtype otherwise.""" # This is a bit annoying. We want to return the "general" type in most # cases (ie. "string" rather than "S10"), but we want to return the # specific type for datetime64 (ie. "datetime64[us]" rather than # "datetime64"). if dtype.type == np.datetime64: return dtype return dtype.type @classmethod def upgrade_mapper(cls, func, default=None): """ Upgrade the mapper of a StringConverter by adding a new function and its corresponding default. The input function (or sequence of functions) and its associated default value (if any) is inserted in penultimate position of the mapper. The corresponding type is estimated from the dtype of the default value. Parameters ---------- func : var Function, or sequence of functions Examples -------- >>> import dateutil.parser >>> import datetime >>> dateparser = dateutil.parser.parse >>> defaultdate = datetime.date(2000, 1, 1) >>> StringConverter.upgrade_mapper(dateparser, default=defaultdate) """ # Func is a single functions if hasattr(func, '__call__'): cls._mapper.insert(-1, (cls._getsubdtype(default), func, default)) return elif hasattr(func, '__iter__'): if isinstance(func[0], (tuple, list)): for _ in func: cls._mapper.insert(-1, _) return if default is None: default = [None] * len(func) else: default = list(default) default.append([None] * (len(func) - len(default))) for fct, dft in zip(func, default): cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft)) @classmethod def _find_map_entry(cls, dtype): # if a converter for the specific dtype is available use that for i, (deftype, func, default_def) in enumerate(cls._mapper): if dtype.type == deftype: return i, (deftype, func, default_def) # otherwise find an inexact match for i, (deftype, func, default_def) in enumerate(cls._mapper): if np.issubdtype(dtype.type, deftype): return i, (deftype, func, default_def) raise LookupError def __init__(self, dtype_or_func=None, default=None, missing_values=None, locked=False): # Defines a lock for upgrade self._locked = bool(locked) # No input dtype: minimal initialization if dtype_or_func is None: self.func = str2bool self._status = 0 self.default = default or False dtype = np.dtype('bool') else: # Is the input a np.dtype ? try: self.func = None dtype = np.dtype(dtype_or_func) except TypeError: # dtype_or_func must be a function, then if not hasattr(dtype_or_func, '__call__'): errmsg = ("The input argument `dtype` is neither a" " function nor a dtype (got '%s' instead)") raise TypeError(errmsg % type(dtype_or_func)) # Set the function self.func = dtype_or_func # If we don't have a default, try to guess it or set it to # None if default is None: try: default = self.func('0') except ValueError: default = None dtype = self._getdtype(default) # find the best match in our mapper try: self._status, (_, func, default_def) = self._find_map_entry(dtype) except LookupError: # no match self.default = default _, func, _ = self._mapper[-1] self._status = 0 else: # use the found default only if we did not already have one if default is None: self.default = default_def else: self.default = default # If the input was a dtype, set the function to the last we saw if self.func is None: self.func = func # If the status is 1 (int), change the function to # something more robust. if self.func == self._mapper[1][1]: if issubclass(dtype.type, np.uint64): self.func = np.uint64 elif issubclass(dtype.type, np.int64): self.func = np.int64 else: self.func = lambda x: int(float(x)) # Store the list of strings corresponding to missing values. if missing_values is None: self.missing_values = {''} else: if isinstance(missing_values, str): missing_values = missing_values.split(",") self.missing_values = set(list(missing_values) + ['']) self._callingfunction = self._strict_call self.type = self._dtypeortype(dtype) self._checked = False self._initial_default = default def _loose_call(self, value): try: return self.func(value) except ValueError: return self.default def _strict_call(self, value): try: # We check if we can convert the value using the current function new_value = self.func(value) # In addition to having to check whether func can convert the # value, we also have to make sure that we don't get overflow # errors for integers. if self.func is int: try: np.array(value, dtype=self.type) except OverflowError: raise ValueError # We're still here so we can now return the new value return new_value except ValueError: if value.strip() in self.missing_values: if not self._status: self._checked = False return self.default raise ValueError("Cannot convert string '%s'" % value) def __call__(self, value): return self._callingfunction(value) def _do_upgrade(self): # Raise an exception if we locked the converter... if self._locked: errmsg = "Converter is locked and cannot be upgraded" raise ConverterLockError(errmsg) _statusmax = len(self._mapper) # Complains if we try to upgrade by the maximum _status = self._status if _status == _statusmax: errmsg = "Could not find a valid conversion function" raise ConverterError(errmsg) elif _status < _statusmax - 1: _status += 1 self.type, self.func, default = self._mapper[_status] self._status = _status if self._initial_default is not None: self.default = self._initial_default else: self.default = default def upgrade(self, value): """ Find the best converter for a given string, and return the result. The supplied string `value` is converted by testing different converters in order. First the `func` method of the `StringConverter` instance is tried, if this fails other available converters are tried. The order in which these other converters are tried is determined by the `_status` attribute of the instance. Parameters ---------- value : str The string to convert. Returns ------- out : any The result of converting `value` with the appropriate converter. """ self._checked = True try: return self._strict_call(value) except ValueError: self._do_upgrade() return self.upgrade(value) def iterupgrade(self, value): self._checked = True if not hasattr(value, '__iter__'): value = (value, ) _strict_call = self._strict_call try: for _m in value: _strict_call(_m) except ValueError: self._do_upgrade() self.iterupgrade(value) def update(self, func, default=None, testing_value=None, missing_values='', locked=False): """ Set StringConverter attributes directly. Parameters ---------- func : function Conversion function. default : any, optional Value to return by default, that is, when the string to be converted is flagged as missing. If not given, `StringConverter` tries to supply a reasonable default value. testing_value : str, optional A string representing a standard input value of the converter. This string is used to help defining a reasonable default value. missing_values : {sequence of str, None}, optional Sequence of strings indicating a missing value. If ``None``, then the existing `missing_values` are cleared. The default is `''`. locked : bool, optional Whether the StringConverter should be locked to prevent automatic upgrade or not. Default is False. Notes ----- `update` takes the same parameters as the constructor of `StringConverter`, except that `func` does not accept a `dtype` whereas `dtype_or_func` in the constructor does. """ self.func = func self._locked = locked # Don't reset the default to None if we can avoid it if default is not None: self.default = default self.type = self._dtypeortype(self._getdtype(default)) else: try: tester = func(testing_value or '1') except (TypeError, ValueError): tester = None self.type = self._dtypeortype(self._getdtype(tester)) # Add the missing values to the existing set or clear it. if missing_values is None: # Clear all missing values even though the ctor initializes it to # set(['']) when the argument is None. self.missing_values = set() else: if not np.iterable(missing_values): missing_values = [missing_values] if not all(isinstance(v, str) for v in missing_values): raise TypeError("missing_values must be strings or unicode") self.missing_values.update(missing_values)
def array(obj, dtype=None, shape=None, offset=0, strides=None, formats=None, names=None, titles=None, aligned=False, byteorder=None, copy=True): """Construct a record array from a wide-variety of objects. """ if isinstance(obj, (type(None), str, file)) and (formats is None) \ and (dtype is None): raise ValueError("Must define formats (or dtype) if object is "\ "None, string, or an open file") kwds = {} if dtype is not None: dtype = sb.dtype(dtype) elif formats is not None: dtype = format_parser(formats, names, titles, aligned, byteorder)._descr else: kwds = { 'formats': formats, 'names': names, 'titles': titles, 'aligned': aligned, 'byteorder': byteorder } if obj is None: if shape is None: raise ValueError("Must define a shape if obj is None") return recarray(shape, dtype, buf=obj, offset=offset, strides=strides) elif isinstance(obj, str): return fromstring(obj, dtype, shape=shape, offset=offset, **kwds) elif isinstance(obj, (list, tuple)): if isinstance(obj[0], (tuple, list)): return fromrecords(obj, dtype=dtype, shape=shape, **kwds) else: return fromarrays(obj, dtype=dtype, shape=shape, **kwds) elif isinstance(obj, recarray): if dtype is not None and (obj.dtype != dtype): new = obj.view(dtype) else: new = obj if copy: new = new.copy() return new elif isinstance(obj, file): return fromfile(obj, dtype=dtype, shape=shape, offset=offset) elif isinstance(obj, ndarray): if dtype is not None and (obj.dtype != dtype): new = obj.view(dtype) else: new = obj if copy: new = new.copy() res = new.view(recarray) if issubclass(res.dtype.type, nt.void): res.dtype = sb.dtype((record, res.dtype)) return res else: interface = getattr(obj, "__array_interface__", None) if interface is None or not isinstance(interface, dict): raise ValueError("Unknown input type") obj = sb.array(obj) if dtype is not None and (obj.dtype != dtype): obj = obj.view(dtype) res = obj.view(recarray) if issubclass(res.dtype.type, nt.void): res.dtype = sb.dtype((record, res.dtype)) return res
def fromarrays( arraylist, dates=None, dtype=None, shape=None, formats=None, names=None, titles=None, aligned=False, byteorder=None ): """Creates a mrecarray from a (flat) list of masked arrays. :Parameters: - `arraylist` : Sequence A list of (masked) arrays. Each element of the sequence is first converted to a masked array if needed. If a 2D array is passed as argument, it is processed line by line - `dtype` : numeric.dtype Data type descriptor. - `shape` : Integer *[None]* Number of records. If None, `shape` is defined from the shape of the first array in the list. - `formats` : (Description to write) - `names` : (description to write) - `titles`: (Description to write) - `aligned`: Boolen *[False]* (Description to write, not used anyway) - `byteorder`: Boolen *[None]* (Description to write, not used anyway) """ arraylist = [MA.asarray(x) for x in arraylist] # Define/check the shape..................... if shape is None or shape == 0: shape = arraylist[0].shape if isinstance(shape, int): shape = (shape,) # Define formats from scratch ............... if formats is None and dtype is None: formats = _getformats(arraylist) # Define the dtype .......................... if dtype is not None: descr = numeric.dtype(dtype) _names = descr.names else: parsed = format_parser(formats, names, titles, aligned, byteorder) _names = parsed._names descr = parsed._descr # Determine shape from data-type............. if len(descr) != len(arraylist): msg = "Mismatch between the number of fields (%i) and the number of " "arrays (%i)" raise ValueError, msg % (len(descr), len(arraylist)) d0 = descr[0].shape nn = len(d0) if nn > 0: shape = shape[:-nn] # Make sure the shape is the correct one .... for k, obj in enumerate(arraylist): nn = len(descr[k].shape) testshape = obj.shape[: len(obj.shape) - nn] if testshape != shape: raise ValueError, "Array-shape mismatch in array %d" % k # Reconstruct the descriptor, by creating a _data and _mask version return MultiTimeSeries(arraylist, dtype=descr)