def test_set_mask(self): base = self.base.copy() mbase = base.view(mrecarray) # Set the mask to True ....................... mbase.mask = masked assert_equal(ma.getmaskarray(mbase['b']), [1] * 5) assert_equal(mbase['a']._mask, mbase['b']._mask) assert_equal(mbase['a']._mask, mbase['c']._mask) assert_equal(mbase._mask.tolist(), np.array([(1, 1, 1)] * 5, dtype=bool)) # Delete the mask ............................ mbase.mask = nomask assert_equal(ma.getmaskarray(mbase['c']), [0] * 5) assert_equal(mbase._mask.tolist(), np.array([(0, 0, 0)] * 5, dtype=bool))
def test_set_fields(self): # Tests setting fields. base = self.base.copy() mbase = base.view(mrecarray) mbase = mbase.copy() mbase.fill_value = (999999, 1e20, 'N/A') # Change the data, the mask should be conserved mbase.a._data[:] = 5 assert_equal(mbase['a']._data, [5, 5, 5, 5, 5]) assert_equal(mbase['a']._mask, [0, 1, 0, 0, 1]) # Change the elements, and the mask will follow mbase.a = 1 assert_equal(mbase['a']._data, [1] * 5) assert_equal(ma.getmaskarray(mbase['a']), [0] * 5) # Use to be _mask, now it's recordmask assert_equal(mbase.recordmask, [False] * 5) assert_equal( mbase._mask.tolist(), np.array([(0, 0, 0), (0, 1, 1), (0, 0, 0), (0, 0, 0), (0, 1, 1)], dtype=bool)) # Set a field to mask ........................ mbase.c = masked # Use to be mask, and now it's still mask ! assert_equal(mbase.c.mask, [1] * 5) assert_equal(mbase.c.recordmask, [1] * 5) assert_equal(ma.getmaskarray(mbase['c']), [1] * 5) assert_equal(ma.getdata(mbase['c']), [b'N/A'] * 5) assert_equal( mbase._mask.tolist(), np.array([(0, 0, 1), (0, 1, 1), (0, 0, 1), (0, 0, 1), (0, 1, 1)], dtype=bool)) # Set fields by slices ....................... mbase = base.view(mrecarray).copy() mbase.a[3:] = 5 assert_equal(mbase.a, [1, 2, 3, 5, 5]) assert_equal(mbase.a._mask, [0, 1, 0, 0, 0]) mbase.b[3:] = masked assert_equal(mbase.b, base['b']) assert_equal(mbase.b._mask, [0, 1, 0, 1, 1]) # Set fields globally.......................... ndtype = [('alpha', '|S1'), ('num', int)] data = ma.array([('a', 1), ('b', 2), ('c', 3)], dtype=ndtype) rdata = data.view(MaskedRecords) val = ma.array([10, 20, 30], mask=[1, 0, 0]) rdata['num'] = val assert_equal(rdata.num, val) assert_equal(rdata.num.mask, [1, 0, 0])
def addfield(mrecord, newfield, newfieldname=None): """Adds a new field to the masked record array Uses `newfield` as data and `newfieldname` as name. If `newfieldname` is None, the new field name is set to 'fi', where `i` is the number of existing fields. """ _data = mrecord._data _mask = mrecord._mask if newfieldname is None or newfieldname in reserved_fields: newfieldname = 'f%i' % len(_data.dtype) newfield = ma.array(newfield) # Get the new data. # Create a new empty recarray newdtype = np.dtype(_data.dtype.descr + [(newfieldname, newfield.dtype)]) newdata = recarray(_data.shape, newdtype) # Add the existing field [newdata.setfield(_data.getfield(*f), *f) for f in _data.dtype.fields.values()] # Add the new field newdata.setfield(newfield._data, *newdata.dtype.fields[newfieldname]) newdata = newdata.view(MaskedRecords) # Get the new mask # Create a new empty recarray newmdtype = np.dtype([(n, bool_) for n in newdtype.names]) newmask = recarray(_data.shape, newmdtype) # Add the old masks [newmask.setfield(_mask.getfield(*f), *f) for f in _mask.dtype.fields.values()] # Add the mask of the new field newmask.setfield(getmaskarray(newfield), *newmask.dtype.fields[newfieldname]) newdata._mask = newmask return newdata
def __setitem__(self, indx, value): """ Sets the given record to value. """ MaskedArray.__setitem__(self, indx, value) if isinstance(indx, basestring): self._mask[indx] = ma.getmaskarray(value)
def __setattr__(self, attr, val): """ Sets the attribute attr to the value val. """ # Should we call __setmask__ first ? if attr in ['mask', 'fieldmask']: self.__setmask__(val) return # Create a shortcut (so that we don't have to call getattr all the time) _localdict = object.__getattribute__(self, '__dict__') # Check whether we're creating a new field newattr = attr not in _localdict try: # Is attr a generic attribute ? ret = object.__setattr__(self, attr, val) except Exception: # Not a generic attribute: exit if it's not a valid field fielddict = ndarray.__getattribute__(self, 'dtype').fields or {} optinfo = ndarray.__getattribute__(self, '_optinfo') or {} if not (attr in fielddict or attr in optinfo): exctype, value = sys.exc_info()[:2] raise exctype(value) else: # Get the list of names fielddict = ndarray.__getattribute__(self, 'dtype').fields or {} # Check the attribute if attr not in fielddict: return ret if newattr: # We just added this one or this setattr worked on an # internal attribute. try: object.__delattr__(self, attr) except Exception: return ret # Let's try to set the field try: res = fielddict[attr][:2] except (TypeError, KeyError): raise AttributeError("record array has no attribute %s" % attr) if val is masked: _fill_value = _localdict['_fill_value'] if _fill_value is not None: dval = _localdict['_fill_value'][attr] else: dval = val mval = True else: dval = filled(val) mval = getmaskarray(val) obj = ndarray.__getattribute__(self, '_data').setfield(dval, *res) _localdict['_mask'].__setitem__(attr, mval) return obj
def fromarrays(arraylist, dtype=None, shape=None, formats=None, names=None, titles=None, aligned=False, byteorder=None, fill_value=None): """ Creates a mrecarray from a (flat) list of masked arrays. Parameters ---------- arraylist : sequence A list of (masked) arrays. Each element of the sequence is first converted to a masked array if needed. If a 2D array is passed as argument, it is processed line by line dtype : {None, dtype}, optional Data type descriptor. shape : {None, integer}, optional Number of records. If None, shape is defined from the shape of the first array in the list. formats : {None, sequence}, optional Sequence of formats for each individual field. If None, the formats will be autodetected by inspecting the fields and selecting the highest dtype possible. names : {None, sequence}, optional Sequence of the names of each field. fill_value : {None, sequence}, optional Sequence of data to be used as filling values. Notes ----- Lists of tuples should be preferred over lists of lists for faster processing. """ datalist = [getdata(x) for x in arraylist] masklist = [np.atleast_1d(getmaskarray(x)) for x in arraylist] _array = recfromarrays(datalist, dtype=dtype, shape=shape, formats=formats, names=names, titles=titles, aligned=aligned, byteorder=byteorder).view(mrecarray) _array._mask.flat = list(zip(*masklist)) if fill_value is not None: _array.fill_value = fill_value return _array
def merge_arrays(seqarrays, fill_value=-1, flatten=False, usemask=False, asrecarray=False): """ Merge arrays field by field. Parameters ---------- seqarrays : sequence of ndarrays Sequence of arrays fill_value : {float}, optional Filling value used to pad missing data on the shorter arrays. flatten : {False, True}, optional Whether to collapse nested fields. usemask : {False, True}, optional Whether to return a masked array or not. asrecarray : {False, True}, optional Whether to return a recarray (MaskedRecords) or not. Examples -------- >>> from numpy1.lib import recfunctions as rfn >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.]))) masked_array(data = [(1, 10.0) (2, 20.0) (--, 30.0)], mask = [(False, False) (False, False) (True, False)], fill_value = (999999, 1e+20), dtype = [('f0', '<i4'), ('f1', '<f8')]) >>> rfn.merge_arrays((np.array([1, 2]), np.array([10., 20., 30.])), ... usemask=False) array([(1, 10.0), (2, 20.0), (-1, 30.0)], dtype=[('f0', '<i4'), ('f1', '<f8')]) >>> rfn.merge_arrays((np.array([1, 2]).view([('a', int)]), ... np.array([10., 20., 30.])), ... usemask=False, asrecarray=True) rec.array([(1, 10.0), (2, 20.0), (-1, 30.0)], dtype=[('a', '<i4'), ('f1', '<f8')]) Notes ----- * Without a mask, the missing value will be filled with something, depending on what its corresponding type: * ``-1`` for integers * ``-1.0`` for floating point numbers * ``'-'`` for characters * ``'-1'`` for strings * ``True`` for boolean values * XXX: I just obtained these values empirically """ # Only one item in the input sequence ? if (len(seqarrays) == 1): seqarrays = np.asanyarray(seqarrays[0]) # Do we have a single ndarray as input ? if isinstance(seqarrays, (ndarray, np.void)): seqdtype = seqarrays.dtype # Make sure we have named fields if not seqdtype.names: seqdtype = np.dtype([('', seqdtype)]) if not flatten or zip_dtype((seqarrays, ), flatten=True) == seqdtype: # Minimal processing needed: just make sure everythng's a-ok seqarrays = seqarrays.ravel() # Find what type of array we must return if usemask: if asrecarray: seqtype = MaskedRecords else: seqtype = MaskedArray elif asrecarray: seqtype = recarray else: seqtype = ndarray return seqarrays.view(dtype=seqdtype, type=seqtype) else: seqarrays = (seqarrays, ) else: # Make sure we have arrays in the input sequence seqarrays = [np.asanyarray(_m) for _m in seqarrays] # Find the sizes of the inputs and their maximum sizes = tuple(a.size for a in seqarrays) maxlength = max(sizes) # Get the dtype of the output (flattening if needed) newdtype = zip_dtype(seqarrays, flatten=flatten) # Initialize the sequences for data and mask seqdata = [] seqmask = [] # If we expect some kind of MaskedArray, make a special loop. if usemask: for (a, n) in zip(seqarrays, sizes): nbmissing = (maxlength - n) # Get the data and mask data = a.ravel().__array__() mask = ma.getmaskarray(a).ravel() # Get the filling value (if needed) if nbmissing: fval = _check_fill_value(fill_value, a.dtype) if isinstance(fval, (ndarray, np.void)): if len(fval.dtype) == 1: fval = fval.item()[0] fmsk = True else: fval = np.array(fval, dtype=a.dtype, ndmin=1) fmsk = np.ones((1, ), dtype=mask.dtype) else: fval = None fmsk = True # Store an iterator padding the input to the expected length seqdata.append(itertools.chain(data, [fval] * nbmissing)) seqmask.append(itertools.chain(mask, [fmsk] * nbmissing)) # Create an iterator for the data data = tuple(izip_records(seqdata, flatten=flatten)) output = ma.array(np.fromiter(data, dtype=newdtype, count=maxlength), mask=list(izip_records(seqmask, flatten=flatten))) if asrecarray: output = output.view(MaskedRecords) else: # Same as before, without the mask we don't need... for (a, n) in zip(seqarrays, sizes): nbmissing = (maxlength - n) data = a.ravel().__array__() if nbmissing: fval = _check_fill_value(fill_value, a.dtype) if isinstance(fval, (ndarray, np.void)): if len(fval.dtype) == 1: fval = fval.item()[0] else: fval = np.array(fval, dtype=a.dtype, ndmin=1) else: fval = None seqdata.append(itertools.chain(data, [fval] * nbmissing)) output = np.fromiter(tuple(izip_records(seqdata, flatten=flatten)), dtype=newdtype, count=maxlength) if asrecarray: output = output.view(recarray) # And we're done... return output