def main(): years = range(2010, 2101) ssps = ['ssp%d' % i for i in range(1, 6)] variables = [(ssp, 'f4', 'ppl/km^2', -9999) for ssp in ssps] fname = '%s/luh2/un_codes-full.tif' % utils.outdir() affine, lats, lons, res, cfudge = get_transform(fname, utils.sps(ssps[0], 2010)) arr = (ma.empty((len(lats), len(lons)), fill_value=-9999), ma.empty((len(lats), len(lons)), fill_value=-9999)) oname = '%s/luh2/sps.nc' % utils.outdir() with Dataset(oname, 'w') as out: data = init_nc(out, affine.to_gdal(), lats, lons, years, variables) for ssp in ssps: print(ssp) with click.progressbar(enumerate(years), length=len(years)) as bar: for idx, year in bar: yy = mixing(year) files = map(lambda y: utils.sps(ssp, y), yy) rasters = map(rasterio.open, files) if len(rasters) == 1: resample(rasters[0], 1, res, rwarp.Resampling.average, arr[0]) data[ssp][idx, :, :] = np.clip(arr[0], 0, None) * cfudge else: f0 = (year % 10) / 10.0 resample(rasters[0], 1, res, rwarp.Resampling.average, arr[0]) resample(rasters[1], 1, res, rwarp.Resampling.average, arr[1]) data[ssp][idx, :, :] = ( (1 - f0) * np.clip(arr[0], 0, None) + f0 * np.clip(arr[1], 0, None)) * cfudge
def register(self): """ We have all the sie and timestamp values in dictionaries, so just build the time step sequence from them """ self.numSteps = len(self.StepDict) self.Steps = ma.empty(self.numSteps, dtype=np.int32) self.Steps.mask = True self.TS_IDs = ma.empty(self.numSteps, dtype=np.int32) self.TS_IDs.mask = True self.Steps.soften_mask() self.TS_IDs.soften_mask() for sie,index in self.StepDict.iteritems(): self.Steps[index] = sie self.TS_IDs[index] = self.TS_IDDict[sie] self.__registered += 1 if ma.count_masked(self.Steps) != 0: handleError(self, TimeStepsRegisterError, "TimeSteps.register(): Warning - registered all %d, but still have %d masked values" % (self.numSteps, len(np.nonzero(self.Steps.mask)))) # not reached if self.Debug == True: self.DebugMessages += "TimeSteps.register(): Registred all %d expected values, now to check them and take the Step differences" % self.numSteps self.Diff = np.diff(self.Steps) if (np.min(self.Diff) <= 0) and (self.Debug == True): message = "TimeSteps.register(): Warning - negative or zero time differentials at\n" message += "indices:" + str(np.where(self.Diff <= 0)) + "values:" + str(self.Diff[np.where(self.Diff <= 0)]) handleError(self, TimeStepsRegisterError, message) # not reached self.HaveData = True
def windowHet(these_geno, posdf, scaf, wSize=50, wStep=None): if not wStep: wStep = wSize these_idx = [x for x in posdf.loc[posdf["chrom"] == scaf].index] these_pos = np.array([x for x in posdf["pos"].loc[these_idx]]) nGeno = these_geno.shape[1] nWindows = math.ceil((nGeno - wSize) / wStep) + 1 these_means = ma.empty([nInd, nWindows]) wCenterPos = [] start = 0 stop = wSize w = 0 while stop < (nGeno + wStep): wData = these_geno[:, start:stop] # per-SNP reference allele frequency: masked = ma.array(wData, mask=[wData == -1]) nChroms, nRef = [], [] for x in range(masked.shape[1]): nChroms.append(2 * masked[:, x].count(axis=0)) nRef.append(np.where(masked[:, x] == 0)[0].shape[0] * 2 + np.where(masked[:, x] == 1)[0].shape[0]) raf = np.array([2 * x * (1 - x) for x in [nRef[i] / c for i, c in enumerate(nChroms)]]) # Fold homozygotes: masked[masked == 2] = 0 F = ma.empty(masked.shape[0]) for h in range(masked.shape[0]): this_het = masked[h, :] this_raf = ma.array(raf, mask=ma.getmask(this_het)) F[h] = 1 - (this_het.mean() / this_raf.mean()) F = ma.array(F, mask=[np.isnan(x) for x in F]) these_means[:, w] = F if stop >= nGeno: realStop = nGeno - 1 else: realStop = stop wCenterPos.append(np.mean([these_pos[start], these_pos[realStop]])) start += wStep stop += wStep w += 1 centers = [x for x in np.array(wCenterPos)[~np.all(these_means.mask, axis=0)]] these_means = these_means[:, ~np.all(these_means.mask, axis=0)] return ([these_means, centers])
def print(self, figure_directory=None, precision=None): if figure_directory is None: figure_directory = self.figure_directory if not path.isdir(figure_directory): os.mkdir(figure_directory) downscale = self.downscale try: for i_simulate in range(self.n_simulate): self.simulate(precision) poisson_rate = ma.empty(downscale.shape) gamma_mean = ma.empty(downscale.shape) rain = ma.empty(downscale.shape) for lat_i in range(downscale.shape[0]): for long_i in range(downscale.shape[1]): time_series = downscale.time_series_array[lat_i][ long_i] poisson_rate[lat_i, long_i] = np.exp( time_series.poisson_rate["const"]) gamma_mean[lat_i, long_i] = np.exp( time_series.gamma_mean["const"]) rain[lat_i, long_i] = time_series[0] poisson_rate.mask = downscale.mask gamma_mean.mask = downscale.mask rain.mask = downscale.mask rain.mask[rain == 0] = True self.print_map(poisson_rate, str(i_simulate) + "_poisson_rate", figure_directory) self.print_map(gamma_mean, str(i_simulate) + "_gamma_mean", figure_directory) self.print_map(rain, str(i_simulate) + "_rain", figure_directory, [0, 50]) cov_chol = self.downscale.parameter_gp_target.cov_chol cov = np.dot(cov_chol, np.transpose(cov_chol)) plt.figure() plt.imshow(cov) plt.colorbar() plt.savefig( path.join(figure_directory, str(i_simulate) + "_autocorr.pdf")) plt.close() except (linalg.LinAlgError): print(precision, "fail")
def raw2target_interp_inds(raw_t, target_t): """ Returns the indices of target data points on or between which raw data points lie. """ out = ma.empty((len(raw_t), 2), dtype=int) out.mask = False r_inds = ma.arange(len(raw_t)) r_inds.mask = False t_inds = np.arange(len(target_t)) #if the same time, want index of that exact time, not a pair of distinct times bullzeyes, r_i, t_i = np.intersect1d(raw_t, target_t, return_indices=True) out[:, 0][r_i] = t_i out[:, 1][r_i] = t_i r_inds[r_i] = ma.masked #if diff times, want indeces of bounding times ##WANT TO MAKE MORE PYTHONIC## for i in np.nonzero(~r_inds.mask)[0]: dt = raw_t[i] - target_t if np.all(dt > 0) or np.all(dt < 0): out[i] = ma.masked continue out[i, 0] = t_inds[dt > 0][np.argmin(dt[dt > 0])] out[i, 1] = t_inds[dt < 0][np.argmax(dt[dt < 0])] return out
def from_ham6(ham6, palette, background=None): if background is None: background = ma.masked elif isinstance(background, numbers.Integral): background = palette[background] if ma.is_masked(background) or ma.isMaskedArray(ham6): rgb8 = ma.empty(ham6.shape[:2] + (3,), dtype=np.uint8) else: rgb8 = np.empty(ham6.shape[:2] + (3,), dtype=np.uint8) for y in range(rgb8.shape[0]): c = background for x in range(rgb8.shape[1]): i = ham6[y, x] if i is ma.masked: ham6[y, x] = ma.masked continue if i < 0x10: c = palette[i] else: c = c.copy() c[(None, 2, 0, 1)[i >> 4]] = (i & 0xF) * 0x11 rgb8[y, x] = c return rgb8
def XWrap(x,ifold,fill_value=0): """ Extend and wrap array. Fold array every y indecies. There will typically be a hanging part of the array. This is padded out. Parameters ---------- x : input ifold : Wrap array after ifold indecies. Return ------ xwrap : Wrapped array. """ ncad = x.size # Number of cadences nrow = int(np.floor(ncad/ifold) + 1) nExtend = nrow * ifold - ncad # Pad out remainder of array with 0s. if type(x) is np.ma.core.MaskedArray: pad = ma.empty(nExtend) pad.mask = True x = ma.hstack( (x ,pad) ) else: pad = np.empty(nExtend) pad[:] = fill_value x = np.hstack( (x ,pad) ) xwrap = x.reshape( nrow,-1 ) return xwrap
def do_block(win, mask, index): pdb.set_trace() xres, yres = mask.res affine = mask.window_transform(win) mask_data = mask.read(1, masked=True, window=win) out = ma.empty(mask_data.shape, dtype=np.float32) out.mask = mask_data.mask.copy() height, width = out.shape startx, starty = affine * (win[1][0] - xres / 2.0, win[0][0] - yres / 2.0) endx, endy = affine * (win[1][1] - xres / 2.0, win[0][1] - yres / 2.0) click.echo("block %d:%d" % (win[0][0], win[0][1])) lats = np.linspace(starty, endy, height) lons = np.linspace(startx, endx, width) for y in range(height): if y > 10: break click.echo("block %d" % (y + win[0][0])) lat = lats[y] lat_min = lat - yres / 2.0 lat_max = lat + yres / 2.0 for lon in lons[ma.where(mask_data.mask[y, :] != True)]: bbox = (lon - xres / 2.0, lat_min, lon + xres / 2.0, lat_max) length = 0 for obj in index.intersection(bbox, objects='raw'): length += do_intersect(bbox, obj) # end x for loop #out[y, x] = length return out
def _make_data( self, data, dtype=np.dtype("int32"), fill_value=None, mask=None, lazy=False, N=3, ): if isinstance(data, Iterable): shape = (len(data), N, N) data = np.array(data).reshape(-1, 1, 1) else: shape = (N, N) if mask is not None: payload = ma.empty(shape, dtype=dtype, fill_value=fill_value) payload.data[:] = data if isinstance(mask, bool): payload.mask = mask else: payload[mask] = ma.masked else: payload = np.empty(shape, dtype=dtype) payload[:] = data if lazy: payload = as_lazy_data(payload) return payload
def test_prepare_array_iterables(): """Convert iterable data into a proper array.""" # input is iterable # iterable contains arrays data = [np.zeros((1, 1))] # output ndarray output = prepare_array(data, masked=False) assert isinstance(output, np.ndarray) assert not isinstance(output, ma.masked_array) assert output.shape == (1, 1, 1) # output masked array output = prepare_array(data) assert isinstance(output, ma.masked_array) assert output.shape == (1, 1, 1) # iterable contains masked arrays data = [ma.empty((1, 1))] output = prepare_array(data, masked=False) assert isinstance(output, np.ndarray) assert not isinstance(output, ma.masked_array) assert output.shape == (1, 1, 1) # output masked array output = prepare_array(data) assert isinstance(output, ma.masked_array) assert output.shape == (1, 1, 1) # iterable contains masked arrays with full mask data = [ma.masked_array(data=np.ones((1, 1)), mask=np.ones((1, 1)))] output = prepare_array(data, masked=False) assert isinstance(output, np.ndarray) assert not isinstance(output, ma.masked_array) assert output.shape == (1, 1, 1) # output masked array output = prepare_array(data) assert isinstance(output, ma.masked_array) assert output.shape == (1, 1, 1)
def comp_gradient( blob): # compare g within sub blob, a component of intra_blob dert__ = ma.empty(shape=blob.dert__.shape, dtype=int) # initialize dert__ g__ = ma.array(blob.dert__[:, :, 3], mask=~blob.map) # apply mask = ~map dy__ = g__[ 2:, 1: -1] - g__[:-2, 1: -1] # vertical comp between rows -> dy, (1:-1): first and last column are discarded dx__ = g__[1:-1, 2:] - g__[ 1:-1, : -2] # lateral comp between columns -> dx, (1:-1): first and last row are discarded gg__ = np.hypot(dy__, dx__) - ave # deviation of gradient # pack all derts into dert__ dert__[:, :, 0] = g__ dert__[ 1:-1, 1:-1, 1] = dy__ # first row, last row, first column and last-column are discarded dert__[1:-1, 1:-1, 2] = dx__ dert__[1:-1, 1:-1, 3] = gg__ blob.new_dert__[0] = dert__ # pack dert__ into blob return 1 # comp rng
def to_ham6(img, palette, background=None, out=None): _debug_array(img) if background is None: background = ma.masked elif isinstance(background, numbers.Integral): background = palette[background] if not ma.is_masked(background) and ma.isMaskedArray(img): img = img.filled(background) if ma.isMaskedArray(img): ham6 = ma.empty(img.shape[:2], dtype=np.uint8) else: ham6 = np.empty(img.shape[:2], dtype=np.uint8) for y in range(img.shape[0]): c = background for x in range(img.shape[1]): i, c = ham6_nearest(img[y, x], palette, c) ham6[y, x] = i if out is not None: out[y, x] = c _debug_array(ham6) return ham6
def masked_array(self): data = ma.empty(self.shape, dtype=self.dtype, fill_value=self.fill_value) for index in np.ndindex(self._stack.shape): masked_array = self._stack[index].masked_array() data[index] = masked_array return data
def register(self): """ We have all the bin and timestamp values in dictionaries, so just build the time bin sequence from them """ self.numBins = len(self.BinDict) self.Bins = ma.empty(self.numBins, dtype=np.float64) self.Bins.mask = True self.Bins.soften_mask() # the bins some times emerge in a random order # so put them in order, and then preserve that count = 0 for bin,index in sorted(self.BinDict.iteritems()): self.Bins[count] = bin count += 1 for count in range(self.numBins): self.BinDict[self.Bins[count]] = count self.__registered += 1 if ma.count_masked(self.Bins) != 0: handleError(self, HistBinsRegisterError, "HistBins.register(): Warning - registered all %d, but still have %d masked values" % (self.numBins, len(np.nonzero(self.Bins.mask)))) # not reached if self.Debug == True: self.DebugMessages += "HistBins.register(): Registred all %d expected values, now to check them and take the Bin differences" % self.numBins self.HaveData = True
def reproject2(src, data, resolution, resampling): meta = src.meta.copy() if not src.crs.is_valid: crs = src.crs.from_string(u'epsg:4326') else: crs = src.crs newaff, width, \ height = rwarp.calculate_default_transform(crs, crs, src.width, src.height, *src.bounds, resolution=resolution) out = ma.empty((src.count, int(height), int(width)), dtype=meta['dtype']) newarr = np.empty((int(height), int(width)), dtype=meta['dtype']) meta.update({'transform': newaff, 'width': int(width), 'height': int(height), 'nodata': src.nodata}) for idx in range(data.shape[0]): rwarp.reproject(source = data[idx], destination = newarr, src_transform = src.transform, dst_transform = newaff, src_crs = src.crs, dst_crs = crs, src_nodata = src.nodatavals[idx], dst_nodata = src.nodatavals[idx], resampling = resampling) out[idx] = ma.masked_values(newarr, src.nodatavals[idx]) return meta, out
def execute(self): params = self.params n_files = len(params['file_middles']) scan_number = 0 for file_middle in params['file_middles']: file_name = (params['input_root'] + file_middle + params['input_end']) f = open(file_name, 'r') scan_list = cPickle.load(f) n_scans_file = len(scan_list) for scan in scan_list: if scan_number == 0: n_scans = n_scans_file * n_files gain = ma.empty((n_scans, ) + scan['gain'].shape) time_arr = sp.empty(n_scans, dtype=int) gain[scan_number, ...] = scan['gain'] tstring = str(scan['time']).split('.')[0] to = time.strptime(tstring, "%Y-%m-%dT%H:%M:%S") time_arr[scan_number] = to.tm_sec + 60 * ( to.tm_min + 60 * (to.tm_hour + 24 * (to.tm_yday + 365 * (to.tm_year - 2000)))) scan_number += 1 self.time = time_arr[:scan_number] self.gain = gain[:scan_number, ...]
def execute(self) : params = self.params n_files = len(params['file_middles']) scan_number = 0 for file_middle in params['file_middles'] : file_name = (params['input_root'] + file_middle + params['input_end']) f = open(file_name, 'r') scan_list = cPickle.load(f) n_scans_file = len(scan_list) for scan in scan_list : if scan_number == 0 : n_scans = n_scans_file*n_files gain = ma.empty((n_scans,) + scan['gain'].shape) time_arr = sp.empty(n_scans, dtype=int) gain[scan_number, ...] = scan['gain'] tstring = str(scan['time']).split('.')[0] to = time.strptime(tstring, "%Y-%m-%dT%H:%M:%S") time_arr[scan_number] = to.tm_sec + 60*(to.tm_min + 60*(to.tm_hour + 24*(to.tm_yday + 365*(to.tm_year-2000)))) scan_number += 1 self.time = time_arr[:scan_number] self.gain = gain[:scan_number,...]
def main(version): fname = '%s/hyde/hyde-%s.nc' % (utils.outdir(), version) #fname = 'netcdf:%s/hyde/hyde-%s.nc:popc' % (utils.outdir(), version) uncodes = '%s/luh2/un_codes-full.tif' % utils.outdir() oname = '%s/luh2/hyde.nc' % utils.outdir() variables = [('popd', 'f4', 'ppl/km^2', -9999)] affine, lats, lons, res, cfudge = get_transform( uncodes, 'netcdf:' + fname + ':popc') arr = ma.empty((len(lats), len(lons)), fill_value=-9999) with rasterio.open(utils.luh2_static('carea')) as carea_ds: carea = carea_ds.read(1, masked=True) with rasterio.open('netcdf:' + fname + ':popc') as ds: years = tuple( map(lambda idx: int(ds.tags(idx)['NETCDF_DIM_time']), ds.indexes)) with Dataset(oname, 'w') as out: init_nc(out, affine.to_gdal(), lats, lons, years, variables) print(ds.name) print(years) #with click.progressbar(enumerate(years), length=len(years)) as bar: #for idx, year in bar: for idx, year in zip(ds.indexes, years): #pdb.set_trace() #time.sleep(100) print(idx, year) resample(ds, idx, res, rwarp.Resampling.average, arr) out.variables['popd'][idx - 1, :, :] = arr * cfudge / carea
def _getdatafromsql(connection, tmp_table, query): """ Private function creating a ndarray from the current table. Parameters ---------- connection: sqlite3.Connection Current SQL connection. tmp_table: string Name of the temporary table created for the purpose of keeping ids when WHERE is used query: string SQL query. """ # Transforms the typestr into dtypes # Define and execute the query connection.execute("CREATE TEMPORARY TABLE %s AS %s"%(tmp_table, query)) # Get the list of names and types from the pragma pragmastr = "PRAGMA TABLE_INFO(%s)"%tmp_table (names, typestr) = zip(*(_[1:3] for _ in connection.execute(pragmastr).fetchall())) ndtype = [] for (i, (n, t)) in enumerate(zip(names, typestr)): # Transform the name into a regular string (not unicode) n = str(n) if t =='INTEGER': ndtype.append((n, int)) elif t =='TEXT': ndtype.append((n, '|S30')) elif t == 'BLOB': ndtype.append((n, object)) else: ndtype.append((n, float)) # Construct the ndarray connection.row_factory = sqlite3.Row data = connection.execute("SELECT * FROM %s"%tmp_table).fetchall() try: return np.array(data, dtype=ndtype) except TypeError: output = ma.empty(len(data), dtype=ndtype) # Find the index of the first row (0 or 1)? rowidref = connection.execute("SELECT rowid FROM %s LIMIT 1"%tmp_table).fetchone()[0] # Loop through the different fields identifying the null fields to mask maskstr_template = "SELECT rowid FROM %s WHERE %%s IS NULL"%tmp_table datastr_template = "SELECT %%s FROM %s WHERE %%s IS NOT NULL"%tmp_table for (i, field) in enumerate(names): current_output = output[field] current_mask = current_output._mask maskstr = maskstr_template % field maskidx = [_[0] - rowidref for _ in connection.execute(maskstr).fetchall()] current_mask[maskidx] = True datastr = datastr_template % (field, field) np.place(current_output._data, ~current_mask, [_[0] for _ in connection.execute(datastr).fetchall()]) connection.execute("DROP TABLE %s"%tmp_table) return output
def XWrap2(x, P0, fill_value=0, pow2=False): """ Extend and wrap array. Fold array every y indecies. There will typically be a hanging part of the array. This is padded out. Parameters ---------- x : input P0 : Base period, units of elements pow2 : If true, pad out nRows so that it's the next power of 2. Return ------ xwrap : Wrapped array. """ ncad = x.size # Number of cadences # for some reason np.ceil(ncad/P0) doesn't work! nrow = int(np.floor(ncad / P0) + 1) nExtend = nrow * P0 - ncad # Pad out remainder of array with 0s. if type(x) is np.ma.core.MaskedArray: pad = ma.empty(nExtend) pad.mask = True x = ma.hstack((x, pad)) else: pad = np.empty(nExtend) pad[:] = fill_value x = np.hstack((x, pad)) xwrap = x.reshape(nrow, -1) if pow2: k = np.ceil(np.log2(nrow)).astype(int) nrow2 = 2**k fill = ma.empty((nrow2 - nrow, P0)) fill[:] = fill_value fill.mask = True xwrap = ma.vstack([xwrap, fill]) return xwrap
def insertEmptyCol(self, colName, dt): """ Inserts an empty column into the table. """ self.cols = np.append(self.cols, colName) self.colFormat[colName] = str(self.width) + "." + str(self.prec) + "f" self.data[colName] = ma.empty((self.nRows,), dtype=dt) self.data[colName][:] = ma.masked
def XWrap2(x,P0,fill_value=0,pow2=False): """ Extend and wrap array. Fold array every y indecies. There will typically be a hanging part of the array. This is padded out. Parameters ---------- x : input P0 : Base period, units of elements pow2 : If true, pad out nRows so that it's the next power of 2. Return ------ xwrap : Wrapped array. """ ncad = x.size # Number of cadences # for some reason np.ceil(ncad/P0) doesn't work! nrow = int( np.floor(ncad/P0) +1 ) nExtend = nrow * P0 - ncad # Pad out remainder of array with 0s. if type(x) is np.ma.core.MaskedArray: pad = ma.empty(nExtend) pad.mask = True x = ma.hstack( (x ,pad) ) else: pad = np.empty(nExtend) pad[:] = fill_value x = np.hstack( (x ,pad) ) xwrap = x.reshape( nrow,-1 ) if pow2: k = np.ceil(np.log2(nrow)).astype(int) nrow2 = 2**k fill = ma.empty( (nrow2-nrow,P0) ) fill[:] = fill_value fill.mask=True xwrap = ma.vstack([xwrap,fill]) return xwrap
def insertEmptyCol(self, colName, dt): ''' Inserts an empty column into the table. ''' self.cols = np.append(self.cols, colName) self.colFormat[colName] = str(self.width) + '.' + str(self.prec) + 'f' self.data[colName] = ma.empty((self.nRows,), dtype = dt) self.data[colName][:] = ma.masked
def test_masked_x(): data = ma.empty((10,), dtype=[('x', float), ('y', float)]) data['x'] = np.arange(10.) data['y'] = np.arange(10.) data.mask = False data['x'].mask[2] = True x2 = np.arange(0.5, 9.5) with pytest.raises(ValueError): resample(data, 'x', x2, 'y')
def standardize_fill_value(cube): """Work around default `fill_value` when obtaining `_CubeSignature` (iris) using `lazy_data()` (biggus). Warning use only when you DO KNOW that the slices should have the same `fill_value`!!!""" if ma.isMaskedArray(cube._my_data): fill_value = ma.empty(0, dtype=cube._my_data.dtype).fill_value cube._my_data.fill_value = fill_value return cube
def empty(coordinates, dtype=float, masked=False): """Get an empty ``gridded_array`` of dtype ``dtype``.""" if masked: _data = ma.empty([coordinates[dim].size for dim in list(coordinates.keys())], dtype=dtype) else: _data = np.empty([coordinates[dim].size for dim in list(coordinates.keys())], dtype=dtype) return gridded_array(_data, coordinates)
def test_masked_all_valid(): data = ma.empty((10,), dtype=[('x', float), ('y', float)]) data['x'] = np.arange(10.) data['y'] = np.arange(10.) data.mask = False x2 = np.arange(0.5, 9.5) result = resample(data, 'x', x2, 'y') assert np.array_equal(result['x'], x2) assert np.array_equal(result['y'], x2)
def refl_plus3(self): """Current pixel minus the value three to the right""" if self.__rp3 is None: plus3 = -self.refl_minus3[:, 3:] final = ma.empty(self.refl.shape, dtype=self.refl.dtype) final[:, :-3] = plus3 final[:, -3:] = ma.masked self.refl_plus3 = final return self.__rp3
def refl_minus3(self): """Current pixel minus the value three to the left""" if self.__rm3 is None: minus3 = self._minus(3) final = ma.empty(self.refl.shape, dtype=self.refl.dtype) final[:, 3:] = minus3 final[:, :3] = ma.masked self.refl_minus3 = final return self.__rm3
def test_interpolation__masked(self): levels = np.array([0.5, 1.5]) new_data = np.empty([len(levels)] + list(self.shape[1:]), dtype=float) new_data[:, 0, :] = np.nan new_data_mask = np.isnan(new_data) scheme = 'linear' mask = [[[False], [True]], [[True], [False]], [[False], [False]]] masked = ma.empty(self.shape) masked.mask = mask cube = _make_cube(masked, dtype=self.dtype) # save cube to test the lazy data interpolation too iris.save(cube, self.filename) with mock.patch('stratify.interpolate', return_value=new_data) as mocker: # first test lazy loaded_cube = iris.load_cube(self.filename) result_from_lazy = extract_levels(loaded_cube, levels, scheme) self.assertEqual(result_from_lazy, self.created_cube) # then test realized result = extract_levels(cube, levels, scheme) self.assertEqual(result, self.created_cube) args, kwargs = mocker.call_args # Check the stratify.interpolate args ... self.assertEqual(len(args), 3) self.assert_array_equal(args[0], levels) pts = cube.coord(axis='z', dim_coords=True).points src_levels_broadcast = np.broadcast_to(pts.reshape(self.z, 1, 1), cube.shape) self.assert_array_equal(args[1], src_levels_broadcast) self.assert_array_equal(args[2], cube.data) # Check the stratify.interpolate kwargs ... self.assertEqual( kwargs, dict(axis=0, interpolation=scheme, extrapolation='nan')) args, kwargs = self.mock_create_cube.call_args # in-place for new extract_levels with nan's new_data[np.isnan(new_data)] = _MDI # Check the _create_cube args ... self.assertEqual(len(args), 4) self.assertEqual(args[0].metadata, cube.metadata) coord_comparison = iris.analysis.coord_comparison(args[0], cube) self.assertFalse(coord_comparison['not_equal'] or coord_comparison['non_equal_data_dimension']) self.assert_array_equal(args[0].data, cube.data) new_data_mask = np.zeros(new_data.shape, bool) new_data_mask[new_data == _MDI] = True new_data = np.ma.array(new_data, mask=new_data_mask) self.assert_array_equal(args[1], new_data) self.assertTrue(ma.isMaskedArray(args[1])) self.assert_array_equal(args[1].mask, new_data_mask) self.assert_array_equal(args[2], self.cube.coord(axis='z', dim_coords=True)) self.assert_array_equal(args[3], levels) # Check the _create_cube kwargs ... self.assertEqual(kwargs, dict())
def add_field_like(self, name, like_array): """ Add a new field to the Datamat with the dtype of the like_array and the shape of the like_array except for the first dimension which will be instead the field-length of this Datamat. """ new_shape = list(like_array.shape) new_shape[0] = len(self) new_data = ma.empty(new_shape, like_array.dtype) new_data.mask = True self.add_field(name, new_data)
def masked_array(self): data = ma.empty(self.shape, dtype=self.dtype) offset = 0 indices = [slice(None)] * self.ndim axis = self._axis for tile in self._tiles: next_offset = offset + tile.shape[axis] indices[axis] = slice(offset, next_offset) data[indices] = tile.masked_array() offset = next_offset return data
def test_masked_one_invalid_nearest(): data = ma.empty((10,), dtype=[('x', float), ('y', float)]) data['x'] = np.arange(10.) data['y'] = np.ones(10) data.mask = False data['y'].mask[4] = True x2 = np.arange(0.25, 9.25) result = resample(data, 'x', x2, 'y', kind='nearest') assert np.array_equal(result['x'], x2) assert np.all(result['y'][:4] == 1) assert result['y'].mask[4] assert np.all(result['y'][5:] == 1)
def tracking(self, other): shape = (self.shape[0], other.shape[0]) i, j, c = self.match(other, intern=False) cost_mat = ma.empty(shape, dtype="f4") cost_mat.mask = ma.ones(shape, dtype="bool") m = c > 0 i, j, c = i[m], j[m], c[m] cost_mat[i, j] = 1 - c i_self, i_other = self.solve_function(cost_mat) i_self, i_other = self.post_process_link(other, i_self, i_other) logger.debug("%d matched with previous", i_self.shape[0]) return i_self, i_other, cost_mat[i_self, i_other]
def compute(cls, f, x, y): g = ma.empty((3, x.max() - x.min() + 1, y.max() - y.min() + 1), dtype=f.dtype) g.mask = True g[:, x - x.min(), y - y.min()] = f[:] return float( ma.mean( gradient(g, gradientType=3, distanceType='harmony', includeOrigin=True, normalize=False)))
def reproject(filename, bidx, resolution, resampling): """Returns the resampled Numpy array and the output metadata Keyword Arguments: filename -- Input file bidx -- Raster band index (one-based indexing) (default 1) resolution -- Multiplier to scale by resampling -- Resampling method from rasterio.warp.Resampling enum Nota Bene: Nodata value MUST be set or resampling on edges will be incorrect! """ with rasterio.open(filename) as src: meta = src.meta.copy() if not src.crs.is_valid: crs = src.crs.from_string(u'epsg:4326') else: crs = src.crs newaff, width, \ height = rwarp.calculate_default_transform(crs, crs, src.width, src.height, *src.bounds, resolution=resolution) data = ma.empty((src.count, int(height), int(width)), dtype=meta['dtype']) newarr = np.empty((int(height), int(width)), dtype=meta['dtype']) meta.update({'transform': newaff, 'width': int(width), 'height': int(height), 'nodata': src.nodata}) if bidx is None: bidx = range(1, src.count + 1) elif not isinstance(bidx, collections.Iterable): bidx = [bidx] with rasterio.open('/tmp/reproj.tif', 'w', **meta) as dst: for idx in bidx: arr = src.read(idx, masked=True) rwarp.reproject(source = arr, destination = newarr, src_transform = src.transform, dst_transform = newaff, src_crs = src.crs, dst_crs = crs, src_nodata = src.nodatavals[idx - 1], dst_nodata = src.nodatavals[idx - 1], resampling = resampling) data[idx - 1] = ma.masked_values(newarr, src.nodatavals[idx - 1]) return meta, data
def bootstrap(self): first_slice = self.array[0] shape = first_slice.shape self.running_total = np.zeros(shape, dtype=self.dtype) if self.masked: first_slice = first_slice.masked_array() self.temp = ma.empty(shape, dtype=self.dtype) self.running_total += first_slice.filled(0) self._bootstrap_mask(first_slice.mask, shape) else: first_slice = first_slice.ndarray() self.temp = np.empty(shape, dtype=self.dtype) self.running_total += first_slice
def calc_pixel_centers(self): p = self.projection p4_obj = p.get_proj4_obj() # prep the input data h = p.perspective_point_height x_m = self.x * h y_m = self.y * h # construct the output arrays size_2d = (len(self.y), len(self.x)) lon = ma.empty(size_2d, np.float32) lat = ma.empty(size_2d, np.float32) # Construct the 2D lat/lon array for i_y in range(len(self.y)): lon_row, lat_row = p4_obj(x_m, np.array([y_m[i_y]] * len(self.x)), inverse=True) lat[i_y, :] = ma.masked_values(lat_row, 1e30) lon[i_y, :] = ma.masked_values(lon_row, 1e30) self.lat = lat self.lon = lon
def eye_image(): global eye_history digitalWrite(EYE_OUT_PIN, 1) rs = ma.empty((3,3), dtype=int) rs.mask = numpy.array(( (1, 0, 1), (0, 1, 0), (1, 0, 1))) time.sleep(0.01) rs[0,1] = analogRead(EYE_TOP) rs[1,0] = analogRead(EYE_LEFT) rs[1,2] = analogRead(EYE_RIGHT) rs[2,1] = analogRead(EYE_BOTTOM) eye_history.append(rs) digitalWrite(EYE_OUT_PIN, 0) return rs
def get_thetaws_profile(self): ''' Function to calculate the theta-ws profile. Parameters ---------- None Returns ------- Array of theta-ws profile ''' thetaws = ma.empty(self.pres.shape[0]) for i in range(len(self.v)): thetaws[i] = thermo.thetaws(self.pres[i], self.tmpc[i]) thetaws[thetaws == self.missing] = ma.masked thetaws.set_fill_value(self.missing) return thetaws
def get_thetae_profile(self): ''' Function to calculate the theta-e profile. Parameters ---------- None Returns ------- Array of theta-e profile ''' thetae = ma.empty(self.pres.shape[0]) for i in range(len(self.v)): thetae[i] = thermo.ctok( thermo.thetae(self.pres[i], self.tmpc[i], self.dwpc[i]) ) thetae[thetae == self.missing] = ma.masked thetae.set_fill_value(self.missing) return thetae
def get_relh_profile(self): ''' Function to calculate the relative humidity profile. Parameters ---------- None Returns ------- Array of relative humidity profile ''' relh = ma.empty(self.pres.shape[0]) for i in range(len(self.v)): relh[i] = thermo.relh(self.pres[i], self.tmpc[i], self.dwpc[i]) relh[relh == self.missing] = ma.masked relh.set_fill_value(self.missing) return relh
def test_masked_kind_not_supported(): data = ma.empty((10,), dtype=[('x', float), ('y', float)]) data['x'] = np.arange(10.) data['y'] = np.ones(10) data.mask = False data['y'].mask[4] = True x2 = np.arange(0.25, 9.25) with pytest.raises(ValueError): resample(data, 'x', x2, 'y', kind='quadratic') with pytest.raises(ValueError): resample(data, 'x', x2, 'y', kind='cubic') with pytest.raises(ValueError): resample(data, 'x', x2, 'y', kind=2) with pytest.raises(ValueError): resample(data, 'x', x2, 'y', kind=3) with pytest.raises(ValueError): resample(data, 'x', x2, 'y', kind=4) with pytest.raises(ValueError): resample(data, 'x', x2, 'y', kind=5)
def get_wetbulb_profile(self): ''' Function to calculate the wetbulb profile. Parameters ---------- None Returns ------- Array of wet bulb profile ''' wetbulb = ma.empty(self.pres.shape[0]) for i in range(len(self.v)): wetbulb[i] = thermo.wetbulb( self.pres[i], self.tmpc[i], self.dwpc[i] ) wetbulb[wetbulb == self.missing] = ma.masked wetbulb.set_fill_value(self.missing) return wetbulb
def _make_data(self, data, dtype=np.dtype('int32'), fill_value=None, mask=None, lazy=False, N=3): if isinstance(data, Iterable): shape = (len(data), N, N) data = np.array(data).reshape(-1, 1, 1) else: shape = (N, N) if mask is not None: payload = ma.empty(shape, dtype=dtype, fill_value=fill_value) payload.data[:] = data if isinstance(mask, bool): payload.mask = mask else: payload[mask] = ma.masked else: payload = np.empty(shape, dtype=dtype) payload[:] = data if lazy: payload = as_lazy_data(payload) return payload
def drop_fields(sta, names, masked=False): names = set(names) newdtype = np.dtype([(name, sta.dtype[name]) for name in sta.dtype.names if name not in names]) if newdtype: if masked: newsta = ma.empty(sta.shape, dtype=newdtype) else: newsta = np.empty(sta.shape, dtype=newdtype) else: return None for field in newdtype.fields: newsta[field] = sta[field] if masked: newsta[field].set_fill_value(sta[field].fill_value) return newsta
def insertRow(self, rowIndex, msk=True): """ At the moment, insert empty row before index rowIndex rowindex starts counting at zeros (as usual) New row is masked by default """ for col in self.cols: dt = self.data[col].dtype newEle = ma.empty((1,), dtype=dt) # print(self.data[col]) # print(self.data[col][:rowIndex].shape) # print(newEle.shape) # print(self.data[col][rowIndex:].shape) self.data[col] = ma.concatenate((self.data[col][:rowIndex], newEle, self.data[col][rowIndex:])) # if rowIndex <= self.nRows: # self.data[col] = ma.concatenate((self.data[col][:rowIndex], newEle, self.data[col][rowIndex:])) # else: # self.data[col] = ma.concatenate((self.data[col], newEle)) if msk: self.data[col][rowIndex] = ma.masked self.nRows += 1
def apply_on_phase(series, func, *args, **kwargs): u""" Applies the function `func` to each phase of the :class:`~scikits.hydroclimpy.enso.ClimateSeries` `series`. Parameters ---------- series : ClimateSeries object Input climate data. The ENSO indices must have been defined by setting a :class:`~scikits.hydroclimpy.enso.ENSOIndicator` object. func : function Function to apply. args : {None, sequence}, optional Mandatory arguments of the function ``func``. kwargs : {None, dictionary}, optional Optional parameters of the function ``func``. Returns ------- result A structured :class:`~numpy.ma.MaskedArray` of results, with a four fields: * ``global`` for the result of the function on the whole series. * ``cold`` for the result of the function on La Niña episodes * ``neutral`` for the result of the function on Neutral episodes * ``warm`` for the result of the function on El Niño episodes. """ if series.ensoindices is None: raise AttributeError("ENSO indices should be defined for the input series!") # _glob = np.asarray(func(series, *args, **kwargs)) names = ("cold", "neutral", "warm", "global") result = ma.empty(_glob.shape, dtype=[(_, _glob.dtype) for _ in names]) result["global"] = _glob for attr in names[:-1]: result[attr] = func(getattr(series, attr), *args, **kwargs) return result
def _reader(self, meth, *args, **kwargs): """ Private function that retransforms the output of Table.read and equivalent to the proper type. """ data = meth(self, *args, **kwargs) special_attrs = getattr(self.attrs, 'special_attrs', {}) fill_value = special_attrs.get('_fill_value', None) # ndtype = self._get_dtype() field_names = ndtype.names field = kwargs.get('field', None) # if field in ['_data','_mask']: output = data else: if (field_names is None) or (field in field_names): output = ma.array(data['_data'], mask=data['_mask']) else: output = ma.empty(data.shape, dtype=ndtype) for name in field_names: current = data[name] output[name] = ma.array(current['_data'], mask=current['_mask']) # Reset some attributes.................. output._baseclass = special_attrs.get('_baseclass', np.ndarray) fill_value = special_attrs.get('_fill_value', None) if (field is not None) and (fill_value is not None): output.fill_value = fill_value[field] else: output.fill_value = fill_value output._hardmask = special_attrs.get('_hardmask', False) output._optinfo = special_attrs.get('_optinfo', {}) recshape = special_attrs.get('recshape',()) if recshape != (): output.shape = tuple([-1,]+list(recshape)) return output
def append_field(sta, data, dtype=None, position=None, masked=False): newdtype = sta.dtype.descr if np.equal(position, None): newdtype.append(dtype) else: newdtype.insert(position, dtype) newdtype = np.dtype(newdtype) if masked: newsta = ma.empty(sta.shape, dtype=newdtype) else: newsta = np.empty(sta.shape, dtype=newdtype) for field in sta.dtype.fields: newsta[field] = sta[field] if masked: newsta[field].set_fill_value(sta[field].fill_value) newsta[dtype[0]] = data if masked: newsta[dtype[0]].set_fill_value(data.fill_value) return newsta
def multiply_by_cal(Data, CalData) : """Function scales data by the noise cal temperature. """ # For now we just assume that the cal and polarizations are arranged in a # certain way and then check to make sure we are right. calibrate_to_I = False if tuple(Data.field['CRVAL4']) == (-5, -7, -8, -6) : xx_ind = 0 yy_ind = 3 xy_inds = [1,2] elif tuple(Data.field['CRVAL4']) == (1, 2, 3, 4) : # This is a hack. Completly temporairy. calibrate_to_I = True else : raise ce.DataError('Polarization types not as expected in data.') cal_xx_ind = 0 cal_yy_ind = 1 if (CalData.field['CRVAL4'][cal_xx_ind] != -5 or CalData.field['CRVAL4'][cal_yy_ind] != -6) : raise ce.DataError('Polarization types not as expected in cal.') # Cal should only have 1 time, 1 cal state and 2 polarizations. if CalData.dims[:3] != (1,2,1) : raise ce.DataError('Cal temperature data has wrong dimensions.') # Cal state should be special state 'R'. if CalData.field['CAL'][0] != 'R' : raise ce.DataError("Cal state in cal temperture data should be " "'R'.") # Bring the Cal data to the same frequencies as the other data. Data.calc_freq() CalData.calc_freq() if sp.allclose(Data.freq, CalData.freq) : cdata = CalData.data elif abs(Data.field['CDELT1']) <= abs(CalData.field['CDELT1']) : calfunc = interpolate.interp1d(CalData.freq, CalData.data, fill_value=sp.nan, bounds_error=False) cdata = ma.array(calfunc(Data.freq)) cdata[sp.logical_not(sp.isfinite(cdata))] = ma.masked else : nf = len(Data.freq) width = abs(Data.field['CDELT1']) cdata = ma.empty((1,2,1,nf)) for find in range(nf) : f = Data.freq[find] inds, = sp.where(sp.logical_and(CalData.freq >= f - width/2.0, CalData.freq < f + width/2.0)) cdata[:,:,:,find] = ma.mean(CalData.data[:,:,:,inds], 3) if calibrate_to_I : Data.data *= (cdata[0,cal_xx_ind,0,:] + cdata[0,cal_yy_ind,0,:])/2.0 else : # Loop over times and cal and scale each polarization appropriately. for tind in range(Data.dims[0]) : for cind in range(Data.dims[2]) : Data.data[tind,xx_ind,cind,:] *= cdata[0,cal_xx_ind,0,:] Data.data[tind,yy_ind,cind,:] *= cdata[0,cal_yy_ind,0,:] Data.data[tind,xy_inds,cind,:] *= ma.sqrt( cdata[0,cal_yy_ind,0,:] * cdata[0,cal_xx_ind,0,:])
def _regrid(src_data, x_dim, y_dim, src_x_coord, src_y_coord, sample_grid_x, sample_grid_y, method='linear', extrapolation_mode='nanmask'): """ Regrid the given data from the src grid to the sample grid. The result will be a MaskedArray if either/both of: - the source array is a MaskedArray, - the extrapolation_mode is 'mask' and the result requires extrapolation. If the result is a MaskedArray the mask for each element will be set if either/both of: - there is a non-zero contribution from masked items in the input data - the element requires extrapolation and the extrapolation_mode dictates a masked value. Args: * src_data: An N-dimensional NumPy array or MaskedArray. * x_dim: The X dimension within `src_data`. * y_dim: The Y dimension within `src_data`. * src_x_coord: The X :class:`iris.coords.DimCoord`. * src_y_coord: The Y :class:`iris.coords.DimCoord`. * sample_grid_x: A 2-dimensional array of sample X values. * sample_grid_y: A 2-dimensional array of sample Y values. Kwargs: * method: Either 'linear' or 'nearest'. The default method is 'linear'. * extrapolation_mode: Must be one of the following strings: * 'linear' - The extrapolation points will be calculated by extending the gradient of the closest two points. * 'nan' - The extrapolation points will be be set to NaN. * 'error' - A ValueError exception will be raised, notifying an attempt to extrapolate. * 'mask' - The extrapolation points will always be masked, even if the source data is not a MaskedArray. * 'nanmask' - If the source data is a MaskedArray the extrapolation points will be masked. Otherwise they will be set to NaN. The default mode of extrapolation is 'nanmask'. Returns: The regridded data as an N-dimensional NumPy array. The lengths of the X and Y dimensions will now match those of the sample grid. """ # # XXX: At the moment requires to be a static method as used by # experimental regrid_area_weighted_rectilinear_src_and_grid # if sample_grid_x.shape != sample_grid_y.shape: raise ValueError('Inconsistent sample grid shapes.') if sample_grid_x.ndim != 2: raise ValueError('Sample grid must be 2-dimensional.') # Prepare the result data array shape = list(src_data.shape) assert shape[x_dim] == src_x_coord.shape[0] assert shape[y_dim] == src_y_coord.shape[0] shape[y_dim] = sample_grid_x.shape[0] shape[x_dim] = sample_grid_x.shape[1] # If we're given integer values, convert them to the smallest # possible float dtype that can accurately preserve the values. dtype = src_data.dtype if dtype.kind == 'i': dtype = np.promote_types(dtype, np.float16) if isinstance(src_data, ma.MaskedArray): data = ma.empty(shape, dtype=dtype) data.mask = np.zeros(data.shape, dtype=np.bool) else: data = np.empty(shape, dtype=dtype) # The interpolation class requires monotonically increasing # coordinates, so flip the coordinate(s) and data if the aren't. reverse_x = src_x_coord.points[0] > src_x_coord.points[1] reverse_y = src_y_coord.points[0] > src_y_coord.points[1] flip_index = [slice(None)] * src_data.ndim if reverse_x: src_x_coord = src_x_coord[::-1] flip_index[x_dim] = slice(None, None, -1) if reverse_y: src_y_coord = src_y_coord[::-1] flip_index[y_dim] = slice(None, None, -1) src_data = src_data[tuple(flip_index)] if src_x_coord.circular: x_points, src_data = extend_circular_coord_and_data(src_x_coord, src_data, x_dim) else: x_points = src_x_coord.points # Slice out the first full 2D piece of data for construction of the # interpolator. index = [0] * src_data.ndim index[x_dim] = index[y_dim] = slice(None) initial_data = src_data[tuple(index)] if y_dim < x_dim: initial_data = initial_data.T # Construct the interpolator, we will fill in any values out of bounds # manually. interpolator = _RegularGridInterpolator([x_points, src_y_coord.points], initial_data, method=method, bounds_error=False, fill_value=None) # The constructor of the _RegularGridInterpolator class does # some unnecessary checks on these values, so we set them # afterwards instead. Sneaky. ;-) try: mode = EXTRAPOLATION_MODES[extrapolation_mode] except KeyError: raise ValueError('Invalid extrapolation mode.') interpolator.bounds_error = mode.bounds_error interpolator.fill_value = mode.fill_value # Construct the target coordinate points array, suitable for passing to # the interpolator multiple times. interp_coords = [sample_grid_x.astype(np.float64)[..., np.newaxis], sample_grid_y.astype(np.float64)[..., np.newaxis]] # Map all the requested values into the range of the source # data (centred over the centre of the source data to allow # extrapolation where required). min_x, max_x = x_points.min(), x_points.max() min_y, max_y = src_y_coord.points.min(), src_y_coord.points.max() if src_x_coord.units.modulus: modulus = src_x_coord.units.modulus offset = (max_x + min_x - modulus) * 0.5 interp_coords[0] -= offset interp_coords[0] = (interp_coords[0] % modulus) + offset interp_coords = np.dstack(interp_coords) def interpolate(data): # Update the interpolator for this data slice. data = data.astype(interpolator.values.dtype) if y_dim < x_dim: data = data.T interpolator.values = data data = interpolator(interp_coords) if y_dim > x_dim: data = data.T return data # Build up a shape suitable for passing to ndindex, inside the loop we # will insert slice(None) on the data indices. iter_shape = list(shape) iter_shape[x_dim] = iter_shape[y_dim] = 1 # Iterate through each 2d slice of the data, updating the interpolator # with the new data as we go. for index in np.ndindex(tuple(iter_shape)): index = list(index) index[x_dim] = index[y_dim] = slice(None) src_subset = src_data[tuple(index)] interpolator.fill_value = mode.fill_value data[tuple(index)] = interpolate(src_subset) if isinstance(data, ma.MaskedArray) or mode.force_mask: # NB. np.ma.getmaskarray returns an array of `False` if # `src_subset` is not a masked array. src_mask = np.ma.getmaskarray(src_subset) interpolator.fill_value = mode.mask_fill_value mask_fraction = interpolate(src_mask) new_mask = (mask_fraction > 0) if np.ma.isMaskedArray(data): data.mask[tuple(index)] = new_mask elif np.any(new_mask): # Set mask=False to ensure we have an expanded mask array. data = np.ma.MaskedArray(data, mask=False) data.mask[tuple(index)] = new_mask return data
def join(left, right, keys=None, join_type='inner', uniq_col_name='{col_name}_{table_name}', table_names=['1', '2'], col_name_map=None): """ Perform a join of the left and right numpy structured array on specified keys. Parameters ---------- left : structured array Left side table in the join right : structured array Right side table in the join keys : str or list of str Name(s) of column(s) used to match rows of left and right tables. Default is to use all columns which are common to both tables. join_type : str Join type ('inner' | 'outer' | 'left' | 'right'), default is 'inner' uniq_col_name : str or None String generate a unique output column name in case of a conflict. The default is '{col_name}_{table_name}'. table_names : list of str or None Two-element list of table names used when generating unique output column names. The default is ['1', '2']. col_name_map : empty dict or None If passed as a dict then it will be updated in-place with the mapping of output to input column names. """ # Store user-provided col_name_map until the end _col_name_map = col_name_map if join_type not in ('inner', 'outer', 'left', 'right'): raise ValueError("The 'join_type' argument should be in 'inner', " "'outer', 'left' or 'right' (got '{0}' instead)". format(join_type)) # If we have a single key, put it in a tuple if keys is None: keys = tuple(name for name in left.dtype.names if name in right.dtype.names) if len(keys) == 0: raise TableMergeError('No keys in common between left and right tables') elif isinstance(keys, six.string_types): keys = (keys,) # Check the key columns for arr, arr_label in ((left, 'Left'), (right, 'Right')): for name in keys: if name not in arr.dtype.names: raise TableMergeError('{0} table does not have key column {1!r}' .format(arr_label, name)) if hasattr(arr[name], 'mask') and np.any(arr[name].mask): raise TableMergeError('{0} key column {1!r} has missing values' .format(arr_label, name)) # Make sure we work with ravelled arrays left = left.ravel() right = right.ravel() len_left, len_right = len(left), len(right) left_names, right_names = left.dtype.names, right.dtype.names # Joined array dtype as a list of descr (name, type_str, shape) tuples col_name_map = get_col_name_map([left, right], keys, uniq_col_name, table_names) out_descrs = get_descrs([left, right], col_name_map) # Make an array with just the key columns out_keys_dtype = [descr for descr in out_descrs if descr[0] in keys] out_keys = np.empty(len_left + len_right, dtype=out_keys_dtype) for key in keys: out_keys[key][:len_left] = left[key] out_keys[key][len_left:] = right[key] idx_sort = out_keys.argsort(order=keys) out_keys = out_keys[idx_sort] # Get all keys diffs = np.concatenate(([True], out_keys[1:] != out_keys[:-1], [True])) idxs = np.flatnonzero(diffs) # Main inner loop in Cython to compute the cartesion product # indices for the given join type int_join_type = {'inner': 0, 'outer': 1, 'left': 2, 'right': 3}[join_type] masked, n_out, left_out, left_mask, right_out, right_mask = \ _np_utils.join_inner(idxs, idx_sort, len_left, int_join_type) # If either of the inputs are masked then the output is masked if any(isinstance(array, ma.MaskedArray) for array in (left, right)): masked = True if masked: out = ma.empty(n_out, dtype=out_descrs) else: out = np.empty(n_out, dtype=out_descrs) # If either input array was zero length then stub a new version # with one row. In this case the corresponding left_out or right_out # will contain all zeros with mask set to true. This allows the # take(*_out) method calls to work as expected. if len(left) == 0: left = left.__class__(1, dtype=left.dtype) if len(right) == 0: right = right.__class__(1, dtype=right.dtype) for out_name, left_right_names in six.iteritems(col_name_map): left_name, right_name = left_right_names if left_name and right_name: # this is a key which comes from left and right out[out_name] = np.where(right_mask, left[left_name].take(left_out), right[right_name].take(right_out)) continue elif left_name: # out_name came from the left table name, array, array_out, array_mask = left_name, left, left_out, left_mask elif right_name: name, array, array_out, array_mask = right_name, right, right_out, right_mask else: raise TableMergeError('Unexpected column names (maybe one is ""?)') out[out_name] = array[name].take(array_out, axis=0) if masked: if isinstance(array, ma.MaskedArray): array_mask = array_mask | array[name].mask.take(array_out) out[out_name].mask = array_mask # If col_name_map supplied as a dict input, then update. if isinstance(_col_name_map, collections.Mapping): _col_name_map.update(col_name_map) return out