def robust_lookup(df, indexer): """ Robust way to apply pandas lookup when indices are not unique Args: df (pdDataFrame): indexer (pdSeries): A Series whose index is either same or a subset of `df.index` and whose values are values from `df.columns`. If `a.index` contains values not in `df.index` they will have NaN values. Returns: pdSeries: a vector where (logically) `extracted[i] = df.loc[indexer.index[i], indexer[i]]`. In most cases, when `indexer.index == df.index` this translates to `extracted[i] = df.loc[i, indexer[i]]` """ # Convert the index into idx, col = indexer.factorize() # convert text labels into integers extracted = df.reindex(col, axis=1).reindex( indexer.index, axis=0) # make sure the columns exist and the indeces are the same extracted = extracted.to_numpy()[range( len(idx)), idx] # numpy accesses by location, not by named index extracted = pdSeries(extracted, index=indexer.index) return extracted
def get_input(type, nrows, ncols, dtype, order='C', out_dtype=False, index=None): rand_mat = (cp.random.rand(nrows, ncols) * 10) rand_mat = cp.array(rand_mat, dtype=dtype, order=order) if type == 'numpy': result = np.array(cp.asnumpy(rand_mat), order=order) if type == 'cupy': result = rand_mat if type == 'numba': result = nbcuda.as_cuda_array(rand_mat) if type == 'cudf': result = cudf.DataFrame(rand_mat, index=index) if type == 'cudf-series': result = cudf.Series(rand_mat, index=index) if type == 'pandas': result = pdDF(cp.asnumpy(rand_mat), index=index) if type == 'pandas-series': result = pdSeries(cp.asnumpy(rand_mat).reshape(nrows, ), index=index) if type == 'cuml': result = CumlArray(data=rand_mat) if out_dtype: return result, np.array(cp.asnumpy(rand_mat).astype(out_dtype), order=order) else: return result, np.array(cp.asnumpy(rand_mat), order=order)
def robust_lookup(df, indexer): """ Robust way to apply pandas lookup (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.lookup.html) when indices are not unique Args: df (pdDataFrame): indexer (pdSeries): A Series with indexes either same or a subset of `df` (BUT SHOULD NOT contain any index not found in `df`), and whose values are values from `df.columns`. Returns: pdSeries """ try: extracted = df.lookup(indexer.index, indexer) except InvalidIndexError: # Indices are not unique, fallback to using numpy-based indexing. # This assumes indexer's values are integers starting from 0. extracted = df.values[indexer.index, indexer] extracted = pdSeries(extracted, index=indexer.index) return extracted
def _parabolic_bed_from_topo(gdir, idl, interpolator): """this returns the parabolic bedhape for all points on idl""" # Volume area scaling formula for the probable ice thickness h_mean = 0.034 * gdir.rgi_area_km2**0.375 * 1000 gnx, gny = gdir.grid.nx, gdir.grid.ny # Far Factor r = 40 # number of points cs_n = 20 # normals ns = [i[0] for i in idl.normals] cs = [] donot_compute = [] for pcoords, n, isgl in zip(idl.line.coords, ns, idl.is_glacier): xi, yi = pcoords vx, vy = n modul = np.sqrt(vx**2 + vy**2) ci = [] _isborder = False for ro in np.linspace(0, r / 2.0, cs_n): t = ro / modul cp1 = HashablePoint(xi + t * vx, yi + t * vy) cp2 = HashablePoint(xi - t * vx, yi - t * vy) # check if out of the frame if not (0 < cp2.y < gny - 1) or \ not (0 < cp2.x < gnx - 1) or \ not (0 < cp1.y < gny - 1) or \ not (0 < cp1.x < gnx - 1): _isborder = True ci.append((cp1, ro)) ci.append((cp2, -ro)) ci = list(set(ci)) cs.append(ci) donot_compute.append(_isborder or isgl) bed = [] for ic, (cc, dontcomp) in enumerate(zip(cs, donot_compute)): if dontcomp: bed.append(np.NaN) continue z = [] ro = [] for i in cc: z.append(interpolator((i[0].y, i[0].x))) ro.append(i[1]) aso = np.argsort(ro) ro, z = np.array(ro)[aso], np.array(z)[aso] # find top of parabola roHead = ro[np.argmin(z)] zero = np.argmin(z) # it is index of roHead/zHead zHead = np.amin(z) dsts = abs(h_mean + zHead - z) # find local minima in set of distances extr = scipy.signal.argrelextrema(dsts, np.less, mode='wrap') if len(extr[0]) == 0: bed.append(np.NaN) continue # from local minima find that with the minimum |x| idx = extr[0][np.argmin(abs(ro[extr]))] # x<0 => x=0 # (|x|+x)/2 roN = ro[int((abs(zero - abs(zero - idx)) + zero - abs(zero - idx)) / 2):zero + abs(zero - idx) + 1] zN = z[int((abs(zero - abs(zero - idx)) + zero - abs(zero - idx)) / 2):zero + abs(zero - idx) + 1] roNx = roN - roHead # zN=zN-zHead# p = _approx_parabola(roNx, zN, y0=zHead) # shift parabola to the ds-line p2 = np.copy(p) p2[2] = z[ro == 0] err = _parabola_error(roN, zN, p2) * 100 # The original implementation of @anton-ub stored all three parabola # params. We just keep the one important here for now if err < 1.5: bed.append(p2[0]) else: bed.append(np.NaN) bed = np.asarray(bed) assert len(bed) == idl.nx pvalid = np.sum(np.isfinite(bed)) / len(bed) * 100 log.debug('%s: percentage of valid parabolas total: %d', gdir.rgi_id, int(pvalid)) bedg = bed[~idl.is_glacier] if len(bedg) > 0: pvalid = np.sum(np.isfinite(bedg)) / len(bedg) * 100 log.debug('%s: percentage of valid parabolas out glacier: %d', gdir.rgi_id, int(pvalid)) if pvalid < 10: log.warning('{}: {}% of valid bedshapes.'.format( gdir.rgi_id, int(pvalid))) # interpolation, filling the gaps default = cfg.PARAMS['default_parabolic_bedshape'] bed_int = interp_nans(bed, default=default) # Scale for dx (we worked in grid coords but need meters) bed_int = bed_int / gdir.grid.dx**2 # Smoothing bed_ma = pdSeries(bed_int) bed_ma = bed_ma.rolling(window=5, center=True, min_periods=1).mean() return bed_ma.values
def compute_downstream_bedshape(gdir): """The bedshape obtained by fitting a parabola to the line's normals. Parameters ---------- gdir : oggm.GlacierDirectory """ # get the major downstream line only majid = gdir.read_pickle('major_divide', div_id=0) dl = gdir.read_pickle('downstream_line', div_id=majid) # Volume area scaling formula for the ice thickness h_mean = 0.034 * gdir.rgi_area_km2**0.375 * 1000 log.debug('%s: estimated glacier thickness: %d', gdir.rgi_id, h_mean) bed = [] # Far Factor r = 40 # number of points cs_n = 20 # make distance between point the same # TODO: use a Centerline class instead from .geometry import _line_interpol l = shpg.LineString(_line_interpol(dl, cfg.PARAMS['flowline_dx'])) idl = Centerline(l, cfg.PARAMS['flowline_dx'], None) # Topography with netCDF4.Dataset(gdir.get_filepath('gridded_data', div_id=0)) as nc: topo = nc.variables['topo_smoothed'][:] x = nc.variables['x'][:] y = nc.variables['y'][:] xy = (np.arange(0, len(y) - 0.1, 1), np.arange(0, len(x) - 0.1, 1)) interpolator = RegularGridInterpolator(xy, topo) # TODO: temporary class class MyPoint(shpg.Point): def __hash__(self): return hash(tuple((self.x, self.y))) # normals ns = [i[0] for i in idl.normals] cs = [] try: for pcoords, n in zip(idl.line.coords, ns): xi, yi = pcoords vx, vy = n modul = np.sqrt(vx**2 + vy**2) ci = [] for ro in np.linspace(0, r / 2.0, cs_n): t = ro / modul cp1 = MyPoint(xi + t * vx, yi + t * vy) cp2 = MyPoint(xi - t * vx, yi - t * vy) # check if out of the frame if not (0 < cp2.y < len(y) - 1) or \ not (0 < cp2.x < len(x) - 1) or \ not (0 < cp1.y < len(y) - 1) or \ not (0 < cp1.x < len(x) - 1): raise StopIteration() ci.append((cp1, ro)) ci.append((cp2, -ro)) ci = list(set(ci)) cs.append(ci) except StopIteration: # we reached the end of the line pass log.debug('%s: length of downstream line is: %d', gdir.rgi_id, len(cs)) good = 0 for ic, cc in enumerate(cs): z = [] ro = [] for i in cc: z.append(interpolator((i[0].y, i[0].x))) ro.append(i[1]) aso = np.argsort(ro) ro, z = np.array(ro)[aso], np.array(z)[aso] # find top of parabola roHead = ro[np.argmin(z)] zero = np.argmin(z) # it is index of roHead/zHead zHead = np.amin(z) dsts = abs(h_mean + zHead - z) # find local minima in set of distances extr = scipy.signal.argrelextrema(dsts, np.less, mode='wrap') # from local minima find that with the minimum |x| idx = extr[0][np.argmin(abs(ro[extr]))] # x<0 => x=0 # (|x|+x)/2 roN = ro[int((abs(zero - abs(zero - idx)) + zero - abs(zero - idx)) / 2):zero + abs(zero - idx) + 1] zN = z[int((abs(zero - abs(zero - idx)) + zero - abs(zero - idx)) / 2):zero + abs(zero - idx) + 1] roNx = roN - roHead # zN=zN-zHead# p = _approx_parabola(roNx, zN, y0=zHead) # shift parabola to the ds-line p2 = np.copy(p) p2[2] = z[ro == 0] err = _parabola_error(roN, zN, p2) * 100 if err < 1.5: bed.append(p2) good += 1 else: bed.append([None, 0, p2[2]]) log.debug('%s: percentage of valid parabolas: %d', gdir.rgi_id, int(good / len(cs) * 100)) # skip gaps x_bed = [i for i, j in enumerate(bed) if j[0]] bg = [bed[i][0] for i in x_bed] # interpolation, filling the gaps bed_i = np.interp(range(0, len(bed)), x_bed, bg) bed_int = [[bed_i[j], i[1], i[2]] for j, i in enumerate(bed)] # approximation - BED_Moving_Average bed_ma = pdSeries([i[0] for i in bed_int]) bed_ma = bed_ma.rolling(window=5, center=True, min_periods=1).mean() bed_ma = [[bed_ma[j], i[1], i[2]] for j, i in enumerate(bed_int)] # write output gdir.write_pickle(bed_ma, 'downstream_bed')