Python pdSeries 예제들, pandas.pdSeries Python 예제들

예제 #1

0

파일 보기

def robust_lookup(df, indexer):
    """
    Robust way to apply pandas lookup when indices are not unique

    Args:
        df (pdDataFrame):
        indexer (pdSeries): A Series whose index is either same or a subset of `df.index`
                            and whose values are values from `df.columns`.
                            If `a.index` contains values not in `df.index` 
                            they will have NaN values.

    Returns:
        pdSeries: a vector where (logically) `extracted[i] = df.loc[indexer.index[i], indexer[i]]`. 
            In most cases, when `indexer.index == df.index` this translates to 
            `extracted[i] = df.loc[i, indexer[i]]`
    """
    # Convert the index into
    idx, col = indexer.factorize()  # convert text labels into integers
    extracted = df.reindex(col, axis=1).reindex(
        indexer.index,
        axis=0)  # make sure the columns exist and the indeces are the same
    extracted = extracted.to_numpy()[range(
        len(idx)), idx]  # numpy accesses by location, not by named index
    extracted = pdSeries(extracted, index=indexer.index)
    return extracted

예제 #2

0

파일 보기

파일: test_input_utils.py 프로젝트: rapidsai/cuml

def get_input(type,
              nrows,
              ncols,
              dtype,
              order='C',
              out_dtype=False,
              index=None):
    rand_mat = (cp.random.rand(nrows, ncols) * 10)
    rand_mat = cp.array(rand_mat, dtype=dtype, order=order)

    if type == 'numpy':
        result = np.array(cp.asnumpy(rand_mat), order=order)

    if type == 'cupy':
        result = rand_mat

    if type == 'numba':
        result = nbcuda.as_cuda_array(rand_mat)

    if type == 'cudf':
        result = cudf.DataFrame(rand_mat, index=index)

    if type == 'cudf-series':
        result = cudf.Series(rand_mat, index=index)

    if type == 'pandas':
        result = pdDF(cp.asnumpy(rand_mat), index=index)

    if type == 'pandas-series':
        result = pdSeries(cp.asnumpy(rand_mat).reshape(nrows, ), index=index)

    if type == 'cuml':
        result = CumlArray(data=rand_mat)

    if out_dtype:
        return result, np.array(cp.asnumpy(rand_mat).astype(out_dtype),
                                order=order)
    else:
        return result, np.array(cp.asnumpy(rand_mat), order=order)

예제 #3

0

파일 보기

def robust_lookup(df, indexer):
    """
    Robust way to apply pandas lookup
     (https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.lookup.html)
     when indices are not unique

    Args:
        df (pdDataFrame):
        indexer (pdSeries): A Series with indexes either same or a subset of `df`
                            (BUT SHOULD NOT contain any index not found in `df`),
                            and whose values are values from `df.columns`.

    Returns:
        pdSeries
    """
    try:
        extracted = df.lookup(indexer.index, indexer)
    except InvalidIndexError:
        # Indices are not unique, fallback to using numpy-based indexing.
        # This assumes indexer's values are integers starting from 0.
        extracted = df.values[indexer.index, indexer]
    extracted = pdSeries(extracted, index=indexer.index)
    return extracted

예제 #4

0

파일 보기

파일: centerlines.py 프로젝트: jlandmann/oggm

def _parabolic_bed_from_topo(gdir, idl, interpolator):
    """this returns the parabolic bedhape for all points on idl"""

    # Volume area scaling formula for the probable ice thickness
    h_mean = 0.034 * gdir.rgi_area_km2**0.375 * 1000
    gnx, gny = gdir.grid.nx, gdir.grid.ny

    # Far Factor
    r = 40
    # number of points
    cs_n = 20

    # normals
    ns = [i[0] for i in idl.normals]
    cs = []
    donot_compute = []

    for pcoords, n, isgl in zip(idl.line.coords, ns, idl.is_glacier):
        xi, yi = pcoords
        vx, vy = n
        modul = np.sqrt(vx**2 + vy**2)
        ci = []
        _isborder = False
        for ro in np.linspace(0, r / 2.0, cs_n):
            t = ro / modul
            cp1 = HashablePoint(xi + t * vx, yi + t * vy)
            cp2 = HashablePoint(xi - t * vx, yi - t * vy)

            # check if out of the frame
            if not (0 < cp2.y < gny - 1) or \
                    not (0 < cp2.x < gnx - 1) or \
                    not (0 < cp1.y < gny - 1) or \
                    not (0 < cp1.x < gnx - 1):
                _isborder = True

            ci.append((cp1, ro))
            ci.append((cp2, -ro))

        ci = list(set(ci))
        cs.append(ci)
        donot_compute.append(_isborder or isgl)

    bed = []
    for ic, (cc, dontcomp) in enumerate(zip(cs, donot_compute)):

        if dontcomp:
            bed.append(np.NaN)
            continue

        z = []
        ro = []
        for i in cc:
            z.append(interpolator((i[0].y, i[0].x)))
            ro.append(i[1])
        aso = np.argsort(ro)
        ro, z = np.array(ro)[aso], np.array(z)[aso]

        # find top of parabola
        roHead = ro[np.argmin(z)]
        zero = np.argmin(z)  # it is index of roHead/zHead
        zHead = np.amin(z)

        dsts = abs(h_mean + zHead - z)

        # find local minima in set of distances
        extr = scipy.signal.argrelextrema(dsts, np.less, mode='wrap')
        if len(extr[0]) == 0:
            bed.append(np.NaN)
            continue

        # from local minima find that with the minimum |x|
        idx = extr[0][np.argmin(abs(ro[extr]))]

        # x<0 => x=0
        # (|x|+x)/2
        roN = ro[int((abs(zero - abs(zero - idx)) + zero - abs(zero - idx)) /
                     2):zero + abs(zero - idx) + 1]
        zN = z[int((abs(zero - abs(zero - idx)) + zero - abs(zero - idx)) /
                   2):zero + abs(zero - idx) + 1]
        roNx = roN - roHead
        # zN=zN-zHead#

        p = _approx_parabola(roNx, zN, y0=zHead)

        # shift parabola to the ds-line
        p2 = np.copy(p)
        p2[2] = z[ro == 0]

        err = _parabola_error(roN, zN, p2) * 100

        # The original implementation of @anton-ub stored all three parabola
        # params. We just keep the one important here for now
        if err < 1.5:
            bed.append(p2[0])
        else:
            bed.append(np.NaN)

    bed = np.asarray(bed)
    assert len(bed) == idl.nx
    pvalid = np.sum(np.isfinite(bed)) / len(bed) * 100
    log.debug('%s: percentage of valid parabolas total: %d', gdir.rgi_id,
              int(pvalid))

    bedg = bed[~idl.is_glacier]
    if len(bedg) > 0:
        pvalid = np.sum(np.isfinite(bedg)) / len(bedg) * 100
        log.debug('%s: percentage of valid parabolas out glacier: %d',
                  gdir.rgi_id, int(pvalid))
        if pvalid < 10:
            log.warning('{}: {}% of valid bedshapes.'.format(
                gdir.rgi_id, int(pvalid)))

    # interpolation, filling the gaps
    default = cfg.PARAMS['default_parabolic_bedshape']
    bed_int = interp_nans(bed, default=default)

    # Scale for dx (we worked in grid coords but need meters)
    bed_int = bed_int / gdir.grid.dx**2

    # Smoothing
    bed_ma = pdSeries(bed_int)
    bed_ma = bed_ma.rolling(window=5, center=True, min_periods=1).mean()
    return bed_ma.values

예제 #5

0

파일 보기

파일: centerlines.py 프로젝트: kaituozhe528/oggm

def compute_downstream_bedshape(gdir):
    """The bedshape obtained by fitting a parabola to the line's normals.

    Parameters
    ----------
    gdir : oggm.GlacierDirectory
    """
    # get the major downstream line only
    majid = gdir.read_pickle('major_divide', div_id=0)
    dl = gdir.read_pickle('downstream_line', div_id=majid)

    # Volume area scaling formula for the ice thickness
    h_mean = 0.034 * gdir.rgi_area_km2**0.375 * 1000
    log.debug('%s: estimated glacier thickness: %d', gdir.rgi_id, h_mean)

    bed = []
    # Far Factor
    r = 40
    # number of points
    cs_n = 20

    # make distance between point the same
    # TODO: use a Centerline class instead
    from .geometry import _line_interpol
    l = shpg.LineString(_line_interpol(dl, cfg.PARAMS['flowline_dx']))
    idl = Centerline(l, cfg.PARAMS['flowline_dx'], None)

    # Topography
    with netCDF4.Dataset(gdir.get_filepath('gridded_data', div_id=0)) as nc:
        topo = nc.variables['topo_smoothed'][:]
        x = nc.variables['x'][:]
        y = nc.variables['y'][:]
    xy = (np.arange(0, len(y) - 0.1, 1), np.arange(0, len(x) - 0.1, 1))
    interpolator = RegularGridInterpolator(xy, topo)

    # TODO: temporary class
    class MyPoint(shpg.Point):
        def __hash__(self):
            return hash(tuple((self.x, self.y)))

    # normals
    ns = [i[0] for i in idl.normals]
    cs = []
    try:
        for pcoords, n in zip(idl.line.coords, ns):
            xi, yi = pcoords
            vx, vy = n
            modul = np.sqrt(vx**2 + vy**2)
            ci = []
            for ro in np.linspace(0, r / 2.0, cs_n):
                t = ro / modul
                cp1 = MyPoint(xi + t * vx, yi + t * vy)
                cp2 = MyPoint(xi - t * vx, yi - t * vy)

                # check if out of the frame
                if not (0 < cp2.y < len(y) - 1) or \
                   not (0 < cp2.x < len(x) - 1) or \
                   not (0 < cp1.y < len(y) - 1) or \
                   not (0 < cp1.x < len(x) - 1):
                    raise StopIteration()

                ci.append((cp1, ro))
                ci.append((cp2, -ro))

            ci = list(set(ci))
            cs.append(ci)
    except StopIteration:
        # we reached the end of the line
        pass

    log.debug('%s: length of downstream line is: %d', gdir.rgi_id, len(cs))

    good = 0
    for ic, cc in enumerate(cs):
        z = []
        ro = []
        for i in cc:
            z.append(interpolator((i[0].y, i[0].x)))
            ro.append(i[1])
        aso = np.argsort(ro)
        ro, z = np.array(ro)[aso], np.array(z)[aso]

        # find top of parabola
        roHead = ro[np.argmin(z)]
        zero = np.argmin(z)  # it is index of roHead/zHead
        zHead = np.amin(z)

        dsts = abs(h_mean + zHead - z)

        # find local minima in set of distances
        extr = scipy.signal.argrelextrema(dsts, np.less, mode='wrap')

        # from local minima find that with the minimum |x|
        idx = extr[0][np.argmin(abs(ro[extr]))]

        # x<0 => x=0
        # (|x|+x)/2
        roN = ro[int((abs(zero - abs(zero - idx)) + zero - abs(zero - idx)) /
                     2):zero + abs(zero - idx) + 1]
        zN = z[int((abs(zero - abs(zero - idx)) + zero - abs(zero - idx)) /
                   2):zero + abs(zero - idx) + 1]
        roNx = roN - roHead
        # zN=zN-zHead#

        p = _approx_parabola(roNx, zN, y0=zHead)

        # shift parabola to the ds-line
        p2 = np.copy(p)
        p2[2] = z[ro == 0]

        err = _parabola_error(roN, zN, p2) * 100
        if err < 1.5:
            bed.append(p2)
            good += 1
        else:
            bed.append([None, 0, p2[2]])

    log.debug('%s: percentage of valid parabolas: %d', gdir.rgi_id,
              int(good / len(cs) * 100))

    # skip gaps
    x_bed = [i for i, j in enumerate(bed) if j[0]]
    bg = [bed[i][0] for i in x_bed]

    # interpolation, filling the gaps
    bed_i = np.interp(range(0, len(bed)), x_bed, bg)
    bed_int = [[bed_i[j], i[1], i[2]] for j, i in enumerate(bed)]

    # approximation - BED_Moving_Average
    bed_ma = pdSeries([i[0] for i in bed_int])
    bed_ma = bed_ma.rolling(window=5, center=True, min_periods=1).mean()
    bed_ma = [[bed_ma[j], i[1], i[2]] for j, i in enumerate(bed_int)]

    # write output
    gdir.write_pickle(bed_ma, 'downstream_bed')