コード例 #1
0
ファイル: gis.py プロジェクト: goncaloperes/cuspatial
def polyline_bounding_boxes(poly_offsets, xs, ys, expansion_radius):
    """Compute the minimum bounding-boxes for a set of polylines.

    Parameters
    ----------
    poly_offsets
        Begin indices of the first ring in each polyline (i.e. prefix-sum)
    xs
        Polyline point x-coordinates
    ys
        Polyline point y-coordinates
    expansion_radius
        radius of each polyline point

    Returns
    -------
    result : cudf.DataFrame
        minimum bounding boxes for each polyline

        x_min : cudf.Series
            the minimum x-coordinate of each bounding box
        y_min : cudf.Series
            the minimum y-coordinate of each bounding box
        x_max : cudf.Series
            the maximum x-coordinate of each bounding box
        y_max : cudf.Series
            the maximum y-coordinate of each bounding box
    """
    poly_offsets = as_column(poly_offsets, dtype="int32")
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    return DataFrame._from_data(
        *cpp_polyline_bounding_boxes(poly_offsets, xs, ys, expansion_radius)
    )
コード例 #2
0
def _cubic_spline_coefficients(x, y, ids, prefix_sums):
    x_c = x._column
    y_c = y._column
    ids_c = ids._column
    prefix_c = prefix_sums._column
    return DataFrame._from_data(
        *cubicspline_coefficients(x_c, y_c, ids_c, prefix_c))
コード例 #3
0
def join_quadtree_and_bounding_boxes(quadtree, poly_bounding_boxes, x_min,
                                     x_max, y_min, y_max, scale, max_depth):
    """ Search a quadtree for polygon or polyline bounding box intersections.

    Parameters
    ----------
    quadtree : cudf.DataFrame
        A complete quadtree for a given area-of-interest bounding box.
    poly_bounding_boxes : cudf.DataFrame
        Minimum bounding boxes for a set of polygons or polylines
    x_min
        The lower-left x-coordinate of the area of interest bounding box
    x_max
        The upper-right x-coordinate of the area of interest bounding box
    min_y
        The lower-left y-coordinate of the area of interest bounding box
    max_y
        The upper-right y-coordinate of the area of interest bounding box
    scale
        Scale to apply to each point's distance from ``(x_min, y_min)``
    max_depth
        Maximum quadtree depth at which to stop testing for intersections

    Returns
    -------
    result : cudf.DataFrame
        Indices for each intersecting bounding box and leaf quadrant.

        poly_offset : cudf.Series
            Indices for each poly bbox that intersects with the quadtree.
        quad_offset : cudf.Series
            Indices for each leaf quadrant intersecting with a poly bbox.

    Notes
    -----
    * Swaps ``min_x`` and ``max_x`` if ``min_x > max_x``
    * Swaps ``min_y`` and ``max_y`` if ``min_y > max_y``
    """
    x_min, x_max, y_min, y_max = (
        min(x_min, x_max),
        max(x_min, x_max),
        min(y_min, y_max),
        max(y_min, y_max),
    )

    min_scale = max(x_max - x_min, y_max - y_min) / ((1 << max_depth) + 2)
    if scale < min_scale:
        warnings.warn("scale {} is less than required minimum ".format(scale) +
                      "scale {}. Clamping to minimum scale".format(min_scale))

    return DataFrame._from_data(*spatial_join.join_quadtree_and_bounding_boxes(
        quadtree,
        poly_bounding_boxes,
        x_min,
        x_max,
        y_min,
        y_max,
        max(scale, min_scale),
        max_depth,
    ))
コード例 #4
0
def assert_packed_frame_equality(df):
    pdf = df.to_pandas()

    packed = pack(df)
    del df
    tbl = unpack(packed)
    unpacked = DataFrame._from_data(tbl._data, tbl._index)

    assert_eq(unpacked, pdf)
コード例 #5
0
ファイル: trajectory.py プロジェクト: goncaloperes/cuspatial
def trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys):
    """ Compute the bounding boxes of sets of trajectories.

    Parameters
    ----------
    num_trajectories
        number of trajectories (unique object ids)
    object_ids
        column of object (e.g., vehicle) ids
    xs
        column of x-coordinates (in kilometers)
    ys
        column of y-coordinates (in kilometers)

    Returns
    -------
    result : cudf.DataFrame
        minimum bounding boxes (in kilometers) for each trajectory

        x_min : cudf.Series
            the minimum x-coordinate of each bounding box
        y_min : cudf.Series
            the minimum y-coordinate of each bounding box
        x_max : cudf.Series
            the maximum x-coordinate of each bounding box
        y_max : cudf.Series
            the maximum y-coordinate of each bounding box

    Examples
    --------
    Compute the minimum bounding boxes of derived trajectories

    >>> objects, traj_offsets = trajectory.derive_trajectories(
            [0, 0, 1, 1],  # object_id
            [0, 1, 2, 3],  # x
            [0, 0, 1, 1],  # y
            [0, 10, 0, 10] # timestamp
        )
    >>> traj_bounding_boxes = cuspatial.trajectory_bounding_boxes(
            len(traj_offsets),
            objects['object_id'],
            objects['x'],
            objects['y']
        )
    >>> print(traj_bounding_boxes)
        x_min   y_min   x_max   y_max
    0     0.0     0.0     2.0     2.0
    1     1.0     1.0     3.0     3.0
    """

    object_ids = as_column(object_ids, dtype=np.int32)
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    return DataFrame._from_data(
        *cpp_trajectory_bounding_boxes(num_trajectories, object_ids, xs, ys))
コード例 #6
0
ファイル: trajectory.py プロジェクト: goncaloperes/cuspatial
def derive_trajectories(object_ids, xs, ys, timestamps):
    """
    Derive trajectories from object ids, points, and timestamps.

    Parameters
    ----------
    object_ids
        column of object (e.g., vehicle) ids
    xs
        column of x-coordinates (in kilometers)
    ys
        column of y-coordinates (in kilometers)
    timestamps
        column of timestamps in any resolution

    Returns
    -------
    result : tuple (objects, traj_offsets)
        objects : cudf.DataFrame
            object_ids, xs, ys, and timestamps sorted by
            ``(object_id, timestamp)``, used by ``trajectory_bounding_boxes``
            and ``trajectory_distances_and_speeds``
        traj_offsets : cudf.Series
            offsets of discovered trajectories

    Examples
    --------
    Compute sorted objects and discovered trajectories

    >>> objects, traj_offsets = cuspatial.derive_trajectories(
            [0, 1, 2, 3],  # object_id
            [0, 0, 1, 1],  # x
            [0, 0, 1, 1],  # y
            [0, 10, 0, 10] # timestamp
        )
    >>> print(traj_offsets)
        0  0
        1  2
    >>> print(objects)
           object_id       x       y  timestamp
        0          0       1       0          0
        1          0       0       0         10
        2          1       3       1          0
        3          1       2       1         10
    """

    object_ids = as_column(object_ids, dtype=np.int32)
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    timestamps = normalize_timestamp_column(as_column(timestamps))
    objects, traj_offsets = cpp_derive_trajectories(object_ids, xs, ys,
                                                    timestamps)
    return DataFrame._from_data(*objects), Series(data=traj_offsets)
コード例 #7
0
ファイル: trajectory.py プロジェクト: goncaloperes/cuspatial
def trajectory_distances_and_speeds(num_trajectories, object_ids, xs, ys,
                                    timestamps):
    """
    Compute the distance traveled and speed of sets of trajectories

    Parameters
    ----------
    num_trajectories
        number of trajectories (unique object ids)
    object_ids
        column of object (e.g., vehicle) ids
    xs
        column of x-coordinates (in kilometers)
    ys
        column of y-coordinates (in kilometers)
    timestamps
        column of timestamps in any resolution

    Returns
    -------
    result : cudf.DataFrame
        meters : cudf.Series
            trajectory distance (in kilometers)
        speed  : cudf.Series
            trajectory speed (in meters/second)

    Examples
    --------
    Compute the distances and speeds of derived trajectories

    >>> objects, traj_offsets = cuspatial.derive_trajectories(...)
    >>> dists_and_speeds = cuspatial.trajectory_distances_and_speeds(
            len(traj_offsets)
            objects['object_id'],
            objects['x'],
            objects['y'],
            objects['timestamp']
        )
    >>> print(dists_and_speeds)
                       distance          speed
        trajectory_id
        0                1000.0  100000.000000
        1                1000.0  111111.109375
    """

    object_ids = as_column(object_ids, dtype=np.int32)
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    timestamps = normalize_timestamp_column(as_column(timestamps))
    df = DataFrame._from_data(*cpp_trajectory_distances_and_speeds(
        num_trajectories, object_ids, xs, ys, timestamps))
    df.index.name = "trajectory_id"
    return df
コード例 #8
0
def points_in_spatial_window(min_x, max_x, min_y, max_y, xs, ys):
    """ Return only the subset of coordinates that fall within a
    rectangular window.

    A point `(x, y)` is inside the query window if and only if
    ``min_x < x < max_x AND min_y < y < max_y``

    The window is specified by minimum and maximum x and y
    coordinates.

    Parameters
    ----------
    min_x
        lower x-coordinate of the query window
    max_x
        upper x-coordinate of the query window
    min_y
        lower y-coordinate of the query window
    max_y
        upper y-coordinate of the query window
    xs
        column of x-coordinates that may fall within the window
    ys
        column of y-coordinates that may fall within the window

    Returns
    -------
    result : cudf.DataFrame
        subset of `(x, y)` pairs above that fall within the window

    Notes
    -----
    * Swaps ``min_x`` and ``max_x`` if ``min_x > max_x``
    * Swaps ``min_y`` and ``max_y`` if ``min_y > max_y``
    """
    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    return DataFrame._from_data(
        *spatial_window.points_in_spatial_window(
            min_x, max_x, min_y, max_y, xs, ys
        )
    )
コード例 #9
0
def check_packed_pickled_equality(df):
    # basic
    assert_packed_frame_picklable(df)
    # sliced
    assert_packed_frame_picklable(df[:-1])
    assert_packed_frame_picklable(df[1:])
    assert_packed_frame_picklable(df[2:-2])
    # sorted
    sortvaldf = df.sort_values("vals")
    assert isinstance(sortvaldf.index, GenericIndex)
    assert_packed_frame_picklable(sortvaldf)
    # out-of-band
    if pickle.HIGHEST_PROTOCOL >= 5:
        buffers = []
        serialbytes = pickle.dumps(pack(df),
                                   protocol=5,
                                   buffer_callback=buffers.append)
        for b in buffers:
            assert isinstance(b, pickle.PickleBuffer)
        tbl = unpack(pickle.loads(serialbytes, buffers=buffers))
        loaded = DataFrame._from_data(tbl._data, tbl._index)
        assert_eq(loaded, df)
コード例 #10
0
def read_orc(
    filepath_or_buffer,
    engine="cudf",
    columns=None,
    filters=None,
    stripes=None,
    skiprows=None,
    num_rows=None,
    use_index=True,
    decimal_cols_as_float=None,
    timestamp_type=None,
    use_python_file_object=True,
    **kwargs,
):
    """{docstring}"""
    if decimal_cols_as_float is not None:
        warnings.warn(
            "`decimal_cols_as_float` is deprecated and will be removed in "
            "the future",
            FutureWarning,
        )
    from cudf import DataFrame

    # Multiple sources are passed as a list. If a single source is passed,
    # wrap it in a list for unified processing downstream.
    if not is_list_like(filepath_or_buffer):
        filepath_or_buffer = [filepath_or_buffer]

    # Each source must have a correlating stripe list. If a single stripe list
    # is provided rather than a list of list of stripes then extrapolate that
    # stripe list across all input sources
    if stripes is not None:
        if any(not isinstance(stripe, list) for stripe in stripes):
            stripes = [stripes]

        # Must ensure a stripe for each source is specified, unless None
        if not len(stripes) == len(filepath_or_buffer):
            raise ValueError(
                "A list of stripes must be provided for each input source"
            )

    filepaths_or_buffers = []
    for source in filepath_or_buffer:
        if ioutils.is_directory(source, **kwargs):
            fs = ioutils._ensure_filesystem(
                passed_filesystem=None, path=source, **kwargs,
            )
            source = stringify_path(source)
            source = fs.sep.join([source, "*.orc"])

        tmp_source, compression = ioutils.get_filepath_or_buffer(
            path_or_data=source,
            compression=None,
            use_python_file_object=use_python_file_object,
            **kwargs,
        )
        if compression is not None:
            raise ValueError(
                "URL content-encoding decompression is not supported"
            )
        if isinstance(tmp_source, list):
            filepaths_or_buffers.extend(tmp_source)
        else:
            filepaths_or_buffers.append(tmp_source)

    if filters is not None:
        selected_stripes = _filter_stripes(
            filters, filepaths_or_buffers, stripes, skiprows, num_rows
        )

        # Return empty if everything was filtered
        if len(selected_stripes) == 0:
            return _make_empty_df(filepaths_or_buffers[0], columns)
        else:
            stripes = selected_stripes

    if engine == "cudf":
        return DataFrame._from_data(
            *liborc.read_orc(
                filepaths_or_buffers,
                columns,
                stripes,
                skiprows,
                num_rows,
                use_index,
                decimal_cols_as_float,
                timestamp_type,
            )
        )
    else:

        def read_orc_stripe(orc_file, stripe, columns):
            pa_table = orc_file.read_stripe(stripe, columns)
            if isinstance(pa_table, pa.RecordBatch):
                pa_table = pa.Table.from_batches([pa_table])
            return pa_table

        warnings.warn("Using CPU via PyArrow to read ORC dataset.")
        if len(filepath_or_buffer) > 1:
            raise NotImplementedError(
                "Using CPU via PyArrow only supports a single a "
                "single input source"
            )

        orc_file = orc.ORCFile(filepath_or_buffer[0])
        if stripes is not None and len(stripes) > 0:
            for stripe_source_file in stripes:
                pa_tables = [
                    read_orc_stripe(orc_file, i, columns)
                    for i in stripe_source_file
                ]
                pa_table = pa.concat_tables(pa_tables)
        else:
            pa_table = orc_file.read(columns=columns)
        df = cudf.DataFrame.from_arrow(pa_table)

    return df
コード例 #11
0
def assert_packed_frame_serializable(df):
    packed = pack(df)
    header, frames = packed.serialize()
    tbl = unpack(packed.deserialize(header, frames))
    loaded = DataFrame._from_data(tbl._data, tbl._index)
    assert_eq(loaded, df)
コード例 #12
0
def assert_packed_frame_picklable(df):
    serialbytes = pickle.dumps(pack(df))
    tbl = unpack(pickle.loads(serialbytes))
    loaded = DataFrame._from_data(tbl._data, tbl._index)
    assert_eq(loaded, df)
コード例 #13
0
ファイル: gis.py プロジェクト: goncaloperes/cuspatial
def point_in_polygon(
    test_points_x,
    test_points_y,
    poly_offsets,
    poly_ring_offsets,
    poly_points_x,
    poly_points_y,
):
    """ Compute from a set of points and a set of polygons which points fall
    within which polygons. Note that `polygons_(x,y)` must be specified as
    closed polygons: the first and last coordinate of each polygon must be
    the same.

    Parameters
    ----------
    test_points_x
        x-coordinate of test points
    test_points_y
        y-coordinate of test points
    poly_offsets
        beginning index of the first ring in each polygon
    poly_ring_offsets
        beginning index of the first point in each ring
    poly_points_x
        x closed-coordinate of polygon points
    poly_points_y
        y closed-coordinate of polygon points

    Examples
    --------

    Test whether 3 points fall within either of two polygons

    >>> result = cuspatial.point_in_polygon(
        [0, -8, 6.0],                             # test_points_x
        [0, -8, 6.0],                             # test_points_y
        cudf.Series([0, 1], index=['nyc', 'hudson river']), # poly_offsets
        [0, 3],                                   # ring_offsets
        [-10, 5, 5, -10, 0, 10, 10, 0],           # poly_points_x
        [-10, -10, 5, 5, 0, 0, 10, 10],           # poly_points_y
    )
    # The result of point_in_polygon is a DataFrame of Boolean
    # values indicating whether each point (rows) falls within
    # each polygon (columns).
    >>> print(result)
                nyc            hudson river
    0          True          True
    1          True         False
    2         False          True
    # Point 0: (0, 0) falls in both polygons
    # Point 1: (-8, -8) falls in the first polygon
    # Point 2: (6.0, 6.0) falls in the second polygon

    note
    input Series x and y will not be index aligned, but computed as
    sequential arrays.

    Returns
    -------
    result : cudf.DataFrame
        A DataFrame of boolean values indicating whether each point falls
        within each polygon.
    """

    if len(poly_offsets) == 0:
        return DataFrame()

    (
        test_points_x,
        test_points_y,
        poly_points_x,
        poly_points_y,
    ) = normalize_point_columns(
        as_column(test_points_x),
        as_column(test_points_y),
        as_column(poly_points_x),
        as_column(poly_points_y),
    )

    result = cpp_point_in_polygon(
        test_points_x,
        test_points_y,
        as_column(poly_offsets, dtype="int32"),
        as_column(poly_ring_offsets, dtype="int32"),
        poly_points_x,
        poly_points_y,
    )

    result = gis_utils.pip_bitmap_column_to_binary_array(
        polygon_bitmap_column=result, width=len(poly_offsets)
    )
    result = DataFrame(result)
    result = DataFrame._from_data(
        {name: col.astype("bool") for name, col in result._data.items()}
    )
    result.columns = [x for x in list(reversed(poly_offsets.index))]
    result = result[list(reversed(result.columns))]
    return result
コード例 #14
0
ファイル: indexing.py プロジェクト: goncaloperes/cuspatial
def quadtree_on_points(
    xs, ys, x_min, x_max, y_min, y_max, scale, max_depth, min_size
):
    """ Construct a quadtree from a set of points for a given area-of-interest
        bounding box.

    Parameters
    ----------
    xs
        Column of x-coordinates for each point
    ys
        Column of y-coordinates for each point
    x_min
        The lower-left x-coordinate of the area of interest bounding box
    x_max
        The upper-right x-coordinate of the area of interest bounding box
    y_min
        The lower-left y-coordinate of the area of interest bounding box
    y_max
        The upper-right y-coordinate of the area of interest bounding box
    scale
        Scale to apply to each point's distance from ``(x_min, y_min)``
    max_depth
        Maximum quadtree depth
    min_size
        Minimum number of points for a non-leaf quadtree node

    Returns
    -------
    result : tuple (cudf.Series, cudf.DataFrame)
        keys_to_points  : cudf.Series(dtype=np.int32)
            A column of sorted keys to original point indices
        quadtree        : cudf.DataFrame
            A complete quadtree for the set of input points

            key         : cudf.Series(dtype=np.int32)
                An int32 column of quadrant keys
            level       : cudf.Series(dtype=np.int8)
                An int8 column of quadtree levels
            is_quad     : cudf.Series(dtype=np.bool_)
                A boolean column indicating whether the node is a quad or leaf
            length      : cudf.Series(dtype=np.int32)
                If this is a non-leaf quadrant (i.e. ``is_quad`` is ``True``),
                this column's value is the number of children in the non-leaf
                quadrant.

                Otherwise this column's value is the number of points
                contained in the leaf quadrant.
            offset      : cudf.Series(dtype=np.int32)
                If this is a non-leaf quadrant (i.e. ``is_quad`` is ``True``),
                this column's value is the position of the non-leaf quadrant's
                first child.

                Otherwise this column's value is the position of the leaf
                quadrant's first point.

    Notes
    -----
    * Swaps ``min_x`` and ``max_x`` if ``min_x > max_x``
    * Swaps ``min_y`` and ``max_y`` if ``min_y > max_y``

    Examples
    --------

    An example of selecting the ``min_size`` and ``scale`` based on input::

        >>> np.random.seed(0)
        >>> points = cudf.DataFrame({
                "x": cudf.Series(np.random.normal(size=120)) * 500,
                "y": cudf.Series(np.random.normal(size=120)) * 500,
            })

        >>> max_depth = 3
        >>> min_size = 50
        >>> min_x, min_y, max_x, max_y = (points["x"].min(),
                                          points["y"].min(),
                                          points["x"].max(),
                                          points["y"].max())
        >>> scale = max(max_x - min_x, max_y - min_y) // (1 << max_depth)
        >>> print(
                "min_size:   " + str(min_size) + "\\n"
                "num_points: " + str(len(points)) + "\\n"
                "min_x:      " + str(min_x) + "\\n"
                "max_x:      " + str(max_x) + "\\n"
                "min_y:      " + str(min_y) + "\\n"
                "max_y:      " + str(max_y) + "\\n"
                "scale:      " + str(scale) + "\\n"
            )
        min_size:   50
        num_points: 120
        min_x:      -1577.4949079170394
        max_x:      1435.877311993804
        min_y:      -1412.7015761122134
        max_y:      1492.572387431971
        scale:      301.0

        >>> key_to_point, quadtree = cuspatial.quadtree_on_points(
                points["x"],
                points["y"],
                min_x,
                max_x,
                min_y,
                max_y,
                scale, max_depth, min_size
            )

        >>> print(quadtree)
            key  level  is_quad  length  offset
        0     0      0    False      15       0
        1     1      0    False      27      15
        2     2      0    False      12      42
        3     3      0     True       4       8
        4     4      0    False       5     106
        5     6      0    False       6     111
        6     9      0    False       2     117
        7    12      0    False       1     119
        8    12      1    False      22      54
        9    13      1    False      18      76
        10   14      1    False       9      94
        11   15      1    False       3     103

        >>> print(key_to_point)
        0       63
        1       20
        2       33
        3       66
        4       19
            ...
        115    113
        116      3
        117     78
        118     98
        119     24
        Length: 120, dtype: int32
    """

    xs, ys = normalize_point_columns(as_column(xs), as_column(ys))
    x_min, x_max, y_min, y_max = (
        min(x_min, x_max),
        max(x_min, x_max),
        min(y_min, y_max),
        max(y_min, y_max),
    )

    min_scale = max(x_max - x_min, y_max - y_min) / ((1 << max_depth) + 2)
    if scale < min_scale:
        warnings.warn(
            "scale {} is less than required minimum ".format(scale)
            + "scale {}. Clamping to minimum scale".format(min_scale)
        )

    key_to_point, quadtree = cpp_quadtree_on_points(
        xs,
        ys,
        x_min,
        x_max,
        y_min,
        y_max,
        max(scale, min_scale),
        max_depth,
        min_size,
    )
    return Series(key_to_point), DataFrame._from_data(*quadtree)
コード例 #15
0
def quadtree_point_in_polygon(
    poly_quad_pairs,
    quadtree,
    point_indices,
    points_x,
    points_y,
    poly_offsets,
    ring_offsets,
    poly_points_x,
    poly_points_y,
):
    """ Test whether the specified points are inside any of the specified
    polygons.

    Uses the table of (polygon, quadrant) pairs returned by
    ``cuspatial.join_quadtree_and_bounding_boxes`` to ensure only the points
    in the same quadrant as each polygon are tested for intersection.

    This pre-filtering can dramatically reduce number of points tested per
    polygon, enabling faster intersection-testing at the expense of extra
    memory allocated to store the quadtree and sorted point_indices.

    Parameters
    ----------
    poly_quad_pairs: cudf.DataFrame
        Table of (polygon, quadrant) index pairs returned by
        ``cuspatial.join_quadtree_and_bounding_boxes``.
    quadtree : cudf.DataFrame
        A complete quadtree for a given area-of-interest bounding box.
    point_indices : cudf.Series
        Sorted point indices returned by ``cuspatial.quadtree_on_points``
    points_x : cudf.Series
        x-coordinates of points used to construct the quadtree.
    points_y : cudf.Series
        y-coordinates of points used to construct the quadtree.
    poly_offsets : cudf.Series
        Begin index of the first ring in each polygon.
    ring_offsets : cudf.Series
        Begin index of the first point in each ring.
    poly_points_x : cudf.Series
        Polygon point x-coodinates.
    poly_points_y : cudf.Series
        Polygon point y-coodinates.

    Returns
    -------
    result : cudf.DataFrame
        Indices for each intersecting point and polygon pair.

        polygon_index : cudf.Series
            Indices of each polygon with which a point intersected.
        point_index : cudf.Series
            Indices of each point that intersects with a polygon.
    """

    (
        points_x,
        points_y,
        poly_points_x,
        poly_points_y,
    ) = normalize_point_columns(
        as_column(points_x),
        as_column(points_y),
        as_column(poly_points_x),
        as_column(poly_points_y),
    )
    return DataFrame._from_data(*spatial_join.quadtree_point_in_polygon(
        poly_quad_pairs,
        quadtree,
        as_column(point_indices, dtype="uint32"),
        points_x,
        points_y,
        as_column(poly_offsets, dtype="uint32"),
        as_column(ring_offsets, dtype="uint32"),
        poly_points_x,
        poly_points_y,
    ))
コード例 #16
0
def quadtree_point_to_nearest_polyline(
    poly_quad_pairs,
    quadtree,
    point_indices,
    points_x,
    points_y,
    poly_offsets,
    poly_points_x,
    poly_points_y,
):
    """ Finds the nearest polyline to each point in a quadrant, and computes
    the distances between each point and polyline.

    Uses the table of (polyline, quadrant) pairs returned by
    ``cuspatial.join_quadtree_and_bounding_boxes`` to ensure distances are
    computed only for the points in the same quadrant as each polyline.

    Parameters
    ----------
    poly_quad_pairs: cudf.DataFrame
        Table of (polyline, quadrant) index pairs returned by
        ``cuspatial.join_quadtree_and_bounding_boxes``.
    quadtree : cudf.DataFrame
        A complete quadtree for a given area-of-interest bounding box.
    point_indices : cudf.Series
        Sorted point indices returned by ``cuspatial.quadtree_on_points``
    points_x : cudf.Series
        x-coordinates of points used to construct the quadtree.
    points_y : cudf.Series
        y-coordinates of points used to construct the quadtree.
    poly_offsets : cudf.Series
        Begin index of the first point in each polyline.
    poly_points_x : cudf.Series
        Polyline point x-coodinates.
    poly_points_y : cudf.Series
        Polyline point y-coodinates.

    Returns
    -------
    result : cudf.DataFrame
        Indices for each point and its nearest polyline, and the distance
        between the two.

        point_index : cudf.Series
            Indices of each point that intersects with a polyline.
        polyline_index : cudf.Series
            Indices of each polyline with which a point intersected.
        distance : cudf.Series
            Distances between each point and its nearest polyline.
    """
    (
        points_x,
        points_y,
        poly_points_x,
        poly_points_y,
    ) = normalize_point_columns(
        as_column(points_x),
        as_column(points_y),
        as_column(poly_points_x),
        as_column(poly_points_y),
    )
    return DataFrame._from_data(
        *spatial_join.quadtree_point_to_nearest_polyline(
            poly_quad_pairs,
            quadtree,
            as_column(point_indices, dtype="uint32"),
            points_x,
            points_y,
            as_column(poly_offsets, dtype="uint32"),
            poly_points_x,
            poly_points_y,
        ))