コード例 #1
0
ファイル: pandas.py プロジェクト: LilySu/datashader
def default(glyph, source, schema, canvas, summary):
    create, info, append, _, finalize = compile_components(
        summary, schema, glyph)
    x_mapper = canvas.x_axis.mapper
    y_mapper = canvas.y_axis.mapper
    extend = glyph._build_extend(x_mapper, y_mapper, info, append)

    x_range = canvas.x_range or glyph.compute_x_bounds(source)
    y_range = canvas.y_range or glyph.compute_y_bounds(source)

    width = canvas.plot_width
    height = canvas.plot_height

    x_st = canvas.x_axis.compute_scale_and_translate(x_range, width)
    y_st = canvas.y_axis.compute_scale_and_translate(y_range, height)

    x_axis = canvas.x_axis.compute_index(x_st, width)
    y_axis = canvas.y_axis.compute_index(y_st, height)

    bases = create((height, width))
    extend(bases, source, x_st + y_st, x_range + y_range)

    return finalize(bases,
                    coords=OrderedDict([(glyph.x_label, x_axis),
                                        (glyph.y_label, y_axis)]),
                    dims=[glyph.y_label, glyph.x_label])
コード例 #2
0
ファイル: dask.py プロジェクト: thuydotm/datashader
def line(glyph, df, schema, canvas, summary, cuda=False):
    if cuda:
        from cudf import concat
    else:
        from pandas import concat

    shape, bounds, st, axis = shape_bounds_st_and_axis(df, canvas, glyph)

    # Compile functions
    create, info, append, combine, finalize = \
        compile_components(summary, schema, glyph, cuda=cuda)
    x_mapper = canvas.x_axis.mapper
    y_mapper = canvas.y_axis.mapper
    extend = glyph._build_extend(x_mapper, y_mapper, info, append)

    def chunk(df, df2=None):
        plot_start = True
        if df2 is not None:
            df = concat([df.iloc[-1:], df2])
            plot_start = False
        aggs = create(shape)
        extend(aggs, df, st, bounds, plot_start=plot_start)
        return aggs

    name = tokenize(df.__dask_tokenize__(), canvas, glyph, summary)
    old_name = df.__dask_tokenize__()
    dsk = {(name, 0): (chunk, (old_name, 0))}
    for i in range(1, df.npartitions):
        dsk[(name, i)] = (chunk, (old_name, i - 1), (old_name, i))
    keys2 = [(name, i) for i in range(df.npartitions)]
    dsk[name] = (apply, finalize, [(combine, keys2)],
                 dict(cuda=cuda,
                      coords=axis,
                      dims=[glyph.y_label, glyph.x_label]))
    return dsk, name
コード例 #3
0
def default(glyph, df, schema, canvas, summary):
    shape, bounds, st, axis = shape_bounds_st_and_axis(df, canvas, glyph)

    # Compile functions
    create, info, append, combine, finalize = \
        compile_components(summary, schema, glyph)
    x_mapper = canvas.x_axis.mapper
    y_mapper = canvas.y_axis.mapper
    extend = glyph._build_extend(x_mapper, y_mapper, info, append)

    def chunk(df):
        aggs = create(shape)
        extend(aggs, df, st, bounds)
        return aggs

    name = tokenize(df.__dask_tokenize__(), canvas, glyph, summary)
    keys = df.__dask_keys__()
    keys2 = [(name, i) for i in range(len(keys))]
    dsk = dict((k2, (chunk, k)) for (k2, k) in zip(keys2, keys))
    dsk[name] = (apply, finalize, [(combine, keys2)],
                 dict(coords=axis, dims=[glyph.y_label, glyph.x_label]))
    return dsk, name
コード例 #4
0
def dask_rectilinear(glyph, xr_ds, schema, canvas, summary, cuda):
    shape, bounds, st, axis = shape_bounds_st_and_axis(xr_ds, canvas, glyph)

    # Compile functions
    create, info, append, combine, finalize = \
        compile_components(summary, schema, glyph, cuda=cuda)
    x_mapper = canvas.x_axis.mapper
    y_mapper = canvas.y_axis.mapper
    extend = glyph._build_extend(x_mapper, y_mapper, info, append)

    # Build chunk indices for coordinates
    chunk_inds = {}
    for k, chunks in xr_ds.chunks.items():
        chunk_inds[k] = [0] + list(np.cumsum(chunks))

    x_name = glyph.x
    y_name = glyph.y
    coords = xr_ds[glyph.name].coords
    coord_dims = list(coords.dims)
    xdim_ind = coord_dims.index(x_name)
    ydim_ind = coord_dims.index(y_name)

    var_name = list(xr_ds.data_vars.keys())[0]

    # Compute interval breaks
    xs = xr_ds[x_name].values
    ys = xr_ds[y_name].values
    x_breaks = glyph.infer_interval_breaks(xs)
    y_breaks = glyph.infer_interval_breaks(ys)

    def chunk(np_arr, *inds):
        # Reconstruct dataset for chunk from numpy array and chunk indices
        chunk_coords_list = []
        # for i, (coord_name, coord_vals) in enumerate(coords.items()):
        for i, coord_name in enumerate(coords.dims):
            chunk_number = inds[i]
            coord_slice = slice(chunk_inds[coord_name][chunk_number],
                                chunk_inds[coord_name][chunk_number + 1])
            chunk_coords_list.append(
                [coord_name, coords[coord_name][coord_slice]])

        chunk_coords = OrderedDict(chunk_coords_list)
        chunk_ds = xr.DataArray(np_arr,
                                coords=chunk_coords,
                                dims=coord_dims,
                                name=var_name).to_dataset()

        # Compute chunk x/y breaks
        x_chunk_number = inds[xdim_ind]
        x_breaks_slice = slice(chunk_inds[x_name][x_chunk_number],
                               chunk_inds[x_name][x_chunk_number + 1] + 1)
        x_breaks_chunk = x_breaks[x_breaks_slice]

        y_chunk_number = inds[ydim_ind]
        y_breaks_slice = slice(chunk_inds[y_name][y_chunk_number],
                               chunk_inds[y_name][y_chunk_number + 1] + 1)
        y_breaks_chunk = y_breaks[y_breaks_slice]

        # Initialize aggregation buffers
        aggs = create(shape)

        # Perform aggregation
        extend(aggs,
               chunk_ds,
               st,
               bounds,
               x_breaks=x_breaks_chunk,
               y_breaks=y_breaks_chunk)
        return aggs

    name = tokenize(xr_ds.__dask_tokenize__(), canvas, glyph, summary)
    keys = [k for row in xr_ds.__dask_keys__()[0] for k in row]
    keys2 = [(name, i) for i in range(len(keys))]
    dsk = dict((k2, (chunk, k, k[1], k[2])) for (k2, k) in zip(keys2, keys))
    dsk[name] = (apply, finalize, [(combine, keys2)],
                 dict(cuda=cuda,
                      coords=axis,
                      dims=[glyph.y_label, glyph.x_label]))
    return dsk, name
コード例 #5
0
def dask_curvilinear(glyph, xr_ds, schema, canvas, summary, cuda):
    shape, bounds, st, axis = shape_bounds_st_and_axis(xr_ds, canvas, glyph)

    # Compile functions
    create, info, append, combine, finalize = \
        compile_components(summary, schema, glyph, cuda=cuda)
    x_mapper = canvas.x_axis.mapper
    y_mapper = canvas.y_axis.mapper
    extend = glyph._build_extend(x_mapper, y_mapper, info, append)

    x_coord_name = glyph.x
    y_coord_name = glyph.y
    z_name = glyph.name

    data_dim_names = list(xr_ds[z_name].dims)
    x_coord_dim_names = list(xr_ds[x_coord_name].dims)
    y_coord_dim_names = list(xr_ds[y_coord_name].dims)
    zs = xr_ds[z_name].data
    x_centers = xr_ds[glyph.x].data
    y_centers = xr_ds[glyph.y].data

    var_name = list(xr_ds.data_vars.keys())[0]

    # Validate coordinates
    err_msg = (
        "DataArray {name} is backed by a Dask array, \n"
        "but coordinate {coord} is not backed by a Dask array with identical \n"
        "dimension order and chunks")
    if (not isinstance(x_centers, dask.array.Array)
            or xr_ds[glyph.name].dims != xr_ds[glyph.x].dims
            or xr_ds[glyph.name].chunks != xr_ds[glyph.x].chunks):
        raise ValueError(err_msg.format(name=glyph.name, coord=glyph.x))

    if (not isinstance(y_centers, dask.array.Array)
            or xr_ds[glyph.name].dims != xr_ds[glyph.y].dims
            or xr_ds[glyph.name].chunks != xr_ds[glyph.y].chunks):
        raise ValueError(err_msg.format(name=glyph.name, coord=glyph.y))

    # Make sure coordinates are floats so that overlap with nan will behave properly
    if x_centers.dtype.kind != 'f':
        x_centers = x_centers.astype(np.float64)
    if y_centers.dtype.kind != 'f':
        y_centers = y_centers.astype(np.float64)

    x_overlapped_centers = overlap(x_centers, depth=1, boundary=np.nan)
    y_overlapped_centers = overlap(y_centers, depth=1, boundary=np.nan)

    def chunk(np_zs, np_x_centers, np_y_centers):

        # Handle boundaries that have nothing to overlap with
        for centers in [np_x_centers, np_y_centers]:
            if np.isnan(centers[0, :]).all():
                centers[0, :] = centers[1, :] - (centers[2, :] - centers[1, :])
            if np.isnan(centers[-1, :]).all():
                centers[-1, :] = centers[-2, :] + (centers[-2, :] -
                                                   centers[-3, :])
            if np.isnan(centers[:, 0]).all():
                centers[:, 0] = centers[:, 1] - (centers[:, 2] - centers[:, 1])
            if np.isnan(centers[:, -1]).all():
                centers[:,
                        -1] = centers[:,
                                      -2] + (centers[:, -2] - centers[:, -3])

        # compute interval breaks
        x_breaks_chunk = glyph.infer_interval_breaks(np_x_centers)
        y_breaks_chunk = glyph.infer_interval_breaks(np_y_centers)

        # trim breaks
        x_breaks_chunk = x_breaks_chunk[1:-1, 1:-1]
        y_breaks_chunk = y_breaks_chunk[1:-1, 1:-1]

        # Reconstruct dataset for chunk from numpy array and chunk indices
        chunk_coords = {
            x_coord_name: (x_coord_dim_names, np_x_centers[1:-1, 1:-1]),
            y_coord_name: (y_coord_dim_names, np_y_centers[1:-1, 1:-1]),
        }
        chunk_ds = xr.DataArray(np_zs,
                                coords=chunk_coords,
                                dims=data_dim_names,
                                name=var_name).to_dataset()

        # Initialize aggregation buffers
        aggs = create(shape)

        # Perform aggregation
        extend(aggs,
               chunk_ds,
               st,
               bounds,
               x_breaks=x_breaks_chunk,
               y_breaks=y_breaks_chunk)
        return aggs

    result_name = tokenize(xr_ds.__dask_tokenize__(), canvas, glyph, summary)

    z_keys = [k for row in zs.__dask_keys__() for k in row]
    x_overlap_keys = [
        k for row in x_overlapped_centers.__dask_keys__() for k in row
    ]
    y_overlap_keys = [
        k for row in y_overlapped_centers.__dask_keys__() for k in row
    ]

    result_keys = [(result_name, i) for i in range(len(z_keys))]

    dsk = dict(
        (res_k, (chunk, z_k, x_k, y_k))
        for (res_k, z_k, x_k,
             y_k) in zip(result_keys, z_keys, x_overlap_keys, y_overlap_keys))

    dsk[result_name] = (apply, finalize, [(combine, result_keys)],
                        dict(cuda=cuda,
                             coords=axis,
                             dims=[glyph.y_label, glyph.x_label]))

    # Add x/y coord tasks to task graph
    dsk.update(x_overlapped_centers.dask)
    dsk.update(y_overlapped_centers.dask)

    return dsk, result_name
コード例 #6
0
def dask_raster(glyph, xr_ds, schema, canvas, summary, cuda):
    shape, bounds, st, axis = shape_bounds_st_and_axis(xr_ds, canvas, glyph)

    # Compile functions
    create, info, append, combine, finalize = \
        compile_components(summary, schema, glyph, cuda=cuda)
    x_mapper = canvas.x_axis.mapper
    y_mapper = canvas.y_axis.mapper
    extend = glyph._build_extend(x_mapper, y_mapper, info, append)

    # Build chunk indices for coordinates
    chunk_inds = {}
    for k, chunks in xr_ds.chunks.items():
        chunk_inds[k] = [0] + list(np.cumsum(chunks))

    x_name = glyph.x
    y_name = glyph.y

    coords = xr_ds[glyph.name].coords

    coord_dims = list(coords.dims)
    xdim_ind = coord_dims.index(x_name)
    ydim_ind = coord_dims.index(y_name)
    var_name = list(xr_ds.data_vars.keys())[0]

    # Pre-compute bin sizes. We do this here to handle length-1 chunks
    src_x0, src_x1 = glyph._compute_bounds_from_1d_centers(xr_ds,
                                                           x_name,
                                                           maybe_expand=False,
                                                           orient=False)
    src_y0, src_y1 = glyph._compute_bounds_from_1d_centers(xr_ds,
                                                           y_name,
                                                           maybe_expand=False,
                                                           orient=False)
    xbinsize = float(xr_ds[x_name][1] - xr_ds[x_name][0])
    ybinsize = float(xr_ds[y_name][1] - xr_ds[y_name][0])

    # Compute scale/translate
    out_h, out_w = shape
    src_h, src_w = [xr_ds[glyph.name].shape[i] for i in [ydim_ind, xdim_ind]]
    out_x0, out_x1, out_y0, out_y1 = bounds
    scale_y, translate_y = build_scale_translate(out_h, out_y0, out_y1, src_h,
                                                 src_y0, src_y1)

    scale_x, translate_x = build_scale_translate(out_w, out_x0, out_x1, src_w,
                                                 src_x0, src_x1)

    def chunk(np_arr, *inds):
        # Reconstruct dataset for chunk from numpy array and chunk indices
        chunk_coords_list = []
        for i, coord_name in enumerate(coords.dims):
            chunk_number = inds[i]
            coord_slice = slice(chunk_inds[coord_name][chunk_number],
                                chunk_inds[coord_name][chunk_number + 1])
            chunk_coords_list.append(
                [coord_name, coords[coord_name][coord_slice]])

        chunk_coords = OrderedDict(chunk_coords_list)
        chunk_ds = xr.DataArray(np_arr,
                                coords=chunk_coords,
                                dims=coord_dims,
                                name=var_name).to_dataset()

        # Compute offsets
        x_chunk_number = inds[xdim_ind]
        offset_x = chunk_inds[x_name][x_chunk_number]

        y_chunk_number = inds[ydim_ind]
        offset_y = chunk_inds[y_name][y_chunk_number]

        # Initialize aggregation buffers
        aggs = create(shape)

        # Perform aggregation
        extend(aggs,
               chunk_ds,
               st,
               bounds,
               scale_x=scale_x,
               scale_y=scale_y,
               translate_x=translate_x,
               translate_y=translate_y,
               offset_x=offset_x,
               offset_y=offset_y,
               src_xbinsize=xbinsize,
               src_ybinsize=ybinsize)

        return aggs

    name = tokenize(xr_ds.__dask_tokenize__(), canvas, glyph, summary)
    keys = [k for row in xr_ds.__dask_keys__()[0] for k in row]
    keys2 = [(name, i) for i in range(len(keys))]
    dsk = dict((k2, (chunk, k, k[1], k[2])) for (k2, k) in zip(keys2, keys))
    dsk[name] = (apply, finalize, [(combine, keys2)],
                 dict(cuda=cuda,
                      coords=axis,
                      dims=[glyph.y_label, glyph.x_label]))
    return dsk, name
コード例 #7
0
ファイル: dask.py プロジェクト: thuydotm/datashader
def default(glyph, df, schema, canvas, summary, cuda=False):
    shape, bounds, st, axis = shape_bounds_st_and_axis(df, canvas, glyph)

    # Compile functions
    create, info, append, combine, finalize = \
        compile_components(summary, schema, glyph, cuda=cuda)
    x_mapper = canvas.x_axis.mapper
    y_mapper = canvas.y_axis.mapper
    extend = glyph._build_extend(x_mapper, y_mapper, info, append)

    # Here be dragons
    # Get the dataframe graph
    graph = df.__dask_graph__()

    # Guess a reasonable output dtype from combination of dataframe dtypes
    dtypes = []

    for dt in df.dtypes:
        if isinstance(dt, pd.CategoricalDtype):
            continue
        elif isinstance(dt, pd.api.extensions.ExtensionDtype):
            # RaggedArray implementation and
            # https://github.com/pandas-dev/pandas/issues/22224
            try:
                subdtype = dt.subtype
            except AttributeError:
                continue
            else:
                dtypes.append(subdtype)
        else:
            dtypes.append(dt)

    dtype = np.result_type(*dtypes)
    # Create a meta object so that dask.array doesn't try to look
    # too closely at the type of the chunks it's wrapping
    # they're actually dataframes, tell dask they're ndarrays
    meta = np.empty((0, ), dtype=dtype)
    # Create a chunks tuple, a singleton for each dataframe chunk
    # The number of chunks + structure needs to match that of
    # the dataframe, so that we can use the dataframe graph keys,
    # but we don't have to be precise with the chunk size.
    # We could use np.nan instead of 1 to indicate that we actually
    # don't know how large the chunk is
    chunks = (tuple(1 for _ in range(df.npartitions)), )

    # Now create a dask array from the dataframe graph layer
    # It's a dask array of dataframes, which is dodgy but useful
    # for the following reasons:
    #
    # (1) The dataframes get converted to a single array by
    #     the datashader reduction functions anyway
    # (2) dask.array.reduction is handy for coding a tree
    #     reduction of arrays
    df_array = da.Array(graph, df._name, chunks, meta=meta)
    # A sufficient condition for ensuring the chimera holds together
    assert list(df_array.__dask_keys__()) == list(df.__dask_keys__())

    def chunk(df, axis, keepdims):
        """ used in the dask.array.reduction chunk step """
        aggs = create(shape)
        extend(aggs, df, st, bounds)
        return aggs

    def wrapped_combine(x, axis, keepdims):
        """ wrap datashader combine in dask.array.reduction combine """
        if isinstance(x, list):
            # list of tuples of ndarrays
            # assert all(isinstance(item, tuple) and
            #            len(item) == 1 and
            #            isinstance(item[0], np.ndarray)
            #            for item in x)
            return combine(x)
        elif isinstance(x, tuple):
            # tuple with single ndarray
            # assert len(x) == 1 and isinstance(x[0], np.ndarray)
            return x
        else:
            raise TypeError("Unknown type %s in wrapped_combine" % type(x))

    local_axis = axis

    def aggregate(x, axis, keepdims):
        """ Wrap datashader finalize in dask.array.reduction aggregate """
        return finalize(wrapped_combine(x, axis, keepdims),
                        cuda=cuda,
                        coords=local_axis,
                        dims=[glyph.y_label, glyph.x_label])

    R = da.reduction(
        df_array,
        aggregate=aggregate,
        chunk=chunk,
        combine=wrapped_combine,
        # Control granularity of tree branching
        # less is more
        split_every=2,
        # We don't want np.concatenate called
        # during combine and aggregate. It'll
        # fail because we're handling tuples of ndarrays
        # and lists of tuples of ndarrays
        concatenate=False,
        # Prevent dask from internally inspecting
        # chunk, combine and aggrregate
        meta=meta,
        # Provide some sort of dtype for the
        # resultant dask array
        dtype=meta.dtype)

    return R, R.name