Exemple #1
0
def cli(product, input_prefix, location, verbose):
    """
    Dump stats data into a cache file.

    Note: The input bucket must be public otherwise the data can not be listed.
    """

    product = product_from_yaml(product)
    dss = s3_fetch_dss(input_prefix, product, glob="*.json")
    cache = create_cache(f"{product.name}.db")

    if verbose:
        print(f"Writing {location}/{product.name}.db")

    cache = create_cache(f"{location}/{product.name}.db")
    cache.bulk_save(dss)
    if verbose:
        print(f"Found {cache.count:,d} datasets")
Exemple #2
0
def cli(product, input_prefix, location, verbose):
    """
    Generate mosaic overviews of the stats data.

    An intermediate cache file is generated and stored in the output location
    during this process.
    Note: The input bucket must be public otherwise the data can not be listed.
    """

    product = product_from_yaml(product)
    if verbose:
        print(f"Preparing mosaics for {product.name} product")

    dss = s3_fetch_dss(input_prefix, product, glob="*.json")
    cache = create_cache(f"{product.name}.db")

    if verbose:
        print(f"Writing {location}/{product.name}.db")

    cache = create_cache(f"{location}/{product.name}.db")
    cache.bulk_save(dss)
    if verbose:
        print(f"Found {cache.count:,d} datasets")

    dc = Datacube()
    dss = list(cache.get_all())
    xx = dc.load(
        datasets=dss,
        dask_chunks={
            "x": 3200,
            "y": 3200
        },
        resolution=(-120, 120),
        measurements=["red", "green", "blue"],
    )

    save(xx, location, product.name, verbose)
Exemple #3
0
def cli(env, grid, year, output, products, complevel):
    """Extract product(s) to an on disk cache.

    Optionally tile datasets into a grid while extracting (see --grid option)
    """

    if len(products) == 0:
        click.echo("Have to supply at least one product")
        raise click.Abort()

    dc = datacube.Datacube(env=env)
    all_prods = {p.name: p for p in dc.index.products.get_all()}

    if len(products) == 1 and products[0].lower() in (":all:", "*"):
        click.echo("Will read all products")
        products = list(all_prods)

    for p in products:
        if p not in all_prods:
            click.echo("No such product found: %s" % p)
            raise click.Abort()

    query = {}
    if year is not None:
        query.update(time=f"{year}")

    click.echo("Getting dataset counts")
    counts = {p: dataset_count(dc.index, product=p, **query) for p in products}

    n_total = 0
    for p, c in counts.items():
        click.echo("..{}: {:8,d}".format(p, c))
        n_total += c

    if n_total == 0:
        click.echo("No datasets found")
        raise click.Abort()

    click.echo("Training compression dictionary")
    zdict = dictionary_from_product_list(dc,
                                         products,
                                         samples_per_product=50,
                                         query=query)
    click.echo("..done")

    # TODO: check for overwrite
    cache = dscache.create_cache(output,
                                 zdict=zdict,
                                 complevel=complevel,
                                 truncate=True)

    raw2ds = mk_raw2ds(all_prods)

    def db_task(products, conn, q):
        for p in products:
            if len(query) == 0:
                dss = map(raw2ds, raw_dataset_stream(p, conn))
            else:
                dss = ordered_dss(dc, product=p, **query)

            for ds in dss:
                q.put(ds)
        q.put(EOS)

    conn = db_connect(cfg=env)
    q = queue.Queue(maxsize=10_000)
    db_thread = Thread(target=db_task, args=(products, conn, q))
    db_thread.start()

    dss = qmap(lambda ds: ds, q, eos_marker=EOS)
    dss = cache.tee(dss)

    cells = {}
    if grid is not None:
        gs = parse_gridspec(grid)
        # TODO for named gridspecs should we use the name as group_prefix?
        group_prefix = f"epsg{gs.crs.epsg:d}"
        cache.add_grid(gs, group_prefix)
        dss = bin_dataset_stream(gs, dss, cells)

    label = "Processing ({:8,d})".format(n_total)
    with click.progressbar(dss, label=label, length=n_total) as dss:
        for _ in dss:
            pass

    if grid is not None:
        click.echo("Total bins: {:d}".format(len(cells)))

        with click.progressbar(cells.values(),
                               length=len(cells),
                               label="Saving") as groups:
            for group in groups:
                cache.add_grid_tile(group_prefix, group.idx, group.dss)

    db_thread.join()
    cache.close()
Exemple #4
0
def cli(env, output, products, complevel):

    if len(products) == 0:
        click.echo('Have to supply at least one product')
        raise click.Abort()

    dc = datacube.Datacube(env=env)
    all_prods = {p.name: p for p in dc.index.products.get_all()}

    if len(products) == 1 and products[0].lower() in (':all:', '*'):
        click.echo('Will read all products')
        products = list(all_prods)

    for p in products:
        if p not in all_prods:
            click.echo('No such product found: %s' % p)
            raise click.Abort()

    click.echo('Getting dataset counts')
    counts = {
        p.name: count
        for p, count in dc.index.datasets.count_by_product(
            product=[p for p in products])
    }

    n_total = 0
    for p, c in counts.items():
        click.echo('..{}: {:8,d}'.format(p, c))
        n_total += c

    if n_total == 0:
        click.echo("No datasets found")
        raise click.Abort()

    click.echo('Training compression dictionary')
    zdict = dictionary_from_product_list(dc, products, samples_per_product=50)
    click.echo('..done')

    # TODO: check for overwrite
    cache = dscache.create_cache(output,
                                 zdict=zdict,
                                 complevel=complevel,
                                 truncate=True)

    raw2ds = mk_raw2ds(all_prods)

    def db_task(products, conn, q):
        for p in products:
            for ds in map(raw2ds, raw_dataset_stream(p, conn)):
                q.put(ds)
        q.put(EOS)

    conn = db_connect(cfg=env)
    q = queue.Queue(maxsize=10_000)
    db_thread = Thread(target=db_task, args=(products, conn, q))
    db_thread.start()

    dss = qmap(lambda ds: ds, q, eos_marker=EOS)
    dss = cache.tee(dss)

    label = 'Processing ({:8,d})'.format(n_total)
    with click.progressbar(dss, label=label, length=n_total) as dss:
        for ds in dss:
            pass

    db_thread.join()
    cache.close()