def l1_scenes_to_process(
    outfile: Path,
    products: List[str],
    brdfdir: Path,
    wvdir: Path,
    region_codes: List,
    scene_limit: int,
    config: Optional[Path] = None,
    days_delta: int = 21,
) -> int:
    """Writes all the files returned from datacube for level1 to a text file."""
    dc = datacube.Datacube(app="gen-list", config=config)
    l1_count = 0
    with open(outfile, "w") as fid:
        for product in products:
            files2process = l1_filter(
                dc,
                product,
                brdfdir=brdfdir,
                wvdir=wvdir,
                region_codes=region_codes,
                scene_limit=scene_limit,
                days_delta=days_delta,
            )
            for fp in files2process:
                fid.write(fp + "\n")
                l1_count += 1
                if l1_count >= scene_limit:
                    break
            if l1_count >= scene_limit:
                break
    return l1_count
예제 #2
0
def worker(session: Session, bucket_name: str, queue, config=None):
    dc = datacube.Datacube(config=config)
    index = dc.index
    s3 = session.resource("s3")

    while True:
        try:
            key = queue.get(timeout=60)
            if key == GUARDIAN:
                break
            print(f"Processing {key} {current_process()}")
            obj = s3.Object(bucket_name,
                            key).get(ResponseCacheControl="no-cache",
                                     RequestPayer="requester")
            raw = obj["Body"].read()
            content = str(raw, "utf-8")
            data = ElementTree.fromstring(content)
            dataset_doc = generate_eo3_dataset_doc(bucket_name, key, data)
            uri = format_s3_key(bucket_name, key)[0]
            add_dataset(dataset_doc, uri, index)
            queue.task_done()
        except Empty:
            break
        except EOFError:
            break
def worker(config, bucket_name, prefix, suffix, func, unsafe, sources_policy,
           queue):
    dc = datacube.Datacube(config=config)
    index = dc.index
    s3 = boto3.resource("s3")
    safety = 'safe' if not unsafe else 'unsafe'

    while True:
        try:
            key = queue.get(timeout=60)
            if key == GUARDIAN:
                break
            logging.info("Processing %s %s", key, current_process())
            obj = s3.Object(bucket_name,
                            key).get(ResponseCacheControl='no-cache')
            raw = obj['Body'].read()
            if suffix == AWS_PDS_TXT_SUFFIX:
                # Attempt to process text document
                raw_string = raw.decode('utf8')
                txt_doc = _parse_group(iter(
                    raw_string.split("\n")))['L1_METADATA_FILE']
                data = make_metadata_doc(txt_doc, bucket_name, key)
            else:
                yaml = YAML(typ=safety, pure=False)
                yaml.default_flow_style = False
                data = yaml.load(raw)
            uri = get_s3_url(bucket_name, key)
            logging.info("calling %s", func)
            func(data, uri, index, sources_policy)
            queue.task_done()
        except Empty:
            break
        except EOFError:
            break
예제 #4
0
파일: ccdc.py 프로젝트: klh5/CCDC
def loadAll(products, key, bands):

    ds = []

    for product in products:

        dc = datacube.Datacube()

        gw = GridWorkflow(dc.index, product=product)

        # Get the list of tiles (one for each time point) for this product
        tile_list = gw.list_tiles(product=product,
                                  cell_index=key,
                                  group_by='solar_day')

        dc.close()

        # Load all tiles
        for tile_index, tile in tile_list.items():
            dataset = gw.load(tile, measurements=bands)

            if (dataset.variables):
                ds.append(dataset)

    return ds
예제 #5
0
    def calculate_result(self, **kwargs):
        """
        This is the entry point for a task run. Will be called by celery.

        :param kwargs: arguments to the tasks.
        :return:
        """

        # connect to the datacube and pass that in to the users function.
        # Everything should be talking to the datacube here so makes sense to pull it out and make things
        # easier for the users.
        result_dir = get_config("App", "result_dir")
        path_prefix = path.join(result_dir, self.request.id)

        os.makedirs(path_prefix, exist_ok=True)

        dc = datacube.Datacube(app=self.name)
        outputs = self.generate_product(dc, path_prefix,
                                        **self.map_kwargs(**kwargs))
        logging.info(f"got result of {outputs}")
        self.log_query(path_prefix)
        self.zip_outputs(path_prefix, outputs)
        # TODO: put the results some where, send notifications etc.
        output_url = self.upload_results(path_prefix)

        self.ping_results(output_url)
예제 #6
0
def add_all_datacube_datasets(top_directory_name):
    """ Main loop function to traverse the bucket-->prefix tree and index each dataset

    for each .json metadata file:

    * extract the metadata and 
    * create a doc (dict json blob for the postgresql database)

    Args:
        **bucket_name** (str): AWS S3 Bucket Name - example lsaa-staging-cog

        config (str): A datacube config file to over-ride the one in your home directory

        **prefix** (str): AWS prefix within the bucket to start the recursive search for .json file = example L8

    Returns:
        ABSOLUTELY_NOTHING

    """

    dc = datacube.Datacube()
    index = dc.index
    rules = make_rules(index, product_list=[
        'l8_rwanda',
    ])
    # print(type(rules))
    # print(rules)
    for metadata_path, metadata_doc in get_metadata_docs(top_directory_name):
        uri = metadata_path
        add_dataset(metadata_doc, uri, rules, index)
        logging.info("Indexing %s", metadata_path)
예제 #7
0
def add_dataset(doc):
    dc = datacube.Datacube(config=config.DATACUBE_CONF)
    index = dc.index
    resolver = Doc2Dataset(index)
    dataset, error = resolver(doc, 'file:///tmp/test-dataset.json')
    print('add dataset', dataset)
    index.datasets.add(dataset)
예제 #8
0
def var_to_ind(variables, product):
    """Helper to get a list of index from a list of variables names

    Useful for telling the modeling module which are the categorical variables
    that should be encoded using One Hot Encoding

    Args:
        names (list): List of strings corresponding to existing variable names in the
            product
        product (str): Name of an existing datacube product

    Return:
        list: LIst of integer corresponding to dataset positions

    Example:
        >>> from madmex.util.datacube import var_to_ind

        >>> ind = var_to_ind(['blue', 'red', 'swir2'], 'ls8_espa_mexico')
        >>> print(ind)
    """
    dc = datacube.Datacube()
    prod = dc.index.products.get_by_name(name=product)
    measurements = list(prod.measurements)
    indices = [measurements.index(x) for x in variables]
    return indices
예제 #9
0
def open_unindexed_nc(fname, product, dc=None):
    if dc is None:
        dc = datacube.Datacube(app='app')

    if isinstance(product, str):
        product = dc.index.products.get_by_name(product)
        if product is None:
            print('Failed to load product: {}'.format(product))
            return None, None, None, None

    uris = ['file://' + abspath(fname)]

    def mk_dataset(yaml_string):
        return datacube.model.Dataset(product,
                                      yaml.load(yaml_string,
                                                Loader=yaml.CSafeLoader),
                                      uris=uris)

    with xr.open_dataset(fname) as f:
        datasets = [
            mk_dataset(f.dataset.values[i].decode('utf-8'))
            for i in range(f.dataset.shape[0])
        ]

    data_group = dc.group_datasets(datasets,
                                   datacube.api.query.query_group_by())

    geom = datacube.api.core.get_bounds(datasets, product.grid_spec.crs)
    geobox = datacube.utils.geometry.GeoBox.from_geopolygon(
        geom, product.grid_spec.resolution)

    return data_group, geobox, product, dc
 def listProducts(self): 
     global dc
     try:
         dc=datacube.Datacube(app="dc")
         product_name=dc.list_products().name.tolist()
         if len(product_name) != 0:
             product_variables=[]
             for products_variables in dc.list_measurements().name.to_dict():
                 product_variables.append(products_variables)
             for products in product_name:
                 #create a tree item
                 parent=QTreeWidgetItem(self.treeProductInfo) 
                 #add parent item
                 parent.setText(0,products) 
                 #set flag of tristate checkbox
                 parent.setFlags(parent.flags() | Qt.ItemIsTristate | Qt.ItemIsUserCheckable) 
                 #extract band variables from product variable list
                 for product_vars,var in product_variables: 
                     if product_vars == products:
                         #add child to the parent item 
                         child=QTreeWidgetItem(parent) 
                         #set the checkbox flag user checkable
                         child.setFlags(child.flags() | Qt.ItemIsUserCheckable) 
                         child.setText(0,var) 
                         child.setCheckState(0, Qt.Unchecked)
                         #to display products in tree view 
                 self.treeProductInfo.addTopLevelItem(parent) 
             self.appendLogs('Products loaded!')
         else:
             self.appendLogs('Product List Empty!')
     except datacube.index.postgres._connections.IndexSetupError:
         self.appendLogs('No DB schema exists. Have you run init? datacube system init')
예제 #11
0
def worker(config, path, product_name, product_type, platform_code, unsafe,
           queue):
    dc = datacube.Datacube(config=config)
    index = dc.index
    safety = 'safe' if not unsafe else 'unsafe'

    while True:
        try:
            path = queue.get(timeout=60)
            if path == GUARDIAN:
                break
            logging.info("Processing %s %s", path, current_process())

            data = make_metadata_doc(path, product_name, product_type,
                                     platform_code)
            if data:
                uri = 'file:/' + data['image']['bands']['elevation']['path']
                add_dataset(data, product_name, uri, index)
            else:
                logging.error("Failed to get data returned... skipping file.")
        except Empty:
            logging.error("Empty exception hit.")
            break
        except EOFError:
            logging.error("EOF Error hit.")
            break
        except ValueError as e:
            logging.error(
                "Found data for a satellite that we can't handle: {}".format(
                    e))
        finally:
            queue.task_done()
예제 #12
0
def load_wofl(ds, query):
    datetime = str(ds.time.data)[:19]
    dc_prod = datacube.Datacube()
    print(f"date: {datetime}")
    print(f"Query: {query}")
    wofl = dc_prod.load(product='wofs_albers', time=datetime, **query)
    return wofl.squeeze()
예제 #13
0
파일: pbs.py 프로젝트: sixy6e/gost
def _initial_query(product_name_test, db_env_test, time, lon, lat,
                   additional_filters, query_filesystem):
    """
    Initial listing to see how many datasets we'll (at most) have to compare.
    """

    if query_filesystem:
        _LOG.info("filesystem queries can take a while, please be patient")

        path = Path(product_name_test)
        pattern = db_env_test

        _LOG.info("initial filesystem query", path=str(path), pattern=pattern)

        n_datasets = len(list(path.rglob(pattern)))
    else:
        dc = datacube.Datacube(env=db_env_test)

        _LOG.info(
            "initial database query",
            product=product_name_test,
            time=time,
            lon=lon,
            lat=lat,
        )

        n_datasets = len(
            dc.find_products(product_name_test,
                             time=time,
                             lon=lon,
                             lat=lat,
                             **additional_filters))

    return n_datasets
def worker(config, bucket_name, prefix, func, sources_policy, queue,
           request_payer):
    dc = datacube.Datacube(config=config)
    index = dc.index
    s3 = boto3.resource("s3")

    while True:
        try:
            key = queue.get(timeout=60)
            if key == GUARDIAN:
                break

            logging.info("Processing %s %s", key, current_process())

            tile_base_path = '/'.join(key.split('/')[:-1])

            tile_meta_obj = s3.Object(bucket_name, key)\
                .get(ResponseCacheControl='no-cache', RequestPayer=request_payer)
            raw_tile_meta_file = tile_meta_obj['Body'].read()
            tile_meta = extract_tile_meta(raw_tile_meta_file)

            spatial_doc_part = {
                'grid_spatial': {
                    'projection': {
                        'geo_ref_points': tile_meta['geo_ref_points'],
                        'spatial_reference': tile_meta['crs_epsg']
                    }
                }
            }

            product_meta_path = tile_meta[
                'product_path'] + '/' + PRODUCT_META_FILE_NAME
            product_meta_obj = s3.Object(bucket_name, product_meta_path)\
                .get(ResponseCacheControl='no-cache', RequestPayer=request_payer)
            raw_product_meta_file = product_meta_obj['Body']

            product_doc_part = extract_product_meta(raw_product_meta_file,
                                                    bucket_name,
                                                    product_meta_path)

            bands_paths = generate_band_paths(bucket_name, tile_base_path,
                                              bands_of_interest,
                                              IMAGE_FILE_ENDINGS)

            image_doc_part = {'image': {'bands': bands_paths}}

            dataset_doc = {
                **product_doc_part,
                **spatial_doc_part,
                **image_doc_part
            }

            uri = get_s3_url(bucket_name, key)
            func(dataset_doc, uri, index, sources_policy)
            queue.task_done()

        except Empty:
            break
        except EOFError:
            break
예제 #15
0
def gm_mads_two_seasons_training(query):

    #connect to the datacube
    dc = datacube.Datacube(app='feature_layers')

    #load S2 geomedian
    ds = dc.load(product='gm_s2_semiannual', **query)

    # load the data
    dss = {"S1": ds.isel(time=0), "S2": ds.isel(time=1)}

    #create features
    epoch1 = common_ops(dss["S1"], era="_S1")
    epoch1 = add_chirps(epoch1, era='_S1')
    epoch2 = common_ops(dss["S2"], era="_S2")
    epoch2 = add_chirps(epoch2, era='_S2')

    # add slope
    url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name="slope")

    result = xr.merge([epoch1, epoch2, slope], compat="override")

    return result.astype(np.float32).squeeze()
예제 #16
0
    def __init__(self, *, config=None, config_file=None, dc_env=None):
        if config is not None:
            self.config = config
        else:
            with fsspec.open(config_file, mode="r") as f:
                self.config = cattr.structure(yaml.safe_load(f),
                                              AlchemistSettings)

        # Connect to the ODC Index
        self.dc = datacube.Datacube(env=dc_env)
        self.input_products = []

        if self.config.specification.product and self.config.specification.products:
            _LOG.warning(
                "Both `product` and `products` are defined, only using product."
            )

        # Store the products that we're allowing as inputs
        if self.config.specification.product:
            self.input_products.append(
                self.dc.index.products.get_by_name(
                    self.config.specification.product))
        elif self.config.specification.products:
            for product in self.config.specification.products:
                self.input_products.append(
                    self.dc.index.products.get_by_name(product))

        # Rasterio environment activation
        configure_s3_access(
            cloud_defaults=True,
            aws_unsigned=self.config.specification.aws_unsigned)
예제 #17
0
def index_data(bbox):
    start_date = '2020-05-01'
    end_date = '2021-01-06'

    print(bbox)

    collections = ['sentinel-s2-l2a-cogs']

    config = {
        'collections': collections,
        'bbox': bbox,
        'datetime': f"{start_date}/{end_date}"
    }

    STAC_API_URL = 'https://explorer.sandbox.dea.ga.gov.au/stac/'
    os.environ['STAC_API_URL'] = STAC_API_URL

    srch = Search().search(**config)
    found_items = srch.found()
    print(f"Found {found_items} items that can be indexed")

    dc = datacube.Datacube()

    indexed, failed = stac_api_to_odc(dc, 's2_l2a', None, False, False, config)
    print(f"Indexed {indexed} out of {found_items} with {failed} failures.")
예제 #18
0
파일: ccdc.py 프로젝트: klh5/CCDC
def loadByTile(products, key, min_y, max_y, min_x, max_x, bands):

    ds = []

    for product in products:

        dc = datacube.Datacube()

        # Create the GridWorkflow object for this product
        curr_gw = GridWorkflow(dc.index, product=product)

        # Get the list of tiles (one for each time point) for this product
        tile_list = curr_gw.list_tiles(product=product,
                                       cell_index=key,
                                       group_by='solar_day')

        dc.close()

        # Retrieve the specified pixel for each tile in the list
        for tile_index, tile in tile_list.items():
            dataset = curr_gw.load(tile[0:1, min_y:max_y, min_x:max_x],
                                   measurements=bands)

            if (dataset.variables):
                ds.append(dataset)

    return ds
예제 #19
0
def gm_mads_two_seasons_predict(ds):
    dc = datacube.Datacube(app="training")
    ds = ds / 10000
    ds1 = ds.sel(time=slice("2019-01", "2019-06"))
    ds2 = ds.sel(time=slice("2019-07", "2019-12"))

    def fun(ds, era):
        # geomedian and tmads
        # gm_mads = xr_geomedian_tmad(ds)
        gm_mads = xr_geomedian_tmad_new(ds).compute()
        gm_mads = calculate_indices(
            gm_mads,
            index=["NDVI", "LAI", "MNDWI"],
            drop=False,
            normalise=False,
            collection="s2",
        )

        gm_mads["sdev"] = -np.log(gm_mads["sdev"])
        gm_mads["bcdev"] = -np.log(gm_mads["bcdev"])
        gm_mads["edev"] = -np.log(gm_mads["edev"])
        gm_mads = gm_mads.chunk({"x": 2000, "y": 2000})

        # rainfall climatology
        if era == "_S1":
            chirps = assign_crs(
                xr.open_rasterio(
                    "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc"
                ),
                crs="epsg:4326",
            )
        if era == "_S2":
            chirps = assign_crs(
                xr.open_rasterio(
                    "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc"
                ),
                crs="epsg:4326",
            )

        chirps = xr_reproject(chirps, ds.geobox, "bilinear")
        chirps = chirps.chunk({"x": 2000, "y": 2000})
        gm_mads["rain"] = chirps

        for band in gm_mads.data_vars:
            gm_mads = gm_mads.rename({band: band + era})

        return gm_mads

    epoch1 = fun(ds1, era="_S1")
    epoch2 = fun(ds2, era="_S2")

    # slope
    url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif"
    slope = rio_slurp_xarray(url_slope, gbox=ds.geobox)
    slope = slope.to_dataset(name="slope").chunk({"x": 2000, "y": 2000})

    result = xr.merge([epoch1, epoch2, slope], compat="override")

    return result.squeeze()
예제 #20
0
def add_datacube_dataset(bucket_name, config, prefix):
    dc = datacube.Datacube(config=config)
    index = dc.index
    rules = make_rules(index)
    for metadata_path, metadata_doc in get_metadata_docs(bucket_name, prefix):
        uri = get_s3_url(bucket_name, metadata_path)
        add_dataset(metadata_doc, uri, rules, index)
        logging.info("Indexing %s", metadata_path)
def make_long_query(ground_brdf):

    ldc = datacube.Datacube()
    lpixsize = 30.0
    udc = datacube.Datacube(env='ardinteroperability_tmp',
                            config='/home/547/aw3463/.sent2.conf')

    # convert half a pixel in metres to decimal degrees latitude
    lmet_latdeg = (lpixsize) / (2 * 111319.9)

    # convert half a pixel in metres to decimal degrees longitude
    lmet_londeg = lmet_latdeg / math.cos(
        math.radians(ground_brdf['Latitude'].mean()))

    lquery = {
        'time': ('2013-01-01', '2118-12-31'),
        'lat': (ground_brdf['Latitude'].min() - lmet_latdeg,
                ground_brdf['Latitude'].max() + lmet_latdeg),
        'lon': (ground_brdf['Longitude'].min() - lmet_londeg,
                ground_brdf['Longitude'].max() + lmet_londeg),
        'output_crs':
        'EPSG:3577',
        'resampling':
        'bilinear',
        'group_by':
        'solar_day',
    }

    lquery2 = {
        'time': ('2013-01-01', '2118-12-31'),
        'lat': (ground_brdf['Latitude'].min() - lmet_latdeg - 0.01,
                ground_brdf['Latitude'].max() + lmet_latdeg + 0.01),
        'lon': (ground_brdf['Longitude'].min() - lmet_londeg - 0.01,
                ground_brdf['Longitude'].max() + lmet_londeg + 0.01),
        'output_crs':
        'EPSG:3577',
        'resampling':
        'bilinear',
        'group_by':
        'solar_day',
    }

    lquery['resolution'] = (-lpixsize, lpixsize)
    lquery2['resolution'] = (-lpixsize, lpixsize)

    return ldc, udc, lquery, lquery2
예제 #22
0
 def __init__(self):
     # using both the datacube object and the api.
     # dc is useful for all data access, api is only really used for metadata
     # fetching.
     # hardcoded config location. could parameterize.
     self.dc = datacube.Datacube(
         config='/home/localuser/Datacube/data_cube_ui/config/.datacube.conf'
     )
예제 #23
0
def iterate_datasets(bucket_name, config, prefix, suffix, func, unsafe, sources_policy):
    dc=datacube.Datacube(config=config)
    index = dc.index
    rules = make_rules(index)
    
    for metadata_path,metadata_doc in get_metadata_docs(bucket_name, prefix, suffix, unsafe):
        uri= get_s3_url(bucket_name, metadata_path)
        func(metadata_doc, uri, rules, index, sources_policy)
예제 #24
0
def generic_task(execID, algorithm, version, output_expression, product,
                 min_lat, min_long, time_ranges, **kwargs):
    """
    Los primeros 8 parámetros deben ser dado por el ejecutor a partir de lo seleccionado por el usuario
        execID = id de la ejecución
        algorithm = nombre del algoritmo 
        version = versión del algoritmo a ejecutar
        output_expression = Expresión que indica cómo se va a generar el nombre del archivo de salida.
        product = producto seleccionado por el usuario (sobre el que se va a realizar la consulta)
        min_long = cordenada x de la esquina inferior izquierda del tile 
        min_lat = cordenada y de la esquina inferior izquierda del tile
        time_ranges = rangos de tiempo de las consultas (es un arreglo de tuplas, debe haber al menos una para realizar una consulta. (Obligatorio)
        kwargs = KeyWord arguments que usará el algoritmo (cuando se ejecute los verá como variables globales)
    """
    dc = datacube.Datacube(app=execID)
    i = 0
    for tr in time_ranges:
        kwargs["xarr" + str(i)] = dc.load(product=product,
                                          longitude=(min_long, min_long + 1.0),
                                          latitude=(min_lat, min_lat + 1),
                                          time=tr)
        i += 1
    dc.close()
    exec(
        open(ALGORITHMS_FOLDER + "/" + algorithm + "/" + algorithm + "_" +
             str(version) + ".py").read(), kwargs)
    fns = []
    folder = "{}/{}/".format(RESULTS_FOLDER, execID)
    if not os.path.exists(os.path.dirname(folder)):
        try:
            os.makedirs(os.path.dirname(folder))
        except OSError as exc:  # Guard against race condition
            if exc.errno != errno.EEXIST:
                raise
    history = u'Creado con CDCOL con el algoritmo {} y  ver. {}'.format(
        algorithm, str(version))
    if "output" in kwargs:  #output debería ser un xarray
        #Guardar a un archivo...
        filename = folder + "{}_{}_{}_{}_{}_output.nc".format(
            algorithm, str(version), min_lat, min_long,
            re.sub('[^\w_.)(-]', '', str(time_ranges)))
        output = kwargs["output"]
        saveNC(output, filename, history)
        fns.append(filename)
    if "outputs" in kwargs:
        for xa in kwargs["outputs"]:
            filename = folder + "{}_{}_{}_{}_{}_{}.nc".format(
                algorithm, str(version), min_lat, min_long,
                re.sub('[^\w_.)(-]', '', str(time_ranges)), xa)
            saveNC(kwargs["outputs"][xa], filename, history)
            fns.append(filename)
    if "outputtxt" in kwargs:
        filename = folder + "{}_{}_{}.txt".format(
            min_lat, min_long, re.sub('[^\w_.)(-]', '', str(time_ranges)))
        with open(filename, "w") as text_file:
            text_file.write(kwargs["outputtxt"])
        fns.append(filename)
    return fns
예제 #25
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument('-latrange',
                        help='latitude range',
                        nargs=2,
                        default=[-34.5, -35],
                        required=False)

    parser.add_argument('-lonrange',
                        help='longitude range',
                        nargs=2,
                        default=[148.5, 149],
                        required=False)

    parser.add_argument('-timerange',
                        help='time range',
                        nargs=2,
                        default=['2011-3-2', '2011-6-5'],
                        type=str,
                        required=False)

    parser.add_argument('-measurements',
                        help='measurement',
                        action='append',
                        type=str,
                        required=False)

    parser.add_argument('-product', help='product', required=False)

    parser.add_argument('-groupby', help='groupby', required=False)

    parser.add_argument('-valuemax',
                        help='max value',
                        type=float,
                        default=4000,
                        required=False)

    parser.add_argument('-verbose',
                        help='verbose output',
                        default=True,
                        required=False)

    args = parser.parse_args()
    kwargs = vars(args)

    if not args.product:
        parser.print_help()
        print('\n\nValid choices for PRODUCT are:')
        dc = datacube.Datacube()
        prods = dc.list_products()['name']
        print(prods.to_string(index=False, header=False))
        parser.exit()

    if args.verbose:
        print(kwargs)

    run(**kwargs)
예제 #26
0
def load_crophealth_data():
    """
    Loads Sentinel-2 analysis-ready data (ARD) product for the crop health
    case-study area. The ARD product is provided for the last year.
    Last modified: January 2020

    outputs
    ds - data set containing combined, masked data from Sentinel-2a and -2b.
    Masked values are set to 'nan'
    """

    # Suppress warnings
    warnings.filterwarnings('ignore')

    # Initialise the data cube. 'app' argument is used to identify this app
    dc = datacube.Datacube(app='Crophealth-app')

    # Specify latitude and longitude ranges
    latitude = (-24.974997, -24.995971)
    longitude = (152.429994, 152.395805)

    # Specify the date range
    # Calculated as today's date, subtract 90 days to match NRT availability
    # Dates are converted to strings as required by loading function below
    end_date = dt.date.today()
    start_date = end_date - dt.timedelta(days=365)

    time = (start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d"))

    # Construct the data cube query
    products = ["s2a_ard_granule", "s2b_ard_granule"]

    query = {
        'x':
        longitude,
        'y':
        latitude,
        'time':
        time,
        'measurements': [
            'nbar_red', 'nbar_green', 'nbar_blue', 'nbar_nir_1', 'nbar_swir_2',
            'nbar_swir_3'
        ],
        'output_crs':
        'EPSG:3577',
        'resolution': (-10, 10)
    }

    # Load the data and mask out bad quality pixels
    ds_s2 = load_ard(dc, products=products, min_gooddata=0.5, **query)

    # Calculate the normalised difference vegetation index (NDVI) across
    # all pixels for each image.
    # This is stored as an attribute of the data
    ds_s2 = calculate_indices(ds_s2, index='NDVI', collection='ga_s2_1')

    # Return the data
    return (ds_s2)
예제 #27
0
def main(products, year, month, save):
    from datacube_stats.utils.query import multi_product_list_cells
    import datacube
    from datacube.api import GridWorkflow

    query = {}
    if year is not None:
        if month is not None:
            query['time'] = ('{}-{}-01'.format(year, month),
                             '{}-{}-01'.format(year, month + 1))
        else:
            query['time'] = ('{}-01-01'.format(year), '{}-12-31'.format(year))

    dc = datacube.Datacube(app='dbg')
    gw = GridWorkflow(product=products[0], index=dc.index)

    click.echo('## Starting to run query', err=True)
    t_start = time.time()
    co_common, co_unmatched = multi_product_list_cells(products, gw, **query)
    t_took = time.time() - t_start
    click.echo('## Completed in {} seconds'.format(t_took), err=True)

    if save is not None:
        click.echo('## Saving data to {}'.format(save), err=True)
        with open(save, 'wb') as f:
            pickle.dump(dict(co_common=co_common, co_unmatched=co_unmatched),
                        f)
            f.close()
        click.echo(' done')

    click.echo('## Processing results,  ...wait', err=True)

    coverage = set(flat_map_ds(ds_to_key, co_common[0]))
    um = set(flat_map_ds(ds_to_key, co_unmatched[0]))

    # These tiles have both matched and unmatched data on the same solar day
    # It's significant cause these are the ones that will interfere with
    # masking if masking is done the "usual way"
    um_with_siblings = um - (um - coverage)

    click.echo('## Found {} matched records and {} unmatched'.format(
        len(coverage), len(um)))
    click.echo(
        '##   Of {} unmatched records {} are "dangerous" for masking'.format(
            len(um), len(um_with_siblings)))
    click.echo('##')

    def dump_unmatched_ds(ds, cell_idx, solar_day):
        k = ds_to_key(ds, cell_idx, solar_day)
        flag = '!' if k in coverage else '.'
        click.echo('{} {} {} {}'.format(k, flag, ds.id, ds.local_path))

    for (idx, product) in enumerate(products):
        click.echo('## unmatched ###########################')
        click.echo('## {}'.format(product))
        click.echo('########################################')
        flat_foreach_ds(dump_unmatched_ds, co_unmatched[idx])
예제 #28
0
def load_miningrehab_data():
    """
    Loads Fractional Cover and Water Observations from Space products for the mining
    case-study area.
    Last modified: January 2020

    outputs
    ds - data set containing masked Fractional Cover data from Landsat 8
    Masked values are set to 'nan'
    """

    # Suppress warnings
    warnings.filterwarnings("ignore")

    # Initialise the data cube. 'app' argument is used to identify this app
    dc = datacube.Datacube(app="mining-app")

    # Specify latitude and longitude ranges
    latitude = (-34.426512, -34.434517)
    longitude = (116.648123, 116.630731)

    # Specify the date range
    time = ("2015-06-01", "2018-06-30")

    # Construct the data cube query
    query = {
        "x": longitude,
        "y": latitude,
        "time": time,
        "output_crs": "EPSG:3577",
        "resolution": (-25, 25),
    }

    print("Loading Fractional Cover for Landsat 8")
    dataset_fc = dc.load(product="ls8_fc_albers", **query)

    print("Loading WoFS for Landsat 8")
    dataset_wofs = dc.load(product="wofs_albers", like=dataset_fc)

    # Match the data
    shared_times = np.intersect1d(dataset_fc.time, dataset_wofs.time)

    ds_fc_matched = dataset_fc.sel(time=shared_times)
    ds_wofs_matched = dataset_wofs.sel(time=shared_times)

    # Mask FC
    dry_mask = masking.make_mask(ds_wofs_matched, dry=True)

    # Get fractional masked fc dataset (as proportion of 1, rather than 100)
    ds_fc_masked = ds_fc_matched.where(dry_mask.water == True) / 100

    # Resample
    ds_resampled = ds_fc_masked.resample(time="1M").median()
    ds_resampled.attrs["crs"] = dataset_fc.crs

    # Return the data
    return ds_resampled
예제 #29
0
def index_datasets(items, parse_only=False):
    s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED))
    dc = datacube.Datacube()
    idx = dc.index
    for item in items:
        if "MTL" in item.assets:
            index_dataset(idx, s3, item.assets["MTL"]["href"], parse_only)
        else:
            logger.info("Item {} does not have an MTL asset (Sentinel2?) - skipping".format(item))
예제 #30
0
 def get():
     config = {}
     config['db_hostname'] = db_hostname
     config['db_port'] = db_port
     config['db_database'] = 'postgres'
     config['db_username'] = '******'
     config['db_password'] = '******'
     dc = datacube.Datacube(config=config)
     return dc