def l1_scenes_to_process( outfile: Path, products: List[str], brdfdir: Path, wvdir: Path, region_codes: List, scene_limit: int, config: Optional[Path] = None, days_delta: int = 21, ) -> int: """Writes all the files returned from datacube for level1 to a text file.""" dc = datacube.Datacube(app="gen-list", config=config) l1_count = 0 with open(outfile, "w") as fid: for product in products: files2process = l1_filter( dc, product, brdfdir=brdfdir, wvdir=wvdir, region_codes=region_codes, scene_limit=scene_limit, days_delta=days_delta, ) for fp in files2process: fid.write(fp + "\n") l1_count += 1 if l1_count >= scene_limit: break if l1_count >= scene_limit: break return l1_count
def worker(session: Session, bucket_name: str, queue, config=None): dc = datacube.Datacube(config=config) index = dc.index s3 = session.resource("s3") while True: try: key = queue.get(timeout=60) if key == GUARDIAN: break print(f"Processing {key} {current_process()}") obj = s3.Object(bucket_name, key).get(ResponseCacheControl="no-cache", RequestPayer="requester") raw = obj["Body"].read() content = str(raw, "utf-8") data = ElementTree.fromstring(content) dataset_doc = generate_eo3_dataset_doc(bucket_name, key, data) uri = format_s3_key(bucket_name, key)[0] add_dataset(dataset_doc, uri, index) queue.task_done() except Empty: break except EOFError: break
def worker(config, bucket_name, prefix, suffix, func, unsafe, sources_policy, queue): dc = datacube.Datacube(config=config) index = dc.index s3 = boto3.resource("s3") safety = 'safe' if not unsafe else 'unsafe' while True: try: key = queue.get(timeout=60) if key == GUARDIAN: break logging.info("Processing %s %s", key, current_process()) obj = s3.Object(bucket_name, key).get(ResponseCacheControl='no-cache') raw = obj['Body'].read() if suffix == AWS_PDS_TXT_SUFFIX: # Attempt to process text document raw_string = raw.decode('utf8') txt_doc = _parse_group(iter( raw_string.split("\n")))['L1_METADATA_FILE'] data = make_metadata_doc(txt_doc, bucket_name, key) else: yaml = YAML(typ=safety, pure=False) yaml.default_flow_style = False data = yaml.load(raw) uri = get_s3_url(bucket_name, key) logging.info("calling %s", func) func(data, uri, index, sources_policy) queue.task_done() except Empty: break except EOFError: break
def loadAll(products, key, bands): ds = [] for product in products: dc = datacube.Datacube() gw = GridWorkflow(dc.index, product=product) # Get the list of tiles (one for each time point) for this product tile_list = gw.list_tiles(product=product, cell_index=key, group_by='solar_day') dc.close() # Load all tiles for tile_index, tile in tile_list.items(): dataset = gw.load(tile, measurements=bands) if (dataset.variables): ds.append(dataset) return ds
def calculate_result(self, **kwargs): """ This is the entry point for a task run. Will be called by celery. :param kwargs: arguments to the tasks. :return: """ # connect to the datacube and pass that in to the users function. # Everything should be talking to the datacube here so makes sense to pull it out and make things # easier for the users. result_dir = get_config("App", "result_dir") path_prefix = path.join(result_dir, self.request.id) os.makedirs(path_prefix, exist_ok=True) dc = datacube.Datacube(app=self.name) outputs = self.generate_product(dc, path_prefix, **self.map_kwargs(**kwargs)) logging.info(f"got result of {outputs}") self.log_query(path_prefix) self.zip_outputs(path_prefix, outputs) # TODO: put the results some where, send notifications etc. output_url = self.upload_results(path_prefix) self.ping_results(output_url)
def add_all_datacube_datasets(top_directory_name): """ Main loop function to traverse the bucket-->prefix tree and index each dataset for each .json metadata file: * extract the metadata and * create a doc (dict json blob for the postgresql database) Args: **bucket_name** (str): AWS S3 Bucket Name - example lsaa-staging-cog config (str): A datacube config file to over-ride the one in your home directory **prefix** (str): AWS prefix within the bucket to start the recursive search for .json file = example L8 Returns: ABSOLUTELY_NOTHING """ dc = datacube.Datacube() index = dc.index rules = make_rules(index, product_list=[ 'l8_rwanda', ]) # print(type(rules)) # print(rules) for metadata_path, metadata_doc in get_metadata_docs(top_directory_name): uri = metadata_path add_dataset(metadata_doc, uri, rules, index) logging.info("Indexing %s", metadata_path)
def add_dataset(doc): dc = datacube.Datacube(config=config.DATACUBE_CONF) index = dc.index resolver = Doc2Dataset(index) dataset, error = resolver(doc, 'file:///tmp/test-dataset.json') print('add dataset', dataset) index.datasets.add(dataset)
def var_to_ind(variables, product): """Helper to get a list of index from a list of variables names Useful for telling the modeling module which are the categorical variables that should be encoded using One Hot Encoding Args: names (list): List of strings corresponding to existing variable names in the product product (str): Name of an existing datacube product Return: list: LIst of integer corresponding to dataset positions Example: >>> from madmex.util.datacube import var_to_ind >>> ind = var_to_ind(['blue', 'red', 'swir2'], 'ls8_espa_mexico') >>> print(ind) """ dc = datacube.Datacube() prod = dc.index.products.get_by_name(name=product) measurements = list(prod.measurements) indices = [measurements.index(x) for x in variables] return indices
def open_unindexed_nc(fname, product, dc=None): if dc is None: dc = datacube.Datacube(app='app') if isinstance(product, str): product = dc.index.products.get_by_name(product) if product is None: print('Failed to load product: {}'.format(product)) return None, None, None, None uris = ['file://' + abspath(fname)] def mk_dataset(yaml_string): return datacube.model.Dataset(product, yaml.load(yaml_string, Loader=yaml.CSafeLoader), uris=uris) with xr.open_dataset(fname) as f: datasets = [ mk_dataset(f.dataset.values[i].decode('utf-8')) for i in range(f.dataset.shape[0]) ] data_group = dc.group_datasets(datasets, datacube.api.query.query_group_by()) geom = datacube.api.core.get_bounds(datasets, product.grid_spec.crs) geobox = datacube.utils.geometry.GeoBox.from_geopolygon( geom, product.grid_spec.resolution) return data_group, geobox, product, dc
def listProducts(self): global dc try: dc=datacube.Datacube(app="dc") product_name=dc.list_products().name.tolist() if len(product_name) != 0: product_variables=[] for products_variables in dc.list_measurements().name.to_dict(): product_variables.append(products_variables) for products in product_name: #create a tree item parent=QTreeWidgetItem(self.treeProductInfo) #add parent item parent.setText(0,products) #set flag of tristate checkbox parent.setFlags(parent.flags() | Qt.ItemIsTristate | Qt.ItemIsUserCheckable) #extract band variables from product variable list for product_vars,var in product_variables: if product_vars == products: #add child to the parent item child=QTreeWidgetItem(parent) #set the checkbox flag user checkable child.setFlags(child.flags() | Qt.ItemIsUserCheckable) child.setText(0,var) child.setCheckState(0, Qt.Unchecked) #to display products in tree view self.treeProductInfo.addTopLevelItem(parent) self.appendLogs('Products loaded!') else: self.appendLogs('Product List Empty!') except datacube.index.postgres._connections.IndexSetupError: self.appendLogs('No DB schema exists. Have you run init? datacube system init')
def worker(config, path, product_name, product_type, platform_code, unsafe, queue): dc = datacube.Datacube(config=config) index = dc.index safety = 'safe' if not unsafe else 'unsafe' while True: try: path = queue.get(timeout=60) if path == GUARDIAN: break logging.info("Processing %s %s", path, current_process()) data = make_metadata_doc(path, product_name, product_type, platform_code) if data: uri = 'file:/' + data['image']['bands']['elevation']['path'] add_dataset(data, product_name, uri, index) else: logging.error("Failed to get data returned... skipping file.") except Empty: logging.error("Empty exception hit.") break except EOFError: logging.error("EOF Error hit.") break except ValueError as e: logging.error( "Found data for a satellite that we can't handle: {}".format( e)) finally: queue.task_done()
def load_wofl(ds, query): datetime = str(ds.time.data)[:19] dc_prod = datacube.Datacube() print(f"date: {datetime}") print(f"Query: {query}") wofl = dc_prod.load(product='wofs_albers', time=datetime, **query) return wofl.squeeze()
def _initial_query(product_name_test, db_env_test, time, lon, lat, additional_filters, query_filesystem): """ Initial listing to see how many datasets we'll (at most) have to compare. """ if query_filesystem: _LOG.info("filesystem queries can take a while, please be patient") path = Path(product_name_test) pattern = db_env_test _LOG.info("initial filesystem query", path=str(path), pattern=pattern) n_datasets = len(list(path.rglob(pattern))) else: dc = datacube.Datacube(env=db_env_test) _LOG.info( "initial database query", product=product_name_test, time=time, lon=lon, lat=lat, ) n_datasets = len( dc.find_products(product_name_test, time=time, lon=lon, lat=lat, **additional_filters)) return n_datasets
def worker(config, bucket_name, prefix, func, sources_policy, queue, request_payer): dc = datacube.Datacube(config=config) index = dc.index s3 = boto3.resource("s3") while True: try: key = queue.get(timeout=60) if key == GUARDIAN: break logging.info("Processing %s %s", key, current_process()) tile_base_path = '/'.join(key.split('/')[:-1]) tile_meta_obj = s3.Object(bucket_name, key)\ .get(ResponseCacheControl='no-cache', RequestPayer=request_payer) raw_tile_meta_file = tile_meta_obj['Body'].read() tile_meta = extract_tile_meta(raw_tile_meta_file) spatial_doc_part = { 'grid_spatial': { 'projection': { 'geo_ref_points': tile_meta['geo_ref_points'], 'spatial_reference': tile_meta['crs_epsg'] } } } product_meta_path = tile_meta[ 'product_path'] + '/' + PRODUCT_META_FILE_NAME product_meta_obj = s3.Object(bucket_name, product_meta_path)\ .get(ResponseCacheControl='no-cache', RequestPayer=request_payer) raw_product_meta_file = product_meta_obj['Body'] product_doc_part = extract_product_meta(raw_product_meta_file, bucket_name, product_meta_path) bands_paths = generate_band_paths(bucket_name, tile_base_path, bands_of_interest, IMAGE_FILE_ENDINGS) image_doc_part = {'image': {'bands': bands_paths}} dataset_doc = { **product_doc_part, **spatial_doc_part, **image_doc_part } uri = get_s3_url(bucket_name, key) func(dataset_doc, uri, index, sources_policy) queue.task_done() except Empty: break except EOFError: break
def gm_mads_two_seasons_training(query): #connect to the datacube dc = datacube.Datacube(app='feature_layers') #load S2 geomedian ds = dc.load(product='gm_s2_semiannual', **query) # load the data dss = {"S1": ds.isel(time=0), "S2": ds.isel(time=1)} #create features epoch1 = common_ops(dss["S1"], era="_S1") epoch1 = add_chirps(epoch1, era='_S1') epoch2 = common_ops(dss["S2"], era="_S2") epoch2 = add_chirps(epoch2, era='_S2') # add slope url_slope = "https://deafrica-input-datasets.s3.af-south-1.amazonaws.com/srtm_dem/srtm_africa_slope.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name="slope") result = xr.merge([epoch1, epoch2, slope], compat="override") return result.astype(np.float32).squeeze()
def __init__(self, *, config=None, config_file=None, dc_env=None): if config is not None: self.config = config else: with fsspec.open(config_file, mode="r") as f: self.config = cattr.structure(yaml.safe_load(f), AlchemistSettings) # Connect to the ODC Index self.dc = datacube.Datacube(env=dc_env) self.input_products = [] if self.config.specification.product and self.config.specification.products: _LOG.warning( "Both `product` and `products` are defined, only using product." ) # Store the products that we're allowing as inputs if self.config.specification.product: self.input_products.append( self.dc.index.products.get_by_name( self.config.specification.product)) elif self.config.specification.products: for product in self.config.specification.products: self.input_products.append( self.dc.index.products.get_by_name(product)) # Rasterio environment activation configure_s3_access( cloud_defaults=True, aws_unsigned=self.config.specification.aws_unsigned)
def index_data(bbox): start_date = '2020-05-01' end_date = '2021-01-06' print(bbox) collections = ['sentinel-s2-l2a-cogs'] config = { 'collections': collections, 'bbox': bbox, 'datetime': f"{start_date}/{end_date}" } STAC_API_URL = 'https://explorer.sandbox.dea.ga.gov.au/stac/' os.environ['STAC_API_URL'] = STAC_API_URL srch = Search().search(**config) found_items = srch.found() print(f"Found {found_items} items that can be indexed") dc = datacube.Datacube() indexed, failed = stac_api_to_odc(dc, 's2_l2a', None, False, False, config) print(f"Indexed {indexed} out of {found_items} with {failed} failures.")
def loadByTile(products, key, min_y, max_y, min_x, max_x, bands): ds = [] for product in products: dc = datacube.Datacube() # Create the GridWorkflow object for this product curr_gw = GridWorkflow(dc.index, product=product) # Get the list of tiles (one for each time point) for this product tile_list = curr_gw.list_tiles(product=product, cell_index=key, group_by='solar_day') dc.close() # Retrieve the specified pixel for each tile in the list for tile_index, tile in tile_list.items(): dataset = curr_gw.load(tile[0:1, min_y:max_y, min_x:max_x], measurements=bands) if (dataset.variables): ds.append(dataset) return ds
def gm_mads_two_seasons_predict(ds): dc = datacube.Datacube(app="training") ds = ds / 10000 ds1 = ds.sel(time=slice("2019-01", "2019-06")) ds2 = ds.sel(time=slice("2019-07", "2019-12")) def fun(ds, era): # geomedian and tmads # gm_mads = xr_geomedian_tmad(ds) gm_mads = xr_geomedian_tmad_new(ds).compute() gm_mads = calculate_indices( gm_mads, index=["NDVI", "LAI", "MNDWI"], drop=False, normalise=False, collection="s2", ) gm_mads["sdev"] = -np.log(gm_mads["sdev"]) gm_mads["bcdev"] = -np.log(gm_mads["bcdev"]) gm_mads["edev"] = -np.log(gm_mads["edev"]) gm_mads = gm_mads.chunk({"x": 2000, "y": 2000}) # rainfall climatology if era == "_S1": chirps = assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jan_jun_cumulative_rainfall.nc" ), crs="epsg:4326", ) if era == "_S2": chirps = assign_crs( xr.open_rasterio( "/g/data/CHIRPS/cumulative_alltime/CHPclim_jul_dec_cumulative_rainfall.nc" ), crs="epsg:4326", ) chirps = xr_reproject(chirps, ds.geobox, "bilinear") chirps = chirps.chunk({"x": 2000, "y": 2000}) gm_mads["rain"] = chirps for band in gm_mads.data_vars: gm_mads = gm_mads.rename({band: band + era}) return gm_mads epoch1 = fun(ds1, era="_S1") epoch2 = fun(ds2, era="_S2") # slope url_slope = "https://deafrica-data.s3.amazonaws.com/ancillary/dem-derivatives/cog_slope_africa.tif" slope = rio_slurp_xarray(url_slope, gbox=ds.geobox) slope = slope.to_dataset(name="slope").chunk({"x": 2000, "y": 2000}) result = xr.merge([epoch1, epoch2, slope], compat="override") return result.squeeze()
def add_datacube_dataset(bucket_name, config, prefix): dc = datacube.Datacube(config=config) index = dc.index rules = make_rules(index) for metadata_path, metadata_doc in get_metadata_docs(bucket_name, prefix): uri = get_s3_url(bucket_name, metadata_path) add_dataset(metadata_doc, uri, rules, index) logging.info("Indexing %s", metadata_path)
def make_long_query(ground_brdf): ldc = datacube.Datacube() lpixsize = 30.0 udc = datacube.Datacube(env='ardinteroperability_tmp', config='/home/547/aw3463/.sent2.conf') # convert half a pixel in metres to decimal degrees latitude lmet_latdeg = (lpixsize) / (2 * 111319.9) # convert half a pixel in metres to decimal degrees longitude lmet_londeg = lmet_latdeg / math.cos( math.radians(ground_brdf['Latitude'].mean())) lquery = { 'time': ('2013-01-01', '2118-12-31'), 'lat': (ground_brdf['Latitude'].min() - lmet_latdeg, ground_brdf['Latitude'].max() + lmet_latdeg), 'lon': (ground_brdf['Longitude'].min() - lmet_londeg, ground_brdf['Longitude'].max() + lmet_londeg), 'output_crs': 'EPSG:3577', 'resampling': 'bilinear', 'group_by': 'solar_day', } lquery2 = { 'time': ('2013-01-01', '2118-12-31'), 'lat': (ground_brdf['Latitude'].min() - lmet_latdeg - 0.01, ground_brdf['Latitude'].max() + lmet_latdeg + 0.01), 'lon': (ground_brdf['Longitude'].min() - lmet_londeg - 0.01, ground_brdf['Longitude'].max() + lmet_londeg + 0.01), 'output_crs': 'EPSG:3577', 'resampling': 'bilinear', 'group_by': 'solar_day', } lquery['resolution'] = (-lpixsize, lpixsize) lquery2['resolution'] = (-lpixsize, lpixsize) return ldc, udc, lquery, lquery2
def __init__(self): # using both the datacube object and the api. # dc is useful for all data access, api is only really used for metadata # fetching. # hardcoded config location. could parameterize. self.dc = datacube.Datacube( config='/home/localuser/Datacube/data_cube_ui/config/.datacube.conf' )
def iterate_datasets(bucket_name, config, prefix, suffix, func, unsafe, sources_policy): dc=datacube.Datacube(config=config) index = dc.index rules = make_rules(index) for metadata_path,metadata_doc in get_metadata_docs(bucket_name, prefix, suffix, unsafe): uri= get_s3_url(bucket_name, metadata_path) func(metadata_doc, uri, rules, index, sources_policy)
def generic_task(execID, algorithm, version, output_expression, product, min_lat, min_long, time_ranges, **kwargs): """ Los primeros 8 parámetros deben ser dado por el ejecutor a partir de lo seleccionado por el usuario execID = id de la ejecución algorithm = nombre del algoritmo version = versión del algoritmo a ejecutar output_expression = Expresión que indica cómo se va a generar el nombre del archivo de salida. product = producto seleccionado por el usuario (sobre el que se va a realizar la consulta) min_long = cordenada x de la esquina inferior izquierda del tile min_lat = cordenada y de la esquina inferior izquierda del tile time_ranges = rangos de tiempo de las consultas (es un arreglo de tuplas, debe haber al menos una para realizar una consulta. (Obligatorio) kwargs = KeyWord arguments que usará el algoritmo (cuando se ejecute los verá como variables globales) """ dc = datacube.Datacube(app=execID) i = 0 for tr in time_ranges: kwargs["xarr" + str(i)] = dc.load(product=product, longitude=(min_long, min_long + 1.0), latitude=(min_lat, min_lat + 1), time=tr) i += 1 dc.close() exec( open(ALGORITHMS_FOLDER + "/" + algorithm + "/" + algorithm + "_" + str(version) + ".py").read(), kwargs) fns = [] folder = "{}/{}/".format(RESULTS_FOLDER, execID) if not os.path.exists(os.path.dirname(folder)): try: os.makedirs(os.path.dirname(folder)) except OSError as exc: # Guard against race condition if exc.errno != errno.EEXIST: raise history = u'Creado con CDCOL con el algoritmo {} y ver. {}'.format( algorithm, str(version)) if "output" in kwargs: #output debería ser un xarray #Guardar a un archivo... filename = folder + "{}_{}_{}_{}_{}_output.nc".format( algorithm, str(version), min_lat, min_long, re.sub('[^\w_.)(-]', '', str(time_ranges))) output = kwargs["output"] saveNC(output, filename, history) fns.append(filename) if "outputs" in kwargs: for xa in kwargs["outputs"]: filename = folder + "{}_{}_{}_{}_{}_{}.nc".format( algorithm, str(version), min_lat, min_long, re.sub('[^\w_.)(-]', '', str(time_ranges)), xa) saveNC(kwargs["outputs"][xa], filename, history) fns.append(filename) if "outputtxt" in kwargs: filename = folder + "{}_{}_{}.txt".format( min_lat, min_long, re.sub('[^\w_.)(-]', '', str(time_ranges))) with open(filename, "w") as text_file: text_file.write(kwargs["outputtxt"]) fns.append(filename) return fns
def main(): parser = argparse.ArgumentParser() parser.add_argument('-latrange', help='latitude range', nargs=2, default=[-34.5, -35], required=False) parser.add_argument('-lonrange', help='longitude range', nargs=2, default=[148.5, 149], required=False) parser.add_argument('-timerange', help='time range', nargs=2, default=['2011-3-2', '2011-6-5'], type=str, required=False) parser.add_argument('-measurements', help='measurement', action='append', type=str, required=False) parser.add_argument('-product', help='product', required=False) parser.add_argument('-groupby', help='groupby', required=False) parser.add_argument('-valuemax', help='max value', type=float, default=4000, required=False) parser.add_argument('-verbose', help='verbose output', default=True, required=False) args = parser.parse_args() kwargs = vars(args) if not args.product: parser.print_help() print('\n\nValid choices for PRODUCT are:') dc = datacube.Datacube() prods = dc.list_products()['name'] print(prods.to_string(index=False, header=False)) parser.exit() if args.verbose: print(kwargs) run(**kwargs)
def load_crophealth_data(): """ Loads Sentinel-2 analysis-ready data (ARD) product for the crop health case-study area. The ARD product is provided for the last year. Last modified: January 2020 outputs ds - data set containing combined, masked data from Sentinel-2a and -2b. Masked values are set to 'nan' """ # Suppress warnings warnings.filterwarnings('ignore') # Initialise the data cube. 'app' argument is used to identify this app dc = datacube.Datacube(app='Crophealth-app') # Specify latitude and longitude ranges latitude = (-24.974997, -24.995971) longitude = (152.429994, 152.395805) # Specify the date range # Calculated as today's date, subtract 90 days to match NRT availability # Dates are converted to strings as required by loading function below end_date = dt.date.today() start_date = end_date - dt.timedelta(days=365) time = (start_date.strftime("%Y-%m-%d"), end_date.strftime("%Y-%m-%d")) # Construct the data cube query products = ["s2a_ard_granule", "s2b_ard_granule"] query = { 'x': longitude, 'y': latitude, 'time': time, 'measurements': [ 'nbar_red', 'nbar_green', 'nbar_blue', 'nbar_nir_1', 'nbar_swir_2', 'nbar_swir_3' ], 'output_crs': 'EPSG:3577', 'resolution': (-10, 10) } # Load the data and mask out bad quality pixels ds_s2 = load_ard(dc, products=products, min_gooddata=0.5, **query) # Calculate the normalised difference vegetation index (NDVI) across # all pixels for each image. # This is stored as an attribute of the data ds_s2 = calculate_indices(ds_s2, index='NDVI', collection='ga_s2_1') # Return the data return (ds_s2)
def main(products, year, month, save): from datacube_stats.utils.query import multi_product_list_cells import datacube from datacube.api import GridWorkflow query = {} if year is not None: if month is not None: query['time'] = ('{}-{}-01'.format(year, month), '{}-{}-01'.format(year, month + 1)) else: query['time'] = ('{}-01-01'.format(year), '{}-12-31'.format(year)) dc = datacube.Datacube(app='dbg') gw = GridWorkflow(product=products[0], index=dc.index) click.echo('## Starting to run query', err=True) t_start = time.time() co_common, co_unmatched = multi_product_list_cells(products, gw, **query) t_took = time.time() - t_start click.echo('## Completed in {} seconds'.format(t_took), err=True) if save is not None: click.echo('## Saving data to {}'.format(save), err=True) with open(save, 'wb') as f: pickle.dump(dict(co_common=co_common, co_unmatched=co_unmatched), f) f.close() click.echo(' done') click.echo('## Processing results, ...wait', err=True) coverage = set(flat_map_ds(ds_to_key, co_common[0])) um = set(flat_map_ds(ds_to_key, co_unmatched[0])) # These tiles have both matched and unmatched data on the same solar day # It's significant cause these are the ones that will interfere with # masking if masking is done the "usual way" um_with_siblings = um - (um - coverage) click.echo('## Found {} matched records and {} unmatched'.format( len(coverage), len(um))) click.echo( '## Of {} unmatched records {} are "dangerous" for masking'.format( len(um), len(um_with_siblings))) click.echo('##') def dump_unmatched_ds(ds, cell_idx, solar_day): k = ds_to_key(ds, cell_idx, solar_day) flag = '!' if k in coverage else '.' click.echo('{} {} {} {}'.format(k, flag, ds.id, ds.local_path)) for (idx, product) in enumerate(products): click.echo('## unmatched ###########################') click.echo('## {}'.format(product)) click.echo('########################################') flat_foreach_ds(dump_unmatched_ds, co_unmatched[idx])
def load_miningrehab_data(): """ Loads Fractional Cover and Water Observations from Space products for the mining case-study area. Last modified: January 2020 outputs ds - data set containing masked Fractional Cover data from Landsat 8 Masked values are set to 'nan' """ # Suppress warnings warnings.filterwarnings("ignore") # Initialise the data cube. 'app' argument is used to identify this app dc = datacube.Datacube(app="mining-app") # Specify latitude and longitude ranges latitude = (-34.426512, -34.434517) longitude = (116.648123, 116.630731) # Specify the date range time = ("2015-06-01", "2018-06-30") # Construct the data cube query query = { "x": longitude, "y": latitude, "time": time, "output_crs": "EPSG:3577", "resolution": (-25, 25), } print("Loading Fractional Cover for Landsat 8") dataset_fc = dc.load(product="ls8_fc_albers", **query) print("Loading WoFS for Landsat 8") dataset_wofs = dc.load(product="wofs_albers", like=dataset_fc) # Match the data shared_times = np.intersect1d(dataset_fc.time, dataset_wofs.time) ds_fc_matched = dataset_fc.sel(time=shared_times) ds_wofs_matched = dataset_wofs.sel(time=shared_times) # Mask FC dry_mask = masking.make_mask(ds_wofs_matched, dry=True) # Get fractional masked fc dataset (as proportion of 1, rather than 100) ds_fc_masked = ds_fc_matched.where(dry_mask.water == True) / 100 # Resample ds_resampled = ds_fc_masked.resample(time="1M").median() ds_resampled.attrs["crs"] = dataset_fc.crs # Return the data return ds_resampled
def index_datasets(items, parse_only=False): s3 = boto3.resource("s3", config=Config(signature_version=UNSIGNED)) dc = datacube.Datacube() idx = dc.index for item in items: if "MTL" in item.assets: index_dataset(idx, s3, item.assets["MTL"]["href"], parse_only) else: logger.info("Item {} does not have an MTL asset (Sentinel2?) - skipping".format(item))
def get(): config = {} config['db_hostname'] = db_hostname config['db_port'] = db_port config['db_database'] = 'postgres' config['db_username'] = '******' config['db_password'] = '******' dc = datacube.Datacube(config=config) return dc