def bkt_info(ctx, repo, task: Task, roi_id, bucket_name, reload, less, sort, limit, column, where, check): """ show bucket info by BUCKET_NAME OR record number * list buckets names and record numbers via 'bucket list' command """ def_col = [ 'productId', 'startDate', 'title', 'relativeOrbitNumber', 'cycle_dt' ] if column is None or not column: column = def_col else: _cp = [] _cd = [] for c in column: if c.startswith('+'): _cp += c[1:].split(',') else: _cd.extend(c) if not _cd: _cd = def_col column = _cd + _cp # log.error(column) _id, _roi = resolve_roi(roi_id, repo) if reload: ctx.invoke(pairs_load, id=_id, reload=True) bname, _ds, headers = _bkt_info( repo, task, geometry=_roi['geometry'], bucket_name=bucket_name, sort=sort, limit=limit, column=column, where=where, check=check, ) if limit >= 0: output.comment(f"Dataset limited to {limit} records") cols = _ds.columns.to_list() if task.loaded: output.comment(f"Task: {task.name}") if 'task' in cols: output.comment( f"INFO: 'task' column: 'm' - used as master in task, 's' - used as slave in task " ) if 'exists' in cols: output.comment( f"INFO: 'exists' column: '+' - full data loaded, '~' - metadata only loaded" ) output.comment(f'Bucket name: {bname}') output.table(_ds, headers=headers, less=less)
def resolve_recipe(repo: Repo, task: Task, roi_id): try: task.resolve() _id, _roi = resolve_roi(roi_id, repo) r = TaskRecipe(task=task) f = r.get_ai_recipe_name(_roi['name']) if not os.path.isfile(f): raise RuntimeError(f"task recipe file {f} not found") log.info(f'recipe resolved via task: {f}') return f except AssertionError as e: raise click.UsageError(f'Task is invalid, reason: {e}') except RuntimeError as e: raise click.UsageError(str(e))
def pairs_list(repo: Repo, less, sort, limit, column, where, list_columns, roi_id, local_only): """ list sate data """ _id, _roi = resolve_roi(roi_id, repo) geometry = _roi['geometry'] cache_file_name = _cache_pairs_file_name(repo) log.debug(f'loading sta data from cache "{cache_file_name}" ') try: _df = pairs.load_from_cache(cache_file_name=cache_file_name) if list_columns: k = _df.columns.tolist() output.table([[v] for v in k], headers=['available columns']) else: _df = bucket.create_list(_df, buckets_dir='') _f = list(column) _ds = _list_products(_df, where=where, sort=list(sort), limit=limit) _ds['fit'] = _ds['geometry'].intersection( geometry).area / geometry.area _f.extend(['fit']) _ds = _ds[_f] if local_only: _locals = [ x['productId'] for x in _list_local(local_only, limit) ] mask = _ds['productId'].apply(lambda x: x in _locals) _ds = _ds[mask] output.table(_ds, headers=['#', *_f], less=less) return _ds except ValueError as e: output.error(f" Bad value: {e}") except KeyError as e: output.error(f"column not found : {e}") except Exception as e: log.exception(e) output.error( f"No data loaded, run '{pairs_cli.name} {pairs_load.name}' command to initiate cache" ) return None
def bkt_list(ctx: click.Context, repo: Repo, task: Task, roi_id, reload, fit): _id, _roi = resolve_roi(roi_id, repo) if reload: ctx.invoke(pairs_load, roi_id=_id, reload=True) _m, _s = _task_ms(task) try: _t, headers = _bkt_list( repo, geometry=_roi['geometry'], fit=fit, master=_m, slave=_s, ) except RuntimeError as e: raise click.UsageError(str(e)) output.comment( f'{len(_t)} possible bucket for roi "{_roi["name"]}" found') if fit is not None: _t = _t[_t['fit']['mean'] >= fit] _l2 = len(_t) output.comment(f"shown {_l2} with fit >= {fit}") output.table(_t, headers=headers)
def bkt_info( repo: Repo, task: Task, roi_id, less, # sort, limit, column, where, check, delta, product_id, platform): """ find pairs by given PRODUCT_ID \b PRODUCT_ID: 4-digits hex number (Sentinel product identifier, last 4 symbols in product name). PLATFORM: like 'S1A' or 'S1B' to narrow search in case PRODUCT_ID is ambiguous """ _id, _roi = resolve_roi(roi_id, repo) _m, _s = _task_ms(task) geometry = _roi['geometry'] output.comment(f"active task master: {_m}") _df = pairs.load_from_cache(cache_file_name=(_cache_pairs_file_name(repo))) _df = _df.set_index('productId') try: _ds = _df.loc[product_id][['startDate', 'platform']] # print( _ds) if isinstance(_ds, DataFrame): # print(f"-----{len(_ds)}--------{type(_ds)}----------") if platform != '': _ds = _ds[_ds['platform'] == platform].loc[product_id] if isinstance(_ds, DataFrame): raise OCLIException( f"Could not resolve '{product_id}' for platform {platform}" ) else: output.table(_ds, headers=['PRODUCT_ID', 'startDate', 'platform']) # print( _ds) raise OCLIException( f"Product ID {product_id} is ambiguous, use <PALTFORM> argument to narrow search " ) ts, platform = _ds[['startDate', 'platform']] # print(f"----------- {ts}") except KeyError: raise OCLIException(f'Product id "{product_id}" not found') output.comment( f"Building bucket for product {product_id} , startDate={ts}") f = unitime_delta_factory(ts) _df['cycle_dt'] = _df['startDate'].apply(f) _df = _df[(_df['cycle_dt'] <= delta) & (_df['platform'] == platform)] cols = [ 'productId', 'cycle_dt', 'startDate', 'platform', 'relativeOrbitNumber', 'polarisation', 'fit', 'task' ] try: if geometry.area == 0: raise AssertionError('ROI has zero area') _df['fit'] = _df['geometry'].intersection( geometry).area / geometry.area _df['task'] = '' _df = _df.reset_index() _df = _df.set_index('title') if _m in _df.index: _df.loc[_m, 'task'] = 'm' else: output.warning('Current task master not found in bucket') if _s in _df.index: _df.loc[_s, 'task'] = 's' else: output.warning('Current task slave not found in bucket') _df = _df.reset_index() _e, eodata = task.get_valid_key('eodata') def _ch_fs(b): _p = _local_eodata_relative_path(eodata, b) if os.path.isfile(os.path.join(_p, 'manifest.safe')): _m = os.path.join(_p, 'measurement') if os.path.isdir(_m): return '+' if any(os.scandir(_m)) else '~' return '' if check and not _e: _df['exists'] = _df['productIdentifier'].apply(_ch_fs) cols += ['exists'] pass _df = _df[cols] except AssertionError as e: raise RuntimeError(e) headers = ['#'] + cols output.table( _df, headers=headers, )