def my_parse(ctx, param, value): parsed_expressions = parse_expressions(*list(value)) # ctx.ensure_object(dict) # try: # parsed_expressions['crs'] = ctx.obj['crs'] # except KeyError: # pass return parsed_expressions
def validate_time_range(context, param, value): """ Click callback to validate a date string """ try: parse_expressions(value) return value except SyntaxError: raise click.BadParameter( 'Date range must be in one of the following format as a string:' '\n\t1996-01 < time < 1996-12' '\n\t1996 < time < 1997' '\n\t1996-01-01 < time < 1996-12-31' '\n\ttime in 1996' '\n\ttime in 1996-12' '\n\ttime in 1996-12-31' '\n\ttime=1996' '\n\ttime=1996-12' '\n\ttime=1996-12-31')
def generate_work_list(product_name, output_dir, s3_list, time_range, config): """ Compares datacube file uri's against S3 bucket (file names within text file) and writes the list of datasets for conversion into the task file Uses a configuration file to define the file naming schema. Task file format: A two column, comma separated CSV file. <Absolute path to NetCDF File>,<Directly and file name prefix of outputs> eg. /g/data/fk4/data/foo/bar.nc,prefix/data/foo/bar """ with open(config) as config_file: config = yaml.safe_load(config_file)['products'][product_name] if s3_list is None: # Use text file generated by save_s3_inventory function s3_list = Path(output_dir) / (product_name + S3_LIST_EXT) else: s3_list = Path(s3_list) if s3_list.exists(): existing_s3_keys = _load_s3_inventory(s3_list) else: existing_s3_keys = {} # Mapping from Expected Output YAML Location -> Input NetCDF File dc_workgen_list = dict() output_bands = expected_bands(product_name) for source_uri, new_basename in get_dataset_values(product_name, config, parse_expressions(time_range)): output_yaml = new_basename + '.yaml' expected_outputs = [f'{new_basename}_{band}.tif' for band in output_bands] + [output_yaml] if not all(output in existing_s3_keys for output in expected_outputs): dc_workgen_list[new_basename] = source_uri.split('file://')[1] out_file = Path(output_dir) / (product_name + TASK_FILE_EXT) with open(out_file, 'w', newline='') as fp: csv_writer = csv.writer(fp, quoting=csv.QUOTE_MINIMAL) LOG.info(f'Found {len(dc_workgen_list)} datasets needing conversion, writing to {out_file}') for s3_basename, input_file in dc_workgen_list.items(): LOG.debug(f"File does not exists in S3, add to processing list: {input_file}") # Write Input_file, Output Basename csv_writer.writerow((input_file, splitext(s3_basename)[0])) if not dc_workgen_list: LOG.info(f"No tasks found, everything is up to date.")
def generate_work_list(product_name, output_dir, pickle_file, time_range, config): """ Compares datacube file uri's against S3 bucket (file names within pickle file) and writes the list of datasets for cog conversion into the task file Uses a configuration file to define the file naming schema. Before using this command, execute the following: $ module use /g/data/v10/public/modules/modulefiles/ $ module load dea """ with open(config) as config_file: config = yaml.load(config_file) if pickle_file is None: # Use pickle file generated by save_s3_inventory function pickle_file = Path(output_dir) / (product_name + PICKLE_FILE_EXT) with open(pickle_file, "rb") as pickle_in_fl: s3_file_list = pickle.load(pickle_in_fl) dc_workgen_list = dict() for uri, dest_dir, dc_yamlfile_path in get_dataset_values( product_name, config['products'][product_name], parse_expressions(time_range)): if uri: dc_workgen_list[dc_yamlfile_path] = (uri.split('file://')[1], dest_dir) work_list = set(dc_workgen_list.keys()) - set(s3_file_list) out_file = Path(output_dir) / (product_name + TASK_FILE_EXT) with open(out_file, 'w', newline='') as fp: csv_writer = csv.writer(fp, quoting=csv.QUOTE_MINIMAL) for s3_filepath in work_list: # dict_value shall contain uri value and s3 output directory path template input_file, dest_dir = dc_workgen_list.get(s3_filepath, (None, None)) if input_file: LOG.info( f"File does not exists in S3, add to processing list: {input_file}" ) csv_writer.writerow((input_file, dest_dir)) if not work_list: LOG.info(f"No tasks found")
def my_parse(ctx, param, value): return parse_expressions(*list(value))
def expression_parser(ctx, param, value): """ Parse query expressions like ``datacube-core``. """ return parse_expressions(*list(value))
def units(ctx, index, expression): ctx.obj['write_results']( index.storage.get_fields().keys(), index.storage.search_summaries(**parse_expressions(*expression)))
def datasets(ctx, index, expression): ctx.obj['write_results']( index.datasets.get_field_names(), index.datasets.search_summaries(**parse_expressions(*expression)))
def get_start_end_dates(expressions): parsed = parse_expressions(*expressions) time_range = parsed['time'] return time_range.begin, time_range.end
def product_counts(index, period, expression): for product, series in index.datasets.count_by_product_through_time(period, **parse_expressions(*expression)): click.echo(product.name) for timerange, count in series: click.echo(' {}: {}'.format(timerange[0].strftime("%Y-%m-%d"), count))
def units(ctx, index, expression): ctx.obj['write_results']( index.storage.get_fields().keys(), index.storage.search_summaries(**parse_expressions(*expression)) )
def datasets(ctx, index, expression): ctx.obj['write_results']( index.datasets.get_field_names(), index.datasets.search_summaries(**parse_expressions(*expression)) )