def main(): parser = argparse.ArgumentParser() parser.add_argument('-d', '--from_dir_prefix') parser.add_argument('-t', '--to_dir_prefix') parser.add_argument('-u', '--path2udp_model', default='russian-syntagrus-ud-2.0-170801.udpipe') parser.add_argument('-n', '--cpu_n', default=5, type=int) args = parser.parse_args() # dfunc.MODELFILE4UDPIPE = args.path2udp_model # dfunc.set_model(args.path2udp_model) with dask.config.set(pool=ThreadPool(args.cpu_n)): bag = db.read_text(args.from_dir_prefix) pbar = ProgressBar() pbar.register() ddf = bag.to_dataframe(columns=['text']) ddf['text'] = ddf['text'].apply(dfunc.skip_empty, meta=('x', 'f8')) ddf = ddf.dropna() ddf['rec'] = ddf['text'].apply(dfunc.get_rec_info, meta=('x', 'f8')) ddf['text'] = ddf['text'].apply(dfunc.spec_tok_add, meta=('x', 'f8')) ddf['norm_text'] = ddf['text'].apply(dfunc.normalization1, meta=('x', 'f8')) # udpipe_sent_and_tok = dfunc.get_udpipe_sent_and_tok(args.path2udp_model) # udpipe_sent_and_tok = dfunc.get_udpipe_sent_and_tok('/home/den/Documents/elmo/data_preparing/rutwitter/russian-syntagrus-ud-2.0-170801.udpipe') ddf['norm_text'] = ddf['text'].apply(dfunc.udpipe_sent_and_tok, meta=('x', 'f8')) # ddf['norm_text'] = ddf['text'].apply(dfunc.nltk_sent_and_tok, meta=('x', 'f8')) ddf['norm_text'] = ddf['norm_text'].apply(dfunc.normalization2, meta=('x', 'f8')) ddf['rec_text'] = ddf.apply(dfunc.recovery, meta=('x', 'f8'), axis=1) ddf['cleaned_text'] = ddf['norm_text'].apply(dfunc.lower_case, meta=('x', 'f8')) ddf[['rec_text', 'cleaned_text']].to_csv(args.to_dir_prefix)
def createPyramidLevel(self, resolution=0, subdiv=(1, 2, 2), quiet=False): """ Add a level in a multi-level pyramid. Provided this function because TeraStitcher does not have enough control over the sampling strategy for imaris files """ # find all of the imaris datasets under the specified resolution group self._subdiv = subdiv datasetnames = list() resin = 'ResolutionLevel ' + str(resolution) resout = 'ResolutionLevel ' + str(resolution + 1) prf = '/DataSet/' + resin self._file_object[prf].visit(datasetnames.append) tt = [ type(self._file_object[prf + '/' + x]) == h5py._hl.dataset.Dataset for x in datasetnames ] res = list(compress(datasetnames, tt)) # Now we need to find the ones ending in '/Data' tt = [x.endswith('/Data') for x in res] res = list(compress(res, tt)) outpaths = ['/DataSet/' + resout + '/' + x for x in res] inpaths = [prf + '/' + x for x in res] pbar = ProgressBar() for idx in range(len(inpaths)): if not quiet: print(inpaths[idx]) pbar.register() self._subdivide(self._file_object, inpaths[idx], outpaths[idx]) if not quiet: pbar.unregister()
def init_dask_client(): pbar = ProgressBar() pbar.register() cluster = LocalCluster() client = Client(cluster) return cluster, client
def main(argv): jobNum = int(argv.jobNum) outputDir = argv.outputDir inputDir = argv.inputDir try: os.mkdir(outputDir) for shape in ['64_32', '128_64', '192_96']: os.mkdir(os.path.join(outputDir, shape)) except OSError: pass # path already exists client = LocalCluster(n_workers=jobNum, threads_per_worker=5) # IO intensive, more threads print('* number of workers:{}, \n* input dir:{}, \n* output dir:{}\n\n'. format(jobNum, inputDir, outputDir)) #print('* Link to local cluster dashboard: ', client.dashboard_link) for subFolder in [ 'ccpd_base', 'ccpd_db', 'ccpd_fn', 'ccpd_rotate', 'ccpd_tilt', 'ccpd_weather' ]: fileList = os.listdir(os.path.join(inputDir, subFolder)) print('* {} images found in {}. Start processing ...'.format( len(fileList), subFolder)) toDo = dbag.from_sequence(fileList, npartitions=jobNum * 30).persist() # persist the bag in memory toDo = toDo.map(processImage, inputDir, outputDir, subFolder) pbar = ProgressBar(minimum=2.0) pbar.register() # register all computations for better tracking result = toDo.compute() print('* image cropped: {}. Done ...'.format(sum(result))) client.close() # shut down the cluster
def show_progress(arr, msg: str = None, nthreads: int = 1): from dask.diagnostics import ProgressBar if msg is not None: logger.info(msg) pbar = ProgressBar() pbar.register() res = controlled_compute(arr, nthreads) pbar.unregister() return res
def main(): paths = list(Path(args.dir).rglob("*.txt")) pbar = ProgressBar() pbar.register() a_bag = db.from_sequence(paths, npartitions=mp.cpu_count()) a_bag = a_bag.map(lambda a_path: parse_path(a_path)) frame_data = a_bag.compute() pbar.unregister() frame = pd.DataFrame(frame_data) frame.to_pickle(args.out)
def merge_with_small(dbSnp153, small_df): # TODO:after merge, make sure to drop duplicates, because dbSnp153 might contain duplicate keys # or process the dbSnp153: dedup/sort beforehand small_df["start"] = small_df["BP"] - 1 dbSnp153['chr'] = dbSnp153.chrom.str[3:] pbar = ProgressBar() pbar.register() result = dbSnp153.merge(small_df, how="inner", left_on=["chr", "chromStart", "chromEnd"], right_on=["Chr", "start", "BP"]).compute() return result
def test_register(capsys): try: p = ProgressBar() p.register() assert _globals['callbacks'] get_threaded(dsk, 'e') check_bar_completed(capsys) p.unregister() assert not _globals['callbacks'] finally: _globals['callbacks'].clear()
def test_register(capsys): try: p = ProgressBar() p.register() assert Callback.active get_threaded(dsk, 'e') check_bar_completed(capsys) p.unregister() assert not Callback.active finally: Callback.active.clear()
def test_register(capsys): try: p = ProgressBar() p.register() assert _globals['callbacks'] get(dsk, 'e') check_bar_completed(capsys) p.unregister() assert not _globals['callbacks'] finally: _globals['callbacks'].clear()
def test_register(capsys): try: p = ProgressBar() p.register() assert Callback.active get_threaded(dsk, "e") check_bar_completed(capsys) p.unregister() assert not Callback.active finally: Callback.active.clear()
def test_register(capsys): try: p = ProgressBar() p.register() assert _globals['callbacks'] get(dsk, 'e') out, err = capsys.readouterr() bar, percent, time = [i.strip() for i in out.split('\r')[-1].split('|')] assert bar == "[########################################]" assert percent == "100% Completed" p.unregister() assert not _globals['callbacks'] finally: _globals['callbacks'].clear()
def cli(ctx, store, cube, skv, n_threads, color): """ Execute certain operations on the given Kartothek cube. If possible, the operations will be performed in parallel on the current machine. """ ctx.ensure_object(dict) store_obj = get_store(skv, store) cube, datasets = get_cube(store_obj, cube) dask.config.set(scheduler="threads") if n_threads > 0: dask.config.set(pool=ThreadPool(n_threads)) if color == "always": ctx.color = True elif color == "off": ctx.color = False pbar = ProgressBar() pbar.register() ctx.call_on_close(pbar.unregister) # silence extremely verbose azure logging azure_logger = logging.getLogger("azure.storage.common.storageclient") azure_logger.setLevel(logging.FATAL) # pandas perf tuning chained_assignment_old = pd.options.mode.chained_assignment def reset_pd(): pd.options.mode.chained_assignment = chained_assignment_old ctx.call_on_close(reset_pd) pd.options.mode.chained_assignment = None ctx.obj["skv"] = skv ctx.obj["store"] = store_obj ctx.obj["store_name"] = store ctx.obj["cube"] = cube ctx.obj["datasets"] = datasets ctx.obj["pbar"] = pbar
def show_progress(arr, msg: str = None, nthreads: int = 1): """ Performs computation with Dask and shows progress bar. Args: arr: msg: message to log, default None nthreads: number of threads to use for computation, default 1 Returns: Result of computation. """ from dask.diagnostics import ProgressBar if msg is not None: logger.info(msg) pbar = ProgressBar() pbar.register() res = controlled_compute(arr, nthreads) pbar.unregister() return res
def main(): logger = logging.getLogger(__name__) logger.info('creating a bunch of features') pbar = ProgressBar() pbar.register() target_entities = ['ip', 'app', 'device', 'os', 'channel'] filenames_train = sorted(glob('../data/interim/train_2017-11-*00.csv')) training_windows = ['1 hours', '3 hours', '1 day'] for target_entity in target_entities: filenames = glob( f"../data/interim/partitioned/{target_entity}/train_*.csv") b = bag.from_sequence(filenames) entity_sets = b.map(create_entityset, target_entity).compute() gc.collect() for filename in filenames_train: logger.info(f"Processing: {filename}") df = pd.read_csv(filename, usecols=['click_time'], parse_dates=to_parse) cutoff_time = df['click_time'].min() del df for training_window in training_windows: create_features(filename, entity_sets, target_entity=target_entity, cutoff_time=cutoff_time, training_window=ft.Timedelta(training_window)) del entity_sets, b gc.collect() logger.info('finished')
def Movie( da, odir, varname=None, framedim="time", moviename="movie", clim=None, cmap=None, bgcolor=np.array([1, 1, 1]) * 0.3, framewidth=1280, frameheight=720, dpi=100, lon=None, lat=None, dask=True, delete=True, ffmpeg=True, plot_style="simple", norm=mpl.colors.Normalize(), progbar=False, ): # Set defaults: if not ffmpeg and delete: raise RuntimeError("raw picture deletion makes only \ sense if ffmpeg conversion is enabled") if not isinstance(da, xr.DataArray): raise RuntimeError("input has to be an xarray DataStructure, instead\ is " + str(type(da))) if not os.path.exists(odir): os.makedirs(odir) # Infer defaults from data if clim is None: print("clim will be inferred from data, this can take very long...") clim = [da.min(), da.max()] if cmap is None: cmap = plt.cm.viridis if plot_style in ["map"]: if None in [lon, lat]: raise RuntimeError("map plotting requires lon and lat") else: lons = np.array(da[lon].data) lats = np.array(da[lat].data) if len(lons.shape) != 2: lons, lats = np.meshgrid(lons, lats) time = np.array(da["time"].data) else: lons = None lats = None time = None # Annnd here we go print("+++ Execute plot function +++") if dask: data = da.data frame_axis = da.get_axis_num(framedim) drop_axis = [da.get_axis_num(a) for a in da.dims if not a == framedim] chunks = list(data.shape) chunks[frame_axis] = 1 data = data.rechunk(chunks) if progbar: pbar = ProgressBar() pbar.register() data.map_blocks( FramePrint, chunks=[1], drop_axis=drop_axis, dtype=np.float64, dask=dask, frame_axis=frame_axis, odir=odir, cmap=cmap, clim=clim, framewidth=framewidth, frameheight=frameheight, bgcolor=bgcolor, plot_style=plot_style, lons=lons, lats=lats, time=time, norm=norm, dpi=dpi, ).compute(get=get) if progbar: pbar.unregister() # The .compute(get=get) line is some dask 'magic': it parallelizes the # print function with processes and not threads,which is a lot faster # for custom functions apparently! else: # do it with a simple for loop...can this really be quicker? print("This is slow! Do it in dask!") for ii in range(0, len(da.time)): start_time = time.time() da_slice = da[{framedim: ii}] # fig,ax,h = FramePrint(da_slice, FramePrint( da_slice, frame=ii, odir=odir, cmap=cmap, clim=clim, framewidth=framewidth, frameheight=dpi, bgcolor=bgcolor, plot_style=plot_style, lons=lons, lats=lats, norm=norm, dpi=dpi, ) if ii % 100 == 0: remaining_time = (len(da.time) - ii) * (time.time() - start_time) / 60 print("FRAME---%04d---" % ii) print("Estimated time left : %d minutes" % remaining_time) query = ('ffmpeg -y -i "frame_%05d.png" -c:v libx264 -preset veryslow \ -crf 6 -pix_fmt yuv420p \ -framerate 10 \ "' + moviename + '.mp4"') with cd(odir): if ffmpeg: print("+++ Convert frames to video +++") excode = os.system(query) if excode == 0 and delete: os.system("rm *.png")
def main(): parser = argparse.ArgumentParser( description='Add multiscale levels to an existing n5') parser.add_argument('-i', '--input', dest='input_path', type=str, required=True, \ help='Path to the directory containing the n5 volume') parser.add_argument('-d', '--data_set', dest='data_set', type=str, default="", \ help='Path to data set (default empty, so /s0 is assumed to exist at the root)') parser.add_argument('-f', '--downsampling_factors', dest='downsampling_factors', type=str, default="2,2,2", \ help='Downsampling factors for each dimension (default "2,2,2")') parser.add_argument('-p', '--pixel_res', dest='pixel_res', type=str, \ help='Pixel resolution for each dimension "2.0,2.0,2.0" (default None) - required for Neuroglancer') parser.add_argument('-u', '--pixel_res_units', dest='pixel_res_units', type=str, default="nm", \ help='Measurement unit for --pixel_res (default "nm") - required for Neuroglancer') parser.add_argument('--distributed', dest='distributed', action='store_true', \ help='Run with distributed scheduler (default)') parser.set_defaults(distributed=False) parser.add_argument('--workers', dest='workers', type=int, default=20, \ help='If --distributed is set, this specifies the number of workers (default 20)') parser.add_argument('--dashboard', dest='dashboard', action='store_true', \ help='If --distributed is set, this runs a web-based dashboard on port 8787') parser.set_defaults(dashboard=False) parser.add_argument('--metadata-only', dest='metadata_only', action='store_true', \ help='Only fix metadata on an existing multiscale pyramid') parser.set_defaults(metadata_only=False) args = parser.parse_args() if args.distributed: dashboard_address = None if args.dashboard: dashboard_address = ":8787" print(f"Starting dashboard on {dashboard_address}") from dask.distributed import Client client = Client(processes=True, n_workers=args.workers, \ threads_per_worker=1, dashboard_address=dashboard_address) else: from dask.diagnostics import ProgressBar pbar = ProgressBar() pbar.register() downsampling_factors = [ int(c) for c in args.downsampling_factors.split(',') ] pixel_res = None if args.pixel_res: pixel_res = [float(c) for c in args.pixel_res.split(',')] if not args.metadata_only: add_multiscale(args.input_path, args.data_set, downsampling_factors=downsampling_factors) add_metadata(args.input_path, downsampling_factors=downsampling_factors, pixel_res=pixel_res, pixel_res_units=args.pixel_res_units)
from astropy.io import fits from astropy import units as u from astropy.stats import mad_std import pylab as pl import radio_beam import glob from spectral_cube import SpectralCube, DaskSpectralCube from spectral_cube.lower_dimensional_structures import Projection from casatools import image ia = image() if os.getenv('NO_PROGRESSBAR') is None: from dask.diagnostics import ProgressBar pbar = ProgressBar() pbar.register() nthreads = 1 scheduler = 'synchronous' os.environ['TEMPDIR'] = '/blue/adamginsburg/adamginsburg/tmp/' if os.getenv('DASK_THREADS') is not None: try: nthreads = int(os.getenv('DASK_THREADS')) if nthreads > 1: scheduler = 'threads' else: scheduler = 'synchronous' except (TypeError, ValueError): nthreads = 1
import re import json from tqdm import tqdm from sklearn.feature_extraction.text import CountVectorizer from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score from sklearn.metrics import confusion_matrix from multiprocessing import Pool import dask.bag as db import dask.dataframe as dd import time from dask.diagnostics import ProgressBar from dask.distributed import Client, LocalCluster, progress p = ProgressBar() p.register() #cluster = LocalCluster(n_workers=2 , threads_per_worker=2 , ip= '145.107.189.23') #c = Client() # word stemmer stemmer = LancasterStemmer() random.seed(1) def ImportData(): """Function for importing the Json file dataset as a pandas object and numpy array. Printing Datashape""" data = pd.read_json("sample_data.json", lines=True)
def main(argv=sys.argv[1:]): global LOG from satpy import Scene from satpy.writers import compute_writer_results from dask.diagnostics import ProgressBar from polar2grid.core.script_utils import ( setup_logging, rename_log_file, create_exc_handler, ) import argparse add_polar2grid_config_paths() USE_POLAR2GRID_DEFAULTS = bool( int(os.environ.setdefault("USE_POLAR2GRID_DEFAULTS", "1"))) BINARY_NAME = "polar2grid" if USE_POLAR2GRID_DEFAULTS else "geo2grid" prog = os.getenv("PROG_NAME", sys.argv[0]) # "usage: " will be printed at the top of this: usage = """ %(prog)s -h see available products: %(prog)s -r <reader> -w <writer> --list-products -f file1 [file2 ...] basic processing: %(prog)s -r <reader> -w <writer> [options] -f file1 [file2 ...] basic processing with limited products: %(prog)s -r <reader> -w <writer> [options] -p prod1 prod2 -f file1 [file2 ...] """ parser = argparse.ArgumentParser( prog=prog, usage=usage, fromfile_prefix_chars="@", description="Load, composite, resample, and save datasets.", ) parser.add_argument( "-v", "--verbose", dest="verbosity", action="count", default=0, help="each occurrence increases verbosity 1 level through " "ERROR-WARNING-INFO-DEBUG (default INFO)", ) parser.add_argument("-l", "--log", dest="log_fn", default=None, help="specify the log filename") parser.add_argument( "--progress", action="store_true", help="show processing progress bar (not recommended for logged output)", ) parser.add_argument( "--num-workers", type=int, default=os.getenv("DASK_NUM_WORKERS", 4), help="specify number of worker threads to use (default: 4)", ) parser.add_argument( "--match-resolution", dest="preserve_resolution", action="store_false", help="When using the 'native' resampler for composites, don't save data " "at its native resolution, use the resolution used to create the " "composite.", ) parser.add_argument( "--list-products", dest="list_products", action="store_true", help="List available {} products and exit".format(BINARY_NAME), ) parser.add_argument( "--list-products-all", dest="list_products_all", action="store_true", help="List available {} products and custom/Satpy products and exit". format(BINARY_NAME), ) reader_group = add_scene_argument_groups( parser, is_polar2grid=USE_POLAR2GRID_DEFAULTS)[0] resampling_group = add_resample_argument_groups( parser, is_polar2grid=USE_POLAR2GRID_DEFAULTS)[0] writer_group = add_writer_argument_groups(parser)[0] argv_without_help = [x for x in argv if x not in ["-h", "--help"]] _retitle_optional_arguments(parser) args, remaining_args = parser.parse_known_args(argv_without_help) os.environ["DASK_NUM_WORKERS"] = str(args.num_workers) # get the logger if we know the readers and writers that will be used if args.readers is not None and args.writers is not None: glue_name = args.readers[0] + "_" + "-".join(args.writers or []) LOG = logging.getLogger(glue_name) reader_subgroups = _add_component_parser_args(parser, "readers", args.readers or []) writer_subgroups = _add_component_parser_args(parser, "writers", args.writers or []) args = parser.parse_args(argv) if args.readers is None: parser.print_usage() parser.exit( 1, "\nERROR: Reader must be provided (-r flag).\n" "Supported readers:\n\t{}\n".format("\n\t".join( ["abi_l1b", "ahi_hsd", "hrit_ahi"])), ) elif len(args.readers) > 1: parser.print_usage() parser.exit( 1, "\nMultiple readers is not currently supported. Got:\n\t" "{}\n".format("\n\t".join(args.readers)), ) return -1 if args.writers is None: parser.print_usage() parser.exit( 1, "\nERROR: Writer must be provided (-w flag) with one or more writer.\n" "Supported writers:\n\t{}\n".format("\n\t".join(["geotiff"])), ) reader_args = _args_to_dict(args, reader_group._group_actions) reader_names = reader_args.pop("readers") scene_creation, load_args = _get_scene_init_load_args( args, reader_args, reader_names, reader_subgroups) resample_args = _args_to_dict(args, resampling_group._group_actions) writer_args = _args_to_dict(args, writer_group._group_actions) writer_specific_args = _parse_writer_args(writer_args["writers"], writer_subgroups, reader_names, args) writer_args.update(writer_specific_args) if not args.filenames: parser.print_usage() parser.exit(1, "\nERROR: No data files provided (-f flag)\n") # Prepare logging rename_log = False if args.log_fn is None: rename_log = True args.log_fn = glue_name + "_fail.log" levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn) logging.getLogger("rasterio").setLevel(levels[min(2, args.verbosity)]) sys.excepthook = create_exc_handler(LOG.name) if levels[min(3, args.verbosity)] > logging.DEBUG: import warnings warnings.filterwarnings("ignore") LOG.debug("Starting script with arguments: %s", " ".join(sys.argv)) # Set up dask and the number of workers if args.num_workers: dask.config.set(num_workers=args.num_workers) # Create a Scene, analyze the provided files LOG.info("Sorting and reading input files...") try: scn = Scene(**scene_creation) except ValueError as e: LOG.error( "{} | Enable debug message (-vvv) or see log file for details.". format(str(e))) LOG.debug("Further error information: ", exc_info=True) return -1 except OSError: LOG.error( "Could not open files. Enable debug message (-vvv) or see log file for details." ) LOG.debug("Further error information: ", exc_info=True) return -1 # Rename the log file if rename_log: rename_log_file(glue_name + scn.attrs["start_time"].strftime("_%Y%m%d_%H%M%S.log")) # Load the actual data arrays and metadata (lazy loaded as dask arrays) LOG.info("Loading product metadata from files...") reader_info = ReaderProxyBase.from_reader_name(scene_creation["reader"], scn, load_args["products"]) if args.list_products or args.list_products_all: _print_list_products(reader_info, p2g_only=not args.list_products_all) return 0 load_args["products"] = reader_info.get_satpy_products_to_load() if not load_args["products"]: return -1 scn.load(load_args["products"]) ll_bbox = resample_args.pop("ll_bbox") if ll_bbox: scn = scn.crop(ll_bbox=ll_bbox) scn = filter_scene( scn, reader_names, sza_threshold=reader_args["sza_threshold"], day_fraction=reader_args["filter_day_products"], night_fraction=reader_args["filter_night_products"], ) if scn is None: LOG.info("No remaining products after filtering.") return 0 to_save = [] areas_to_resample = resample_args.pop("grids") if "ewa_persist" in resample_args: resample_args["persist"] = resample_args.pop("ewa_persist") scenes_to_save = resample_scene( scn, areas_to_resample, preserve_resolution=args.preserve_resolution, is_polar2grid=USE_POLAR2GRID_DEFAULTS, **resample_args) for scene_to_save, products_to_save in scenes_to_save: overwrite_platform_name_with_aliases(scene_to_save) reader_info.apply_p2g_name_to_scene(scene_to_save) to_save = write_scene( scene_to_save, writer_args["writers"], writer_args, products_to_save, to_save=to_save, ) if args.progress: pbar = ProgressBar() pbar.register() LOG.info("Computing products and saving data to writers...") compute_writer_results(to_save) LOG.info("SUCCESS") return 0
def run(stat, bands, nodata, output, output_type, num_process, chunksize, start_date, end_date, inputs): # ignore warnings warnings.filterwarnings("ignore") print(header) # check statistical option if stat not in ('median', 'mean', 'gmean', 'max', 'min', 'std', 'valid_pixels', 'last_pixel', 'jday_last_pixel', 'jday_median', 'linear_trend') and not stat.startswith(('percentile_', 'trim_mean_')): print("\nError: argument '-stat' invalid choice: {}".format(stat)) print("choose from: median, mean, gmean, max, min, std, valid_pixels, last_pixel, " "jday_last_pixel, jday_median, linear_trend, percentile_NN, trim_mean_LL_UL") return if stat.startswith('percentile_'): try: int(stat.split('_')[1]) except: print("\nError: argument '-stat' invalid choice: {}".format(stat)) print("the percentile must ends with a valid number, e.g. percentile_25") return if stat.startswith('trim_mean_'): try: int(stat.split('_')[2]) int(stat.split('_')[3]) except: print("\nError: argument '-stat' invalid choice: {}".format(stat)) print("the trim_mean_LL_UL must ends with a valid limits, e.g. trim_mean_10_80") return print("\nLoading and prepare images in path(s):", flush=True) # search all Image files in inputs recursively if the files are in directories images_files = [] for _input in inputs: if os.path.isfile(_input): if _input.endswith(IMAGES_TYPES): images_files.append(os.path.abspath(_input)) elif os.path.isdir(_input): for root, dirs, files in os.walk(_input): if len(files) != 0: files = [os.path.join(root, x) for x in files if x.endswith(IMAGES_TYPES)] [images_files.append(os.path.abspath(file)) for file in files] # load bands if isinstance(bands, int): bands = [bands] if not isinstance(bands, list): bands = [int(b) for b in bands.split(',')] # load images images = [Image(landsat_file) for landsat_file in images_files] # filter images based on the start date and/or end date, required filename as metadata if start_date is not None or end_date is not None: [image.set_metadata_from_filename() for image in images] if start_date is not None: images = [image for image in images if image.date >= start_date] if end_date is not None: images = [image for image in images if image.date <= end_date] if len(images) <= 1: print("\n\nAfter load (and filter images in range date if applicable) there are {} images to process.\n" "StackComposed required at least 2 or more images to process.\n".format(len(images))) exit(1) # save nodata set from arguments Image.nodata_from_arg = nodata # get wrapper extent min_x = min([image.extent[0] for image in images]) max_y = max([image.extent[1] for image in images]) max_x = max([image.extent[2] for image in images]) min_y = min([image.extent[3] for image in images]) Image.wrapper_extent = [min_x, max_y, max_x, min_y] # define the properties for the raster wrapper Image.wrapper_x_res = images[0].x_res Image.wrapper_y_res = images[0].y_res Image.wrapper_shape = (int((max_y-min_y)/Image.wrapper_y_res), int((max_x-min_x)/Image.wrapper_x_res)) # (y,x) # reset the chunksize with the min of width/high if apply if chunksize > min(Image.wrapper_shape): chunksize = min(Image.wrapper_shape) # some information about process if len(images_files) != len(images): print(" images loaded: {0}".format(len(images_files))) print(" images to process: {0} (filtered in the range dates)".format(len(images))) else: print(" images to process: {0}".format(len(images))) print(" band(s) to process: {0}".format(','.join([str(b) for b in bands]))) print(" pixels size: {0} x {1}".format(round(Image.wrapper_x_res, 1), round(Image.wrapper_y_res, 1))) print(" wrapper size: {0} x {1} pixels".format(Image.wrapper_shape[1], Image.wrapper_shape[0])) print(" running in {0} cores with chunks size {1}".format(num_process, chunksize)) # check print(" checking bands and pixel size: ", flush=True, end="") for image in images: for band in bands: if band > image.n_bands: print("\n\nError: the image '{0}' don't have the band {1} needed to process\n" .format(image.file_path, band)) exit(1) if round(image.x_res, 1) != round(Image.wrapper_x_res, 1) or \ round(image.y_res, 1) != round(Image.wrapper_y_res, 1): print("\n\nError: the image '{}' don't have the same pixel size to the base image: {}x{} vs {}x{}." " The stack-composed is not enabled for process yet images with different pixel size.\n" .format(image.file_path, round(image.x_res, 1), round(image.y_res, 1), round(Image.wrapper_x_res, 1), round(Image.wrapper_x_res, 1))) exit(1) print("ok") # set bounds for all images [image.set_bounds() for image in images] # for some statistics that required filename as metadata if stat in ["last_pixel", "jday_last_pixel", "jday_median", "linear_trend"]: [image.set_metadata_from_filename() for image in images] # registered Dask progress bar pbar = ProgressBar() pbar.register() for band in bands: # check and set the output file before process if os.path.isdir(output): output_filename = os.path.join(output, "stack_composed_{}_band{}.tif".format(stat, band)) elif output.endswith((".tif", ".TIF")) and os.path.isdir(os.path.dirname(output)): output_filename = output elif output.endswith((".tif", ".TIF")) and os.path.dirname(output) == '': output_filename = os.path.join(os.getcwd(), output) else: print("\nError: Setting the output filename, wrong directory and/or\n" " filename: {}\n".format(output)) exit(1) # choose the default data type based on the statistic if output_type is None: if stat in ['median', 'mean', 'gmean', 'max', 'min', 'last_pixel', 'jday_last_pixel', 'jday_median'] or stat.startswith(('percentile_', 'trim_mean_')): gdal_output_type = gdal.GDT_UInt16 if stat in ['std', 'snr']: gdal_output_type = gdal.GDT_Float32 if stat in ['valid_pixels']: if len(images) < 256: gdal_output_type = gdal.GDT_Byte else: gdal_output_type = gdal.GDT_UInt16 if stat in ['linear_trend']: gdal_output_type = gdal.GDT_Int32 else: if output_type == 'byte': gdal_output_type = gdal.GDT_Byte if output_type == 'uint16': gdal_output_type = gdal.GDT_UInt16 if output_type == 'uint32': gdal_output_type = gdal.GDT_UInt32 if output_type == 'int16': gdal_output_type = gdal.GDT_Int16 if output_type == 'int32': gdal_output_type = gdal.GDT_Int32 if output_type == 'float32': gdal_output_type = gdal.GDT_Float32 if output_type == 'float64': gdal_output_type = gdal.GDT_Float64 for image in images: image.output_type = gdal_output_type ### process ### # Calculate the statistics print("\nProcessing the {} for band {}:".format(stat, band)) output_array = statistic(stat, images, band, num_process, chunksize) ### save result ### # create output raster driver = gdal.GetDriverByName('GTiff') nbands = 1 outRaster = driver.Create(output_filename, Image.wrapper_shape[1], Image.wrapper_shape[0], nbands, gdal_output_type) outband = outRaster.GetRasterBand(nbands) # convert nan value and set nodata value special by statistic if stat in ['linear_trend']: output_array[np.isnan(output_array)] = -2147483648 outband.SetNoDataValue(-2147483648) output_filename = output_filename.replace("stack_composed_linear_trend_band", "stack_composed_linear_trend_x1e6_band") else: # set nodata value depend of the output type if gdal_output_type in [gdal.GDT_Byte, gdal.GDT_UInt16, gdal.GDT_UInt32, gdal.GDT_Int16, gdal.GDT_Int32]: outband.SetNoDataValue(0) if gdal_output_type in [gdal.GDT_Float32, gdal.GDT_Float64]: outband.SetNoDataValue(np.nan) # write band outband.WriteArray(output_array) # set projection and geotransform outRasterSRS = osr.SpatialReference() outRasterSRS.ImportFromWkt(Image.projection) outRaster.SetProjection(outRasterSRS.ExportToWkt()) outRaster.SetGeoTransform((Image.wrapper_extent[0], Image.wrapper_x_res, 0, Image.wrapper_extent[1], 0, -Image.wrapper_y_res)) # clean del driver, outRaster, outband, outRasterSRS, output_array # force run garbage collector to release unreferenced memory gc.collect() print("\nProcess completed!")
class MetSim(object): """ MetSim handles the distribution of jobs that write to a common file by launching muliple processes and queueing up their writeback so that work can be done while IO is happening. """ # Class variables methods = {'mtclim': mtclim} params = { "period_ending": False, "is_worker": False, "method": 'mtclim', "domain": '', "state": '', "out_dir": '', "out_prefix": 'forcing', "start": 'forcing', "stop": 'forcing', "forcing_fmt": 'netcdf', "time_step": -1, "calendar": 'standard', "prec_type": 'uniform', "out_precision": 'f4', "verbose": 0, "sw_prec_thresh": 0.0, "utc_offset": False, "lw_cloud": 'cloud_deardorff', "lw_type": 'prata', "prec_type": 'uniform', "tdew_tol": 1e-6, "tmax_daylength_fraction": 0.67, "rain_scalar": 0.75, "tday_coef": 0.45, "lapse_rate": 0.0065, "out_vars": {n: available_outputs[n] for n in default_outputs}, "out_freq": None, "chunks": NO_SLICE, "scheduler": 'distributed', "num_workers": 1, } def __init__(self, params: dict, domain_slice=NO_SLICE): """ Constructor """ self._domain = None self._met_data = None self._state = None self._client = None self._domain_slice = domain_slice self.progress_bar = ProgressBar() self.params.update(params) logging.captureWarnings(True) self.logger = logging.getLogger(__name__) self.logger.setLevel(self.params['verbose']) formatter = logging.Formatter(' - '.join( ['%asctime)s', '%(name)s', '%(levelname)s', '%(message)s'])) ch = logging.StreamHandler(sys.stdout) ch.setFormatter(formatter) ch.setLevel(self.params['verbose']) # set global dask scheduler if domain_slice is NO_SLICE: if self.params['scheduler'] in DASK_CORE_SCHEDULERS: dask.config.set(scheduler=self.params['scheduler']) else: from distributed import Client, progress if 'distributed' == self.params['scheduler']: self._client = Client(n_workers=self.params['num_workers'], threads_per_worker=1) if self.params['verbose'] == logging.DEBUG: self.progress_bar = progress elif os.path.isfile(self.params['scheduler']): self._client = Client( scheduler_file=self.params['scheduler']) else: self._client = Client(self.params['scheduler']) else: dask.config.set(scheduler=self.params['scheduler']) # Set up logging # If in verbose mode set up the progress bar if self.params['verbose'] == logging.DEBUG: if 'distributed' != self.params['scheduler']: self.progress_bar.register() self.progress_bar = lambda x: x else: # If not in verbose mode, create a dummy function self.progress_bar = lambda x: x # Create time vector(s) self._times = self._get_output_times( freq=self.params['out_freq'], period_ending=self.params['period_ending']) self._update_unit_attrs(self.params['out_vars']) def _update_unit_attrs(self, out_vars): for k, v in out_vars.items(): if 'units' in v.keys(): if v['units'] in converters[k].keys(): attrs[k]['units'] = v['units'] else: self.logger.warn( f'Could not find unit conversion for {k} to {v["units"]}!' f' We will use the default units of' f' {available_outputs[k]["units"]} instead.') v['units'] = available_outputs[k]['units'] else: v['units'] = available_outputs[k]['units'] def _validate_force_times(self, force_times): for p, i in [('start', 0), ('stop', -1)]: # infer times from force_times if isinstance(self.params[p], str): if self.params[p] == 'forcing': self.params[p] = pd.Timestamp( force_times.values[i]).to_pydatetime() elif '/' in self.params[p]: year, month, day = map(int, self.params[p].split('/')) self.params[p] = pd.datetime(year, month, day) else: self.params[p] = pd.to_datetime(self.params[p]) # update calendar from input data (fall back to params version) self.params['calendar'] = self.met_data['time'].encoding.get( 'calendar', self.params['calendar']) assert self.params['start'] >= pd.Timestamp( force_times.values[0]).to_pydatetime() assert self.params['stop'] <= pd.Timestamp( force_times.values[-1]).to_pydatetime() self.params['state_start'] = (self.params['start'] - pd.Timedelta("90 days")) self.params['state_stop'] = (self.params['start'] - pd.Timedelta("1 days")) if self.params['utc_offset']: attrs['time'] = { 'units': DEFAULT_TIME_UNITS, 'long_name': 'UTC time', 'standard_name': 'utc_time' } else: attrs['time'] = { 'units': DEFAULT_TIME_UNITS, 'long_name': 'local time at grid location', 'standard_name': 'local_time' } def convert_monthly_param(self, name): self.met_data[name] = self.met_data['prec'].copy() months = self.met_data['time'].dt.month for m in range(12): param = self.domain[name].sel(month=m) locations = {'time': self.met_data['time'].isel(time=months == m)} self.met_data[name].loc[locations] = param @property def domain(self): if self._domain is None: self._domain = io.read_domain( self.params).isel(**self._domain_slice) return self._domain @property def met_data(self): if self._met_data is None: self._met_data = io.read_met_data(self.params, self.domain) self._met_data['elev'] = self.domain['elev'] self._met_data['lat'] = self.domain['lat'] self._met_data['lon'] = self.domain['lon'] # process constant_vars constant_vars = self.params.get('constant_vars', None) if constant_vars: da_template = self._met_data[list(self._met_data)[0]] for var in constant_vars.keys(): self._met_data[var] = xr.full_like( da_template, float(constant_vars[var])) self._validate_force_times(force_times=self._met_data['time']) return self._met_data @property def state(self): if self._state is None: self._state = io.read_state(self.params, self.domain) self._aggregate_state() return self._state @property def slices(self): if not self.params['chunks']: return [{d: slice(None) for d in self.domain[['mask']].dims}] return chunk_domain(self.params['chunks'], self.domain[['mask']].dims) def open_output(self): filenames = [self._get_output_filename(times) for times in self._times] return xr.open_mfdataset(filenames) def run(self): self._validate_setup() write_locks = {} for times in self._times: filename = self._get_output_filename(times) self.setup_netcdf_output(filename, times) write_locks[filename] = combine_locks( [NETCDFC_LOCK, get_write_lock(filename)]) self.logger.info('Starting {} chunks...'.format(len(self.slices))) delayed_objs = [ wrap_run_slice(self.params, write_locks, dslice) for dslice in self.slices ] persisted = dask.persist(delayed_objs, num_workers=self.params['num_workers']) self.progress_bar(persisted) dask.compute(persisted) self.logger.info('Cleaning up...') try: self._client.cluster.close() self._client.close() if self.params['verbose'] == logging.DEBUG: print() print('closed dask cluster/client') except Exception: pass def load_inputs(self, close=True): self._domain = self.domain.load() self._met_data = self.met_data.load() self._state = self.state.load() if close: self._domain.close() self._met_data.close() self._state.close() def setup_netcdf_output(self, filename, times): '''setup a single netcdf file''' with Dataset(filename, mode="w") as ncout: # dims dim_sizes = (None, ) + self.domain['mask'].shape var_dims = ('time', ) + self.domain['mask'].dims chunksizes = [len(times)] for d, s in zip(var_dims[1:], dim_sizes[1:]): c = int(self.params['chunks'].get(d, s)) if c <= s: chunksizes.append(c) else: chunksizes.append(s) create_kwargs = {'chunksizes': chunksizes} for d, size in zip(var_dims, dim_sizes): ncout.createDimension(d, size) # vars for varname, varconf in self.params['out_vars'].items(): ncout.createVariable(varconf['out_name'], self.params['out_precision'], var_dims, **create_kwargs) # add metadata and coordinate variables (time/lat/lon) time_var = ncout.createVariable('time', 'i4', ('time', )) time_var.calendar = self.params['calendar'] time_var[:] = date2num(times.to_pydatetime(), units=attrs['time'].get( 'units', DEFAULT_TIME_UNITS), calendar=time_var.calendar) dtype_map = { 'float64': 'f8', 'float32': 'f4', 'int64': 'i8', 'int32': 'i4' } for dim in self.domain['mask'].dims: dim_vals = self.domain[dim].values dim_dtype = dtype_map.get(str(dim_vals.dtype), self.params['out_precision']) dim_var = ncout.createVariable(dim, dim_dtype, (dim, )) dim_var[:] = dim_vals # parameters to not record in the metadata skip_params = [ 'elev', 'lat', 'lon', 'is_worker', 'out_vars', 'forcing_vars', 'domain_vars', 'state_vars', 'constant_vars', 'references', 'verbose', 'num_workers', ] for k, v in self.params.items(): if k in skip_params: continue # Need to convert some parameters to strings if k in ['start', 'stop', 'utc_offset', 'period_ending']: v = str(v) elif k in ['state_start', 'state_stop', 'out_freq']: # skip continue # Don't include complex types if isinstance(v, dict): v = json.dumps(v) elif not isinstance(v, str) and isinstance(v, Iterable): v = ', '.join(v) if isinstance(v, str): v = v.replace("'", "").replace('"', "") attrs['_global'][k] = v # set global attrs for key, val in attrs['_global'].items(): setattr(ncout, key, val) # set variable attrs for key, value in attrs.get('time', {}).items(): setattr(ncout.variables['time'], key, value) for varname, varconf in self.params['out_vars'].items(): outname = varconf['out_name'] for key, val in attrs.get(varname, {}).items(): setattr(ncout.variables[outname], key, val) def write_chunk(self, locks=None): '''write data from a single chunk''' if not len(self.params['out_vars']): return for times in self._times: filename = self._get_output_filename(times) lock = locks.get(filename, DummyLock()) time_slice = slice(times[0], times[-1]) with lock: with Dataset(filename, mode="r+") as ncout: for varname, varconf in self.params['out_vars'].items(): outname = varconf['out_name'] dims = ncout.variables[outname].dimensions[1:] write_slice = ((slice(None), ) + tuple(self._domain_slice[d] for d in dims)) ncout.variables[outname][write_slice] = ( self.output[varname].sel(time=time_slice).values) def run_slice(self): """ Run a single slice of """ self._validate_setup() self.disagg = int(self.params['time_step']) < cnst.MIN_PER_DAY self.method = MetSim.methods[self.params['method']] self.setup_output() times = self.met_data['time'] params = self.params.copy() # transform input parameters to floating point values params['sw_prec_thresh'] = float(params['sw_prec_thresh']) params['rain_scalar'] = float(params['rain_scalar']) params['tdew_tol'] = float(params['tdew_tol']) params['tmax_daylength_fraction'] = float( params['tmax_daylength_fraction']) params['tday_coef'] = float(params['tday_coef']) params['tmax_daylength_fraction'] = float( params['tmax_daylength_fraction']) params['lapse_rate'] = float(params['lapse_rate']) if self.params['prec_type'].upper() in ['TRIANGLE', 'MIX']: self.convert_monthly_param('dur') self.convert_monthly_param('t_pk') for index, mask_val in np.ndenumerate(self.domain['mask'].values): if mask_val > 0: locs = {d: i for d, i in zip(self.domain['mask'].dims, index)} else: continue df, state = wrap_run_cell(self.method.run, params, self.met_data.isel(**locs), self.state.isel(**locs), self.disagg, times) # Cut the returned data down to the correct time index # and do any required unit conversions for varname in self.params['out_vars']: desired_units = self.params['out_vars'][varname]['units'] out_vals = converters[varname][desired_units]( df[varname].values, int(self.params['time_step'])) self.output[varname][locs] = out_vals def _unpack_state(self, result: pd.DataFrame, locs: dict): """Put restart values in the state dataset""" # We concatenate with the old state values in case we don't # have 90 new days to use tmin = np.concatenate((self.state['t_min'].isel(**locs).values[:], result['t_min'].values)) tmax = np.concatenate((self.state['t_max'].isel(**locs).values[:], result['t_max'].values)) prec = np.concatenate( (self.state['prec'].isel(**locs).values[:], result['prec'].values)) self.state['t_min'].isel(**locs).values[:] = tmin[-90:] self.state['t_max'].isel(**locs).values[:] = tmax[-90:] self.state['prec'].isel(**locs).values[:] = prec[-90:] state_start = result.index[-1] - pd.Timedelta('89 days') self.state['time'].values = date_range( state_start, result.index[-1], calendar=self.params['calendar']) def _get_output_times(self, freq=None, period_ending=False): """ Generate chunked time vectors Parameters ---------- freq: Output frequency. Given as a Pandas timegrouper string. If not given, the entire timeseries will be used. period_ending: Flag to specify if output timesteps should be period- ending. Default is period-beginning Returns ------- times: A list of timeseries which represent each of times that output files will be created for. """ prototype = self.met_data self.disagg = int(self.params['time_step']) < cnst.MIN_PER_DAY if self.disagg: delta = pd.Timedelta('1 days') - pd.Timedelta('{} minutes'.format( self.params['time_step'])) else: delta = pd.Timedelta('0 days') if period_ending: offset = pd.Timedelta('{} minutes'.format( self.params['time_step'])) else: offset = pd.Timedelta('0 minutes') start = pd.Timestamp(prototype['time'].values[0]).to_pydatetime() stop = pd.Timestamp(prototype['time'].values[-1]).to_pydatetime() times = date_range(start + offset, stop + offset + delta, freq="{}T".format(self.params['time_step']), calendar=self.params['calendar']) if freq is None or freq == '': times = [times] else: dummy = pd.Series(np.arange(len(times)), index=times) grouper = pd.Grouper(freq=freq) times = [t.index for k, t in dummy.groupby(grouper)] return times def _get_output_filename(self, times): suffix = self.get_nc_output_suffix(times) fname = '{}_{}.nc'.format(self.params['out_prefix'], suffix) output_filename = os.path.join(os.path.abspath(self.params['out_dir']), fname) return output_filename def setup_output(self): # output times times = self._get_output_times( freq=None, period_ending=self.params['period_ending'])[0] # Number of timesteps n_ts = len(times) shape = (n_ts, ) + self.domain['mask'].shape dims = ('time', ) + self.domain['mask'].dims coords = {'time': times, **self.domain['mask'].coords} self.output = xr.Dataset(coords=coords) self.output['time'].encoding['calendar'] = self.params['calendar'] dtype = self.params['out_precision'] for varname in self.params['out_vars']: self.output[varname] = xr.DataArray(data=np.full(shape, np.nan, dtype=dtype), coords=coords, dims=dims, name=varname, attrs=attrs.get(varname, {})) self.output['time'].attrs.update(attrs['time']) def _aggregate_state(self): """Aggregate data out of the state file and load it into `met_data`""" # Precipitation record assert self.state.dims['time'] == 90, self.state['time'] record_dates = date_range(self.params['state_start'], self.params['state_stop'], calendar=self.params['calendar']) trailing = self.state['prec'] trailing['time'] = record_dates total_precip = xr.concat([trailing, self.met_data['prec']], dim='time').load() total_precip = ( cnst.DAYS_PER_YEAR * total_precip.rolling(time=90).mean().sel( time=slice(self.params['start'], self.params['stop']))) self.met_data['seasonal_prec'] = total_precip # Smoothed daily temperature range trailing = self.state['t_max'] - self.state['t_min'] trailing['time'] = record_dates dtr = self.met_data['t_max'] - self.met_data['t_min'] if (dtr < 0).any(): raise ValueError("Daily maximum temperature lower" " than daily minimum temperature!") sm_dtr = xr.concat([trailing, dtr], dim='time').load() sm_dtr = sm_dtr.rolling(time=30).mean().drop(record_dates, dim='time') self.met_data['dtr'] = dtr self.met_data['smoothed_dtr'] = sm_dtr def _validate_setup(self): """Updates the global parameters dictionary""" errs = [""] # Make sure there's some input if not len(self.params.get('forcing', [])): errs.append("Requires input forcings to be specified") # Make sure there is at least one forcing_var # They cannot all be constant since we use one as a template # for the others if not len(self.params.get('forcing_vars', [])): errs.append("Requires at least one non-constant forcing") # Parameters that can't be empty strings or None non_empty = ['out_dir', 'time_step', 'forcing_fmt'] for each in non_empty: if self.params.get(each, None) is None or self.params[each] == '': errs.append("Cannot have empty value for {}".format(each)) # Make sure time step divides evenly into a day if (cnst.MIN_PER_DAY % int(self.params.get('time_step', -1)) or (int(self.params['time_step']) > (6 * cnst.MIN_PER_HOUR) and int(self.params['time_step']) != cnst.MIN_PER_DAY)): errs.append("Time step must be evenly divisible into 1440 " "minutes (24 hours) and less than 360 minutes " "(6 hours). Got {}.".format(self.params['time_step'])) # Check for required input variable specification if self.met_data is not None: required_in = ['t_min', 't_max', 'prec'] for each in required_in: if each not in self.met_data.variables: errs.append("Input requires {}".format(each)) # Make sure that we are going to write out some data if not len(self.params.get('out_vars', [])): errs.append("Output variable list must not be empty") # Check output variables are valid daily_out_vars = [ 't_min', 't_max', 't_day', 'prec', 'vapor_pressure', 'shortwave', 'tskc', 'pet', 'wind' ] out_var_check = [ 'temp', 'prec', 'shortwave', 'vapor_pressure', 'air_pressure', 'rel_humid', 'spec_humid', 'longwave', 'tskc', 'wind' ] if int(self.params.get('time_step', -1)) == 1440: out_var_check = daily_out_vars for var in self.params.get('out_vars', []): if var not in out_var_check: errs.append('Cannot output variable {} at timestep {}'.format( var, self.params['time_step'])) # Check that the parameters specified are available opts = { 'out_precision': ['f4', 'f8'], 'lw_cloud': ['default', 'cloud_deardorff'], 'lw_type': [ 'default', 'tva', 'anderson', 'brutsaert', 'satterlund', 'idso', 'prata' ] } for k, v in opts.items(): if not self.params.get(k, None) in v: errs.append("Invalid option given for {}".format(k)) # If any errors, raise and give a summary if len(errs) > 1: raise Exception("\n ".join(errs)) def get_nc_output_suffix(self, times): s, e = times[[0, -1]] template = '{:04d}{:02d}{:02d}-{:04d}{:02d}{:02d}' return template.format( s.year, s.month, s.day, e.year, e.month, e.day, )
import output, logging import load_subreddit_castra, make_subreddit_castra from dask.diagnostics import ProgressBar from pprint import pprint import pandas as pd import dask.dataframe as dd logging.basicConfig(level = logging.DEBUG, format = '%(asctime)s - %(name)s - %(levelname)s - %(message)s') logging.getLogger('requests').setLevel(logging.CRITICAL) logger = logging.getLogger(__name__) # Start a progress bar for all computations pbar = ProgressBar() pbar.register() def test(file_name): """ # Subsetting the dataframe a = df[df.link_id == 't3_36k7u4'].compute() # Get multiple columns from the dataframe b = df[['author', 'subreddit']].compute() # Groupby operations c = df.groupby(['link_id', 'author'])['ups'].count().compute() c = df.groupby(df.link_id).ups.mean().compute() c = df.groupby(df.link_id).score.count().compute() # Drop duplicates d = df.author.drop_duplicates().compute()
def pca(a, b, n_pc, estimator_matrix, out_dir, n_threads, block_size, nodata): """Calculate the principal components for the vertical stack A or with combinations of the stack B :param A: first input raster data (fists period) :param B: second input raster data (second period) or None :param n_pc: number of principal components to output :param estimator_matrix: pca with correlation of covariance :param out_dir: directory to save the outputs :return: pca files list and statistics """ A = a B = b # get/set the nodata if nodata is None: ds = gdal.Open(A, gdal.GA_ReadOnly) nodata = ds.GetRasterBand(1).GetNoDataValue() del ds print("\nPRINCIPAL COMPONENTS ANALYSIS") print(" Compute {} components for:".format(n_pc)) print(" A: {}".format(A)) if B is not None: print(" B: {}".format(B)) # init dask as threads (shared memory is required) dask.config.set(pool=ThreadPool(n_threads)) # registered Dask progress bar pbar = ProgressBar() pbar.register() print("\nRead and prepare data:") raw_image = [] nodata_mask = None src_ds_A = gdal.Open(A, gdal.GA_ReadOnly) src_ds_B = None for band in range(src_ds_A.RasterCount): ds = src_ds_A.GetRasterBand(band + 1).ReadAsArray().flatten().astype( np.float32) if nodata is not None: nodata_mask = ds == nodata if nodata_mask is None else np.logical_or( nodata_mask, ds == nodata) raw_image.append(ds) if B is not None: src_ds_B = gdal.Open(B, gdal.GA_ReadOnly) for band in range(src_ds_B.RasterCount): ds = src_ds_B.GetRasterBand(band + 1).ReadAsArray().flatten().astype( np.float32) if nodata is not None: nodata_mask = np.logical_or(nodata_mask, ds == nodata) raw_image.append(ds) # pair-masking data, let only the valid data across all dimensions/bands if nodata is not None: raw_image = [b[~nodata_mask] for b in raw_image] # flat each dimension (bands) flat_dims = da.vstack(raw_image).rechunk((1, block_size**2)) # bands n_bands = flat_dims.shape[0] ######## # compute the mean of each band, in order to center the matrix. band_mean = [] for i in range(n_bands): band_mean.append(dask.delayed(np.mean)(flat_dims[i])) band_mean = dask.compute(*band_mean) ######## # compute the matrix correlation/covariance print("\nComputing the estimator matrix:") estimation_matrix = np.empty((n_bands, n_bands)) if estimator_matrix == "correlation": for i in range(n_bands): deviation_scores_band_i = flat_dims[i] - band_mean[i] for j in range(i, n_bands): deviation_scores_band_j = flat_dims[j] - band_mean[j] estimation_matrix[j][i] = estimation_matrix[i][j] = \ da.corrcoef(deviation_scores_band_i, deviation_scores_band_j)[0][1] if estimator_matrix == "covariance": for i in range(n_bands): deviation_scores_band_i = flat_dims[i] - band_mean[i] for j in range(i, n_bands): deviation_scores_band_j = flat_dims[j] - band_mean[j] estimation_matrix[j][i] = estimation_matrix[i][j] = \ da.cov(deviation_scores_band_i, deviation_scores_band_j)[0][1] # free mem del raw_image, flat_dims, src_ds_B, ds ######## # calculate eigenvectors & eigenvalues of the matrix # use 'eigh' rather than 'eig' since estimation_matrix # is symmetric, the performance gain is substantial eigenvals, eigenvectors = np.linalg.eigh(estimation_matrix) # sort eigenvalue in decreasing order idx_eigenvals = np.argsort(eigenvals)[::-1] eigenvectors = eigenvectors[:, idx_eigenvals] # sort eigenvectors according to same index eigenvals = eigenvals[idx_eigenvals] # select the first n eigenvectors (n is desired dimension # of rescaled data array, or dims_rescaled_data) eigenvectors = eigenvectors[:, :n_pc] ######## # save the principal components separated in tif images def get_raw_band_from_stack(band): src_ds_A = gdal.Open(A, gdal.GA_ReadOnly) if band < src_ds_A.RasterCount: return src_ds_A.GetRasterBand(band + 1).ReadAsArray().flatten().astype( np.float32) if band >= src_ds_A.RasterCount: src_ds_B = gdal.Open(B, gdal.GA_ReadOnly) return src_ds_B.GetRasterBand(band - src_ds_A.RasterCount + 1).ReadAsArray().flatten().astype( np.float32) @dask.delayed def get_principal_component(i, j): return eigenvectors[j, i] * (get_raw_band_from_stack(j) - band_mean[j]) print("\nComputing and saving the components in pca-stack.tif:") # save component as file tmp_pca_file = Path(out_dir) / 'pca-stack.tif' driver = gdal.GetDriverByName("GTiff") out_pc = driver.Create(str(tmp_pca_file), src_ds_A.RasterXSize, src_ds_A.RasterYSize, n_pc, gdal.GDT_Float32) for i in range(n_pc): pc = dask.delayed(sum)( [get_principal_component(i, j) for j in range(n_bands)]) pc = pc.astype(np.float32) pc = np.array(pc.compute()) if nodata is not None: pc[nodata_mask] = 0 pc = pc.reshape((src_ds_A.RasterYSize, src_ds_A.RasterXSize)) pcband = out_pc.GetRasterBand(i + 1) if nodata is not None: pcband.SetNoDataValue(0) pcband.WriteArray(pc) del pc, pcband # set projection and geotransform if src_ds_A.GetGeoTransform() is not None: out_pc.SetGeoTransform(src_ds_A.GetGeoTransform()) if src_ds_A.GetProjection() is not None: out_pc.SetProjection(src_ds_A.GetProjection()) out_pc.FlushCache() # free mem del src_ds_A, nodata_mask, out_pc print("\nDONE")
def main(argv=sys.argv[1:]): global LOG from satpy import Scene from satpy.resample import get_area_def from satpy.writers import compute_writer_results from dask.diagnostics import ProgressBar from polar2grid.core.script_utils import ( setup_logging, rename_log_file, create_exc_handler) import argparse prog = os.getenv('PROG_NAME', sys.argv[0]) # "usage: " will be printed at the top of this: usage = """ %(prog)s -h see available products: %(prog)s -r <reader> -w <writer> --list-products -f file1 [file2 ...] basic processing: %(prog)s -r <reader> -w <writer> [options] -f file1 [file2 ...] basic processing with limited products: %(prog)s -r <reader> -w <writer> [options] -p prod1 prod2 -f file1 [file2 ...] """ parser = argparse.ArgumentParser(prog=prog, usage=usage, description="Load, composite, resample, and save datasets.") parser.add_argument('-v', '--verbose', dest='verbosity', action="count", default=0, help='each occurrence increases verbosity 1 level through ERROR-WARNING-INFO-DEBUG (default INFO)') parser.add_argument('-l', '--log', dest="log_fn", default=None, help="specify the log filename") parser.add_argument('--progress', action='store_true', help="show processing progress bar (not recommended for logged output)") parser.add_argument('--num-workers', type=int, default=4, help="specify number of worker threads to use (default: 4)") parser.add_argument('--match-resolution', dest='preserve_resolution', action='store_false', help="When using the 'native' resampler for composites, don't save data " "at its native resolution, use the resolution used to create the " "composite.") parser.add_argument('-w', '--writers', nargs='+', help='writers to save datasets with') parser.add_argument("--list-products", dest="list_products", action="store_true", help="List available reader products and exit") subgroups = add_scene_argument_groups(parser) subgroups += add_resample_argument_groups(parser) argv_without_help = [x for x in argv if x not in ["-h", "--help"]] args, remaining_args = parser.parse_known_args(argv_without_help) # get the logger if we know the readers and writers that will be used if args.reader is not None and args.writers is not None: glue_name = args.reader + "_" + "-".join(args.writers or []) LOG = logging.getLogger(glue_name) # add writer arguments if args.writers is not None: for writer in (args.writers or []): parser_func = WRITER_PARSER_FUNCTIONS.get(writer) if parser_func is None: continue subgroups += parser_func(parser) args = parser.parse_args(argv) if args.reader is None: parser.print_usage() parser.exit(1, "\nERROR: Reader must be provided (-r flag).\n" "Supported readers:\n\t{}\n".format('\n\t'.join(['abi_l1b', 'ahi_hsd', 'hrit_ahi']))) if args.writers is None: parser.print_usage() parser.exit(1, "\nERROR: Writer must be provided (-w flag) with one or more writer.\n" "Supported writers:\n\t{}\n".format('\n\t'.join(['geotiff']))) def _args_to_dict(group_actions): return {ga.dest: getattr(args, ga.dest) for ga in group_actions if hasattr(args, ga.dest)} scene_args = _args_to_dict(subgroups[0]._group_actions) load_args = _args_to_dict(subgroups[1]._group_actions) resample_args = _args_to_dict(subgroups[2]._group_actions) writer_args = {} for idx, writer in enumerate(args.writers): sgrp1, sgrp2 = subgroups[3 + idx * 2: 5 + idx * 2] wargs = _args_to_dict(sgrp1._group_actions) if sgrp2 is not None: wargs.update(_args_to_dict(sgrp2._group_actions)) writer_args[writer] = wargs # get default output filename if 'filename' in wargs and wargs['filename'] is None: wargs['filename'] = get_default_output_filename(args.reader, writer) if not args.filenames: parser.print_usage() parser.exit(1, "\nERROR: No data files provided (-f flag)\n") # Prepare logging rename_log = False if args.log_fn is None: rename_log = True args.log_fn = glue_name + "_fail.log" levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn) logging.getLogger('rasterio').setLevel(levels[min(2, args.verbosity)]) sys.excepthook = create_exc_handler(LOG.name) if levels[min(3, args.verbosity)] > logging.DEBUG: import warnings warnings.filterwarnings("ignore") LOG.debug("Starting script with arguments: %s", " ".join(sys.argv)) # Set up dask and the number of workers if args.num_workers: from multiprocessing.pool import ThreadPool dask.config.set(pool=ThreadPool(args.num_workers)) # Parse provided files and search for files if provided directories scene_args['filenames'] = get_input_files(scene_args['filenames']) # Create a Scene, analyze the provided files LOG.info("Sorting and reading input files...") try: scn = Scene(**scene_args) except ValueError as e: LOG.error("{} | Enable debug message (-vvv) or see log file for details.".format(str(e))) LOG.debug("Further error information: ", exc_info=True) return -1 except OSError: LOG.error("Could not open files. Enable debug message (-vvv) or see log file for details.") LOG.debug("Further error information: ", exc_info=True) return -1 if args.list_products: print("\n".join(sorted(scn.available_dataset_names(composites=True)))) return 0 # Rename the log file if rename_log: rename_log_file(glue_name + scn.attrs['start_time'].strftime("_%Y%m%d_%H%M%S.log")) # Load the actual data arrays and metadata (lazy loaded as dask arrays) if load_args['products'] is None: try: reader_mod = importlib.import_module('polar2grid.readers.' + scene_args['reader']) load_args['products'] = reader_mod.DEFAULT_PRODUCTS LOG.info("Using default product list: {}".format(load_args['products'])) except (ImportError, AttributeError): LOG.error("No default products list set, please specify with `--products`.") return -1 LOG.info("Loading product metadata from files...") scn.load(load_args['products']) resample_kwargs = resample_args.copy() areas_to_resample = resample_kwargs.pop('grids') grid_configs = resample_kwargs.pop('grid_configs') resampler = resample_kwargs.pop('resampler') if areas_to_resample is None and resampler in [None, 'native']: # no areas specified areas_to_resample = ['MAX'] elif areas_to_resample is None: raise ValueError("Resampling method specified (--method) without any destination grid/area (-g flag).") elif not areas_to_resample: # they don't want any resampling (they used '-g' with no args) areas_to_resample = [None] has_custom_grid = any(g not in ['MIN', 'MAX', None] for g in areas_to_resample) if has_custom_grid and resampler == 'native': LOG.error("Resampling method 'native' can only be used with 'MIN' or 'MAX' grids " "(use 'nearest' method instead).") return -1 p2g_grid_configs = [x for x in grid_configs if x.endswith('.conf')] pyresample_area_configs = [x for x in grid_configs if not x.endswith('.conf')] if not grid_configs or p2g_grid_configs: # if we were given p2g grid configs or we weren't given any to choose from from polar2grid.grids import GridManager grid_manager = GridManager(*p2g_grid_configs) else: grid_manager = {} if pyresample_area_configs: from pyresample.utils import parse_area_file custom_areas = parse_area_file(pyresample_area_configs) custom_areas = {x.area_id: x for x in custom_areas} else: custom_areas = {} ll_bbox = resample_kwargs.pop('ll_bbox') if ll_bbox: scn = scn.crop(ll_bbox=ll_bbox) wishlist = scn.wishlist.copy() preserve_resolution = get_preserve_resolution(args, resampler, areas_to_resample) if preserve_resolution: preserved_products = set(wishlist) & set(scn.datasets.keys()) resampled_products = set(wishlist) - preserved_products # original native scene to_save = write_scene(scn, args.writers, writer_args, preserved_products) else: preserved_products = set() resampled_products = set(wishlist) to_save = [] LOG.debug("Products to preserve resolution for: {}".format(preserved_products)) LOG.debug("Products to use new resolution for: {}".format(resampled_products)) for area_name in areas_to_resample: if area_name is None: # no resampling area_def = None elif area_name == 'MAX': area_def = scn.max_area() elif area_name == 'MIN': area_def = scn.min_area() elif area_name in custom_areas: area_def = custom_areas[area_name] elif area_name in grid_manager: from pyresample.geometry import DynamicAreaDefinition p2g_def = grid_manager[area_name] area_def = p2g_def.to_satpy_area() if isinstance(area_def, DynamicAreaDefinition) and p2g_def['cell_width'] is not None: area_def = area_def.freeze(scn.max_area(), resolution=(abs(p2g_def['cell_width']), abs(p2g_def['cell_height']))) else: area_def = get_area_def(area_name) if resampler is None and area_def is not None: rs = 'native' if area_name in ['MIN', 'MAX'] else 'nearest' LOG.debug("Setting default resampling to '{}' for grid '{}'".format(rs, area_name)) else: rs = resampler if area_def is not None: LOG.info("Resampling data to '%s'", area_name) new_scn = scn.resample(area_def, resampler=rs, **resample_kwargs) elif not preserve_resolution: # the user didn't want to resample to any areas # the user also requested that we don't preserve resolution # which means we have to save this Scene's datasets # because they won't be saved new_scn = scn to_save = write_scene(new_scn, args.writers, writer_args, resampled_products, to_save=to_save) if args.progress: pbar = ProgressBar() pbar.register() LOG.info("Computing products and saving data to writers...") compute_writer_results(to_save) LOG.info("SUCCESS") return 0
def _load_basic_dataframe(df_file=None, datatype='sim', config='IC86.2012', energy_reco=True, energy_cut_key='reco_log_energy', log_energy_min=None, log_energy_max=None, columns=None, n_jobs=1, verbose=False, compute=True): validate_datatype(datatype) if df_file is not None: files = df_file else: paths = get_config_paths() file_pattern = os.path.join(paths.comp_data_dir, config, datatype, 'processed_hdf', 'nominal' if datatype == 'sim' else '', '*.hdf') files = sorted(glob.glob(file_pattern)) ddf = dd.read_hdf(files, key='dataframe', mode='r', columns=columns, chunksize=10000) # Energy reconstruction if energy_reco: model_dict = load_trained_model( 'linearregression_energy_{}'.format(config), return_metadata=True) pipeline = model_dict['pipeline'] feature_list = list(model_dict['training_features']) def add_reco_energy(partition): partition['reco_log_energy'] = pipeline.predict( partition[feature_list]) partition['reco_energy'] = 10**partition['reco_log_energy'] return partition ddf = ddf.map_partitions(add_reco_energy) # Energy range cut if log_energy_min is not None and log_energy_max is not None: def apply_energy_cut(partition): energy_mask = (partition[energy_cut_key] > log_energy_min) & ( partition[energy_cut_key] < log_energy_max) return partition.loc[energy_mask, :] ddf = ddf.map_partitions(apply_energy_cut) if compute: if verbose: pbar = ProgressBar() pbar.register() scheduler = 'processes' if n_jobs > 1 else 'synchronous' df = ddf.compute(scheduler=scheduler, num_workers=n_jobs) df = df.reset_index(drop=True) else: df = ddf return df
def main(argv=sys.argv[1:]): global LOG import satpy from satpy import Scene from satpy.writers import compute_writer_results from dask.diagnostics import ProgressBar from polar2grid.core.script_utils import (setup_logging, rename_log_file, create_exc_handler) import argparse dist = pkg_resources.get_distribution('polar2grid') if dist_is_editable(dist): p2g_etc = os.path.join(dist.module_path, 'etc') else: p2g_etc = os.path.join(sys.prefix, 'etc', 'polar2grid') config_path = satpy.config.get('config_path') if p2g_etc not in config_path: satpy.config.set(config_path=config_path + [p2g_etc]) USE_POLAR2GRID_DEFAULTS = bool( int(os.environ.setdefault("USE_POLAR2GRID_DEFAULTS", "1"))) prog = os.getenv('PROG_NAME', sys.argv[0]) # "usage: " will be printed at the top of this: usage = """ %(prog)s -h see available products: %(prog)s -r <reader> -w <writer> --list-products -f file1 [file2 ...] basic processing: %(prog)s -r <reader> -w <writer> [options] -f file1 [file2 ...] basic processing with limited products: %(prog)s -r <reader> -w <writer> [options] -p prod1 prod2 -f file1 [file2 ...] """ parser = argparse.ArgumentParser( prog=prog, usage=usage, fromfile_prefix_chars="@", description="Load, composite, resample, and save datasets.") parser.add_argument( '-v', '--verbose', dest='verbosity', action="count", default=0, help='each occurrence increases verbosity 1 level through ' 'ERROR-WARNING-INFO-DEBUG (default INFO)') parser.add_argument('-l', '--log', dest="log_fn", default=None, help="specify the log filename") parser.add_argument( '--progress', action='store_true', help="show processing progress bar (not recommended for logged output)" ) parser.add_argument( '--num-workers', type=int, default=os.getenv('DASK_NUM_WORKERS', 4), help="specify number of worker threads to use (default: 4)") parser.add_argument( '--match-resolution', dest='preserve_resolution', action='store_false', help="When using the 'native' resampler for composites, don't save data " "at its native resolution, use the resolution used to create the " "composite.") parser.add_argument("--list-products", dest="list_products", action="store_true", help="List available reader products and exit") reader_group = add_scene_argument_groups( parser, is_polar2grid=USE_POLAR2GRID_DEFAULTS)[0] resampling_group = add_resample_argument_groups( parser, is_polar2grid=USE_POLAR2GRID_DEFAULTS)[0] writer_group = add_writer_argument_groups(parser)[0] subgroups = [reader_group, resampling_group, writer_group] argv_without_help = [x for x in argv if x not in ["-h", "--help"]] _retitle_optional_arguments(parser) args, remaining_args = parser.parse_known_args(argv_without_help) os.environ['DASK_NUM_WORKERS'] = str(args.num_workers) # get the logger if we know the readers and writers that will be used if args.readers is not None and args.writers is not None: glue_name = args.readers[0] + "_" + "-".join(args.writers or []) LOG = logging.getLogger(glue_name) # add writer arguments for writer in (args.writers or []): parser_func = WRITER_PARSER_FUNCTIONS.get(writer) if parser_func is None: continue subgroups += parser_func(parser) args = parser.parse_args(argv) if args.readers is None: parser.print_usage() parser.exit( 1, "\nERROR: Reader must be provided (-r flag).\n" "Supported readers:\n\t{}\n".format('\n\t'.join( ['abi_l1b', 'ahi_hsd', 'hrit_ahi']))) elif len(args.readers) > 1: parser.print_usage() parser.exit( 1, "\nMultiple readers is not currently supported. Got:\n\t" "{}\n".format('\n\t'.join(args.readers))) return -1 if args.writers is None: parser.print_usage() parser.exit( 1, "\nERROR: Writer must be provided (-w flag) with one or more writer.\n" "Supported writers:\n\t{}\n".format('\n\t'.join(['geotiff']))) def _args_to_dict(group_actions, exclude=None): if exclude is None: exclude = [] return { ga.dest: getattr(args, ga.dest) for ga in group_actions if hasattr(args, ga.dest) and ga.dest not in exclude } reader_args = _args_to_dict(reader_group._group_actions) reader_names = reader_args.pop('readers') scene_creation = { 'filenames': reader_args.pop('filenames'), 'reader': reader_names[0], } load_args = { 'products': reader_args.pop('products'), } # anything left in 'reader_args' is a reader-specific kwarg resample_args = _args_to_dict(resampling_group._group_actions) writer_args = _args_to_dict(writer_group._group_actions) # writer_args = {} subgroup_idx = 3 for idx, writer in enumerate(writer_args['writers']): sgrp1, sgrp2 = subgroups[subgroup_idx + idx * 2:subgroup_idx + 2 + idx * 2] wargs = _args_to_dict(sgrp1._group_actions) if sgrp2 is not None: wargs.update(_args_to_dict(sgrp2._group_actions)) writer_args[writer] = wargs # get default output filename if 'filename' in wargs and wargs['filename'] is None: wargs['filename'] = get_default_output_filename( args.readers[0], writer) if not args.filenames: parser.print_usage() parser.exit(1, "\nERROR: No data files provided (-f flag)\n") # Prepare logging rename_log = False if args.log_fn is None: rename_log = True args.log_fn = glue_name + "_fail.log" levels = [logging.ERROR, logging.WARN, logging.INFO, logging.DEBUG] setup_logging(console_level=levels[min(3, args.verbosity)], log_filename=args.log_fn) logging.getLogger('rasterio').setLevel(levels[min(2, args.verbosity)]) sys.excepthook = create_exc_handler(LOG.name) if levels[min(3, args.verbosity)] > logging.DEBUG: import warnings warnings.filterwarnings("ignore") LOG.debug("Starting script with arguments: %s", " ".join(sys.argv)) # Set up dask and the number of workers if args.num_workers: dask.config.set(num_workers=args.num_workers) # Parse provided files and search for files if provided directories scene_creation['filenames'] = get_input_files(scene_creation['filenames']) # Create a Scene, analyze the provided files LOG.info("Sorting and reading input files...") try: scn = Scene(**scene_creation) except ValueError as e: LOG.error( "{} | Enable debug message (-vvv) or see log file for details.". format(str(e))) LOG.debug("Further error information: ", exc_info=True) return -1 except OSError: LOG.error( "Could not open files. Enable debug message (-vvv) or see log file for details." ) LOG.debug("Further error information: ", exc_info=True) return -1 if args.list_products: print("\n".join(sorted(scn.available_dataset_names(composites=True)))) return 0 # Rename the log file if rename_log: rename_log_file(glue_name + scn.attrs['start_time'].strftime("_%Y%m%d_%H%M%S.log")) # Load the actual data arrays and metadata (lazy loaded as dask arrays) LOG.info("Loading product metadata from files...") load_args['products'] = _apply_default_products_and_aliases( scn, scene_creation['reader'], load_args['products']) if not load_args['products']: return -1 scn.load(load_args['products']) ll_bbox = resample_args.pop('ll_bbox') if ll_bbox: scn = scn.crop(ll_bbox=ll_bbox) scn = filter_scene( scn, reader_names, sza_threshold=reader_args['sza_threshold'], day_fraction=reader_args['filter_day_products'], night_fraction=reader_args['filter_night_products'], ) if scn is None: LOG.info("No remaining products after filtering.") return 0 to_save = [] areas_to_resample = resample_args.pop("grids") if 'ewa_persist' in resample_args: resample_args['persist'] = resample_args.pop('ewa_persist') scenes_to_save = resample_scene( scn, areas_to_resample, preserve_resolution=args.preserve_resolution, is_polar2grid=USE_POLAR2GRID_DEFAULTS, **resample_args) for scene_to_save, products_to_save in scenes_to_save: overwrite_platform_name_with_aliases(scene_to_save) to_save = write_scene(scene_to_save, writer_args['writers'], writer_args, products_to_save, to_save=to_save) if args.progress: pbar = ProgressBar() pbar.register() LOG.info("Computing products and saving data to writers...") compute_writer_results(to_save) LOG.info("SUCCESS") return 0
def get(*args, **kwargs): pbar = ProgressBar() pbar.register() out = client.get(*args, **kwargs) pbar.unregister() return out