def _create_timeseries(inventory_df, config_file): """Helper function to create Timeseries out of OST ARD products Based on the inventory GeoDataFrame and the configuration file, this function triggers the time-series processing for all bursts/tracks within the respective project. Each product/polarisation is treated singularly. Based on the ARD type/configuration settings, the function uses SNAP's Create-Stack function to unify the grid of each scene and applies a multi-temporal speckle filter if selected. The output are single GeoTiff files, whereas there is the possibility to reduce the data by converting the data format into uint8 or uint16. This is done by linearly stretching the data between -30 and +5 for backscatter, 0 and 1 for coherence, polarimetric anisotropy # and entropy, as well 0 and 90 for polarimetric alpha channel. All the data is cropped to the same extent based on the minimum bounds layer. This function executes the underlying functions using the godale framework for parallel execution. Executor type and number of parallel processes is defined within the configuration file. :param inventory_df: :type GeoDataFrame :param config_file: :type str/Path :return: """ with open(config_file, 'r') as file: config_dict = json.load(file) processing_dir = Path(config_dict['processing_dir']) iter_list = [] for track in inventory_df.relativeorbit.unique(): # get the burst directory track_dir = processing_dir.joinpath(track) for pol in ['VV', 'VH', 'HH', 'HV']: # see if there is actually any imagery in thi polarisation list_of_files = sorted( str(file) for file in list(track_dir.glob(f'20*/*data*/*ma0*{pol}*img'))) if len(list_of_files) <= 1: continue # create list of dims if polarisation is present list_of_dims = sorted( str(dim) for dim in list(track_dir.glob('20*/*bs*dim'))) iter_list.append([list_of_dims, track, 'bs', pol]) executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) out_dict = { 'track': [], 'list_of_dims': [], 'out_files': [], 'out_vrt': [], 'product': [], 'error': [] } for task in executor.as_completed(func=ard_to_ts.gd_ard_to_ts, iterable=iter_list, fargs=([ str(config_file), ])): track, list_of_dims, out_files, out_vrt, product, error = task.result() out_dict['track'].append(track) out_dict['list_of_dims'].append(list_of_dims) out_dict['out_files'].append(out_files) out_dict['out_vrt'].append(out_vrt) out_dict['product'].append(product) out_dict['error'].append(error) return pd.DataFrame.from_dict(out_dict)
def timeseries_to_timescan(inventory_df, config_file): # load ard parameters with open(config_file, 'r') as file: config_dict = json.load(file) processing_dir = Path(config_dict['processing_dir']) ard = config_dict['processing']['single_ARD'] ard_mt = config_dict['processing']['time-series_ARD'] ard_tscan = config_dict['processing']['time-scan_ARD'] # get the db scaling right to_db = ard['to_db'] if ard['to_db'] or ard_mt['to_db']: to_db = True dtype_conversion = True if ard_mt['dtype_output'] != 'float32' else False iter_list, vrt_iter_list = [], [] for track in inventory_df.relativeorbit.unique(): # get track directory track_dir = processing_dir.joinpath(track) # define and create Timescan directory timescan_dir = track_dir.joinpath('Timescan') timescan_dir.mkdir(parents=True, exist_ok=True) # loop thorugh each polarization for polar in ['VV', 'VH', 'HH', 'HV']: if timescan_dir.joinpath(f'.bs.{polar}.processed').exists(): logger.info(f'Timescans for track {track} already processed.') continue # get timeseries vrt time_series = track_dir.joinpath( f'Timeseries/Timeseries.bs.{polar}.vrt') if not time_series.exists(): continue # create a datelist for harmonics scene_list = list(track_dir.glob(f'Timeseries/*bs.{polar}.tif')) # create a datelist for harmonics calculation datelist = [] for file in sorted(scene_list): datelist.append(file.name.split('.')[1]) # define timescan prefix timescan_prefix = timescan_dir.joinpath(f'bs.{polar}') iter_list.append([ time_series, timescan_prefix, ard_tscan['metrics'], dtype_conversion, to_db, ard_tscan['remove_outliers'], datelist ]) vrt_iter_list.append(timescan_dir) # now we run with godale, which works also with 1 worker executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) # run timescan creation out_dict = {'track': [], 'prefix': [], 'metrics': [], 'error': []} for task in executor.as_completed(func=timescan.gd_mt_metrics, iterable=iter_list): burst, prefix, metrics, error = task.result() out_dict['track'].append(burst) out_dict['prefix'].append(prefix) out_dict['metrics'].append(metrics) out_dict['error'].append(error) timescan_df = pd.DataFrame.from_dict(out_dict) # run vrt creation for task in executor.as_completed(func=ras.create_tscan_vrt, iterable=vrt_iter_list, fargs=([ str(config_file), ])): task.result() return timescan_df
def combine_timeseries(processing_dir, config_dict, timescan=True): # namespaces for folder comb_dir = processing_dir.joinpath('combined') if comb_dir.exists(): h.remove_folder_content(comb_dir) tseries_dir = comb_dir.joinpath('Timeseries') tseries_dir.mkdir(parents=True, exist_ok=True) PRODUCT_LIST = [ 'bs.HH', 'bs.VV', 'bs.HV', 'bs.VH', 'coh.VV', 'coh.VH', 'coh.HH', 'coh.HV', 'pol.Entropy', 'pol.Anisotropy', 'pol.Alpha' ] out_files, iter_list = [], [] for product_type in PRODUCT_LIST: filelist = list( processing_dir.glob(f'*/Timeseries/*{product_type}.tif')) if len(filelist) > 1: datelist = sorted([file.name.split('.')[1] for file in filelist]) for i, date in enumerate(datelist): file = list( processing_dir.glob( f'*/Timeseries/*{date}*{product_type}.tif')) outfile = tseries_dir.joinpath( f'{i+1:02d}.{date}.{product_type}.tif') shutil.copy(file[0], str(outfile)) out_files.append(str(outfile)) vrt_options = gdal.BuildVRTOptions(srcNodata=0, separate=True) out_vrt = str( tseries_dir.joinpath(f'Timeseries.{product_type}.vrt')) gdal.BuildVRT(str(out_vrt), out_files, options=vrt_options) if timescan: from ost.generic import timescan as ts ard = config_dict['processing']['single_ARD'] ard_mt = config_dict['processing']['time-series_ARD'] ard_tscan = config_dict['processing']['time-scan_ARD'] # get the db scaling right to_db = ard['to_db'] if ard['to_db'] or ard_mt['to_db']: to_db = True dtype_conversion = True if ard_mt[ 'dtype_output'] != 'float32' else False tscan_dir = comb_dir.joinpath('Timescan') tscan_dir.mkdir(parents=True, exist_ok=True) # get timeseries vrt time_series = tseries_dir.joinpath( f'Timeseries.{product_type}.vrt') if not time_series.exists(): continue # create a datelist for harmonics scene_list = [ str(file) for file in list(tseries_dir.glob(f'*{product_type}.tif')) ] # create a datelist for harmonics calculation datelist = [] for file in sorted(scene_list): datelist.append(os.path.basename(file).split('.')[1]) # define timescan prefix timescan_prefix = tscan_dir.joinpath(f'{product_type}') iter_list.append([ time_series, timescan_prefix, ard_tscan['metrics'], dtype_conversion, to_db, ard_tscan['remove_outliers'], datelist ]) if timescan: # now we run with godale, which works also with 1 worker executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) # run timescan creation out_dict = {'track': [], 'prefix': [], 'metrics': [], 'error': []} for task in executor.as_completed(func=ts.gd_mt_metrics, iterable=iter_list): burst, prefix, metrics, error = task.result() out_dict['track'].append(burst) out_dict['prefix'].append(prefix) out_dict['metrics'].append(metrics) out_dict['error'].append(error) create_tscan_vrt(tscan_dir, config_dict)
def mosaic_timeseries(inventory_df, config_file): print(' -----------------------------------') logger.info('Mosaicking Time-series layers') print(' -----------------------------------') # ------------------------------------- # 1 load project config with open(config_file, 'r') as ard_file: config_dict = json.load(ard_file) processing_dir = Path(config_dict['processing_dir']) # create output folder ts_dir = processing_dir.joinpath('Mosaic/Timeseries') ts_dir.mkdir(parents=True, exist_ok=True) # loop through polarisations iter_list, vrt_iter_list = [], [] for p in ['VV', 'VH', 'HH', 'HV']: tracks = inventory_df.relativeorbit.unique() nr_of_ts = len( list( processing_dir.joinpath(f'{tracks[0]}/Timeseries').glob( f'*.{p}.tif'))) if not nr_of_ts >= 1: continue outfiles = [] for i in range(1, nr_of_ts + 1): filelist = list( processing_dir.glob(f'*/Timeseries/{i:02d}.*.{p}.tif')) filelist = [ str(file) for file in filelist if 'Mosaic' not in str(file) ] # create datelist = [] for file in filelist: datelist.append(Path(file).name.split('.')[1]) filelist = ' '.join(filelist) start, end = sorted(datelist)[0], sorted(datelist)[-1] if start == end: outfile = ts_dir.joinpath(f'{i:02d}.{start}.bs.{p}.tif') else: outfile = ts_dir.joinpath(f'{i:02d}.{start}-{end}.bs.{p}.tif') check_file = outfile.parent.joinpath(f'.{outfile.stem}.processed') outfiles.append(outfile) if check_file.exists(): logger.info(f'Mosaic layer {outfile.name} already processed.') continue logger.info(f'Mosaicking layer {outfile.name}.') iter_list.append([filelist, outfile, config_file]) vrt_iter_list.append([ts_dir, p, outfiles]) # now we run with godale, which works also with 1 worker executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) # run mosaicking for task in executor.as_completed(func=mosaic.gd_mosaic, iterable=iter_list): task.result() # run mosaicking vrts for task in executor.as_completed(func=mosaic.create_timeseries_mosaic_vrt, iterable=vrt_iter_list): task.result()
def mosaic_timescan(burst_inventory, config_file): """ :param burst_inventory: :param config_file: :return: """ print(' -----------------------------------------------------------------') logger.info('Mosaicking time-scan layers.') print(' -----------------------------------------------------------------') # ------------------------------------- # 1 load project config with open(config_file, 'r') as ard_file: config_dict = json.load(ard_file) processing_dir = Path(config_dict['processing_dir']) metrics = config_dict['processing']['time-scan_ARD']['metrics'] if 'harmonics' in metrics: metrics.remove('harmonics') metrics.extend(['amplitude', 'phase', 'residuals']) if 'percentiles' in metrics: metrics.remove('percentiles') metrics.extend(['p95', 'p5']) # create output folder ts_dir = processing_dir.joinpath('Mosaic/Timescan') ts_dir.mkdir(parents=True, exist_ok=True) temp_mosaic = processing_dir.joinpath('Mosaic/temp') temp_mosaic.mkdir(parents=True, exist_ok=True) # ------------------------------------- # 2 create iterable # loop through each product iter_list, vrt_iter_list = [], [] for product, metric in itertools.product(PRODUCT_LIST, metrics): for track in burst_inventory.Track.unique(): filelist = list( processing_dir.glob( f'[A,D]{track}_IW*/Timescan/*{product}.{metric}.tif')) if not len(filelist) >= 1: continue temp_acq = temp_mosaic.joinpath(f'{track}.{product}.{metric}.tif') if temp_acq: iter_list.append( [track, metric, product, temp_acq, config_file]) # now we run with godale, which works also with 1 worker executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) # run vrt creation for task in executor.as_completed(func=mosaic.gd_mosaic_slc_acquisition, iterable=iter_list): task.result() iter_list, vrt_iter_list = [], [] for product, metric in itertools.product(PRODUCT_LIST, metrics): list_of_files = list(temp_mosaic.glob(f'*{product}.{metric}.tif')) if not list_of_files: continue # turn to OTB readable format list_of_files = ' '.join([str(file) for file in list_of_files]) # create namespace for outfile outfile = ts_dir.joinpath(f'{product}.{metric}.tif') check_file = outfile.parent.joinpath(f'.{outfile.name[:-4]}.processed') if check_file.exists(): logger.info(f'Mosaic layer {outfile.name} already processed.') continue logger.info(f'Mosaicking layer {outfile.name}.') iter_list.append([list_of_files, outfile, config_file]) # now we run with godale, which works also with 1 worker executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) # run mosaicking for task in executor.as_completed(func=mosaic.gd_mosaic, iterable=iter_list): task.result() ras.create_tscan_vrt(ts_dir, config_file) # remove temp folder h.remove_folder_content(temp_mosaic)
def mosaic_timeseries(burst_inventory, config_file): print(' -----------------------------------------------------------------') logger.info('Mosaicking time-series layers.') print(' -----------------------------------------------------------------') # ------------------------------------- # 1 load project config with open(config_file, 'r') as ard_file: config_dict = json.load(ard_file) processing_dir = Path(config_dict['processing_dir']) # create output folder ts_dir = processing_dir.joinpath('Mosaic/Timeseries') ts_dir.mkdir(parents=True, exist_ok=True) temp_mosaic = processing_dir.joinpath('Mosaic/temp') temp_mosaic.mkdir(parents=True, exist_ok=True) # ------------------------------------- # 2 create iterable # loop through each product iter_list, vrt_iter_list = [], [] for product in PRODUCT_LIST: for track in burst_inventory.Track.unique(): dates = [ date[2:] for date in sorted(burst_inventory.Date[ burst_inventory.Track == track].unique()) ] for i, date in enumerate(dates): if 'coh' in product: # we do the try, since for the last date # there is no dates[i+1] for coherence try: temp_acq = temp_mosaic.joinpath( f'{i}.{date}.{dates[i + 1]}.{track}.{product}.tif') except IndexError: temp_acq = None else: temp_acq = temp_mosaic.joinpath( f'{i}.{date}.{track}.{product}.tif') if temp_acq: iter_list.append( [track, date, product, temp_acq, config_file]) # now we run with godale, which works also with 1 worker executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) # run vrt creation for task in executor.as_completed(func=mosaic.gd_mosaic_slc_acquisition, iterable=iter_list): task.result() # mosaic the acquisitions iter_list, vrt_iter_list = [], [] for product in PRODUCT_LIST: outfiles = [] for i in range(len(dates)): list_of_files = list(temp_mosaic.glob(f'{i}.*{product}.tif')) if not list_of_files: continue datelist = [] for file in list_of_files: if 'coh' in product: datelist.append( f"{file.name.split('.')[2]}_{file.name.split('.')[1]}") else: datelist.append(file.name.split('.')[1]) # get start and endate of mosaic start, end = sorted(datelist)[0], sorted(datelist)[-1] list_of_files = ' '.join([str(file) for file in list_of_files]) # create namespace for output file if start == end: outfile = ts_dir.joinpath(f'{i + 1:02d}.{start}.{product}.tif') # with the above operation, the list automatically # turns into string, so we can call directly list_of_files shutil.move(list_of_files, outfile) outfiles.append(outfile) continue else: outfile = ts_dir.joinpath( f'{i + 1:02d}.{start}-{end}.{product}.tif') # create namespace for check_file check_file = outfile.parent.joinpath( f'.{outfile.name[:-4]}.processed') if check_file.exists(): logger.info(f'Mosaic layer {outfile} already processed.') continue # append to list of outfile for vrt creation outfiles.append(outfile) iter_list.append([list_of_files, outfile, config_file]) vrt_iter_list.append([ts_dir, product, outfiles]) # now we run with godale, which works also with 1 worker executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) # run mosaicking for task in executor.as_completed(func=mosaic.gd_mosaic, iterable=iter_list): task.result() # run mosaicking vrts for task in executor.as_completed(func=mosaic.create_timeseries_mosaic_vrt, iterable=vrt_iter_list): task.result() # remove temp folder h.remove_folder_content(temp_mosaic)
def timeseries_to_timescan(burst_gdf, config_file): """Function to create a timescan out of a OST timeseries. """ print('--------------------------------------------------------------') logger.info('Processing all burst ARDs time-series to ARD timescans') print('--------------------------------------------------------------') # ------------------------------------- # 1 load project config with open(config_file, 'r') as ard_file: config_dict = json.load(ard_file) processing_dir = config_dict['processing_dir'] ard = config_dict['processing']['single_ARD'] ard_mt = config_dict['processing']['time-series_ARD'] ard_tscan = config_dict['processing']['time-scan_ARD'] # get the db scaling right to_db = True if ard['to_db'] or ard_mt['to_db'] else False # get datatype right dtype_conversion = True if ard_mt['dtype_output'] != 'float32' else False # ------------------------------------- # 2 create iterable for parallel processing iter_list, vrt_iter_list = [], [] for burst in burst_gdf.bid.unique(): # get relevant directories burst_dir = Path(processing_dir).joinpath(burst) timescan_dir = burst_dir.joinpath('Timescan') timescan_dir.mkdir(parents=True, exist_ok=True) for product in PRODUCT_LIST: # check if already processed if timescan_dir.joinpath(f'.{product}.processed').exists(): logger.debug(f'Timescans for burst {burst} already processed.') continue # get respective timeseries timeseries = burst_dir.joinpath( f'Timeseries/Timeseries.{product}.vrt') # che if this timsereis exists ( since we go through all products if not timeseries.exists(): continue # datelist for harmonics scenelist = list(burst_dir.glob(f'Timeseries/*{product}*tif')) datelist = [ file.name.split('.')[1][:6] for file in sorted(scenelist) ] # define timescan prefix timescan_prefix = timescan_dir.joinpath(product) # get rescaling and db right (backscatter vs. coh/pol) if 'bs.' in str(timescan_prefix): to_power, rescale = to_db, dtype_conversion else: to_power, rescale = False, False iter_list.append([ timeseries, timescan_prefix, ard_tscan['metrics'], rescale, to_power, ard_tscan['remove_outliers'], datelist ]) vrt_iter_list.append(timescan_dir) # now we run with godale, which works also with 1 worker executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) # run timescan creation out_dict = {'burst': [], 'prefix': [], 'metrics': [], 'error': []} for task in executor.as_completed(func=timescan.gd_mt_metrics, iterable=iter_list): burst, prefix, metrics, error = task.result() out_dict['burst'].append(burst) out_dict['prefix'].append(prefix) out_dict['metrics'].append(metrics) out_dict['error'].append(error) df = pd.DataFrame.from_dict(out_dict) # run vrt creation for task in executor.as_completed(func=ras.create_tscan_vrt, iterable=vrt_iter_list, fargs=([ str(config_file), ])): task.result() return df
def bursts_to_ards(burst_gdf, config_file): """Batch processing from single bursts to ARD format This function handles the burst processing based on a OST burst inventory file and an OST config file that contains all necessary information about the project (e.g. project directory) and processing steps applied for the ARD generation based on the JSON ARD-type templates. :param burst_gdf: an OST burst inventory :type burst_gdf: GeoDataFrame :param config_file: (str/Path) path to the project config file :param executor_type: executer type for parallel processing with godale, defaults to multiprocessing :param max_workers: number of parallel burst processing jobs to start :return: """ print('--------------------------------------------------------------') logger.info('Processing all single bursts to ARD') print('--------------------------------------------------------------') logger.info('Preparing the processing pipeline. This may take a moment.') proc_inventory = prepare_burst_inventory(burst_gdf, config_file) with open(config_file, 'r') as file: config_dict = json.load(file) executor_type = config_dict['executor_type'] max_workers = config_dict['max_workers'] # we update max_workers in case we have less snap_cpu_parallelism # then cpus available if (max_workers == 1 and config_dict['snap_cpu_parallelism'] < os.cpu_count()): max_workers = int(os.cpu_count() / config_dict['snap_cpu_parallelism']) # now we run with godale, which works also with 1 worker out_dict = { 'burst': [], 'acq_date': [], 'out_bs': [], 'out_ls': [], 'out_pol': [], 'out_coh': [], 'error': [] } executor = Executor(executor=executor_type, max_workers=max_workers) for task in executor.as_completed(func=burst_to_ard, iterable=proc_inventory.iterrows(), fargs=([ str(config_file), ])): burst, date, out_bs, out_ls, out_pol, out_coh, error = task.result() out_dict['burst'].append(burst) out_dict['acq_date'].append(date) out_dict['out_bs'].append(out_bs) out_dict['out_ls'].append(out_ls) out_dict['out_pol'].append(out_pol) out_dict['out_coh'].append(out_coh) out_dict['error'].append(error) return pd.DataFrame.from_dict(out_dict)
def _create_timeseries(burst_gdf, config_file): # we need a # dict_of_product_types = {'bs': 'Gamma0', 'coh': 'coh', 'pol': 'pol'} list_of_product_types = {('bs', 'Gamma0'), ('bs', 'Sigma0'), ('coh', 'coh'), ('pol', 'pol')} pols = ['VV', 'VH', 'HH', 'HV', 'Alpha', 'Entropy', 'Anisotropy'] # read config file with open(config_file, 'r') as file: config_dict = json.load(file) processing_dir = config_dict['processing_dir'] # create iterable iter_list = [] for burst in burst_gdf.bid.unique(): burst_dir = Path(processing_dir).joinpath(burst) # for pr, pol in itertools.product(dict_of_product_types.items(), pols): for pr, pol in itertools.product(list_of_product_types, pols): # unpack items product, product_name = list(pr) # take care of H-A-Alpha naming for file search if pol in ['Alpha', 'Entropy', 'Anisotropy'] and product is 'pol': list_of_files = sorted( list(burst_dir.glob(f'20*/*data*/*{pol}*img'))) else: # see if there is actually any imagery for this # combination of product and polarisation list_of_files = sorted( list( burst_dir.glob( f'20*/*data*/*{product_name}*{pol}*img'))) if len(list_of_files) <= 1: continue # create list of dims if polarisation is present list_of_dims = sorted( str(dim) for dim in list(burst_dir.glob(f'20*/*{product}*dim'))) iter_list.append([list_of_dims, burst, product, pol]) # now we run with godale, which works also with 1 worker executor = Executor(executor=config_dict['executor_type'], max_workers=config_dict['max_workers']) out_dict = { 'burst': [], 'list_of_dims': [], 'out_files': [], 'out_vrt': [], 'product': [], 'error': [] } for task in executor.as_completed(func=ard_to_ts.gd_ard_to_ts, iterable=iter_list, fargs=([ str(config_file), ])): burst, list_of_dims, out_files, out_vrt, product, error = task.result() out_dict['burst'].append(burst) out_dict['list_of_dims'].append(list_of_dims) out_dict['out_files'].append(out_files) out_dict['out_vrt'].append(out_vrt) out_dict['product'].append(product) out_dict['error'].append(error) return pd.DataFrame.from_dict(out_dict)