Python Executor Examples, godale._concurrent.Executor Python Examples

Example #1

0

Show file

File: srtm.py Project: ywg0212/OpenSarToolkit

def download_srtm(aoi):

    warnings.filterwarnings(
        'ignore', 'Geometry is in a geographic CRS', UserWarning
    )
    srtm = gpd.read_file(
        OST_ROOT.joinpath('aux/srtm1sectiles.gpkg')
    )

    aoi_gdf = vec.wkt_to_gdf(aoi)
    aoi_gdf['geometry'] = aoi_gdf.geometry.buffer(1)
    overlap_df = gpd.overlay(srtm, aoi_gdf, how='intersection')
    iter_list = []
    for file in overlap_df.url.values:
        iter_list.append(file)

    # now we run with godale, which works also with 1 worker
    executor = Executor(
        executor='concurrent_processes',
        max_workers=10
    )

    for task in executor.as_completed(
            func=download_srtm_tile,
            iterable=iter_list
    ):
        task.result()

Example #2

0

Show file

File: grd_batch.py Project: ywg0212/OpenSarToolkit

def _create_mt_ls_mask(inventory_df, config_file):
    """Helper function to union the Layover/Shadow masks of a Time-series

    This function creates a

    :param inventory_df:
    :param config_file:
    :return:
    """
    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = Path(config_dict['processing_dir'])

    iter_list = []
    for track in inventory_df.relativeorbit.unique():

        # get the burst directory
        track_dir = processing_dir.joinpath(track)

        # get common burst extent
        list_of_masks = list(track_dir.glob('*/*/*_ls_mask.json'))

        # if extent does not already exist, add to iterable
        if not track_dir.joinpath(f'{track}.ls_mask.json').exists():
            iter_list.append(list_of_masks)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=os.cpu_count())

    for task in executor.as_completed(func=ts_ls_mask.mt_layover,
                                      iterable=iter_list):
        task.result()

Example #3

0

Show file

File: grd_batch.py Project: ywg0212/OpenSarToolkit

def _create_extents(inventory_df, config_file):

    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = Path(config_dict['processing_dir'])

    iter_list = []
    for track in inventory_df.relativeorbit.unique():

        # get the burst directory
        track_dir = processing_dir.joinpath(track)

        list_of_extents = list(track_dir.glob('*/*/*bounds.json'))

        # if extent does not already exist, add to iterable
        if not track_dir.joinpath(f'{track}.min_bounds.json').exists():
            iter_list.append(list_of_extents)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=os.cpu_count())

    out_dict = {'track': [], 'list_of_scenes': [], 'extent': []}
    for task in executor.as_completed(func=ts_extent.mt_extent,
                                      iterable=iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):
        track, list_of_scenes, extent = task.result()
        out_dict['track'].append(track)
        out_dict['list_of_scenes'].append(list_of_scenes)
        out_dict['extent'].append(extent)

    return pd.DataFrame.from_dict(out_dict)

Example #4

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def _create_mt_ls_mask(burst_gdf, config_file):
    """Helper function to union the Layover/Shadow masks of a Time-series

    This function creates a

    :param inventory_df:
    :param config_file:
    :return:
    """
    # read config file
    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = config_dict['processing_dir']

    # create layover
    iter_list = []
    for burst in burst_gdf.bid.unique():  # ***

        # get the burst directory
        burst_dir = Path(processing_dir).joinpath(burst)

        # get common burst extent
        list_of_masks = list(burst_dir.glob('*/*/*_ls_mask.json'))
        if not burst_dir.joinpath(f'{burst}.ls_mask.json').exists():
            iter_list.append(list_of_masks)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=os.cpu_count())

    for task in executor.as_completed(func=ts_ls_mask.mt_layover,
                                      iterable=iter_list):
        task.result()

Example #5

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def mosaic_timescan_old(config_file):
    print(' -----------------------------------------------------------------')
    logger.info('Mosaicking time-scan layers.')
    print(' -----------------------------------------------------------------')

    with open(config_file, 'r') as ard_file:
        config_dict = json.load(ard_file)
        processing_dir = Path(config_dict['processing_dir'])
        metrics = config_dict['processing']['time-scan_ARD']['metrics']

    if 'harmonics' in metrics:
        metrics.remove('harmonics')
        metrics.extend(['amplitude', 'phase', 'residuals'])

    if 'percentiles' in metrics:
        metrics.remove('percentiles')
        metrics.extend(['p95', 'p5'])

    tscan_dir = processing_dir.joinpath('Mosaic/Timescan')
    tscan_dir.mkdir(parents=True, exist_ok=True)

    iter_list = []
    for product, metric in itertools.product(PRODUCT_LIST, metrics):

        filelist = list(
            processing_dir.glob(f'*/Timescan/*{product}.{metric}.tif'))

        if not len(filelist) >= 1:
            continue

        filelist = ' '.join([str(file) for file in filelist])

        outfile = tscan_dir.joinpath(f'{product}.{metric}.tif')
        check_file = outfile.parent.joinpath(f'.{outfile.name[:-4]}.processed')

        if check_file.exists():
            logger.info(f'Mosaic layer {outfile.name} already processed.')
            continue

        logger.info(f'Mosaicking layer {outfile.name}.')

        iter_list.append([filelist, outfile, config_file])

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    # run mosaicking
    for task in executor.as_completed(func=mosaic.gd_mosaic,
                                      iterable=iter_list):
        task.result()

    ras.create_tscan_vrt(tscan_dir, config_file)

Example #6

0

Show file

File: grd_batch.py Project: ywg0212/OpenSarToolkit

def mosaic_timescan(config_file):

    # load ard parameters
    with open(config_file, 'r') as ard_file:
        config_dict = json.load(ard_file)
        processing_dir = Path(config_dict['processing_dir'])
        metrics = config_dict['processing']['time-scan_ARD']['metrics']

    if 'harmonics' in metrics:
        metrics.remove('harmonics')
        metrics.extend(['amplitude', 'phase', 'residuals'])

    if 'percentiles' in metrics:
        metrics.remove('percentiles')
        metrics.extend(['p95', 'p5'])

    # create out directory of not existent
    tscan_dir = processing_dir.joinpath('Mosaic/Timescan')
    tscan_dir.mkdir(parents=True, exist_ok=True)

    # loop through all pontial proucts
    iter_list = []
    for polar, metric in itertools.product(['VV', 'HH', 'VH', 'HV'], metrics):

        # create a list of files based on polarisation and metric
        filelist = list(
            processing_dir.glob(f'*/Timescan/*bs.{polar}.{metric}.tif'))

        # break loop if there are no files
        if not len(filelist) >= 2:
            continue

        # get number
        filelist = ' '.join([str(file) for file in filelist])
        outfile = tscan_dir.joinpath(f'bs.{polar}.{metric}.tif')
        check_file = outfile.parent.joinpath(f'.{outfile.stem}.processed')

        if check_file.exists():
            logger.info(f'Mosaic layer {outfile.name} already processed.')
            continue

        iter_list.append([filelist, outfile, config_file])

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    # run mosaicking
    for task in executor.as_completed(func=mosaic.gd_mosaic,
                                      iterable=iter_list):
        task.result()

    ras.create_tscan_vrt(tscan_dir, config_file)

Example #7

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def _create_mt_ls_mask_old(burst_gdf, config_file):
    """Batch processing for multi-temporal Layover/Shadow mask

    This function handles the organization of the

    :param burst_gdf:
    :param config_file:
    :return:
    """

    # read config file
    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = config_dict['processing_dir']

    # create layover
    iter_list = []
    for burst in burst_gdf.bid.unique():  # ***

        # get the burst directory
        burst_dir = Path(processing_dir).joinpath(burst)

        # get layover scenes
        list_of_scenes = list(burst_dir.glob('20*/*data*/*img'))
        list_of_layover = [
            str(x) for x in list_of_scenes if 'layover' in str(x)
        ]

        # we need to redefine the namespace of the already created extents
        extent = burst_dir.joinpath(f'{burst}.extent.gpkg')
        if not extent.exists():
            raise FileNotFoundError(
                f'Extent file for burst {burst} not found.')

        # layover/shadow mask
        out_ls = burst_dir.joinpath(f'{burst}.ls_mask.tif')

        # if the file does not already exists, then put into list to process
        if not out_ls.exists():
            iter_list.append(list_of_layover)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    for task in executor.as_completed(func=ts_ls_mask.mt_layover,
                                      iterable=iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):
        task.result()

Example #8

0

Show file

File: grd_batch.py Project: ywg0212/OpenSarToolkit

def _create_mt_ls_mask_old(inventory_df, config_file):

    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = Path(config_dict['processing_dir'])

    iter_list = []
    for track in inventory_df.relativeorbit.unique():

        # get the burst directory
        track_dir = processing_dir.joinpath(track)

        # get common burst extent
        list_of_scenes = list(track_dir.glob('**/*img'))

        list_of_layover = [
            str(x) for x in list_of_scenes if 'layover' in str(x)
        ]

        iter_list.append(list_of_layover)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    out_dict = {
        'track': [],
        'list_of_layover': [],
        'ls_mask': [],
        'ls_extent': []
    }
    for task in executor.as_completed(func=ts_ls_mask.mt_layover,
                                      iterable=iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):
        track, list_of_layover, ls_mask, ls_extent = task.result()
        out_dict['track'].append(track)
        out_dict['list_of_layover'].append(list_of_layover)
        out_dict['ls_mask'].append(list_of_layover)
        out_dict['ls_extent'].append(ls_extent)

    return pd.DataFrame.from_dict(out_dict)

Example #9

0

Show file

File: grd_batch.py Project: ywg0212/OpenSarToolkit

def grd_to_ard_batch(inventory_df, config_file):

    # load relevant config parameters
    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        download_dir = Path(config_dict['download_dir'])
        data_mount = Path(config_dict['data_mount'])

    # where all frames are grouped into acquisitions
    processing_dict = _create_processing_dict(inventory_df)
    processing_df = pd.DataFrame(
        columns=['identifier', 'outfile', 'out_ls', 'error'])

    iter_list = []
    for _, list_of_scenes in processing_dict.items():

        # get the paths to the file
        scene_paths = ([
            Sentinel1Scene(scene).get_path(download_dir, data_mount)
            for scene in list_of_scenes
        ])

        iter_list.append(scene_paths)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    for task in executor.as_completed(func=grd_to_ard.grd_to_ard,
                                      iterable=iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):

        list_of_scenes, outfile, out_ls, error = task.result()

        # return the info of processing as dataframe
        temp_df = create_processed_df(inventory_df, list_of_scenes, outfile,
                                      out_ls, error)

        processing_df = processing_df.append(temp_df)

    return processing_df

Example #10

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def _create_extents_old(burst_gdf, config_file):
    """Batch processing for multi-temporal Layover7Shadow mask

    This function handles the organization of the

    :param burst_gdf:
    :param config_file:
    :return:
    """

    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = Path(config_dict['processing_dir'])

    # create extent iterable
    iter_list = []
    for burst in burst_gdf.bid.unique():

        # get the burst directory
        burst_dir = processing_dir.joinpath(burst)

        # get common burst extent
        list_of_bursts = list(burst_dir.glob('**/*img'))
        list_of_bursts = [
            str(x) for x in list_of_bursts if 'layover' not in str(x)
        ]

        # if the file does not already exist, add to iterable
        extent = burst_dir.joinpath(f'{burst}.extent.gpkg')
        if not extent.exists():
            iter_list.append(list_of_bursts)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    for task in executor.as_completed(func=ts_extent.mt_extent,
                                      iterable=iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):
        task.result()

Example #11

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def _create_extents(burst_gdf, config_file):
    """Batch processing for multi-temporal Layover7Shadow mask

     This function handles the organization of the

     :param burst_gdf:
     :param config_file:
     :return:
     """

    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = Path(config_dict['processing_dir'])

    # create extent iterable
    iter_list = []
    for burst in burst_gdf.bid.unique():

        # get the burst directory
        burst_dir = processing_dir.joinpath(burst)

        list_of_extents = list(burst_dir.glob('*/*/*bounds.json'))

        # if extent does not already exist, add to iterable
        if not burst_dir.joinpath(f'{burst}.min_bounds.json').exists():
            iter_list.append(list_of_extents)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=os.cpu_count())

    out_dict = {'burst': [], 'list_of_scenes': [], 'extent': []}
    for task in executor.as_completed(func=ts_extent.mt_extent,
                                      iterable=iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):
        track, list_of_scenes, extent = task.result()
        out_dict['burst'].append(track)
        out_dict['list_of_scenes'].append(list_of_scenes)
        out_dict['extent'].append(extent)

Example #12

0

Show file

File: convert_format.py Project: Scartography/OpenSarToolkit

def ard_slc_to_rgb(file_list,
                   outfile,
                   process_bounds,
                   driver='GTiff',
                   to_db=False):
    out_tifs = []
    max_workers = os.cpu_count()
    # Index files
    for i, f in zip(range(len(file_list)), file_list):
        file_list[i] = (i, f)
    with TemporaryDirectory() as temp:
        executor_type = 'concurrent_processes'
        executor = Executor(executor=executor_type, max_workers=max_workers)
        for task in executor.as_completed(func=execute_burst_to_tif,
                                          iterable=file_list,
                                          fargs=(temp, driver, to_db)):
            tif_file = task.result()
            out_tifs.append(rasterio.open(tif_file))
    arr, out_trans = merge(out_tifs,
                           nodata=out_tifs[0].nodata,
                           bounds=process_bounds)
    width = arr.shape[2]
    height = arr.shape[1]
    blockxsize = GTIFF_OST_PROFILE["blockxsize"]
    blockysize = GTIFF_OST_PROFILE["blockysize"]
    if width < 256 or height < 256:
        blockxsize = 64
        blockysize = 64
    profile = out_tifs[0].profile
    profile.update(GTIFF_OST_PROFILE,
                   width=width,
                   height=height,
                   transform=out_trans,
                   blockxsize=blockxsize,
                   blockysize=blockysize,
                   count=3)
    arr = np.where(arr == out_tifs[0].nodata, 0, arr)
    with rasterio.open(outfile, "w", **profile) as dst:
        dst.write(arr)
    return outfile

Example #13

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def _create_timeseries(burst_gdf, config_file):
    # we need a
    # dict_of_product_types = {'bs': 'Gamma0', 'coh': 'coh', 'pol': 'pol'}
    list_of_product_types = {('bs', 'Gamma0'), ('bs', 'Sigma0'),
                             ('coh', 'coh'), ('pol', 'pol')}
    pols = ['VV', 'VH', 'HH', 'HV', 'Alpha', 'Entropy', 'Anisotropy']

    # read config file
    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = config_dict['processing_dir']

    # create iterable
    iter_list = []
    for burst in burst_gdf.bid.unique():

        burst_dir = Path(processing_dir).joinpath(burst)

        # for pr, pol in itertools.product(dict_of_product_types.items(), pols):
        for pr, pol in itertools.product(list_of_product_types, pols):

            # unpack items
            product, product_name = list(pr)

            # take care of H-A-Alpha naming for file search
            if pol in ['Alpha', 'Entropy', 'Anisotropy'] and product is 'pol':
                list_of_files = sorted(
                    list(burst_dir.glob(f'20*/*data*/*{pol}*img')))
            else:
                # see if there is actually any imagery for this
                # combination of product and polarisation
                list_of_files = sorted(
                    list(
                        burst_dir.glob(
                            f'20*/*data*/*{product_name}*{pol}*img')))

            if len(list_of_files) <= 1:
                continue

            # create list of dims if polarisation is present
            list_of_dims = sorted(
                str(dim)
                for dim in list(burst_dir.glob(f'20*/*{product}*dim')))

            iter_list.append([list_of_dims, burst, product, pol])

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    out_dict = {
        'burst': [],
        'list_of_dims': [],
        'out_files': [],
        'out_vrt': [],
        'product': [],
        'error': []
    }
    for task in executor.as_completed(func=ard_to_ts.gd_ard_to_ts,
                                      iterable=iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):
        burst, list_of_dims, out_files, out_vrt, product, error = task.result()
        out_dict['burst'].append(burst)
        out_dict['list_of_dims'].append(list_of_dims)
        out_dict['out_files'].append(out_files)
        out_dict['out_vrt'].append(out_vrt)
        out_dict['product'].append(product)
        out_dict['error'].append(error)

    return pd.DataFrame.from_dict(out_dict)

Example #14

0

Show file

File: grd_batch.py Project: ywg0212/OpenSarToolkit

def mosaic_timeseries(inventory_df, config_file):

    print(' -----------------------------------')
    logger.info('Mosaicking Time-series layers')
    print(' -----------------------------------')

    # -------------------------------------
    # 1 load project config
    with open(config_file, 'r') as ard_file:
        config_dict = json.load(ard_file)
        processing_dir = Path(config_dict['processing_dir'])

    # create output folder
    ts_dir = processing_dir.joinpath('Mosaic/Timeseries')
    ts_dir.mkdir(parents=True, exist_ok=True)

    # loop through polarisations
    iter_list, vrt_iter_list = [], []
    for p in ['VV', 'VH', 'HH', 'HV']:

        tracks = inventory_df.relativeorbit.unique()
        nr_of_ts = len(
            list(
                processing_dir.joinpath(f'{tracks[0]}/Timeseries').glob(
                    f'*.{p}.tif')))

        if not nr_of_ts >= 1:
            continue

        outfiles = []
        for i in range(1, nr_of_ts + 1):

            filelist = list(
                processing_dir.glob(f'*/Timeseries/{i:02d}.*.{p}.tif'))
            filelist = [
                str(file) for file in filelist if 'Mosaic' not in str(file)
            ]

            # create
            datelist = []
            for file in filelist:
                datelist.append(Path(file).name.split('.')[1])

            filelist = ' '.join(filelist)
            start, end = sorted(datelist)[0], sorted(datelist)[-1]

            if start == end:
                outfile = ts_dir.joinpath(f'{i:02d}.{start}.bs.{p}.tif')
            else:
                outfile = ts_dir.joinpath(f'{i:02d}.{start}-{end}.bs.{p}.tif')

            check_file = outfile.parent.joinpath(f'.{outfile.stem}.processed')

            outfiles.append(outfile)

            if check_file.exists():
                logger.info(f'Mosaic layer {outfile.name} already processed.')
                continue

            logger.info(f'Mosaicking layer {outfile.name}.')
            iter_list.append([filelist, outfile, config_file])

        vrt_iter_list.append([ts_dir, p, outfiles])

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    # run mosaicking
    for task in executor.as_completed(func=mosaic.gd_mosaic,
                                      iterable=iter_list):
        task.result()

    # run mosaicking vrts
    for task in executor.as_completed(func=mosaic.create_timeseries_mosaic_vrt,
                                      iterable=vrt_iter_list):
        task.result()

Example #15

0

Show file

File: grd_batch.py Project: ywg0212/OpenSarToolkit

def timeseries_to_timescan(inventory_df, config_file):

    # load ard parameters
    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = Path(config_dict['processing_dir'])
        ard = config_dict['processing']['single_ARD']
        ard_mt = config_dict['processing']['time-series_ARD']
        ard_tscan = config_dict['processing']['time-scan_ARD']

    # get the db scaling right
    to_db = ard['to_db']
    if ard['to_db'] or ard_mt['to_db']:
        to_db = True

    dtype_conversion = True if ard_mt['dtype_output'] != 'float32' else False

    iter_list, vrt_iter_list = [], []
    for track in inventory_df.relativeorbit.unique():

        # get track directory
        track_dir = processing_dir.joinpath(track)
        # define and create Timescan directory
        timescan_dir = track_dir.joinpath('Timescan')
        timescan_dir.mkdir(parents=True, exist_ok=True)

        # loop thorugh each polarization
        for polar in ['VV', 'VH', 'HH', 'HV']:

            if timescan_dir.joinpath(f'.bs.{polar}.processed').exists():
                logger.info(f'Timescans for track {track} already processed.')
                continue

            # get timeseries vrt
            time_series = track_dir.joinpath(
                f'Timeseries/Timeseries.bs.{polar}.vrt')

            if not time_series.exists():
                continue

            # create a datelist for harmonics
            scene_list = list(track_dir.glob(f'Timeseries/*bs.{polar}.tif'))

            # create a datelist for harmonics calculation
            datelist = []
            for file in sorted(scene_list):
                datelist.append(file.name.split('.')[1])

            # define timescan prefix
            timescan_prefix = timescan_dir.joinpath(f'bs.{polar}')

            iter_list.append([
                time_series, timescan_prefix, ard_tscan['metrics'],
                dtype_conversion, to_db, ard_tscan['remove_outliers'], datelist
            ])

        vrt_iter_list.append(timescan_dir)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    # run timescan creation
    out_dict = {'track': [], 'prefix': [], 'metrics': [], 'error': []}
    for task in executor.as_completed(func=timescan.gd_mt_metrics,
                                      iterable=iter_list):
        burst, prefix, metrics, error = task.result()
        out_dict['track'].append(burst)
        out_dict['prefix'].append(prefix)
        out_dict['metrics'].append(metrics)
        out_dict['error'].append(error)

    timescan_df = pd.DataFrame.from_dict(out_dict)

    # run vrt creation
    for task in executor.as_completed(func=ras.create_tscan_vrt,
                                      iterable=vrt_iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):
        task.result()

    return timescan_df

Example #16

0

Show file

File: grd_batch.py Project: ywg0212/OpenSarToolkit

def _create_timeseries(inventory_df, config_file):
    """Helper function to create Timeseries out of OST ARD products

    Based on the inventory GeoDataFrame and the configuration file,
    this function triggers the time-series processing for all bursts/tracks
    within the respective project. Each product/polarisation is treated
    singularly.

    Based on the ARD type/configuration settings, the function uses
    SNAP's Create-Stack function to unify the grid of each scene and
    applies a multi-temporal speckle filter if selected.

    The output are single GeoTiff files, whereas there is the possibility to
    reduce the data by converting the data format into uint8 or uint16.
    This is done by linearly stretching the data between -30 and +5
    for backscatter, 0 and 1 for coherence, polarimetric anisotropy #
    and entropy, as well 0 and 90 for polarimetric alpha channel. All
    the data is cropped to the same extent based on the minimum bounds layer.

    This function executes the underlying functions using the godale framework
    for parallel execution. Executor type and number of parallel processes is
    defined within the configuration file.


    :param inventory_df:
    :type GeoDataFrame
    :param config_file:
    :type str/Path
    :return:
    """
    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        processing_dir = Path(config_dict['processing_dir'])

    iter_list = []
    for track in inventory_df.relativeorbit.unique():

        # get the burst directory
        track_dir = processing_dir.joinpath(track)

        for pol in ['VV', 'VH', 'HH', 'HV']:

            # see if there is actually any imagery in thi polarisation
            list_of_files = sorted(
                str(file)
                for file in list(track_dir.glob(f'20*/*data*/*ma0*{pol}*img')))

            if len(list_of_files) <= 1:
                continue

            # create list of dims if polarisation is present
            list_of_dims = sorted(
                str(dim) for dim in list(track_dir.glob('20*/*bs*dim')))

            iter_list.append([list_of_dims, track, 'bs', pol])

    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    out_dict = {
        'track': [],
        'list_of_dims': [],
        'out_files': [],
        'out_vrt': [],
        'product': [],
        'error': []
    }
    for task in executor.as_completed(func=ard_to_ts.gd_ard_to_ts,
                                      iterable=iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):
        track, list_of_dims, out_files, out_vrt, product, error = task.result()
        out_dict['track'].append(track)
        out_dict['list_of_dims'].append(list_of_dims)
        out_dict['out_files'].append(out_files)
        out_dict['out_vrt'].append(out_vrt)
        out_dict['product'].append(product)
        out_dict['error'].append(error)

    return pd.DataFrame.from_dict(out_dict)

Example #17

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def bursts_to_ards(burst_gdf, config_file):
    """Batch processing from single bursts to ARD format

    This function handles the burst processing based on a OST burst inventory
    file and an OST config file that contains all necessary information
    about the project (e.g. project directory) and processing steps applied
    for the ARD generation based on the JSON ARD-type templates.

    :param burst_gdf: an OST burst inventory
    :type burst_gdf: GeoDataFrame
    :param config_file: (str/Path) path to the project config file
    :param executor_type: executer type for parallel processing with godale,
                          defaults to multiprocessing
    :param max_workers: number of parallel burst processing jobs to start
    :return:
    """

    print('--------------------------------------------------------------')
    logger.info('Processing all single bursts to ARD')
    print('--------------------------------------------------------------')

    logger.info('Preparing the processing pipeline. This may take a moment.')
    proc_inventory = prepare_burst_inventory(burst_gdf, config_file)

    with open(config_file, 'r') as file:
        config_dict = json.load(file)
        executor_type = config_dict['executor_type']
        max_workers = config_dict['max_workers']

    # we update max_workers in case we have less snap_cpu_parallelism
    # then cpus available
    if (max_workers == 1
            and config_dict['snap_cpu_parallelism'] < os.cpu_count()):
        max_workers = int(os.cpu_count() / config_dict['snap_cpu_parallelism'])

    # now we run with godale, which works also with 1 worker
    out_dict = {
        'burst': [],
        'acq_date': [],
        'out_bs': [],
        'out_ls': [],
        'out_pol': [],
        'out_coh': [],
        'error': []
    }
    executor = Executor(executor=executor_type, max_workers=max_workers)
    for task in executor.as_completed(func=burst_to_ard,
                                      iterable=proc_inventory.iterrows(),
                                      fargs=([
                                          str(config_file),
                                      ])):
        burst, date, out_bs, out_ls, out_pol, out_coh, error = task.result()
        out_dict['burst'].append(burst)
        out_dict['acq_date'].append(date)
        out_dict['out_bs'].append(out_bs)
        out_dict['out_ls'].append(out_ls)
        out_dict['out_pol'].append(out_pol)
        out_dict['out_coh'].append(out_coh)
        out_dict['error'].append(error)

    return pd.DataFrame.from_dict(out_dict)

Example #18

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def timeseries_to_timescan(burst_gdf, config_file):
    """Function to create a timescan out of a OST timeseries.

    """

    print('--------------------------------------------------------------')
    logger.info('Processing all burst ARDs time-series to ARD timescans')
    print('--------------------------------------------------------------')

    # -------------------------------------
    # 1 load project config
    with open(config_file, 'r') as ard_file:
        config_dict = json.load(ard_file)
        processing_dir = config_dict['processing_dir']
        ard = config_dict['processing']['single_ARD']
        ard_mt = config_dict['processing']['time-series_ARD']
        ard_tscan = config_dict['processing']['time-scan_ARD']

    # get the db scaling right
    to_db = True if ard['to_db'] or ard_mt['to_db'] else False

    # get datatype right
    dtype_conversion = True if ard_mt['dtype_output'] != 'float32' else False

    # -------------------------------------
    # 2 create iterable for parallel processing
    iter_list, vrt_iter_list = [], []
    for burst in burst_gdf.bid.unique():

        # get relevant directories
        burst_dir = Path(processing_dir).joinpath(burst)
        timescan_dir = burst_dir.joinpath('Timescan')
        timescan_dir.mkdir(parents=True, exist_ok=True)

        for product in PRODUCT_LIST:

            # check if already processed
            if timescan_dir.joinpath(f'.{product}.processed').exists():
                logger.debug(f'Timescans for burst {burst} already processed.')
                continue

            # get respective timeseries
            timeseries = burst_dir.joinpath(
                f'Timeseries/Timeseries.{product}.vrt')

            # che if this timsereis exists ( since we go through all products
            if not timeseries.exists():
                continue

            # datelist for harmonics
            scenelist = list(burst_dir.glob(f'Timeseries/*{product}*tif'))
            datelist = [
                file.name.split('.')[1][:6] for file in sorted(scenelist)
            ]

            # define timescan prefix
            timescan_prefix = timescan_dir.joinpath(product)

            # get rescaling and db right (backscatter vs. coh/pol)
            if 'bs.' in str(timescan_prefix):
                to_power, rescale = to_db, dtype_conversion
            else:
                to_power, rescale = False, False

            iter_list.append([
                timeseries, timescan_prefix, ard_tscan['metrics'], rescale,
                to_power, ard_tscan['remove_outliers'], datelist
            ])

        vrt_iter_list.append(timescan_dir)

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    # run timescan creation
    out_dict = {'burst': [], 'prefix': [], 'metrics': [], 'error': []}
    for task in executor.as_completed(func=timescan.gd_mt_metrics,
                                      iterable=iter_list):
        burst, prefix, metrics, error = task.result()
        out_dict['burst'].append(burst)
        out_dict['prefix'].append(prefix)
        out_dict['metrics'].append(metrics)
        out_dict['error'].append(error)

    df = pd.DataFrame.from_dict(out_dict)

    # run vrt creation
    for task in executor.as_completed(func=ras.create_tscan_vrt,
                                      iterable=vrt_iter_list,
                                      fargs=([
                                          str(config_file),
                                      ])):
        task.result()

    return df

Example #19

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def mosaic_timeseries(burst_inventory, config_file):
    print(' -----------------------------------------------------------------')
    logger.info('Mosaicking time-series layers.')
    print(' -----------------------------------------------------------------')

    # -------------------------------------
    # 1 load project config
    with open(config_file, 'r') as ard_file:
        config_dict = json.load(ard_file)
        processing_dir = Path(config_dict['processing_dir'])

    # create output folder
    ts_dir = processing_dir.joinpath('Mosaic/Timeseries')
    ts_dir.mkdir(parents=True, exist_ok=True)

    temp_mosaic = processing_dir.joinpath('Mosaic/temp')
    temp_mosaic.mkdir(parents=True, exist_ok=True)
    # -------------------------------------
    # 2 create iterable
    # loop through each product
    iter_list, vrt_iter_list = [], []
    for product in PRODUCT_LIST:

        for track in burst_inventory.Track.unique():

            dates = [
                date[2:] for date in sorted(burst_inventory.Date[
                    burst_inventory.Track == track].unique())
            ]

            for i, date in enumerate(dates):

                if 'coh' in product:
                    # we do the try, since for the last date
                    # there is no dates[i+1] for coherence
                    try:
                        temp_acq = temp_mosaic.joinpath(
                            f'{i}.{date}.{dates[i + 1]}.{track}.{product}.tif')
                    except IndexError:
                        temp_acq = None
                else:
                    temp_acq = temp_mosaic.joinpath(
                        f'{i}.{date}.{track}.{product}.tif')

                if temp_acq:
                    iter_list.append(
                        [track, date, product, temp_acq, config_file])

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    # run vrt creation
    for task in executor.as_completed(func=mosaic.gd_mosaic_slc_acquisition,
                                      iterable=iter_list):
        task.result()

    # mosaic the acquisitions
    iter_list, vrt_iter_list = [], []
    for product in PRODUCT_LIST:

        outfiles = []
        for i in range(len(dates)):

            list_of_files = list(temp_mosaic.glob(f'{i}.*{product}.tif'))

            if not list_of_files:
                continue

            datelist = []
            for file in list_of_files:
                if 'coh' in product:
                    datelist.append(
                        f"{file.name.split('.')[2]}_{file.name.split('.')[1]}")
                else:
                    datelist.append(file.name.split('.')[1])

            # get start and endate of mosaic
            start, end = sorted(datelist)[0], sorted(datelist)[-1]
            list_of_files = ' '.join([str(file) for file in list_of_files])

            # create namespace for output file
            if start == end:
                outfile = ts_dir.joinpath(f'{i + 1:02d}.{start}.{product}.tif')

                # with the above operation, the list automatically
                # turns into string, so we can call directly list_of_files
                shutil.move(list_of_files, outfile)
                outfiles.append(outfile)
                continue

            else:
                outfile = ts_dir.joinpath(
                    f'{i + 1:02d}.{start}-{end}.{product}.tif')

            # create namespace for check_file
            check_file = outfile.parent.joinpath(
                f'.{outfile.name[:-4]}.processed')

            if check_file.exists():
                logger.info(f'Mosaic layer {outfile} already processed.')
                continue

            # append to list of outfile for vrt creation
            outfiles.append(outfile)
            iter_list.append([list_of_files, outfile, config_file])

        vrt_iter_list.append([ts_dir, product, outfiles])

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    # run mosaicking
    for task in executor.as_completed(func=mosaic.gd_mosaic,
                                      iterable=iter_list):
        task.result()

    # run mosaicking vrts
    for task in executor.as_completed(func=mosaic.create_timeseries_mosaic_vrt,
                                      iterable=vrt_iter_list):
        task.result()

    # remove temp folder
    h.remove_folder_content(temp_mosaic)

Example #20

0

Show file

File: burst_batch.py Project: ywg0212/OpenSarToolkit

def mosaic_timescan(burst_inventory, config_file):
    """

    :param burst_inventory:
    :param config_file:
    :return:
    """

    print(' -----------------------------------------------------------------')
    logger.info('Mosaicking time-scan layers.')
    print(' -----------------------------------------------------------------')

    # -------------------------------------
    # 1 load project config
    with open(config_file, 'r') as ard_file:
        config_dict = json.load(ard_file)
        processing_dir = Path(config_dict['processing_dir'])
        metrics = config_dict['processing']['time-scan_ARD']['metrics']

    if 'harmonics' in metrics:
        metrics.remove('harmonics')
        metrics.extend(['amplitude', 'phase', 'residuals'])

    if 'percentiles' in metrics:
        metrics.remove('percentiles')
        metrics.extend(['p95', 'p5'])

    # create output folder
    ts_dir = processing_dir.joinpath('Mosaic/Timescan')
    ts_dir.mkdir(parents=True, exist_ok=True)

    temp_mosaic = processing_dir.joinpath('Mosaic/temp')
    temp_mosaic.mkdir(parents=True, exist_ok=True)
    # -------------------------------------
    # 2 create iterable
    # loop through each product
    iter_list, vrt_iter_list = [], []
    for product, metric in itertools.product(PRODUCT_LIST, metrics):

        for track in burst_inventory.Track.unique():

            filelist = list(
                processing_dir.glob(
                    f'[A,D]{track}_IW*/Timescan/*{product}.{metric}.tif'))

            if not len(filelist) >= 1:
                continue

            temp_acq = temp_mosaic.joinpath(f'{track}.{product}.{metric}.tif')

            if temp_acq:
                iter_list.append(
                    [track, metric, product, temp_acq, config_file])

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    # run vrt creation
    for task in executor.as_completed(func=mosaic.gd_mosaic_slc_acquisition,
                                      iterable=iter_list):
        task.result()

    iter_list, vrt_iter_list = [], []
    for product, metric in itertools.product(PRODUCT_LIST, metrics):

        list_of_files = list(temp_mosaic.glob(f'*{product}.{metric}.tif'))

        if not list_of_files:
            continue

        # turn to OTB readable format
        list_of_files = ' '.join([str(file) for file in list_of_files])

        # create namespace for outfile
        outfile = ts_dir.joinpath(f'{product}.{metric}.tif')
        check_file = outfile.parent.joinpath(f'.{outfile.name[:-4]}.processed')

        if check_file.exists():
            logger.info(f'Mosaic layer {outfile.name} already processed.')
            continue

        logger.info(f'Mosaicking layer {outfile.name}.')

        iter_list.append([list_of_files, outfile, config_file])

    # now we run with godale, which works also with 1 worker
    executor = Executor(executor=config_dict['executor_type'],
                        max_workers=config_dict['max_workers'])

    # run mosaicking
    for task in executor.as_completed(func=mosaic.gd_mosaic,
                                      iterable=iter_list):
        task.result()

    ras.create_tscan_vrt(ts_dir, config_file)

    # remove temp folder
    h.remove_folder_content(temp_mosaic)

Example #21

0

Show file

File: raster.py Project: ywg0212/OpenSarToolkit

def combine_timeseries(processing_dir, config_dict, timescan=True):

    # namespaces for folder
    comb_dir = processing_dir.joinpath('combined')
    if comb_dir.exists():
        h.remove_folder_content(comb_dir)

    tseries_dir = comb_dir.joinpath('Timeseries')
    tseries_dir.mkdir(parents=True, exist_ok=True)

    PRODUCT_LIST = [
        'bs.HH', 'bs.VV', 'bs.HV', 'bs.VH', 'coh.VV', 'coh.VH', 'coh.HH',
        'coh.HV', 'pol.Entropy', 'pol.Anisotropy', 'pol.Alpha'
    ]

    out_files, iter_list = [], []
    for product_type in PRODUCT_LIST:

        filelist = list(
            processing_dir.glob(f'*/Timeseries/*{product_type}.tif'))

        if len(filelist) > 1:
            datelist = sorted([file.name.split('.')[1] for file in filelist])

            for i, date in enumerate(datelist):
                file = list(
                    processing_dir.glob(
                        f'*/Timeseries/*{date}*{product_type}.tif'))
                outfile = tseries_dir.joinpath(
                    f'{i+1:02d}.{date}.{product_type}.tif')

                shutil.copy(file[0], str(outfile))
                out_files.append(str(outfile))

            vrt_options = gdal.BuildVRTOptions(srcNodata=0, separate=True)
            out_vrt = str(
                tseries_dir.joinpath(f'Timeseries.{product_type}.vrt'))
            gdal.BuildVRT(str(out_vrt), out_files, options=vrt_options)

            if timescan:
                from ost.generic import timescan as ts
                ard = config_dict['processing']['single_ARD']
                ard_mt = config_dict['processing']['time-series_ARD']
                ard_tscan = config_dict['processing']['time-scan_ARD']

                # get the db scaling right
                to_db = ard['to_db']
                if ard['to_db'] or ard_mt['to_db']:
                    to_db = True

                dtype_conversion = True if ard_mt[
                    'dtype_output'] != 'float32' else False

                tscan_dir = comb_dir.joinpath('Timescan')
                tscan_dir.mkdir(parents=True, exist_ok=True)

                # get timeseries vrt
                time_series = tseries_dir.joinpath(
                    f'Timeseries.{product_type}.vrt')

                if not time_series.exists():
                    continue

                # create a datelist for harmonics
                scene_list = [
                    str(file)
                    for file in list(tseries_dir.glob(f'*{product_type}.tif'))
                ]

                # create a datelist for harmonics calculation
                datelist = []
                for file in sorted(scene_list):
                    datelist.append(os.path.basename(file).split('.')[1])

                # define timescan prefix
                timescan_prefix = tscan_dir.joinpath(f'{product_type}')

                iter_list.append([
                    time_series, timescan_prefix, ard_tscan['metrics'],
                    dtype_conversion, to_db, ard_tscan['remove_outliers'],
                    datelist
                ])

    if timescan:
        # now we run with godale, which works also with 1 worker
        executor = Executor(executor=config_dict['executor_type'],
                            max_workers=config_dict['max_workers'])

        # run timescan creation
        out_dict = {'track': [], 'prefix': [], 'metrics': [], 'error': []}
        for task in executor.as_completed(func=ts.gd_mt_metrics,
                                          iterable=iter_list):
            burst, prefix, metrics, error = task.result()
            out_dict['track'].append(burst)
            out_dict['prefix'].append(prefix)
            out_dict['metrics'].append(metrics)
            out_dict['error'].append(error)

        create_tscan_vrt(tscan_dir, config_dict)