Example #1
0
def aggregate_netcdf_group(job_dir,
                           output_dir,
                           files,
                           group_name,
                           request_id=None):
    datasets = []
    accum_size = 0
    parameters = analyze_datasets(job_dir, files, request_id=request_id)
    for f in sorted(files):
        path = os.path.join(job_dir, f)
        size = os.stat(path).st_size
        accum_size += size
        if accum_size > MAX_AGGREGATION_SIZE:
            concatenate_and_write(datasets,
                                  output_dir,
                                  group_name,
                                  request_id=request_id)
            accum_size = size
            datasets = []

        with xr.open_dataset(path,
                             decode_times=False,
                             mask_and_scale=False,
                             decode_coords=False) as ds:
            ds.load()
            shape_up(ds, parameters, request_id=request_id)
            datasets.append(ds)

    if datasets:
        concatenate_and_write(datasets,
                              output_dir,
                              group_name,
                              request_id=request_id)
Example #2
0
def analyze_datasets(dir_path, files, request_id=None):
    parameters = {}
    for f in files:
        path = os.path.join(dir_path, f)
        with xr.open_dataset(path, decode_times=False, mask_and_scale=False, decode_coords=False) as ds:
            for var in ds.data_vars:
                # drop the obs dimension
                shape = ds[var].shape[1:]
                dims = ds[var].dims[1:]
                dtype = ds[var].dtype

                if dtype.kind == 'S':
                    fill = ds[var].attrs.get('_FillValue', FILL_VALUES.get('string'))
                else:
                    fill = ds[var].attrs.get('_FillValue', FILL_VALUES.get(dtype.name))

                if var not in parameters:
                    parameters[var] = {
                        'shape': shape,
                        'dtype': dtype,
                        'dims': dims,
                        'fill': fill
                    }
                else:
                    parameters[var] = {
                        'shape': max_shape(shape, parameters[var]['shape']),
                        'dtype': max_dtype(dtype, parameters[var]['dtype']),
                        'dims': max_dimension_names(dims, parameters[var]['dims']),
                        'fill': fill
                    }
    return parameters
Example #3
0
def get_nc_info(file_name):
    with xr.open_dataset(file_name, decode_times=False, mask_and_scale=False, decode_coords=False) as ds:
        ret_val = {
            'size': ds.obs.size,
        }
        for i in ATTRIBUTE_CARRYOVER_MAP:
            if i in ds.attrs:
                ret_val[i] = ds.attrs[i]

        ret_val['file_start_time'] = ds.time.values[-1]

    return ret_val
Example #4
0
def aggregate_netcdf_group(job_dir, output_dir, files, group_name, request_id=None):
    datasets = []
    accum_size = 0
    parameters = analyze_datasets(job_dir, files, request_id=request_id)
    for f in sorted(files):
        path = os.path.join(job_dir, f)
        size = os.stat(path).st_size
        accum_size += size
        if accum_size > MAX_AGGREGATION_SIZE:
            concatenate_and_write(datasets, output_dir, group_name, request_id=request_id)
            accum_size = size
            datasets = []

        with xr.open_dataset(path, decode_times=False, mask_and_scale=False, decode_coords=False) as ds:
            ds.load()
            shape_up(ds, parameters, request_id=request_id)
            datasets.append(ds)

    if datasets:
        concatenate_and_write(datasets, output_dir, group_name, request_id=request_id)