Esempio n. 1
0
def EG_extract_region():
    # subset on dimensions using Coordinate concept.
    # NOTE: could be done in Iris, but then also requires CF compliance ??
    input_path = os.path.join(basedir, 'test.nc')
    output_path = os.path.join(basedir, 'test_out.nc')
    depth_start, depth_end = (50.0, 550.0)

    input = nc_files.read(input_path)
    depth_dim = input.dimensions['depth']
    depth_coord = input.variables['depth']

    # Work out indexing to the part we want
    i_start = np.where(depth_coord[:] >= depth_start)[0][0]
    i_end_indices = np.where(depth_coord[:] >= depth_end)[0]
    if i_end_indices:
        i_end = i_end_indices[0]
        n_depths = i_end - i_start + 1
    else:
        i_end = -1
        n_depths = depth_dim.length - i_start
    depth_slice = slice(i_start, i_end)

    # Adjust the dimension definition.
    depth_dim.length = n_depths

    # Adjust all the referencing variables (N.B. includes depth coord).
    for var in ncg.all_variables(input):
        if depth_dim in var.dimensions:
            dim_index = var.dimensions.index(depth_dim)
            slices = [depth_slice if index == dim_index else slice(None)
                      for index in range(len(var.dimensions))]
            var.data = var[slices]

    # Write result
    nc_files.write(output, output_path)
Esempio n. 2
0
        def test_cdl__ncdump_match(self):
            ncdump_output = _complex_cdl[:]
            g = _make_complex_group()
            file_dirpath = tempfile.mkdtemp()
            try:
                file_path = os.path.join(file_dirpath, 'temp.nc')
                with nc4.Dataset(file_path, 'w') as ds:
                    ncf.write(ds, g)
                results = subprocess.check_output(
                    'ncdump -h ' + file_path, shell=True)
            finally:
                shutil.rmtree(file_dirpath)

            expected = strip_lines(ncdump_output)
            found = strip_lines(results)
            self.assertEqual(found, expected)
Esempio n. 3
0
def EG_some_variables():
    # Copy only certain variables to output file.
    input_path = os.path.join(basedir, 'test.nc')
    output_path = os.path.join(basedir, 'test_out.nc')

    # Read input.
    nco = nc_files.read(input_path)

    # Delete all-but selected variables.
    varnames = ('temp', 'depth')
    for var in ncg.all_variables(input):
        if var.name not in var_names:
            output.variables.add(var)

    # Write out.
    nc_files.write(nco, output_path)
Esempio n. 4
0
def EG_filter_variables():
    # Filter variables, removing some, and flatten structure.
    input_path = os.path.join(basedir, 'test.nc')
    output_path = os.path.join(basedir, 'test_out.nc')
    # Read input.
    input = nc_files.read(input_path)
    # Make blank output.
    output = Group()
    varname_parts_only = ('temp', 'depth')
    varname_ends_exclude = ('_QC', '_stats')
    # Copy selected variables.
    for var in ncg.all_variables(input):
        if (any(var.name.find(part) >= 0
                for part in varname_parts_only) and
            not any(var.name.endswith(part)
                    for part in varname_ends_exclude)):
                output.variables.add(var)
    # Write out.
    nc_files.write(output, output_path)
Esempio n. 5
0
def EG_flatten():
    # Copy certain information to output file, losing existing structure.
    input_path = os.path.join(basedir, 'test.nc')
    output_path = os.path.join(basedir, 'test_out.nc')

    # Read input.
    input = nc_files.read(input_path)

    # Make blank output.
    output = Group()

    # Partial copy.
    output.groups.add(input.groups['grid_dims'])
    output.variables.add_all(ncg.all_variables(input.groups['group_a']))
    output.variables.add_all(ncg.all_variables(input.groups['group_Q']))

    # Write out.
    # N.B. relevant top-level dimensions etc. will be re-created.
    # but this 'flattens' everything into the root group.
    nc_files.write(nco, output_path)
Esempio n. 6
0
 def test_simple_to_path(self):
     test_outfile_name = 'test_simple.nc'
     test_outfile_path = os.path.join(testdata_dirpath,
                                      test_outfile_name)
     array = np.arange(4)
     var = ov('v1', dd=[od('x')], aa=[oa('v_att', 'this')], data=array)
     g = og('', vv=[var])
     self.assertFalse(ncg.has_no_missing_dims(g))
     try:
         write(test_outfile_path, g)
         self.assertTrue(ncg.has_no_missing_dims(g))
         # Read it back again and check.
         with netCDF4.Dataset(test_outfile_path) as ds:
             g_back = read(ds)
             self.assertEqual(g_back, g)
     finally:
         if not leave_output:
             # Remove temporary file
             if os.path.exists(test_outfile_path):
                 os.remove(test_outfile_path)
Esempio n. 7
0
 def test_add_to_dataset(self):
     test_outfile_name = 'test_add.nc'
     test_outfile_path = os.path.join(testdata_dirpath, test_outfile_name)
     try:
         ds = netCDF4.Dataset(test_outfile_path, 'w')
         ds.setncattr('extra', 4.5)
         g = og('', vv=[ov('v1', aa=[oa('var_attr', 'this_value')],
                           data=np.array(3.2))])
         write(ds, g)
         ds.close()
         # Read it back again and check.
         with netCDF4.Dataset(test_outfile_path) as ds:
             g = read(ds)
             self.assertEqual(g.attributes['extra'].value, 4.5)
             self.assertEqual(list(g.dimensions), [])
             self.assertEqual(
                 g.variables['v1'].attributes['var_attr'].value,
                 'this_value')
     finally:
         if not leave_output:
             if os.path.exists(test_outfile_path):
                 os.remove(test_outfile_path)
Esempio n. 8
0
def check_file_cdl(file_path):
    # Load from the given file
    with netCDF4.Dataset(file_path) as ds:
        g = ncds.read(ds)
        # Re-save to a temporary file, to get an ncdump output in known order.
        ncds.write('temp.nc', g)
        # Rename so name matches temporary file name as seen by ncdump.
        g.rename('temp')
        # Generate cdl.
        g_cdl = cdl(g)
    try:
        # Run ncdump on the test file
        f_cdl = subprocess.check_output('ncdump -h temp.nc', shell=True)
    finally:
        os.remove('temp.nc')
    # Check that the two CDL strings are "essentially" the same.
    g_cdl_std = ncdl.comparable_cdl(g_cdl)
    f_cdl_std = ncdl.comparable_cdl(f_cdl)
    if g_cdl_std == f_cdl_std:
        print 'OK    (ncdump and ncobj output EQUAL)'
    else:
        print 'FAIL: ncdump and ncobj output differ..'
    print
Esempio n. 9
0
def EG_extract_combine():
    # Combine selected data with a month's worth each from several files.

    input_paths = glob.glob(os.path.join(basedir, 'test_sequence_*.nc'))
    output_path = os.path.join(basedir, 'test_out.nc')
    desired_var_names = ('air_temperature', 'surface_pressure')

    # Create a suitable empty output structure.
    output = Group()
    output.dimensions.add(Dimension('time', length=0))
    output_time_dim = output.dimensions['time']
    output.variables.add(Variable('time', dimensions=output_time_dim))
    output_time_var = output.variables['time']
    # N.B. time 'units' attribute and data array added later, copied from
    # the first input file.

    # sort input filenames for correct data times ordering
    input_paths = sorted(input_paths)

    # get whole months from input files in name order and concat to output.
    first_file = True
    for input_path in input_paths:
        input = nc_files.read(input_path)

        # Get input file time information.
        time_var = input.variables['time']
        time_units = time_var.attributes['units'].value
        time_dts = iris.Unit(time_units).num2date(time_var[:])
        monthdays = np.array([dt.day for dt in time_dts])

        if first_file:
            first_file = False
            # First time through, establish time base at first month start.
            time_next = time_dts[np.where(monthdays == 1)[0][0]]

            # First time though, initialise time variable units + data (empty).
            output_time_var.attributes.add(
                Attribute(name='units', value=time_units))
            output_time_var.data = np.array([])

            # First time through, create output variables matching input.
            for varname in desired_var_names:
                output.variables[varname] = input.variables[varname]
                var = output.variables[varname]
                assert var.dimensions[0].name == 'time'
                # Change first dimension to output_time (current length=0).
                var.dimensions[0] = output_time_var
                dim_lens = [dim.length for dim in var_dims]
                assert dim_lens[0] == 0
                # Assign a data array of correct dimensions (initially empty).
                var.data = np.array(dim_lens)

        # Get index for the month start.
        month_start = np.where(time_dts == time_next)[0][0]
        # Calculate next start date.
        is_december = time_next.month == 12
        time_next = datetime.datetime(
            year=year+1 if is_december else year,
            month=1 if is_december else month+1,
            day=1)
        # Get index for the month end, or -1 if not in file.
        next_months = np.where(time_dts == time_next)[0]
        month_end = next_months[0] if next_months else -1

        # Slice a month from the desired variables and copy to the output.
        for varname in desired_var_names:
            var = input.variables[varname]
            assert var.dimensions[0].name == 'time'
            # Note: this array concatenation is crude + expensive in memory.
            # This is where we really need deferred data management.
            output.variables[varname].data = np.concatenate(
                (output.variables[varname][:], var[month_start:month_end]),
                axis=0)
            #
            # NOTE: a cool one-line version, that may not be possible ...
            #     output.variables[varname][N:] = var[month_start:month_end]
            # (N.B. you can do this with lists, but *not* numpy arrays)
            #
            # alternatively, we could have a method such as 'extend'?

        # Extend the time coord variable appropriately.
        # NOTE: with missing data, the time values sequence will show gaps.
        output_time_var.data = np.concatenate(
            (output_time_var[:], time_var[month_start:month_end]), axis=0)

    # Write the output file.
    nc_files.write(output, output_path)
Esempio n. 10
0
        oa('root_attr_vec', np.array([1.2, 3, 4]))],
    dd=[od('root_dim_x', 2)],
    vv=[ov('root_var_1',
           dd=[od('root_dim_x')],
           aa=[oa('root_var_attr_1', 11)],
           data=np.zeros((2))),
        ov('root_var_2_scalar', data=np.array(3.15, dtype=np.float32))],
    gg=[og('subgroup',
           aa=[oa('subgroup_attr', 'qq')],
           dd=[od('subgroup_dim_y', 3)],
           vv=[ov('subgroup_var',
                  dd=[od('root_dim_x'), od('subgroup_dim_y')],
                  aa=[oa('subgroup_var_attr', 57.31)],
                  data=np.zeros((2, 3)))],
           gg=[og('sub_sub_group',
                  aa=[oa('sub_sub_group_attr', 'this')],
                  vv=[ov('sub_sub_group_var',
                         dd=[od('subgroup_dim_y')],
                         data=np.zeros((3)))])]),
        og('sg_2_empty')])


ncg.complete(g)
print g
print
file_path = './temp.nc'
with nc4.Dataset(file_path, 'w') as ds:
    ncf.write(ds, g)
print
os.system('ncdump -h temp.nc')