Ejemplo n.º 1
0
def nca_equivalence(ncfile1, ncfile2, variable='ta'):
    """ Do these two files describe the same content?"""
    # Let's start by comparing a few important things

    x = s3Dataset(ncfile1)
    y = s3Dataset(ncfile2)

    # First let's just check a data record
    xx = x.variables[variable]
    yy = y.variables[variable]

    assert (
        xx.shape == yy.shape).all(), "CFA data arrays are not the same shape"

    assert len(xx.shape) == 4, "Unexpected variable shape for comparison"

    xx = xx[:, 0, 0, 0].flatten()
    yy = yy[:, 0, 0, 0].flatten()

    # We don't do all data coz it would take a long time
    assert (xx == yy).all(), "Data in arrays does not match"

    x.close()
    y.close()
    # now check file headers

    raise NotImplementedError(
        "This doesn't mean the test has failed, just the test code is not finished"
    )
Ejemplo n.º 2
0
def create_partitions_from_files(out_dataset, files, axis,
                                 cfa_version, common_date):
    """Create the CFA partitions from a list of files."""
    # loop over the files and open as a regular netCDF4 Dataset
    for fname in files:
        in_dataset = s3Dataset(fname, "r")
        # get the global metadata
        in_dataset_attrs = {
            x: in_dataset.getncattr(x) for x in in_dataset.ncattrs()
        }
        # add the attributes to the s3Dataset by updating the dictionary
        out_dataset._cfa_dataset.metadata.update(in_dataset_attrs)
        # loop over the groups
        for grp in in_dataset.groups:
            in_group = in_dataset[grp]
            # create a group if one with this name does not exist
            if grp not in out_dataset.groups:
                out_group = out_dataset.createGroup(grp)
            else:
                out_group = out_dataset.groups[grp]
            # update the metadata
            in_group_attrs = {
                x: in_group.getncattr(x) for x in in_group.ncattrs()
            }
            out_group._cfa_grp.metadata.update(in_group_attrs)
            add_var_dims(in_group, out_group, axis, fname, common_date)

        # add the variables in the root group
        add_var_dims(in_dataset, out_dataset, axis, fname, common_date)
        in_dataset.close()
Ejemplo n.º 3
0
def test_s3Dataset_read(path_stub, format="NETCDF4", cfa_version=None):
    """Test writing out a s3Dataset, for one of the various permutations of:
        1. file format (netCDF3 or netCDF4)
        2. whether it is a S3-netCDF / CFA file or a plain netCDF file
        3. the CFA version (0.4 or 0.5)
    """
    file_name = get_file_path(path_stub, format, cfa_version)
    if DEBUG:
        print("Test reading {}".format(file_name))
    # open the dataset
    dr = s3Dataset(file_name, mode='r')
    if DEBUG:
        print(dr.groups)

    if format == "NETCDF4" or format == "CFA4":
        grp = dr.groups["test_group"]
    else:
        grp = dr

    if DEBUG:
        print(grp.variables["tmp"])
        print(dr.variables["scl"])

    tmp_var = grp.variables["tmp"]
    x = tmp_var[:, 0, 0, 0]
    dr.close()
    return True
Ejemplo n.º 4
0
    def read_s3nc_serial(self):
        # S3netcdf serial read method
        fp = os.path.join(self.config['path'], self.config['file'])
        total_bytes = 0
        for ir in range(self.config['repeats']):
            nc = s3Dataset(fp, 'r')
            var = nc.variables[config['var']]
            for i in range(var.shape[0]):
                logging.debug('Index: {}'.format(i))
                data = var[i, :, :, :]
                total_bytes += data.nbytes

            nc.close()

        return total_bytes
Ejemplo n.º 5
0
    def read_s3nc_map(self):
        # s3netcdf map read
        fp = os.path.join(self.config['path'], self.config['file'])
        total_bytes = 0
        for ir in range(self.config['repeats']):
            nc = s3Dataset(fp, 'r')
            var = nc.variables[config['var']]
            rx = randint(0, var.shape[0] - 1)
            ry = randint(0, var.shape[1] - 1)
            logging.debug('Index: [{},{},:,:]'.format(rx, ry))
            data = var[rx, ry, :, :]
            total_bytes += data.nbytes

            nc.close()

        return total_bytes
Ejemplo n.º 6
0
    def read_s3nc_timeseries(self):
        #s3netcdf time series read
        fp = os.path.join(self.config['path'], self.config['file'])
        total_bytes = 0
        for ir in range(self.config['repeats']):
            nc = s3Dataset(fp, 'r')
            var = nc.variables[config['var']]
            rx = randint(0, var.shape[1] - 1)
            ry = randint(0, var.shape[2] - 1)
            logging.debug('Index: [:,{},{},:]'.format(rx, ry))
            data = var[:, rx, ry, 0]
            total_bytes += data.nbytes
            logging.debug('shape of results = {}'.format(data.shape))

            nc.close()

        return total_bytes
Ejemplo n.º 7
0
def test_s3Dataset_write(path_stub,
                         format="NETCDF4",
                         cfa_version="0.4",
                         resolution_degrees=1.5):
    """Test writing out a s3Dataset, for one of the various permutations of:
        1. file format (netCDF3 or netCDF4)
        2. whether it is a S3-netCDF / CFA file or a plain netCDF file
        3. the CFA version (0.4 or 0.5)
    """
    # build a file name from the path stub, the format and the cfa_version
    # don't use os.path.join as it doesn't handle URLs and paths
    file_name = get_file_path(path_stub, format, cfa_version)
    if DEBUG:
        print("Test writing {}".format(file_name))
    # open the dataset
    ds = s3Dataset(file_name,
                   format=format,
                   mode='w',
                   cfa_version=cfa_version,
                   diskless=False,
                   persist=False)
    # construct the shape:
    shape = [
        365, 1, 180.0 / resolution_degrees + 1, 360.0 / resolution_degrees
    ]
    # create the data inside the dataset
    create_test_dataset(ds, format, cfa_version, shape)
    if DEBUG:
        print(ds.groups["test_group"].variables["tmp"])
        print(ds.variables["scl"])

    if format == "CFA4" or format == "NETCDF4":
        tmp_var = ds.groups["test_group"].variables["tmp"]
    else:
        tmp_var = ds.variables["tmp"]
    tmp_var[:, :, :, :] = 250.0
    vel_var = ds.variables["velocity"]
    vel_var[0] = 10.0
    ds.close()
    return True
Ejemplo n.º 8
0
def aggregate_into_CFA(output_master_array, path, axis,
                       cfa_version, common_date=None):
    """Aggregate the netCDF files in directory into a CFA master-array file"""
    # get the list of files first of all
    files = get_file_list(path)
    # create the s3Dataset
    # create the output master array file
    out_dataset = s3Dataset(
        output_master_array,
        mode='w',
        clobber=True,
        diskless=False,
        cfa_version=cfa_version
    )
    # create the partitions from the list - these will be created in the order
    # that the files are read in
    create_partitions_from_files(out_dataset, files, axis,
                                 cfa_version, common_date)
    # we need to sort the partition matrices for each variable - i.e. there is
    # one matrix per variable
    sort_partition_matrices(out_dataset, axis)
    # close the dataset to write / upload it
    out_dataset.close()
Ejemplo n.º 9
0
def split_into_CFA(
    output_path,
    input_path,
    subarray_path="",
    subarray_shape=[],
    subarray_size=50 * 1024 * 1024,
    cfa_version="0.5",
):
    """Split a netCDF file into a number of subarray files and write the CFA
    master array file."""
    # if the subarray path is empty then get it from the output_path
    if subarray_path == "":
        if ".nca" in output_path:
            subarray_path = output_path[:-4]
        elif ".nc" in output_path:
            subarray_path = output_path[:-3]
        else:
            subarray_path = output_path
            output_path += ".nca"

    # open the input file
    nc_ds = Dataset(input_path, 'r')

    # get the output format for the new Dataset
    # if it's netCDF4 then the output is CFA4
    # if it's netCDF3 then the output is CFA3
    if nc_ds.file_format in ['NETCDF4', 'NETCDF4_CLASSIC']:
        s3_file_format = "CFA4"
    elif nc_ds.file_format == "NETCDF3_CLASSIC":
        s3_file_format = "CFA3"
    else:
        raise CFAError("Cannot split file with format: {}".format(
            nc_ds.file_format))

    # open the output file - copy the input from the input file to the output
    # file(s), whilst using the subarray settings to chunk the data
    s3_ds = s3Dataset(output_path,
                      'w',
                      format=s3_file_format,
                      cfa_version=cfa_version)

    # we now want to copy the information from the original dataset
    # netCDF files have:
    #   global metadata
    #   global dimensions
    #   global variables
    #       Each variable has
    #           metadata
    #           field data
    #
    #   global groups
    #       Each group has
    #           metadata
    #           dimensions
    #           variables
    #               Each variable has
    #                   metadata
    #                   field data

    # global metadata
    nc_md_keys = nc_ds.ncattrs()
    for k in nc_md_keys:
        s3_ds.setncattr(k, nc_ds.getncattr(k))

    # global dimensions
    copy_dims(nc_ds, s3_ds)

    # global variables
    copy_vars(nc_ds, s3_ds, subarray_size, subarray_shape)

    # now do the groups
    for grp in nc_ds.groups:
        nc_grp = nc_ds.groups[grp]
        # create s3 group in the s3 dataset
        s3_grp = s3_ds.createGroup(nc_grp.name)
        # copy group metadata
        nc_md_keys = nc_grp.ncattrs()
        for k in nc_md_keys:
            s3_ds.setncattr(k, nc_grp.getncattr(k))

        # copy group dimensions
        copy_dims(nc_ds, s3_ds)

        # copy group variables
        copy_vars(nc_ds, s3_ds, subarray_size, subarray_shape)

    # close the s3Dataset - super important as everything gets written on close
    s3_ds.close()
Ejemplo n.º 10
0
    if args.variable:
        variable = args.variable
    else:
        variable = "all"

    if args.partition:
        # convert the partition string to a numpy array
        partition = args.partition
    else:
        partition = "none"

    if args.metadata:
        metadata = True
    else:
        metadata = False

    if input_file:
        # Get the input file.
        path = os.path.expanduser(input_file)
        input_dataset = s3Dataset(path, mode='r')
        # Print the global dataset information
        print_dataset_info(
            input_dataset,
            group,
            variable,
            partition,
            metadata
        )
    #else:
Ejemplo n.º 11
0
        metavar="<variable>",
        help=("Name of a variable to change file prefix for, or change all "
              "variables."
              "--variable=all|<variable_name>"))

    parser.add_argument(
        "--partition",
        action="store",
        default="all",
        metavar="<partition>",
        help=("Choose the partition to change the file location prefix for."
              "--partition=all<partition_index>"))

    parser.add_argument("--prefix",
                        action="store",
                        default="none",
                        required=True,
                        metavar="<prefix>",
                        help=("New file location prefix"))
    args = parser.parse_args()

    # get the input file
    input_path = os.path.expanduser(args.input)
    # open the input dataset in append mode
    input_dataset = s3Dataset(input_path, mode='a')
    # Update the prefix in the partitions
    update_file_in_partitions(input_dataset, args.prefix, args.group,
                              args.variable, args.partition)
    # close the file to save the changes
    input_dataset.close()