Exemple #1
0
    def _best_coordinates_setup(self):
        """
        Set up object when coordinates attribute is found
        """
        coordinates_vars = []
        coordinates = self._attributes[0][self.BEST_COORDINATES_NAME.lower()]
        if coordinates is not None:
            coordinates_vars = coordinates.split()  # split on whitespace

        if len(coordinates_vars) is not 4:
            raise InvalidVariableError(
                'The coordinate attribute does not have four entries. '
                'It should be space separated "longitude latitude altitude time"'
            )

        for coordinates_var in coordinates_vars:
            if coordinates_var not in self._variables:
                raise InvalidVariableError(
                    "There is no variable for the co-ordinate '{}'".format(
                        coordinates_var))

        self.longitude_variable_name, \
            self.latitude_variable_name, \
            self.altitude_variable_name, \
            self.time_variable_name \
            = coordinates_vars
Exemple #2
0
 def _check_has_variables_and_attributes(self):
     """
     Check that netcdf file has variables and attributes
     """
     if self._variables is None or len(self._variables) == 0:
         raise InvalidVariableError("No variables in the file so the type of data is unknown")
     if self._attributes[0] is None or len(self._attributes[0]) == 0:
         raise InvalidVariableError("No attributes in the file so type of data is unknown")
Exemple #3
0
    def __init__(self, attributes, variables):
        """
        Initialisation
        :param attributes: dictionary of attributes and their values (or list of dictionarys if multiple files read)
        :param variables: dictionary of variable names and NetCDF Variable objects
        (or list of dictionarys if multiple files read)
        :return: nothing
        """
        self.station = False
        self.station_latitude = None
        self.latitude_variable_name = None
        self.station_longitude = None
        self.longitude_variable_name = None
        self.altitude = None
        self.altitude_variable_name = None
        self.pressure_variable_name = None
        self.time_stamp_info = None
        self.time_dimensions = None

        self._attributes = [{k.lower(): v for k, v in list(attrs.items())} for attrs in listify(attributes)]
        if len(variables) == 0:
            raise InvalidVariableError("No variables in the file so the type of data is unknown")
        self._variables = list(variables[0].keys())
        self._variable_dimensions = [{name: var.dimensions for name, var in list(vars.items())}
                                     for vars in listify(variables)]
        self._check_has_variables_and_attributes()

        # Carry out these checks using the attributes from the first file as a 'master'
        if self.TIME_COORDINATE_NAME.lower() in self._attributes[0]:
            self.time_variable_name = self._get_coordinate_variable_name(self.TIME_COORDINATE_NAME, "time")

            if self.LATITUDE_COORDINATE_NAME.lower() in self._attributes[0]:
                self._lat_lon_var_specified_setup()
            elif self.STATION_LATITUDE_NAME.lower() in self._attributes[0]:
                self._stationary_setup()
            else:
                raise InvalidVariableError("No attributes indicating latitude, expecting '{}' or '{}'"
                                           .format(self.STATION_LATITUDE_NAME, self.LONGITUDE_COORDINATE_NAME))
        elif self.BEST_COORDINATES_NAME.lower() in self._attributes[0]:
            self._best_coordinates_setup()
        else:
            raise InvalidVariableError(
                "No attributes indicating time variable name, expecting either '{}' or 'Coordinates'"
                .format(self.TIME_COORDINATE_NAME))

        if self.CORRECTED_PRESSURE_VAR_NAME in self._variables:
            self.pressure_variable_name = self.CORRECTED_PRESSURE_VAR_NAME
        elif self.PRESSURE_VAR_NAME in self._variables:
            self.pressure_variable_name = self.PRESSURE_VAR_NAME
        else:
            self.pressure_variable_name = None

        if self.TIME_STAMP_INFO_NAME.lower() in self._attributes[0]:
            # Not all files will have the same timestamp -> Retrieve a list of timestamps for each file.
            self.time_stamp_info = [attrs[self.TIME_STAMP_INFO_NAME.lower()] for attrs in self._attributes]

        self.time_dimensions = self._variable_dimensions[0][self.time_variable_name]
Exemple #4
0
    def _create_cube(self, filenames, variable):
        """Creates a cube for the specified variable.
        :param filenames: List of filenames to read coordinates from
        :param variable: Optional variable to read while we're reading the coordinates, can be a string or a
        VariableConstraint object
        :return: If variable was specified this will return an UngriddedData object, otherwise a CoordList
        """
        import six
        from cis.exceptions import InvalidVariableError
        from cis.data_io.products.gridded_NetCDF import DisplayConstraint
        from cis.data_io.gridded_data import load_cube
        from iris.exceptions import CoordinateNotFoundError

        # Check if the files given actually exist.
        for filename in filenames:
            with open(filename) as f:
                pass

        variable_constraint = variable
        if isinstance(variable, six.string_types):
            # noinspection PyPep8
            variable_constraint = DisplayConstraint(
                cube_func=(lambda c: c.var_name == variable or c.standard_name
                           == variable or c.long_name == variable),
                display=variable)
        if len(filenames) == 1:
            callback_function = self.load_single_file_callback
        else:
            callback_function = self.load_multiple_files_callback

        try:
            cube = load_cube(filenames,
                             variable_constraint,
                             callback=callback_function)
        except ValueError as e:
            if variable is None:
                message = "File contains more than one cube variable name must be specified"
            elif e.args[0] == "No cubes found":
                message = "Variable not found: {} \nTo see a list of variables run: cis info {}" \
                    .format(str(variable), filenames[0])
            else:
                message = e.args[0]
            raise InvalidVariableError(message)

        try:
            hybrid_ht = cube.coord(name_or_coord='Hybrid height')
            hybrid_ht.attributes[
                'formula'] = 'z(k,j,i) = a(k) + b(k)*orog(j,i)'
            hybrid_ht.convert_units('m')
        except CoordinateNotFoundError as e:
            pass

        try:
            cube.coord(long_name='t').standard_name = 'time'
        except CoordinateNotFoundError as e:
            pass

        self._add_available_aux_coords(cube, filenames)

        return cube
Exemple #5
0
def read(filename, usr_variables):
    """
    Reads a Variable from a NetCDF file

    :param filename: The name (with path) of the NetCDF file to read.
    :param usr_variables: A variable (dataset) name to read from the files. The name must appear exactly as in in the
      NetCDF file. Variable names may be fully qualified NetCDF4 Hierarchical group variables in the form
      ``<group1>/<group2....>/<variable_name>``, e.g. ``AVHRR/Ch4CentralWavenumber``.
    :return: A Variable instance constructed from  the input file
    """
    from netCDF4 import Dataset

    usr_variables = listify(usr_variables)

    try:
        datafile = Dataset(filename)
    except RuntimeError as e:
        raise IOError(str(e))

    data = {}
    for full_variable in usr_variables:
        # Split the fully qualified variable (group/variable) into group and variable
        parts = full_variable.split("/")
        groups = parts[:-1]
        variable = parts[-1]
        current_group = datafile
        for group in groups:
            current_group = current_group.groups[group]
        try:
            data[full_variable] = current_group.variables[variable]
        except:
            raise InvalidVariableError(full_variable +
                                       ' could not be found in ' + filename)

    return data
Exemple #6
0
def read_many_files(filenames, usr_variables, dim=None):
    """
    Reads a single Variable from many NetCDF files. This method uses the netCDF4 MFDataset class and so is NOT
    suitable for NetCDF4 datasets (only 'CLASSIC' netcdf).

    :param filenames: A list of NetCDF filenames to read, or a string with wildcards.
    :param usr_variables: A list of variable (dataset) names to read from the files.
      The names must appear exactly as in in the NetCDF file.
    :param dim: The name of the dimension on which to aggregate the data. None is the default
      which tries to aggregate over the unlimited dimension
    :return: A list of variable instances constructed from all of the input files
    """
    from netCDF4 import MFDataset
    from cis.exceptions import InvalidVariableError

    usr_variables = listify(usr_variables)

    try:
        datafile = MFDataset(filenames, aggdim=dim)
    except RuntimeError as e:
        raise IOError(e)

    data = {}
    for variable in usr_variables:
        # Get data.
        try:
            data[variable] = datafile.variables[variable]
        except:
            raise InvalidVariableError(
                'Variable {} not found in file {}.'.format(
                    variable, filenames))

    return data
Exemple #7
0
    def find_auxiliary_coordinate(self, variable):
        """
        Find the variable name of an auxiliary coordinate for the given variable (if there is one).

        :param str variable: The data variable we're checking for any auxiliary coordinates
        :return str or None: The name of the variable holding the auxiliary coordinate or None
        """
        aux_coord_name = None
        dim_coord_names = [self.latitude_variable_name, self.longitude_variable_name,
                           self.altitude_variable_name, self.pressure_variable_name] + list(self.time_dimensions)
        # Find the *dimension* which corresponds to the auxiliary coordinate
        aux_coords = [dim for dim in self._variable_dimensions[0][variable] if dim not in dim_coord_names]
        if len(aux_coords) > 1:
            raise InvalidVariableError("CIS currently only supports reading data variables with one auxilliary "
                                       "coordinate")
        elif len(aux_coords) == 1:
            # If there is also a variable named after that dimension then this is the variable we're after
            if aux_coords[0] in self._variable_dimensions[0]:
                aux_coord_name = aux_coords[0]
            # Otherwise we need to look through all the variables and choose the first variable whose dimension is only
            #  the auxiliary dimension.
            else:
                for v, dims in self._variable_dimensions[0].items():
                    if dims[0] == aux_coords[0]:
                        aux_coord_name = v
                        break
        return aux_coord_name
Exemple #8
0
    def create_data_object(self, filenames, variable):
        from cis.exceptions import InvalidVariableError, CISError
        import numpy as np

        try:
            data_obj = load_multiple_hysplit(filenames, [variable])
        except ValueError:
            raise InvalidVariableError(variable + " does not exist in " +
                                       str(filenames))
        except EOFError as e:
            raise CISError(e)
        except IOError as e:
            raise CISError(e)  # TODO

        coords = self._create_coord_list(filenames, data_obj)

        # WRITE STANDARD NAME GUESSER HERE
        if variable == "PRESSURE":
            variable = "air_pressure"
        elif variable == "RELHUMID":
            variable = "relative_humidity"

        objM = Metadata(name=variable,
                        standard_name=variable,
                        long_name=variable,
                        shape=(len(data_obj[variable]), ),
                        missing_value=-99999.0)
        #objM.standard_name = None

        #print((len(data_obj[variable]),))
        return UngriddedData(data_obj[variable], objM, coords)
Exemple #9
0
    def _get_coordinate_variable_name(self, attribute_name, coordinate_display_name):
        """
        Reads an attribute value for a co-ordinate and returns the value. Checks that value is a variables in the data
        Throws InvalidVariableError if the attribute or variable does not exist
        :param attribute_name: the name of the attribute to read
        :param coordinate_display_name: the display name of the attribute read
        :return: the variable name
        """
        if attribute_name.lower() in self._attributes[0]:
            variable_name = self._attributes[0][attribute_name.lower()]
            if variable_name not in self._variables:  # Just check the first file
                raise InvalidVariableError("There is no variable for the {} co-ordinate '{}'"
                                           .format(coordinate_display_name, variable_name))
            return variable_name

        raise InvalidVariableError(
            "No attributes indicating {} variable name, expecting '{}'"
            .format(coordinate_display_name, attribute_name))
Exemple #10
0
def load_aeronet(filename, variables=None):
    """
    Loads aeronet csv file.

    :param filename: data file name
    :param variables: A list of variables to return
    :return: A dictionary of variables names and numpy arrays containing the data for that variable
    """
    from cis.exceptions import InvalidVariableError
    from cis.time_util import cis_standard_time_unit
    from numpy.ma import masked_invalid
    from pandas import read_csv, to_datetime

    version = get_aeronet_version(filename)
    ordered_vars = get_aeronet_file_variables(filename, version)
    if len(ordered_vars) == 0:
        return {}

    # Load all available geolocation information and any requested variables
    cols = [var for var in ("date", "time", "latitude", "longitude", "altitude") if var in ordered_vars]
    if cols is not None and variables is not None:
        cols.extend(variables)

    dtypes = {var:'str' if var in ("date", "time") else "float" for var in cols}

    try:
        rawd = read_csv(filename, sep=",", header=AERONET_HEADER_LENGTH[version]-1, names=ordered_vars,
                        index_col=False, usecols=cols, na_values=AERONET_MISSING_VALUE[version], dtype=dtypes,
                        parse_dates={"datetime":["date", "time"]}, infer_datetime_format=True, dayfirst=True,
                        error_bad_lines=False, warn_bad_lines=True, #low_memory="All_Sites_Times_All_Points" in filename
        )
    except ValueError:
        raise InvalidVariableError("{} not available in {}".format(variables, filename))

    # Empty file
    if rawd.shape[0] == 0:
        return {"datetime":[], "latitude":[], "longitude":[], "altitude":[]}

    # Convert pandas Timestamps into CIS standard numbers
    rawd["datetime"] = [cis_standard_time_unit.date2num(timestamp.to_pydatetime())
                        for timestamp in to_datetime(rawd["datetime"], format='%d:%m:%Y %H:%M:%S')]

    # Add position metadata that isn't listed in every line for some formats
    if version.startswith("MAN"):
        rawd["altitude"] = 0.

    elif version.endswith("2"):
        metadata = get_file_metadata(filename)
        rawd["longitude"] = float(metadata.misc[2][1].split("=")[1])
        rawd["latitude"] = float(metadata.misc[2][2].split("=")[1])
        rawd["altitude"] = float(metadata.misc[2][3].split("=")[1])

    return {var : masked_invalid(arr) for var, arr in rawd.items()}
Exemple #11
0
    def __get_data_scale(self, filename, variable):
        from cis.exceptions import InvalidVariableError
        from pyhdf.SD import SD

        try:
            meta = SD(filename).datasets()[variable][0][0]
        except KeyError:
            raise InvalidVariableError("Variable " + variable + " not found")

        for scaling in self.modis_scaling:
            if scaling in meta:
                return scaling
        return None
Exemple #12
0
    def __get_data_scale(self, filename, variable):
        # Note this is only here because it doesn't get inherited...
        from cis.exceptions import InvalidVariableError
        from pyhdf.SD import SD

        try:
            meta = SD(filename).datasets()[variable][0][0]
        except KeyError:
            raise InvalidVariableError("Variable " + variable + " not found")

        for scaling in self.modis_scaling:
            if scaling in meta:
                return scaling
        return None
Exemple #13
0
    def _stationary_setup(self):
        """
        Set up object when latitude and longitude are fixed
        """
        from cis.exceptions import InvalidVariableError
        if self.STATION_LATITUDE_NAME.lower() not in self._attributes[0]:
            raise InvalidVariableError(
                "No attributes indicating latitude, expecting '{}'".format(
                    self.STATION_LATITUDE_NAME))
        # We need a bunch of different latitudes for different files
        self.station_latitude = [
            self._parse_station_lat_lon(
                attr[self.STATION_LATITUDE_NAME.lower()])
            for attr in self._attributes
        ]

        if self.STATION_LONGITUDE_NAME.lower() not in self._attributes[0]:
            raise InvalidVariableError(
                "No attributes indicating longitude, expecting '{}'".format(
                    self.STATION_LONGITUDE_NAME))
        self.station_longitude = [
            self._parse_station_lat_lon(
                attr[self.STATION_LONGITUDE_NAME.lower()])
            for attr in self._attributes
        ]
        self.station = True

        if self.STATION_ALTITUDE_NAME.lower() in self._attributes[0]:
            self.altitude = [
                self._parse_station_altitude(
                    attr[self.STATION_ALTITUDE_NAME.lower()])
                for attr in self._attributes
            ]
        else:
            self.altitude = [
                self.DEFAULT_ALTITUDE for attr in self._attributes
            ]
Exemple #14
0
 def _parse_station_lat_lon(lat_lon_string):
     """
     Parse a station's latitude or longitude string. Will try and read it directly as a float, otherwise will try and
      read the first white-space separated part of the string (e.g. '80 degrees north' -> float(80)).
     :param lat_lon_string:
     :return:
     """
     from cis.exceptions import InvalidVariableError
     try:
         return float(lat_lon_string)
     except ValueError:
         try:
             return float(lat_lon_string.split()[0])
         except ValueError:
             raise InvalidVariableError("Couldn't parse station attribute '{}'".format(lat_lon_string))
    def _create_cube(self, filenames, variable):
        """Creates a cube for the specified variable.
        :param filenames: List of filenames to read coordinates from
        :param variable: Optional variable to read while we're reading the coordinates, can be a string or a
        VariableConstraint object
        :return: If variable was specified this will return an UngriddedData object, otherwise a CoordList
        """
        from cis.exceptions import InvalidVariableError
        from cis.data_io import gridded_data
        import iris

        # Check if the files given actually exist.
        for filename in filenames:
            with open(filename) as f:
                pass

        variable_constraint = variable
        if isinstance(variable, str):
            variable_constraint = DisplayConstraint(cube_func=(lambda c: c.var_name == variable or
                                                                c.standard_name == variable or
                                                                c.long_name == variable), display=variable,
                                                    coord_values={'hybrid level at layer midpoints':
                                                                      (lambda lev: lev == 31)})
        if len(filenames) == 1:
            callback_function = self.load_single_file_callback
        else:
            callback_function = self.load_multiple_files_callback

        try:
            cube = gridded_data.load_cube(filenames, variable_constraint, callback=callback_function)
        except iris.exceptions.ConstraintMismatchError as e:
            if variable is None:
                message = "File contains more than one cube variable name must be specified"
            elif e.message == "no cubes found":
                message = "Variable not found: {} \nTo see a list of variables run: cis info {}" \
                    .format(str(variable), filenames[0])
            else:
                message = e.message
            raise InvalidVariableError(message)
        except ValueError as e:
            raise IOError(str(e))

        self._add_available_aux_coords(cube, filenames)

        return cube
Exemple #16
0
def _read_hdf4(filename, variables):
    """
        A wrapper method for reading raw data from hdf4 files. This returns a dictionary of io handles
         for each VD and SD data types.

        :param filename:     A name of a file to read
        :param variables:    List of variables to read from the files

        :return: (sds_dict, vds_dict) A tuple of dictionaries, one for sds objects and another for vds
    """
    from cis.exceptions import InvalidVariableError
    from pyhdf.error import HDF4Error

    variables = utils.listify(variables)

    # I'd rather not have to make this check but for pyhdf 0.9.0 and hdf 4.2.9 on OS X the c-level read routine will at
    # some point call exit(138) when reading valid netcdf files (rather than returning a negative status).
    if not filename.endswith('.hdf'):
        raise IOError("Tried to read non HDF file: {}".format(filename))

    try:
        sds_dict = hdf_sd.read(filename, variables)

        # remove the variables identified as SD (i.e. the keys in sds_dict)
        # no need to try looking for them as VD variable
        # AND this can cause a crash in some version/implementations of the core HDF4 libraries!

        # First create a copy of the list in order for the original list to be left intact when elements are removed
        # from it, this enables the original list to be used when many files are read
        vdvariables = list(variables)
        for sds_dict_key in sds_dict:
            vdvariables.remove(sds_dict_key)

        vds_dict = hdf_vd.read(filename, vdvariables)
    except HDF4Error as e:
        raise IOError(str(e))

    for variable in variables:
        if variable not in sds_dict and variable not in vds_dict:
            raise InvalidVariableError("Could not find " + variable +
                                       " in file: " + filename)

    return sds_dict, vds_dict
Exemple #17
0
    def create_coords(self, filenames, variable=None):
        from cis.data_io.ungridded_data import Metadata
        from numpy import genfromtxt, NaN
        from cis.exceptions import InvalidVariableError
        from cis.time_util import convert_datetime_to_std_time
        import dateutil.parser as du

        array_list = []

        for filename in filenames:
            try:
                array_list.append(genfromtxt(filename, dtype="f8,f8,f8,O,f8",
                                             names=['latitude', 'longitude', 'altitude', 'time', 'value'],
                                             delimiter=',', missing_values='', usemask=True, invalid_raise=True,
                                             converters={"time": du.parse}))
            except:
                raise IOError('Unable to read file ' + filename)

        data_array = utils.concatenate(array_list)
        n_elements = len(data_array['latitude'])

        coords = CoordList()
        coords.append(Coord(data_array["latitude"],
                            Metadata(standard_name="latitude", shape=(n_elements,), units="degrees_north")))
        coords.append(Coord(data_array["longitude"],
                            Metadata(standard_name="longitude", shape=(n_elements,), units="degrees_east")))
        coords.append(
            Coord(data_array["altitude"], Metadata(standard_name="altitude", shape=(n_elements,), units="meters")))

        time_arr = convert_datetime_to_std_time(data_array["time"])
        time = Coord(time_arr,
                     Metadata(standard_name="time", shape=(n_elements,), units="days since 1600-01-01 00:00:00"))
        coords.append(time)

        if variable:
            try:
                data = UngriddedData(data_array['value'], Metadata(name="value", shape=(n_elements,), units="unknown",
                                                                   missing_value=NaN), coords)
            except:
                InvalidVariableError("Value column does not exist in file " + filenames)
            return data
        else:
            return UngriddedCoordinates(coords)
Exemple #18
0
def load_aeronet(fname, variables=None):
    """
    loads aeronet lev 2.0 csv file.

        Originally from http://code.google.com/p/metamet/
        License: GNU GPL v3

    :param fname: data file name
    :param variables: A list of variables to return
    :return: A dictionary of variables names and numpy arrays containing the data for that variable
    """
    import numpy as np
    from numpy import ma
    from datetime import datetime, timedelta
    from cis.time_util import cis_standard_time_unit
    from cis.exceptions import InvalidVariableError

    std_day = cis_standard_time_unit.num2date(0)

    ordered_vars = get_aeronet_file_variables(fname)

    def date2daynum(datestr):
        the_day = datetime(int(datestr[-4:]), int(datestr[3:5]),
                           int(datestr[:2]))
        return float((the_day - std_day).days)

    def time2fractionalday(timestr):
        td = timedelta(hours=int(timestr[:2]),
                       minutes=int(timestr[3:5]),
                       seconds=int(timestr[6:8]))
        return td.total_seconds() / (24.0 * 60.0 * 60.0)

    try:
        rawd = np.genfromtxt(fname,
                             skip_header=5,
                             delimiter=',',
                             names=ordered_vars,
                             converters={
                                 0: date2daynum,
                                 1: time2fractionalday,
                                 'Last_Processing_Date': date2daynum
                             },
                             dtype=np.float64,
                             missing_values='N/A',
                             usemask=True)
    except (StopIteration, IndexError) as e:
        raise IOError(e)

    lend = len(rawd)
    # The date and time column are already in days since cis standard time, and fractional days respectively, so we can
    # just add them together
    # Find the columns by number rather than name as some older versions of numpy mangle the special characters
    datetimes = rawd[rawd.dtype.names[0]] + rawd[rawd.dtype.names[1]]

    metadata = get_file_metadata(fname)
    lon = np.zeros(lend) + float(metadata.misc[2][1].split("=")[1])
    lat = np.zeros(lend) + float(metadata.misc[2][2].split("=")[1])
    alt = np.zeros(lend) + float(metadata.misc[2][3].split("=")[1])

    data_dict = {}
    if variables is not None:
        for key in variables:
            try:
                # Again, we can't trust the numpy names so we have to use our pre-read names to index the right column
                data_dict[key] = rawd[rawd.dtype.names[ordered_vars.index(
                    key)]]
            except ValueError:
                raise InvalidVariableError(key + " does not exist in " + fname)

    data_dict["datetime"] = ma.array(datetimes)
    data_dict["longitude"] = ma.array(lon)
    data_dict["latitude"] = ma.array(lat)
    data_dict["altitude"] = ma.array(alt)

    return data_dict