def test_aggregating_over_time_with_default_times(self):
     from datetime import datetime, timedelta
     from cis.time_util import cis_standard_time_unit as tunit
     data = make_regular_2d_with_time_ungridded_data()
     data.time.convert_to_std_time()
     output = data.aggregate(t=slice(None, None, timedelta(days=30)))
     expected_t_bounds = [[tunit.date2num(datetime(1984, 8, 27)), tunit.date2num(datetime(1984, 9, 10))]]
     assert_arrays_equal(output[0].coord('time').bounds, expected_t_bounds)
     assert_arrays_equal(output[0].data, [[[7.5]]])
 def test_aggregating_over_time_with_partial_datetime(self):
     from cis.time_util import PartialDateTime, cis_standard_time_unit as tunit
     from datetime import datetime, timedelta
     data = make_regular_2d_with_time_ungridded_data()
     data.time.convert_to_std_time()
     output = data.aggregate(t=[PartialDateTime(1984,9), timedelta(days=30)])
     expected_t_bounds = [[tunit.date2num(datetime(1984, 9, 1)), tunit.date2num(datetime(1984, 10, 1))]]
     assert_arrays_almost_equal(output[0].coord('time').bounds, expected_t_bounds)
     assert_arrays_almost_equal(output[0].data, [[[10.5]]])
Exemple #3
0
    def test_aeronet_time_parsing(self):
        # 1.8s
        from datetime import datetime
        from cis.time_util import cis_standard_time_unit as ct

        aeronet_data = load_aeronet(valid_aeronet_filename, [valid_aeronet_variable])

        assert_almost_equal(aeronet_data['datetime'][0], ct.date2num(datetime(2003, 9, 25, 6, 47, 9)))
        assert_almost_equal(aeronet_data['datetime'][5], ct.date2num(datetime(2003, 9, 25, 7, 10, 37)))
        assert_almost_equal(aeronet_data['datetime'][76], ct.date2num(datetime(2003, 9, 27, 13, 28, 2)))
Exemple #4
0
 def test_aggregating_over_time_with_default_times(self):
     from datetime import datetime, timedelta
     from cis.time_util import cis_standard_time_unit as tunit
     data = make_regular_2d_with_time_ungridded_data()
     data.time.convert_to_std_time()
     output = data.aggregate(t=slice(None, None, timedelta(days=30)))
     expected_t_bounds = [[
         tunit.date2num(datetime(1984, 8, 27)),
         tunit.date2num(datetime(1984, 9, 10))
     ]]
     assert_arrays_equal(output[0].coord('time').bounds, expected_t_bounds)
     assert_arrays_equal(output[0].data, [[[7.5]]])
Exemple #5
0
 def test_aggregating_over_time_with_partial_datetime(self):
     from cis.time_util import PartialDateTime, cis_standard_time_unit as tunit
     from datetime import datetime, timedelta
     data = make_regular_2d_with_time_ungridded_data()
     data.time.convert_to_std_time()
     output = data.aggregate(
         t=[PartialDateTime(1984, 9),
            timedelta(days=30)])
     expected_t_bounds = [[
         tunit.date2num(datetime(1984, 9, 1)),
         tunit.date2num(datetime(1984, 10, 1))
     ]]
     assert_arrays_almost_equal(output[0].coord('time').bounds,
                                expected_t_bounds)
     assert_arrays_almost_equal(output[0].data, [[[10.5]]])
Exemple #6
0
    def test_pressure_constraint_in_4d(self):
        from cis.collocation.col_implementations import SepConstraintKdtree
        import datetime as dt
        import numpy as np

        ug_data = mock.make_regular_4d_ungridded_data()
        ug_data_points = ug_data.as_data_frame(time_index=False,
                                               name='vals').dropna(axis=1)
        sample_point = pd.Series({
            'longitude': [0.0],
            'latitude': [0.0],
            'altitude': [50.0],
            'air_pressure': [24.0],
            'time':
            [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]
        })
        constraint = SepConstraintKdtree(p_sep=2)

        # This should leave us with 20 points:  [  6.   7.   8.   9.  10.]
        #                                       [ 11.  12.  13.  14.  15.]
        #                                       [ 16.  17.  18.  19.  20.]
        #                                       [ 21.  22.  23.  24.  25.]
        ref_vals = np.array([
            6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19.,
            20., 21., 22., 23., 24., 25.
        ])

        new_points = constraint.constrain_points(sample_point, ug_data_points)
        new_vals = new_points.vals

        eq_(ref_vals.size, new_vals.size)
        assert (np.equal(ref_vals, new_vals).all())
Exemple #7
0
    def test_time_constraint_in_4d(self):
        from cis.collocation.col_implementations import SepConstraintKdtree
        import datetime as dt
        import numpy as np

        ug_data = mock.make_regular_4d_ungridded_data()
        ug_data_points = ug_data.as_data_frame(time_index=False,
                                               name='vals').dropna(axis=1)
        sample_point = pd.Series({
            'longitude': [0.0],
            'latitude': [0.0],
            'altitude': [50.0],
            'time':
            [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]
        })

        # 1 day (and a little bit) time seperation
        constraint = SepConstraintKdtree(t_sep='P1dT1M')

        # This should leave us with 30 points
        ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten()

        new_points = constraint.constrain_points(sample_point, ug_data_points)
        new_vals = new_points.vals

        eq_(ref_vals.size, new_vals.size)
        assert (np.equal(ref_vals, new_vals).all())
Exemple #8
0
    def test_alt_constraint_in_4d(self):
        from cis.collocation.col_implementations import SepConstraintKdtree
        import datetime as dt
        import numpy as np

        ug_data = mock.make_regular_4d_ungridded_data()
        ug_data_points = ug_data.as_data_frame(time_index=False,
                                               name='vals').dropna(axis=1)
        sample_point = pd.Series({
            'longitude': [0.0],
            'latitude': [0.0],
            'altitude': [50.0],
            'time':
            [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]
        })
        # 15m altitude separation
        a_sep = 15

        constraint = SepConstraintKdtree(a_sep=a_sep)

        # This should leave us with 15 points:  [ 21.  22.  23.  24.  25.]
        #                                       [ 26.  27.  28.  29.  30.]
        #                                       [ 31.  32.  33.  34.  35.]
        ref_vals = np.array([
            21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33.,
            34., 35.
        ])

        new_points = constraint.constrain_points(sample_point, ug_data_points)
        new_vals = new_points.vals

        eq_(ref_vals.size, new_vals.size)
        assert (np.equal(ref_vals, new_vals).all())
Exemple #9
0
    def test_all_constraints_in_4d(self):
        ug_data = mock.make_regular_4d_ungridded_data()
        ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1)
        sample_point = pd.DataFrame(data={'longitude': [0.0], 'latitude': [0.0], 'altitude': [50.0],
                                          'air_pressure': [50.0],
                                          'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]}).iloc[0]
        # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees
        #  in each direction
        h_sep = 1000
        # 15m altitude separation
        a_sep = 15
        # 1 day (and a little bit) time separation
        t_sep = 'P1dT1M'
        # Pressure constraint is 50/40 < p_sep < 60/50
        p_sep = 1.22

        constraint = SepConstraintKdtree(h_sep=h_sep, a_sep=a_sep, p_sep=p_sep, t_sep=t_sep)

        index = HaversineDistanceKDTreeIndex()
        index.index_data(None, ug_data_points, None)
        constraint.haversine_distance_kd_tree_index = index

        # This should leave us with 9 points: [[ 22, 23, 24]
        #                                      [ 27, 28, 29]
        #                                      [ 32, 33, 34]]
        ref_vals = np.array([27., 28., 29., 32., 33., 34.])

        new_points = constraint.constrain_points(sample_point, ug_data_points)
        new_vals = np.sort(new_points.vals)

        eq_(ref_vals.size, new_vals.size)
        assert (np.equal(ref_vals, new_vals).all())
Exemple #10
0
def parse_as_number_or_standard_time(string):
    """
    Parse a string as a number from the command line, or if that fails, as a datetime in standard cis units

    :param in_string: String to parse
    :return: int, float (possibly representing a time in CIS standard time units)
    """
    from datetime import datetime
    res = parse_as_number_or_datetime(string)
    if isinstance(res, datetime):
        res = cis_standard_time_unit.date2num(res)
    return res
Exemple #11
0
def parse_as_number_or_standard_time(string):
    """
    Parse a string as a number from the command line, or if that fails, as a datetime in standard cis units

    :param in_string: String to parse
    :return: int, float (possibly representing a time in CIS standard time units)
    """
    from datetime import datetime
    res = parse_as_number_or_datetime(string)
    if isinstance(res, datetime):
        res = cis_standard_time_unit.date2num(res)
    return res
Exemple #12
0
def load_aeronet(filename, variables=None):
    """
    Loads aeronet csv file.

    :param filename: data file name
    :param variables: A list of variables to return
    :return: A dictionary of variables names and numpy arrays containing the data for that variable
    """
    from cis.exceptions import InvalidVariableError
    from cis.time_util import cis_standard_time_unit
    from numpy.ma import masked_invalid
    from pandas import read_csv, to_datetime

    version = get_aeronet_version(filename)
    ordered_vars = get_aeronet_file_variables(filename, version)
    if len(ordered_vars) == 0:
        return {}

    # Load all available geolocation information and any requested variables
    cols = [var for var in ("date", "time", "latitude", "longitude", "altitude") if var in ordered_vars]
    if cols is not None and variables is not None:
        cols.extend(variables)

    dtypes = {var:'str' if var in ("date", "time") else "float" for var in cols}

    try:
        rawd = read_csv(filename, sep=",", header=AERONET_HEADER_LENGTH[version]-1, names=ordered_vars,
                        index_col=False, usecols=cols, na_values=AERONET_MISSING_VALUE[version], dtype=dtypes,
                        parse_dates={"datetime":["date", "time"]}, infer_datetime_format=True, dayfirst=True,
                        error_bad_lines=False, warn_bad_lines=True, #low_memory="All_Sites_Times_All_Points" in filename
        )
    except ValueError:
        raise InvalidVariableError("{} not available in {}".format(variables, filename))

    # Empty file
    if rawd.shape[0] == 0:
        return {"datetime":[], "latitude":[], "longitude":[], "altitude":[]}

    # Convert pandas Timestamps into CIS standard numbers
    rawd["datetime"] = [cis_standard_time_unit.date2num(timestamp.to_pydatetime())
                        for timestamp in to_datetime(rawd["datetime"], format='%d:%m:%Y %H:%M:%S')]

    # Add position metadata that isn't listed in every line for some formats
    if version.startswith("MAN"):
        rawd["altitude"] = 0.

    elif version.endswith("2"):
        metadata = get_file_metadata(filename)
        rawd["longitude"] = float(metadata.misc[2][1].split("=")[1])
        rawd["latitude"] = float(metadata.misc[2][2].split("=")[1])
        rawd["altitude"] = float(metadata.misc[2][3].split("=")[1])

    return {var : masked_invalid(arr) for var, arr in rawd.items()}
Exemple #13
0
def make_regular_2d_with_time_ungridded_data():
    """
        Makes a well defined ungridded data object of shape 5x3 with data as follows
        array([[1,2,3],
               [4,5,6],
               [7,8,9],
               [10,11,12],
               [13,14,15]])
        and coordinates in latitude:
        array([[-10,-10,-10],
               [-5,-5,-5],
               [0,0,0],
               [5,5,5],
               [10,10,10]])
        longitude:
        array([[-5,0,5],
               [-5,0,5],
               [-5,0,5],
               [-5,0,5],
               [-5,0,5]])
        time: np.array( [ 15 day increments from 27th August 1984 ] )
        They are different lengths to make it easier to distinguish. Note the latitude increases
        as you step through the array in order - so downwards as it's written above
    """
    import numpy as np
    from cis.data_io.Coord import CoordList, Coord
    from cis.data_io.ungridded_data import UngriddedData, Metadata
    import datetime
    from cis.time_util import cis_standard_time_unit

    x_points = np.arange(-10, 11, 5)
    y_points = np.arange(-5, 6, 5)
    y, x = np.meshgrid(y_points, x_points)

    t0 = datetime.datetime(1984, 8, 27)
    times = np.reshape(np.array([t0 + datetime.timedelta(days=d) for d in range(15)]), (5, 3))

    x = Coord(x, Metadata(standard_name='latitude', units='degrees'))
    y = Coord(y, Metadata(standard_name='longitude', units='degrees'))
    t = Coord(cis_standard_time_unit.date2num(times), Metadata(standard_name='time', units=cis_standard_time_unit))

    data = np.reshape(np.arange(15) + 1.0, (5, 3))

    coords = CoordList([x, y, t])
    return UngriddedData(data, Metadata(name='rain', standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S",
                                        units="kg m-2 s-1", missing_value=-999), coords)
Exemple #14
0
    def test_all_constraints_in_4d(self):
        ug_data = mock.make_regular_4d_ungridded_data()
        ug_data_points = ug_data.as_data_frame(time_index=False,
                                               name='vals').dropna(axis=1)
        sample_point = pd.DataFrame(
            data={
                'longitude': [0.0],
                'latitude': [0.0],
                'altitude': [50.0],
                'air_pressure': [50.0],
                'time':
                [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]
            }).iloc[0]
        # One degree near 0, 0 is about 110km in latitude and longitude, so 300km should keep us to within 3 degrees
        #  in each direction
        h_sep = 1000
        # 15m altitude separation
        a_sep = 15
        # 1 day (and a little bit) time separation
        t_sep = 'P1dT1M'
        # Pressure constraint is 50/40 < p_sep < 60/50
        p_sep = 1.22

        constraint = SepConstraintKdtree(h_sep=h_sep,
                                         a_sep=a_sep,
                                         p_sep=p_sep,
                                         t_sep=t_sep)

        index = HaversineDistanceKDTreeIndex()
        index.index_data(None, ug_data_points, None)
        constraint.haversine_distance_kd_tree_index = index

        # This should leave us with 9 points: [[ 22, 23, 24]
        #                                      [ 27, 28, 29]
        #                                      [ 32, 33, 34]]
        ref_vals = np.array([27., 28., 29., 32., 33., 34.])

        new_points = constraint.constrain_points(sample_point, ug_data_points)
        new_vals = np.sort(new_points.vals)

        eq_(ref_vals.size, new_vals.size)
        assert (np.equal(ref_vals, new_vals).all())
Exemple #15
0
    def test_time_constraint_in_4d(self):
        from cis.collocation.col_implementations import SepConstraintKdtree
        import datetime as dt
        import numpy as np

        ug_data = mock.make_regular_4d_ungridded_data()
        ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1)
        sample_point = pd.Series({'longitude': [0.0], 'latitude': [0.0], 'altitude':[50.0],
                                  'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]})

        # 1 day (and a little bit) time seperation
        constraint = SepConstraintKdtree(t_sep='P1dT1M')

        # This should leave us with 30 points
        ref_vals = np.reshape(np.arange(50) + 1.0, (10, 5))[:, 1:4].flatten()

        new_points = constraint.constrain_points(sample_point, ug_data_points)
        new_vals = new_points.vals

        eq_(ref_vals.size, new_vals.size)
        assert (np.equal(ref_vals, new_vals).all())
Exemple #16
0
def make_dummy_ungridded_data_time_series(len=10):
    """
    Create a time series of ungridded data of length len, with a single lat/lon coordinate (65.2, -12.1)
    :param len: length of teh time series and associated data
    :return:
    """
    from datetime import datetime, timedelta
    from cis.time_util import cis_standard_time_unit
    from cis.data_io.Coord import Coord, CoordList
    from cis.data_io.ungridded_data import UngriddedData, Metadata

    t0 = datetime(1984, 8, 27)
    times = np.array([t0 + timedelta(days=d) for d in range(len)])

    x = Coord(np.zeros(len) + 65.2, Metadata(standard_name='latitude', units='degrees'))
    y = Coord(np.zeros(len) - 12.1, Metadata(standard_name='longitude', units='degrees'))
    t = Coord(cis_standard_time_unit.date2num(times),
              Metadata(standard_name='time', units=cis_standard_time_unit), axis='x')
    data = np.arange(len) + 1.0

    return UngriddedData(data, Metadata(standard_name='rainfall_flux', long_name="TOTAL RAINFALL RATE: LS+CONV KG/M2/S",
                                        units="kg m-2 s-1", missing_value=-999), CoordList([x, y, t]))
Exemple #17
0
    def test_pressure_constraint_in_4d(self):
        from cis.collocation.col_implementations import SepConstraintKdtree
        import datetime as dt
        import numpy as np

        ug_data = mock.make_regular_4d_ungridded_data()
        ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1)
        sample_point = pd.Series({'longitude': [0.0], 'latitude': [0.0], 'altitude':[50.0], 'air_pressure': [24.0],
                                  'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]})
        constraint = SepConstraintKdtree(p_sep=2)

        # This should leave us with 20 points:  [  6.   7.   8.   9.  10.]
        #                                       [ 11.  12.  13.  14.  15.]
        #                                       [ 16.  17.  18.  19.  20.]
        #                                       [ 21.  22.  23.  24.  25.]
        ref_vals = np.array([6., 7., 8., 9., 10., 11., 12., 13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23.,
                             24., 25.])

        new_points = constraint.constrain_points(sample_point, ug_data_points)
        new_vals = new_points.vals

        eq_(ref_vals.size, new_vals.size)
        assert (np.equal(ref_vals, new_vals).all())
Exemple #18
0
    def test_alt_constraint_in_4d(self):
        from cis.collocation.col_implementations import SepConstraintKdtree
        import datetime as dt
        import numpy as np

        ug_data = mock.make_regular_4d_ungridded_data()
        ug_data_points = ug_data.as_data_frame(time_index=False, name='vals').dropna(axis=1)
        sample_point = pd.Series({'longitude': [0.0], 'latitude': [0.0], 'altitude':[50.0],
                                  'time': [cis_standard_time_unit.date2num(dt.datetime(1984, 8, 29))]})
        # 15m altitude separation
        a_sep = 15

        constraint = SepConstraintKdtree(a_sep=a_sep)

        # This should leave us with 15 points:  [ 21.  22.  23.  24.  25.]
        #                                       [ 26.  27.  28.  29.  30.]
        #                                       [ 31.  32.  33.  34.  35.]
        ref_vals = np.array([21., 22., 23., 24., 25., 26., 27., 28., 29., 30., 31., 32., 33., 34., 35.])

        new_points = constraint.constrain_points(sample_point, ug_data_points)
        new_vals = new_points.vals

        eq_(ref_vals.size, new_vals.size)
        assert (np.equal(ref_vals, new_vals).all())
Exemple #19
0
def parse_as_number_or_datetime(in_string, name, parser):
    """Parse a string as a number from the command line, or if that fails, as a datetime, reporting parse errors.

    The string should be in an ISO 8601 format except that the date and time
    parts may be separated by a space or colon instead of T.
    :param in_string: String to parse
    :param name:      A description of the argument used for error messages
    :param parser:    The parser used to report errors
    :return: int, or float value (possibly converted to the standard time from a time string)
    """
    import dateutil.parser as du

    try:
        ret = int(in_string)
    except ValueError:
        try:
            ret = float(in_string)
        except ValueError:
            try:
                ret = cis_standard_time_unit.date2num(du.parse(in_string))
            except ValueError:
                parser.error("'" + in_string + "' is not a valid " + name)
                ret = None
    return ret
Exemple #20
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube, CubeList
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit
        from datetime import datetime
        from iris.util import new_axis
        import numpy as np

        logging.debug("Creating data object for variable " + variable)

        variables = ["Pressure_Mean"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :]
        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :]
        lat_coord = DimCoord(lat_data, standard_name='latitude', units='degrees_north')

        lon_data = self._get_calipso_data(hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :]
        lon_coord = DimCoord(lon_data, standard_name='longitude', units='degrees_east')

        cubes = CubeList()
        for f in filenames:
            t = get_data(VDS(f, "Nominal_Year_Month"), True)[0]
            time_data = cis_standard_time_unit.date2num(datetime(int(t[0:4]), int(t[4:6]), 15))
            time_coord = AuxCoord(time_data, long_name='Profile_Time', standard_name='time',
                                  units=cis_standard_time_unit)

            # retrieve data + its metadata
            var = sdata[variable]
            metadata = hdf.read_metadata(var, "SD")

            data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable))

            pres_data = self._get_calipso_data(hdf_sd.HDF_SDS(f, 'Pressure_Mean'))
            pres_coord = AuxCoord(pres_data, standard_name='air_pressure', units='hPa')

            if data.ndim == 2:
                # pres_coord = new_axis()
                cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                cubes.append(new_cube)
            elif data.ndim == 3:
                # pres_coord = new_axis()
                cube = Cube(data, long_name=metadata.long_name or variable, units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1), (alt_coord, 2)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                # Then add the (extended) pressure coord so that it is explicitly a function of time
                new_cube.add_aux_coord(pres_coord[np.newaxis, ...], (0, 1, 2, 3))
                cubes.append(new_cube)
            else:
                raise ValueError("Unexpected number of dimensions for CALIOP data: {}".format(data.ndim))


        # Concatenate the cubes from each file into a single GriddedData object
        gd = GriddedData.make_from_cube(cubes.concatenate_cube())
        return gd
Exemple #21
0
def parse_datetimestr_to_std_time(s):
    import dateutil.parser as du
    return cis_standard_time_unit.date2num(du.parse(s))
Exemple #22
0
    def create_data_object(self, filenames, variable):
        from netCDF4 import Dataset
        from biggus import OrthoArrayAdapter
        from iris.cube import Cube, CubeList
        from iris.coords import DimCoord
        from iris.fileformats.netcdf import NetCDFDataProxy
        from datetime import datetime
        from os.path import basename
        from cis.time_util import cis_standard_time_unit
        from cis.data_io.gridded_data import make_from_cube
        import numpy as np

        cubes = CubeList()

        for f in filenames:
            # Open the file
            ds = Dataset(f)
            # E.g. 'NO2.COLUMN.VERTICAL.TROPOSPHERIC.CS30_BACKSCATTER.SOLAR'
            v = ds.variables[variable]
            # Get the coords
            lat = ds.variables['LATITUDE']
            lon = ds.variables['LONGITUDE']

            # Create a biggus adaptor over the data
            scale_factor = getattr(v, 'scale_factor', None)
            add_offset = getattr(v, 'add_offset', None)
            if scale_factor is None and add_offset is None:
                v_dtype = v.datatype
            elif scale_factor is not None:
                v_dtype = scale_factor.dtype
            else:
                v_dtype = add_offset.dtype
            # proxy = NetCDFDataProxy(v.shape, v_dtype, f, variable, float(v.VAR_FILL_VALUE))
            # a = OrthoArrayAdapter(proxy)
            # Mask out all invalid values (NaN, Inf, etc)
            a = np.ma.masked_invalid(v[:])
            # Set everything negative to NaN
            a = np.ma.masked_less(a, 0.0)

            # Just read the lat and lon in directly
            lat_coord = DimCoord(lat[:], standard_name='latitude', units='degrees', long_name=lat.VAR_DESCRIPTION)
            lon_coord = DimCoord(lon[:], standard_name='longitude', units='degrees', long_name=lon.VAR_DESCRIPTION)

            # Pull the date out of the filename
            fname = basename(f)
            dt = datetime.strptime(fname[:10], "%Y_%m_%d")
            t_coord = DimCoord(cis_standard_time_unit.date2num(dt), standard_name='time', units=cis_standard_time_unit)

            c = Cube(a, long_name=getattr(v, "VAR_DESCRIPTION", None), units=getattr(v, "VAR_UNITS", None),
                     dim_coords_and_dims=[(lat_coord, 0), (lon_coord, 1)])

            c.add_aux_coord(t_coord)

            # Close the file
            ds.close()

            cubes.append(c)

        # We have a scalar time coord and no conflicting metadata so this should just create one cube...
        merged = cubes.merge_cube()

        # Return as a CIS GriddedData object
        return make_from_cube(merged)
Exemple #23
0
    def create_data_object(self, filenames, variable, index_offset=1):
        from cis.data_io.hdf_vd import get_data
        from cis.data_io.hdf_vd import VDS
        from pyhdf.error import HDF4Error
        from cis.data_io import hdf_sd
        from iris.coords import DimCoord, AuxCoord
        from iris.cube import Cube, CubeList
        from cis.data_io.gridded_data import GriddedData
        from cis.time_util import cis_standard_time_unit
        from datetime import datetime
        from iris.util import new_axis
        import numpy as np

        logging.debug("Creating data object for variable " + variable)

        variables = ["Pressure_Mean"]
        logging.info("Listing coordinates: " + str(variables))

        variables.append(variable)

        # reading data from files
        sdata = {}
        for filename in filenames:
            try:
                sds_dict = hdf_sd.read(filename, variables)
            except HDF4Error as e:
                raise IOError(str(e))

            for var in list(sds_dict.keys()):
                utils.add_element_to_list_in_dict(sdata, var, sds_dict[var])

        # work out size of data arrays
        # the coordinate variables will be reshaped to match that.
        # NOTE: This assumes that all Caliop_L1 files have the same altitudes.
        #       If this is not the case, then the following line will need to be changed
        #       to concatenate the data from all the files and not just arbitrarily pick
        #       the altitudes from the first file.
        alt_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Altitude_Midpoint'))[0, :]
        alt_coord = DimCoord(alt_data, standard_name='altitude', units='km')
        alt_coord.convert_units('m')

        lat_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Latitude_Midpoint'))[0, :]
        lat_coord = DimCoord(lat_data,
                             standard_name='latitude',
                             units='degrees_north')

        lon_data = self._get_calipso_data(
            hdf_sd.HDF_SDS(filenames[0], 'Longitude_Midpoint'))[0, :]
        lon_coord = DimCoord(lon_data,
                             standard_name='longitude',
                             units='degrees_east')

        cubes = CubeList()
        for f in filenames:
            t = get_data(VDS(f, "Nominal_Year_Month"), True)[0]
            time_data = cis_standard_time_unit.date2num(
                datetime(int(t[0:4]), int(t[4:6]), 15))
            time_coord = AuxCoord(time_data,
                                  long_name='Profile_Time',
                                  standard_name='time',
                                  units=cis_standard_time_unit)

            # retrieve data + its metadata
            var = sdata[variable]
            metadata = hdf.read_metadata(var, "SD")

            data = self._get_calipso_data(hdf_sd.HDF_SDS(f, variable))

            pres_data = self._get_calipso_data(
                hdf_sd.HDF_SDS(f, 'Pressure_Mean'))
            pres_coord = AuxCoord(pres_data,
                                  standard_name='air_pressure',
                                  units='hPa')

            if data.ndim == 2:
                # pres_coord = new_axis()
                cube = Cube(data,
                            long_name=metadata.long_name or variable,
                            units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0),
                                                 (lon_coord, 1)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                cubes.append(new_cube)
            elif data.ndim == 3:
                # pres_coord = new_axis()
                cube = Cube(data,
                            long_name=metadata.long_name or variable,
                            units=self.clean_units(metadata.units),
                            dim_coords_and_dims=[(lat_coord, 0),
                                                 (lon_coord, 1),
                                                 (alt_coord, 2)],
                            aux_coords_and_dims=[(time_coord, ())])
                # Promote the time scalar coord to a length one dimension
                new_cube = new_axis(cube, 'time')
                # Then add the (extended) pressure coord so that it is explicitly a function of time
                new_cube.add_aux_coord(pres_coord[np.newaxis, ...],
                                       (0, 1, 2, 3))
                cubes.append(new_cube)
            else:
                raise ValueError(
                    "Unexpected number of dimensions for CALIOP data: {}".
                    format(data.ndim))

        # Concatenate the cubes from each file into a single GriddedData object
        gd = GriddedData.make_from_cube(cubes.concatenate_cube())
        return gd
Exemple #24
0
def parse_datetimestr_to_std_time(s):
    import dateutil.parser as du

    return cis_standard_time_unit.date2num(du.parse(s))
Exemple #25
0
def parse_as_number_or_datetime_can_parse_date_as_datetime():
    from datetime import datetime
    from cis.time_util import cis_standard_time_unit
    parser = MockParser()
    dt = parse_as_number_or_datetime('2010-07-01', 'date/time arg', parser)
    assert (dt == cis_standard_time_unit.date2num(datetime(2010, 7, 1)))