Python get_dataの例、ow_calibration.get_region.data_functions.get_data.get_data Pythonの例

コード例 #1

0

ファイルを表示

    def test_returns_correct_data(self):
        """
        See if it gets the right type of data each time
        :return: Nothing
        """
        print("Testing that get_data returns the expected amount of data")

        wmo_boxes = np.array([3505, 1, 1, 1])

        data_ctd = get_data(wmo_boxes, 1, self.config, self.float_name)
        self.assertTrue(data_ctd['long'].shape[1] == 10,
                        "should return some data when fetching ctd")

        data_bot = get_data(wmo_boxes, 2, self.config, self.float_name)
        self.assertTrue(data_bot['long'].shape[1] == 33,
                        "should return some data when fetching argo")

        data_argo = get_data(wmo_boxes, 3, self.config, self.float_name)
        self.assertTrue(data_argo['long'][0].__len__() == 787,
                        "should return some data when fetching argo")

コード例 #2

0

ファイルを表示

    def test_returns_no_data(self):
        """
        If given some data to find, it returns the correct data
        :return: Nothing
        """
        print("Testing that get_data will return no data if box is all 0's")

        wmo_boxes = np.array([3505, 0, 0, 0])

        data = get_data(wmo_boxes, 1, self.config, self.float_name)
        self.assertTrue(data.__len__() == 0, "should return no data")

コード例 #3

0

ファイルを表示

    def test_removes_argo_float(self):
        """
        See if it removes the argo float currently being analysed
        :return: Nothing
        """
        print("Testing that get_data will remove the argo being analysed")

        wmo_boxes = np.array([3505, 0, 0, 1])
        float_removed = "1900479"

        data_normal = get_data(wmo_boxes, 3, self.config, self.float_name)
        data_removed = get_data(wmo_boxes, 3, self.config, float_removed)

        # 7 pieces of historical data are from float 1900479 in the selected box, whereas
        # 0 pieces of historical data are from float 3901960 in the selected box, so there
        # should be 7 less data in data_removed

        self.assertTrue(
            data_normal['long'][0].__len__() -
            data_removed['long'][0].__len__() == 7,
            "Should have removed data associated with the float being processed"
        )

コード例 #4

0

ファイルを表示

ファイル: get_region_hist_locations.py プロジェクト: gmaze/argodmqc_owc

def get_region_hist_locations(pa_wmo_numbers, pa_float_name, config):
    """
    Uses the WMO boxes and to return all of the historical data in the given area,
    excluding the float that is currently being analysed.
    :param pa_wmo_numbers: 2D array containing the name of the WMO boxes that cover the area
    of interest, and flags for whether we want to use argo, bottle, and/or CTD data
    :param pa_float_name: string of the name of the float currently being processed
    :param config: Dictionary containing configuration settings. Used to find locations of folders
    and file containing data
    :return: the latitude, longitude and age of each data point we want
    """

    # set up matrices to hold data
    grid_lat = []
    grid_long = []
    grid_dates = []

    # go through each of the WMO boxes
    for wmo_box in pa_wmo_numbers:

        # go through each of the columns denoting whether we should use CTD, bottle, and/or argo
        for data_type in range(1, 4):

            # get the data
            try:
                data = get_data(wmo_box, data_type, config, pa_float_name)

                # if we have data, combine it with the other data then reset it
                if data:
                    grid_lat = np.concatenate([grid_lat, data['lat'][0]])
                    grid_long = np.concatenate([grid_long, data['long'][0]])
                    grid_dates = np.concatenate([grid_dates, data['dates'][0]])
                    data = []

            except:
                pass

    if grid_lat.__len__() == 0:

        raise ValueError(
            "get_region_hist_locations found no data for your specification. "
            "Are your wmo_boxes files set up correctly?")

    grid_long = wrap_longitude(grid_long)
    # decimalise dates
    grid_dates = change_dates(grid_dates)

    return grid_lat, grid_long, grid_dates

コード例 #5

0

ファイルを表示

    def test_returns_correct_shape(self):
        """
        See if the returned data has the expected shape
        :return: Nothing
        """
        print("Testing that get_data returns data with the expected shape")

        wmo_boxes = np.array([3505, 1, 0, 0])
        data = get_data(wmo_boxes, 1, self.config, self.float_name)

        self.assertTrue(
            data['sal'].shape == data['ptmp'].shape == data['temp'].shape,
            "Ocean characteristic data should be the same shape")
        self.assertTrue(
            data['long'].shape == data['lat'].shape == data['dates'].shape,
            "Spatial/temporal data should be the same shape")
        self.assertTrue(data['sal'].shape[1] == data['long'].shape[1],
                        "Should be a profile for every location")

コード例 #6

0

ファイルを表示

def get_region_data(pa_wmo_numbers, pa_float_name, config, index,
                    pa_float_pres):
    """
    Get the historical pressure, salinity, and temperature of selected casts
    :param pa_wmo_numbers: 2D array containing the name of the WMO boxes that cover the area
    of interest, and flags for whether we want to use argo, bottle, and/or CTD data
    :param pa_float_name: string of the name of the float currently being processed
    :param config: Dictionary containing configuration settings. Used to find locations of folders
    and file containing data
    :param index: array of indices of selected historical casts
    :param pa_float_pres: array of pressures for the float being processed
    :return: The salinity, potential temperature, pressure, latitude, longitude, and age of each
    historical cast selected to use
    """

    # maximum depth to retrieve data from (deepest float measurement + MAP_P_DELTA
    max_pres = np.nanmax(pa_float_pres) + config["MAP_P_DELTA"]

    # set up empty arrays to hold the data to return
    grid_sal = []
    grid_ptmp = []
    grid_pres = []
    grid_lat = []
    grid_long = []
    grid_dates = []
    data = []

    # set up current maximum depth and number of columns
    max_depth = 0
    how_many_cols = 0

    # set up variable to save beginning index for each set of data
    starting_index = 0

    # go through each of the WMO boxes
    for wmo_box in pa_wmo_numbers:

        # go through each of the columns denoting whether we should use CTD, bottle, and/or argo
        for data_type in range(1, 4):

            # get the data
            try:
                data = get_data(wmo_box, data_type, config, pa_float_name)

                if data:
                    # Sometimes the data comes in wrapped in as a 3d array, so convert to 2d
                    if data['pres'].__len__() == 1:
                        data['pres'] = data['pres'][0]
                        data['sal'] = data['sal'][0]
                        data['ptmp'] = data['ptmp'][0]
                        data['lat'] = data['lat'][0].reshape(-1, 1)
                        data['long'] = data['long'][0].reshape(-1, 1)
                        data['dates'] = data['dates'][0].reshape(-1, 1)

                    #  check the index of each station to see if it should be loaded
                    data_length = data['lat'][0].__len__()
                    data_indices = np.arange(0, data_length) + starting_index

                    # remember location of last entry
                    starting_index = starting_index + data_length

                    # load each station
                    for i in range(0, data_length):

                        good_indices = np.argwhere(index == data_indices[i])

                        if good_indices.__len__() > 0:
                            # only use non-NaN values
                            not_nan = np.argwhere(
                                np.isnan(data['pres'][:, i]) == 0)

                            # get the non-NaN values
                            pres = data['pres'][not_nan, i]
                            sal = data['sal'][not_nan, i]
                            ptmp = data['ptmp'][not_nan, i]

                            # remove values where pressure exceeds the maximum we want
                            too_deep = np.argwhere(pres > max_pres)
                            pres = np.delete(pres, too_deep[:, 0])
                            sal = np.delete(sal, too_deep[:, 0])
                            ptmp = np.delete(ptmp, too_deep[:, 0])
                            new_depth = pres.__len__()
                            how_many_rows = np.max([new_depth, max_depth])

                            # if the new data we are adding is longer than our columns, we need to
                            # fill in NaNs in the other columns
                            if new_depth > max_depth != 0:
                                grid_pres = np.append(
                                    grid_pres,
                                    np.ones(
                                        (how_many_cols, new_depth - max_depth))
                                    * np.nan,
                                    axis=1).reshape(
                                        (how_many_cols, how_many_rows))
                                grid_ptmp = np.append(
                                    grid_ptmp,
                                    np.ones(
                                        (how_many_cols, new_depth - max_depth))
                                    * np.nan,
                                    axis=1).reshape(
                                        (how_many_cols, how_many_rows))
                                grid_sal = np.append(
                                    grid_sal,
                                    np.ones(
                                        (how_many_cols, new_depth - max_depth))
                                    * np.nan,
                                    axis=1).reshape(
                                        (how_many_cols, how_many_rows))

                            # if the new data we are adding is shorter than our columns,
                            # then we need to fill in the rest with NaNs so it's the same length
                            elif new_depth < max_depth:
                                pres = np.append(
                                    pres,
                                    np.ones(
                                        (max_depth - new_depth, 1)) * np.nan)
                                ptmp = np.append(
                                    ptmp,
                                    np.ones(
                                        (max_depth - new_depth, 1)) * np.nan)
                                sal = np.append(
                                    sal,
                                    np.ones(
                                        (max_depth - new_depth, 1)) * np.nan)

                            # if we don't have any data saved yet, create the grid matrix with the
                            # first data set
                            if grid_pres.__len__() == 0:

                                grid_pres = pres.reshape((1, pres.__len__()))
                                grid_ptmp = ptmp.reshape((1, pres.__len__()))
                                grid_sal = sal.reshape((1, pres.__len__()))

                            # if we already have data saved, add the new data to the saved data
                            else:

                                grid_pres = np.append(grid_pres, pres).reshape(
                                    how_many_cols + 1, how_many_rows)
                                grid_ptmp = np.append(grid_ptmp, ptmp).reshape(
                                    how_many_cols + 1, how_many_rows)
                                grid_sal = np.append(grid_sal, sal).reshape(
                                    how_many_cols + 1, how_many_rows)

                            # save the latitude, longitude, and date of the new data
                            grid_lat = np.append(grid_lat, data['lat'][0, i])
                            grid_long = np.append(grid_long, data['long'][0,
                                                                          i])
                            grid_dates = np.append(grid_dates,
                                                   data['dates'][0, i])

                            # readjust our values so we know what column to add the new data to,
                            # and what shape we should expect the data to be
                            max_depth = grid_pres.shape[1]
                            how_many_cols = grid_pres.shape[0]

            except:
                pass

    # convert longitude to 0 to 360 degrees
    try:
        grid_long = wrap_longitude(grid_long)

        # make sure salinity, pressure, and potential temperature data have all the same NaNs
        sal_nans = np.argwhere(np.isnan(grid_sal))
        for nan in sal_nans:
            grid_pres[nan[0], nan[1]] = np.nan
            grid_ptmp[nan[0], nan[1]] = np.nan

        pres_nans = np.argwhere(np.isnan(grid_pres))
        for nan in pres_nans:
            grid_sal[nan[0], nan[1]] = np.nan
            grid_ptmp[nan[0], nan[1]] = np.nan

        ptmp_nans = np.argwhere(np.isnan(grid_ptmp))
        for nan in ptmp_nans:
            grid_sal[nan[0], nan[1]] = np.nan
            grid_pres[nan[0], nan[1]] = np.nan

        grid_dates = change_dates(grid_dates)

        # transpose data
        grid_sal = grid_sal.T
        grid_pres = grid_pres.T
        grid_ptmp = grid_ptmp.T

    except:
        raise Exception("NO DATA FOUND")

    # we have encountered a problem where some data coming in is all NaN
    # these columns need to be removed from the data set
    nans = 0
    for column in range(grid_sal.shape[1]):

        if np.all(np.isnan(grid_sal[:, column - nans])) or \
                np.all(np.isnan(grid_pres[:, column - nans])):

            grid_sal = np.delete(grid_sal, column - nans, 1)
            grid_ptmp = np.delete(grid_ptmp, column - nans, 1)
            grid_pres = np.delete(grid_pres, column - nans, 1)
            grid_lat = np.delete(grid_lat, column - nans)
            grid_long = np.delete(grid_long, column - nans)
            grid_dates = np.delete(grid_dates, column - nans)
            nans += 1

    if nans > 0:
        print(
            "Warning: found ", nans,
            " all NaNs in your dataset. These water columns have been removed")

    return grid_sal, grid_ptmp, grid_pres, grid_lat, grid_long, grid_dates