def test_returns_correct_data(self): """ See if it gets the right type of data each time :return: Nothing """ print("Testing that get_data returns the expected amount of data") wmo_boxes = np.array([3505, 1, 1, 1]) data_ctd = get_data(wmo_boxes, 1, self.config, self.float_name) self.assertTrue(data_ctd['long'].shape[1] == 10, "should return some data when fetching ctd") data_bot = get_data(wmo_boxes, 2, self.config, self.float_name) self.assertTrue(data_bot['long'].shape[1] == 33, "should return some data when fetching argo") data_argo = get_data(wmo_boxes, 3, self.config, self.float_name) self.assertTrue(data_argo['long'][0].__len__() == 787, "should return some data when fetching argo")
def test_returns_no_data(self): """ If given some data to find, it returns the correct data :return: Nothing """ print("Testing that get_data will return no data if box is all 0's") wmo_boxes = np.array([3505, 0, 0, 0]) data = get_data(wmo_boxes, 1, self.config, self.float_name) self.assertTrue(data.__len__() == 0, "should return no data")
def test_removes_argo_float(self): """ See if it removes the argo float currently being analysed :return: Nothing """ print("Testing that get_data will remove the argo being analysed") wmo_boxes = np.array([3505, 0, 0, 1]) float_removed = "1900479" data_normal = get_data(wmo_boxes, 3, self.config, self.float_name) data_removed = get_data(wmo_boxes, 3, self.config, float_removed) # 7 pieces of historical data are from float 1900479 in the selected box, whereas # 0 pieces of historical data are from float 3901960 in the selected box, so there # should be 7 less data in data_removed self.assertTrue( data_normal['long'][0].__len__() - data_removed['long'][0].__len__() == 7, "Should have removed data associated with the float being processed" )
def get_region_hist_locations(pa_wmo_numbers, pa_float_name, config): """ Uses the WMO boxes and to return all of the historical data in the given area, excluding the float that is currently being analysed. :param pa_wmo_numbers: 2D array containing the name of the WMO boxes that cover the area of interest, and flags for whether we want to use argo, bottle, and/or CTD data :param pa_float_name: string of the name of the float currently being processed :param config: Dictionary containing configuration settings. Used to find locations of folders and file containing data :return: the latitude, longitude and age of each data point we want """ # set up matrices to hold data grid_lat = [] grid_long = [] grid_dates = [] # go through each of the WMO boxes for wmo_box in pa_wmo_numbers: # go through each of the columns denoting whether we should use CTD, bottle, and/or argo for data_type in range(1, 4): # get the data try: data = get_data(wmo_box, data_type, config, pa_float_name) # if we have data, combine it with the other data then reset it if data: grid_lat = np.concatenate([grid_lat, data['lat'][0]]) grid_long = np.concatenate([grid_long, data['long'][0]]) grid_dates = np.concatenate([grid_dates, data['dates'][0]]) data = [] except: pass if grid_lat.__len__() == 0: raise ValueError( "get_region_hist_locations found no data for your specification. " "Are your wmo_boxes files set up correctly?") grid_long = wrap_longitude(grid_long) # decimalise dates grid_dates = change_dates(grid_dates) return grid_lat, grid_long, grid_dates
def test_returns_correct_shape(self): """ See if the returned data has the expected shape :return: Nothing """ print("Testing that get_data returns data with the expected shape") wmo_boxes = np.array([3505, 1, 0, 0]) data = get_data(wmo_boxes, 1, self.config, self.float_name) self.assertTrue( data['sal'].shape == data['ptmp'].shape == data['temp'].shape, "Ocean characteristic data should be the same shape") self.assertTrue( data['long'].shape == data['lat'].shape == data['dates'].shape, "Spatial/temporal data should be the same shape") self.assertTrue(data['sal'].shape[1] == data['long'].shape[1], "Should be a profile for every location")
def get_region_data(pa_wmo_numbers, pa_float_name, config, index, pa_float_pres): """ Get the historical pressure, salinity, and temperature of selected casts :param pa_wmo_numbers: 2D array containing the name of the WMO boxes that cover the area of interest, and flags for whether we want to use argo, bottle, and/or CTD data :param pa_float_name: string of the name of the float currently being processed :param config: Dictionary containing configuration settings. Used to find locations of folders and file containing data :param index: array of indices of selected historical casts :param pa_float_pres: array of pressures for the float being processed :return: The salinity, potential temperature, pressure, latitude, longitude, and age of each historical cast selected to use """ # maximum depth to retrieve data from (deepest float measurement + MAP_P_DELTA max_pres = np.nanmax(pa_float_pres) + config["MAP_P_DELTA"] # set up empty arrays to hold the data to return grid_sal = [] grid_ptmp = [] grid_pres = [] grid_lat = [] grid_long = [] grid_dates = [] data = [] # set up current maximum depth and number of columns max_depth = 0 how_many_cols = 0 # set up variable to save beginning index for each set of data starting_index = 0 # go through each of the WMO boxes for wmo_box in pa_wmo_numbers: # go through each of the columns denoting whether we should use CTD, bottle, and/or argo for data_type in range(1, 4): # get the data try: data = get_data(wmo_box, data_type, config, pa_float_name) if data: # Sometimes the data comes in wrapped in as a 3d array, so convert to 2d if data['pres'].__len__() == 1: data['pres'] = data['pres'][0] data['sal'] = data['sal'][0] data['ptmp'] = data['ptmp'][0] data['lat'] = data['lat'][0].reshape(-1, 1) data['long'] = data['long'][0].reshape(-1, 1) data['dates'] = data['dates'][0].reshape(-1, 1) # check the index of each station to see if it should be loaded data_length = data['lat'][0].__len__() data_indices = np.arange(0, data_length) + starting_index # remember location of last entry starting_index = starting_index + data_length # load each station for i in range(0, data_length): good_indices = np.argwhere(index == data_indices[i]) if good_indices.__len__() > 0: # only use non-NaN values not_nan = np.argwhere( np.isnan(data['pres'][:, i]) == 0) # get the non-NaN values pres = data['pres'][not_nan, i] sal = data['sal'][not_nan, i] ptmp = data['ptmp'][not_nan, i] # remove values where pressure exceeds the maximum we want too_deep = np.argwhere(pres > max_pres) pres = np.delete(pres, too_deep[:, 0]) sal = np.delete(sal, too_deep[:, 0]) ptmp = np.delete(ptmp, too_deep[:, 0]) new_depth = pres.__len__() how_many_rows = np.max([new_depth, max_depth]) # if the new data we are adding is longer than our columns, we need to # fill in NaNs in the other columns if new_depth > max_depth != 0: grid_pres = np.append( grid_pres, np.ones( (how_many_cols, new_depth - max_depth)) * np.nan, axis=1).reshape( (how_many_cols, how_many_rows)) grid_ptmp = np.append( grid_ptmp, np.ones( (how_many_cols, new_depth - max_depth)) * np.nan, axis=1).reshape( (how_many_cols, how_many_rows)) grid_sal = np.append( grid_sal, np.ones( (how_many_cols, new_depth - max_depth)) * np.nan, axis=1).reshape( (how_many_cols, how_many_rows)) # if the new data we are adding is shorter than our columns, # then we need to fill in the rest with NaNs so it's the same length elif new_depth < max_depth: pres = np.append( pres, np.ones( (max_depth - new_depth, 1)) * np.nan) ptmp = np.append( ptmp, np.ones( (max_depth - new_depth, 1)) * np.nan) sal = np.append( sal, np.ones( (max_depth - new_depth, 1)) * np.nan) # if we don't have any data saved yet, create the grid matrix with the # first data set if grid_pres.__len__() == 0: grid_pres = pres.reshape((1, pres.__len__())) grid_ptmp = ptmp.reshape((1, pres.__len__())) grid_sal = sal.reshape((1, pres.__len__())) # if we already have data saved, add the new data to the saved data else: grid_pres = np.append(grid_pres, pres).reshape( how_many_cols + 1, how_many_rows) grid_ptmp = np.append(grid_ptmp, ptmp).reshape( how_many_cols + 1, how_many_rows) grid_sal = np.append(grid_sal, sal).reshape( how_many_cols + 1, how_many_rows) # save the latitude, longitude, and date of the new data grid_lat = np.append(grid_lat, data['lat'][0, i]) grid_long = np.append(grid_long, data['long'][0, i]) grid_dates = np.append(grid_dates, data['dates'][0, i]) # readjust our values so we know what column to add the new data to, # and what shape we should expect the data to be max_depth = grid_pres.shape[1] how_many_cols = grid_pres.shape[0] except: pass # convert longitude to 0 to 360 degrees try: grid_long = wrap_longitude(grid_long) # make sure salinity, pressure, and potential temperature data have all the same NaNs sal_nans = np.argwhere(np.isnan(grid_sal)) for nan in sal_nans: grid_pres[nan[0], nan[1]] = np.nan grid_ptmp[nan[0], nan[1]] = np.nan pres_nans = np.argwhere(np.isnan(grid_pres)) for nan in pres_nans: grid_sal[nan[0], nan[1]] = np.nan grid_ptmp[nan[0], nan[1]] = np.nan ptmp_nans = np.argwhere(np.isnan(grid_ptmp)) for nan in ptmp_nans: grid_sal[nan[0], nan[1]] = np.nan grid_pres[nan[0], nan[1]] = np.nan grid_dates = change_dates(grid_dates) # transpose data grid_sal = grid_sal.T grid_pres = grid_pres.T grid_ptmp = grid_ptmp.T except: raise Exception("NO DATA FOUND") # we have encountered a problem where some data coming in is all NaN # these columns need to be removed from the data set nans = 0 for column in range(grid_sal.shape[1]): if np.all(np.isnan(grid_sal[:, column - nans])) or \ np.all(np.isnan(grid_pres[:, column - nans])): grid_sal = np.delete(grid_sal, column - nans, 1) grid_ptmp = np.delete(grid_ptmp, column - nans, 1) grid_pres = np.delete(grid_pres, column - nans, 1) grid_lat = np.delete(grid_lat, column - nans) grid_long = np.delete(grid_long, column - nans) grid_dates = np.delete(grid_dates, column - nans) nans += 1 if nans > 0: print( "Warning: found ", nans, " all NaNs in your dataset. These water columns have been removed") return grid_sal, grid_ptmp, grid_pres, grid_lat, grid_long, grid_dates