def match_tile_to_point_generator(tile_service, tile_id, m_tree, edge_results, search_domain_bounding_wkt, search_parameter, radius_tolerance, lonlat_proj, aeqd_proj): from nexustiles.model.nexusmodel import NexusPoint from webservice.algorithms_spark.Matchup import DomsPoint # Must import DomsPoint or Spark complains # Load tile try: the_time = datetime.now() tile = tile_service.mask_tiles_to_polygon( wkt.loads(search_domain_bounding_wkt), tile_service.find_tile_by_id(tile_id))[0] print("%s Time to load tile %s" % (str(datetime.now() - the_time), tile_id)) except IndexError: # This should only happen if all measurements in a tile become masked after applying the bounding polygon print('Tile is empty after masking spatially. Skipping this tile.') return # Convert valid tile lat,lon tuples to UTM tuples the_time = datetime.now() # Get list of indices of valid values valid_indices = tile.get_indices() primary_points = np.array([ pyproj.transform(p1=lonlat_proj, p2=aeqd_proj, x=tile.longitudes[aslice[2]], y=tile.latitudes[aslice[1]]) for aslice in valid_indices ]) print("%s Time to convert primary points for tile %s" % (str(datetime.now() - the_time), tile_id)) a_time = datetime.now() p_tree = spatial.cKDTree(primary_points, leafsize=30) print("%s Time to build primary tree" % (str(datetime.now() - a_time))) a_time = datetime.now() matched_indexes = p_tree.query_ball_tree(m_tree, radius_tolerance) print("%s Time to query primary tree for tile %s" % (str(datetime.now() - a_time), tile_id)) for i, point_matches in enumerate(matched_indexes): if len(point_matches) > 0: p_nexus_point = NexusPoint( latitude=tile.latitudes[valid_indices[i][1]], longitude=tile.longitudes[valid_indices[i][2]], depth=None, time=tile.times[valid_indices[i][0]], index=valid_indices[i], data_val=tile.data[tuple(valid_indices[i])]) p_doms_point = DomsPoint.from_nexus_point(p_nexus_point, tile=tile) for m_point_index in point_matches: m_doms_point = DomsPoint.from_edge_point( edge_results[m_point_index]) yield p_doms_point, m_doms_point
def test_doms_point_is_pickleable(): edge_point = { 'id': 'argo-profiles-5903995(46, 0)', 'time': '2012-10-15T14:24:04Z', 'point': '-33.467 29.728', 'sea_water_temperature': 24.5629997253, 'sea_water_temperature_depth': 2.9796258642, 'wind_speed': None, 'sea_water_salinity': None, 'sea_water_salinity_depth': None, 'platform': 4, 'device': 3, 'fileurl': 'ftp://podaac-ftp.jpl.nasa.gov/allData/argo-profiles-5903995.nc' } point = DomsPoint.from_edge_point(edge_point) assert pickle.dumps(point) is not None
def from_edge_point(edge_point): point = DomsPoint() try: x, y = wkt.loads(edge_point['point']).coords[0] except ReadingError: try: x, y = Point( *[float(c) for c in edge_point['point'].split(' ')]).coords[0] except ValueError: y, x = Point( *[float(c) for c in edge_point['point'].split(',')]).coords[0] point.longitude = x point.latitude = y point.time = edge_point['time'] point.wind_u = edge_point.get('eastward_wind') point.wind_v = edge_point.get('northward_wind') point.wind_direction = edge_point.get('wind_direction') point.wind_speed = edge_point.get('wind_speed') point.sst = edge_point.get('sea_water_temperature') point.sst_depth = edge_point.get('sea_water_temperature_depth') point.sss = edge_point.get('sea_water_salinity') point.sss_depth = edge_point.get('sea_water_salinity_depth') point.source = edge_point.get('source') point.platform = edge_point.get('platform') point.device = edge_point.get('device') point.file_url = edge_point.get('fileurl') try: point.data_id = unicode(edge_point['id']) except KeyError: point.data_id = "%s:%s:%s" % (point.time, point.longitude, point.latitude) return point
def from_nexus_point(nexus_point, tile=None, parameter='sst'): point = DomsPoint() point.data_id = "%s[%s]" % (tile.tile_id, nexus_point.index) # TODO Not an ideal solution; but it works for now. if parameter == 'sst': point.sst = nexus_point.data_val.item() elif parameter == 'sss': point.sss = nexus_point.data_val.item() elif parameter == 'wind': point.wind_u = nexus_point.data_val.item() try: point.wind_v = tile.meta_data['wind_v'][tuple( nexus_point.index)].item() except (KeyError, IndexError): pass try: point.wind_direction = tile.meta_data['wind_dir'][tuple( nexus_point.index)].item() except (KeyError, IndexError): pass try: point.wind_speed = tile.meta_data['wind_speed'][tuple( nexus_point.index)].item() except (KeyError, IndexError): pass else: raise NotImplementedError( '%s not supported. Only sst, sss, and wind parameters are supported.' % parameter) point.longitude = nexus_point.longitude.item() point.latitude = nexus_point.latitude.item() point.time = datetime.utcfromtimestamp( nexus_point.time).strftime('%Y-%m-%dT%H:%M:%SZ') try: point.depth = nexus_point.depth except KeyError: # No depth associated with this measurement pass point.sst_depth = 0 point.source = tile.dataset point.file_url = tile.granule # TODO device should change based on the satellite making the observations. point.platform = 9 point.device = 5 return point
def test_calc(test_matchup_args): """ Assert that the expected functions are called during the matchup calculation and that the results are formatted as expected. """ # Mock anything that connects external dependence (Solr, Cassandra, ...) tile_service_factory = mock.MagicMock() tile_service = mock.MagicMock() tile_service_factory.return_value = tile_service spark = SparkSession.builder.appName('nexus-analysis').getOrCreate() spark_context = spark.sparkContext request = mock.MagicMock() request.get_argument.return_value = '1,2,3,4' # Patch in request arguments start_time = datetime.strptime( '2020-01-01T00:00:00', '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc) end_time = datetime.strptime( '2020-02-01T00:00:00', '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc) polygon_wkt = 'POLYGON((-34.98 29.54, -30.1 29.54, -30.1 31.00, -34.98 31.00, -34.98 29.54))' args = { 'bounding_polygon': wkt.loads(polygon_wkt), 'primary_ds_name': 'primary-ds-name', 'matchup_ds_names': 'matchup-ds-name', 'parameter_s': 'sst', 'start_time': start_time, 'start_seconds_from_epoch': start_time.timestamp(), 'end_time': end_time, 'end_seconds_from_epoch': end_time.timestamp(), 'depth_min': 1.0, 'depth_max': 2.0, 'time_tolerance': 3.0, 'radius_tolerance': 4.0, 'platforms': '1,2,3,4,5,6,7,8,9', 'match_once': True, 'result_size_limit': 10 } def generate_fake_tile(tile_id): tile = Tile() tile.tile_id = tile_id return tile # Mock tiles fake_tiles = [generate_fake_tile(idx) for idx in range(10)] tile_service.find_tiles_in_polygon.return_value = fake_tiles # Mock result # Format of 'spark_result': keys=domspoint,values=list of domspoint doms_point_args = { 'longitude': -180, 'latitude': -90, 'time': '2020-01-15T00:00:00Z' } d1_sat = DomsPoint(**doms_point_args) d2_sat = DomsPoint(**doms_point_args) d1_ins = DomsPoint(**doms_point_args) d2_ins = DomsPoint(**doms_point_args) d1_sat.data = [ DataPoint(variable_name='sea_surface_temperature', variable_value=10.0) ] d2_sat.data = [ DataPoint(variable_name='sea_surface_temperature', variable_value=20.0) ] d1_ins.data = [ DataPoint(variable_name='sea_surface_temperature', variable_value=30.0) ] d2_ins.data = [ DataPoint(variable_name='sea_surface_temperature', variable_value=40.0) ] fake_spark_result = { d1_sat: [d1_ins, d2_ins], d2_sat: [d1_ins, d2_ins], } matchup_obj = Matchup(tile_service_factory=tile_service_factory, sc=spark_context) matchup_obj.parse_arguments = lambda _: [item for item in args.values()] with mock.patch('webservice.algorithms_spark.Matchup.ResultsStorage') as mock_rs, \ mock.patch( 'webservice.algorithms_spark.Matchup.spark_matchup_driver') as mock_matchup_driver: mock_rs.insertExecution.return_value = 1 mock_matchup_driver.return_value = fake_spark_result matchup_result = matchup_obj.calc(request) # Ensure the call to 'spark_matchup_driver' contains the expected params assert len(mock_matchup_driver.call_args_list) == 1 matchup_driver_args = mock_matchup_driver.call_args_list[0].args matchup_driver_kwargs = mock_matchup_driver.call_args_list[0].kwargs assert matchup_driver_args[0] == [tile.tile_id for tile in fake_tiles] assert wkt.loads(matchup_driver_args[1]).equals(wkt.loads(polygon_wkt)) assert matchup_driver_args[2] == args['primary_ds_name'] assert matchup_driver_args[3] == args['matchup_ds_names'] assert matchup_driver_args[4] == args['parameter_s'] assert matchup_driver_args[5] == args['depth_min'] assert matchup_driver_args[6] == args['depth_max'] assert matchup_driver_args[7] == args['time_tolerance'] assert matchup_driver_args[8] == args['radius_tolerance'] assert matchup_driver_args[9] == args['platforms'] assert matchup_driver_args[10] == args['match_once'] assert matchup_driver_args[11] == tile_service_factory assert matchup_driver_kwargs['sc'] == spark_context # Ensure the result of the matchup calculation is as expected json_matchup_result = json.loads(matchup_result.toJson()) assert len(json_matchup_result['data']) == 2 assert len(json_matchup_result['data'][0]['matches']) == 2 assert len(json_matchup_result['data'][1]['matches']) == 2 for data in json_matchup_result['data']: assert data['x'] == '-180' assert data['y'] == '-90' for matches in data['matches']: assert matches['x'] == '-180' assert matches['y'] == '-90' assert json_matchup_result['data'][0]['data'][0][ 'variable_value'] == 10.0 assert json_matchup_result['data'][1]['data'][0][ 'variable_value'] == 20.0 assert json_matchup_result['data'][0]['matches'][0]['data'][0][ 'variable_value'] == 30.0 assert json_matchup_result['data'][0]['matches'][1]['data'][0][ 'variable_value'] == 40.0 assert json_matchup_result['data'][1]['matches'][0]['data'][0][ 'variable_value'] == 30.0 assert json_matchup_result['data'][1]['matches'][1]['data'][0][ 'variable_value'] == 40.0 assert json_matchup_result['details']['numInSituMatched'] == 4 assert json_matchup_result['details']['numGriddedMatched'] == 2
def from_nexus_point(nexus_point, tile=None): point = DomsPoint() point.data_id = "%s[%s]" % (tile.tile_id, nexus_point.index) # Get the name of the satellite variable from the source NetCDF satellite_var_name = tile.var_name point.satellite_var_name = satellite_var_name point.satellite_var_value = nexus_point.data_val.item() try: point.wind_v = tile.meta_data['wind_v'][tuple( nexus_point.index)].item() except (KeyError, IndexError): pass try: point.wind_direction = tile.meta_data['wind_dir'][tuple( nexus_point.index)].item() except (KeyError, IndexError): pass try: point.wind_speed = tile.meta_data['wind_speed'][tuple( nexus_point.index)].item() except (KeyError, IndexError): pass point.longitude = nexus_point.longitude.item() point.latitude = nexus_point.latitude.item() point.time = datetime.utcfromtimestamp( nexus_point.time).strftime('%Y-%m-%dT%H:%M:%SZ') try: point.depth = nexus_point.depth except KeyError: # No depth associated with this measurement pass point.sst_depth = 0 point.source = tile.dataset point.file_url = tile.granule # TODO device should change based on the satellite making the observations. point.platform = 9 point.device = 5 return point