Beispiel #1
0
def dataset_value_generator(variable_name_list,
                            dataset_list,
                            bounding_box,
                            min_points=None,
                            max_points=None):
    '''
    Generator yielding coordinates and values of the specified variable for all points from the supplied dataset list 
    which fall within bounds
    '''
    line_dataset_count = 0
    for dataset in dataset_list:
        line_data = {}
        try:
            nc_dataset = Dataset(
                dataset + '#fillmismatch'
            )  # Note work-around for bad _FillValue: https://github.com/Unidata/netcdf-c/issues/1299
            netcdf_point_utils = NetCDFPointUtils(nc_dataset)

            #print netcdf_point_utils.__dict__
            #print(nc_dataset.variables.keys())

            #print('Computing spatial mask')
            spatial_mask = netcdf_point_utils.get_spatial_mask(bounding_box)

            point_count = np.count_nonzero(spatial_mask)

            print('{}/{} points found in bounding box for {}'.format(
                point_count, netcdf_point_utils.point_count, dataset))

            if not point_count:
                continue

            # Enforce min/max point counts
            if min_points and point_count < min_points:
                print('Skipping dataset with < {} points'.format(min_points))
                continue
            if max_points and point_count > max_points:
                print('Skipping dataset with > {} points'.format(max_points))
                continue

            dataset_value_dict = {
                'coordinates': netcdf_point_utils.xycoords[spatial_mask]
            }

            # Read all variable attributes and values
            for variable_name in variable_name_list:
                variable = nc_dataset.variables[variable_name]
                if (variable.dimensions[0] != 'point'
                    ):  # Variable is NOT of point dimension - must be lookup
                    dataset_value_dict[
                        variable_name] = netcdf_point_utils.expand_lookup_variable(
                            lookup_variable_name=variable_name,
                            mask=spatial_mask)
                else:  # 'point' is in variable.dimensions - "normal" variable
                    dataset_value_dict[variable_name] = variable[:][
                        spatial_mask]

            yield dataset, dataset_value_dict

        except Exception as e:
            print('Unable to read point dataset {}: {}'.format(dataset, e))
Beispiel #2
0
    def build_points(self,
                     dataset_metadata_dict,
                     bounding_box,
                     visibility=True):
        """
        Builds all points for a survey. Including building the containing folder, setting time stamps, setting the
         style, and setting the description html to pop up when the point is selected.
        @param dataset_metadata_dict: Dict containing dataset metadata largely as returned by DatasetMetadataCache.search_dataset_distributions function
        @param bounding_box: Bounding box specified as [<xmin>, <ymin>, <xmax>, <ymax>] list
        @param visibilty: Boolean flag indicating whether dataset geometry should be visible
        @return: Dataset folder under parent folder
        """
        cache_path = os.path.join(
            self.cache_dir,
            re.sub('\.nc$', '_cache.nc',
                   dataset_metadata_dict['netcdf_basename']))

        point_utils = NetCDFPointUtils(
            dataset_metadata_dict['netcdf_path'],
            enable_disk_cache=self.cache_coordinates,
            enable_memory_cache=True,
            cache_path=cache_path,
            debug=self.debug)

        spatial_mask = point_utils.get_spatial_mask(bounding_box)
        #logger.debug('spatial_mask: {}'.format(spatial_mask))
        if not np.any(spatial_mask):
            logger.debug('No points in view')
            return

        dataset_folder_kml = self.dataset_type_folder.newfolder(
            name=dataset_metadata_dict['dataset_title'], visibility=visibility)

        dataset_folder_kml.style = self.point_style

        if self.timestamp_detail_view:
            # Enable timestamps on points
            self.set_timestamps(dataset_folder_kml, dataset_metadata_dict)

        point_data_generator = point_utils.all_point_data_generator(
            self.point_field_list, spatial_mask)
        logger.debug(point_data_generator)
        variable_attributes = next(
            point_data_generator
        )  # Get point attribute names from first item returned
        logger.debug(variable_attributes)
        logger.debug("variable_attributes: " + str(variable_attributes))

        skip_points = 1  # set to limit the points displayed if required.
        visible_point_count = 0

        for point_data_list in point_data_generator:
            point_data = dict(
                zip(self.point_field_list,
                    point_data_list))  # Create dict for better readability
            logger.debug("POINT DATA: {}".format(point_data))
            visible_point_count += 1

            # ignore points between skip_points
            if visible_point_count % skip_points != 0:
                continue

            # add new points with netcdf file Obsno as title and long and lat as coordinatess
            # point_field_list: ['obsno', 'latitude', 'longitude', 'grav', 'freeair', 'bouguer', 'stattype', 'reliab', 'gridflag']
            point_kml = dataset_folder_kml.newpoint(
                name="Observation no. " + str(point_data['obsno']),
                coords=[(point_data['longitude'], point_data['latitude'])],
                visibility=visibility)

            point_kml.style = dataset_folder_kml.style

            description_string = self.build_html_description_string(
                dataset_metadata_dict, variable_attributes, point_data)
            logger.debug(description_string)
            point_kml.description = description_string  # set description of point

            # Set point styling
            # Set the color for filtered points
            variant_point_style = None  # Assume point is unfiltered
            if self.point_filter:  # if there is a point_flag separate the points and color differently
                logger.debug('self.point_filter: {}'.format(self.point_filter))
                for key, value in self.point_filter.items():
                    if point_data[
                            key] == value:  # Point satisfies filter condition
                        variant_point_style = self.filtered_point_style
                        break

            if not variant_point_style:  # Point is not filtered
                if not self.point_color:  # Variable point color required
                    variant_point_color = self.value2colorhex(
                        point_data[self.point_color_field],
                        self.point_color_range)

                    variant_point_style = self.point_style_by_color.get(
                        variant_point_color)  # Check point style cache
                    if not variant_point_style:  # Point style doesn't already exist in cache - construct and cache it
                        variant_point_style = simplekml.Style()
                        variant_point_style.iconstyle.scale = self.point_icon_scale
                        variant_point_style.labelstyle.scale = self.point_labelstyle_scale
                        variant_point_style.iconstyle.icon.href = self.point_icon_href
                        variant_point_style.iconstyle.color = variant_point_color
                        self.point_style_by_color[
                            variant_point_color] = variant_point_style

            # Override default style if required
            if variant_point_style:
                point_kml.style = variant_point_style

        point_utils.close()  # Explicitly close netCDF file

        dataset_folder_kml.region = self.build_region(dataset_metadata_dict,
                                                      100, -1, 200, 800)

        if visible_point_count:
            dataset_folder_kml.name = dataset_folder_kml.name + ' ({} points in view)'.format(
                visible_point_count)

        return dataset_folder_kml
Beispiel #3
0
    def point_data_generator(self,
                             bounding_box,
                             keywords=[],
                             metadata_filter_function=None,
                             variable_names=None,
                             flight_lines_only=True):
        '''
        Generator yielding point data for each dataset
        '''
        t0 = datetime.now()
        for metadata_dict in self.dataset_metadata_generator(
                bounding_box,
                keywords=keywords,
                metadata_filter_function=metadata_filter_function):
            nc_path = metadata_dict['file_path']

            if not os.path.isfile(nc_path):
                nc_path = nc_path.replace(
                    GeophysPointFetcher.OPENDAP_PATH_MAP[0],
                    GeophysPointFetcher.OPENDAP_PATH_MAP[1])

            try:
                logger.info('Opening {}'.format(nc_path))
                nc_dataset = netCDF4.Dataset(nc_path, 'r')
                netcdf_line_utils = NetCDFPointUtils(nc_dataset)

                # Skip processing this dataset if it doesn't contain any of the required variables
                if variable_names and not (set(variable_names) & set(
                        netcdf_line_utils.point_variables)):
                    logger.debug(
                        'Skipping dataset containing none of the required variables'
                    )
                    continue

                if flight_lines_only:
                    logger.info('Excluding points in tie-lines')
                    line_numbers = nc_dataset.variables['line'][
                        nc_dataset.variables['flag_linetype'][:] == 2]
                    line_mask = np.zeros(
                        shape=(netcdf_line_utils.point_count, ), dtype=bool)
                    for _line_number, single_line_mask in netcdf_line_utils.get_line_masks(
                            line_numbers):
                        line_mask = np.logical_or(line_mask, single_line_mask)
                else:
                    line_mask = np.ones(
                        shape=(netcdf_line_utils.point_count, ), dtype=bool)

                logger.info('Computing spatial subset mask')
                spatial_mask = netcdf_line_utils.get_spatial_mask(bounding_box)
                if not np.any(spatial_mask):
                    logger.warning(
                        'No points in bounding box {} for {}'.format(
                            tuple(bounding_box), nc_path))
                    continue

                point_indices = np.where(
                    np.logical_and(spatial_mask, line_mask))[0]

                if not len(point_indices):
                    logger.warning(
                        'No points in bounding box {} for {}'.format(
                            tuple(bounding_box), nc_path))
                    continue

                logger.info('{} points found in bounding box {} for {}'.format(
                    len(point_indices), tuple(bounding_box), nc_path))

                dataset_dict = {}

                dataset_dict['metadata'] = dict(metadata_dict)  # Copy metadata
                logger.debug('\tReading coordinates')
                dataset_dict['coordinates'] = netcdf_line_utils.xycoords[
                    point_indices]
                dataset_dict['values'] = {}
                for variable_name in netcdf_line_utils.point_variables:
                    if not variable_names or variable_name in variable_names:
                        logger.debug(
                            '\tReading values for {}'.format(variable_name))
                        dataset_dict['values'][
                            variable_name] = nc_dataset.variables[
                                variable_name][:][point_indices]

                yield dataset_dict
            except Exception as e:
                logger.error(
                    'Failed to retrieve point data from {}: {}'.format(
                        nc_path, e.message))

        #=======================================================================
        # # Sort results by ascending distance for each point
        # for coordinate in bounding_box:
        #     point_result_dict[coordinate] = sorted(point_result_dict[coordinate], key=lambda d: d['distance'], reverse=False)
        #=======================================================================

        logger.debug('Elapsed time: {}'.format(datetime.now() - t0))