def setUp(self):
     self.path = "../../testdata/mrms_3km/"
     self.variable = "MESH_Max_60min_00.50"
     self.start_date = datetime(2015, 5, 1, 18, 0)
     self.end_date = datetime(2015, 5, 2, 15, 0)
     self.mrms = MRMSGrid(self.start_date, self.end_date, self.variable,
                          self.path)
    def load_obs(self, mask_threshold=0.5):
        """
        Loads observations and masking grid (if needed).

        Args:
            mask_threshold: Values greater than the threshold are kept, others are masked.
        """
        print("Loading obs ", self.run_date, self.model_name,
              self.forecast_variable)
        start_date = self.run_date + timedelta(hours=self.start_hour)
        end_date = self.run_date + timedelta(hours=self.end_hour)
        mrms_grid = MRMSGrid(start_date, end_date, self.mrms_variable,
                             self.mrms_path)
        mrms_grid.load_data()
        if len(mrms_grid.data) > 0:
            self.raw_obs[self.mrms_variable] = np.where(
                mrms_grid.data > 100, 100, mrms_grid.data)
            self.period_obs[self.mrms_variable] = self.raw_obs[
                self.mrms_variable].max(axis=0)
            if self.obs_mask:
                mask_grid = MRMSGrid(start_date, end_date, self.mask_variable,
                                     self.mrms_path)
                mask_grid.load_data()
                self.raw_obs[self.mask_variable] = np.where(
                    mask_grid.data >= mask_threshold, 1, 0)
                self.period_obs[self.mask_variable] = self.raw_obs[
                    self.mask_variable].max(axis=0)
class TestMRMSGrid(unittest.TestCase):
    def setUp(self):
        self.path = "../../testdata/mrms_3km/"
        self.variable = "MESH_Max_60min_00.50"
        self.start_date = datetime(2015, 5, 1, 18, 0)
        self.end_date = datetime(2015, 5, 2, 15, 0)
        self.mrms = MRMSGrid(self.start_date, self.end_date, self.variable,
                             self.path)

    def test_constructor(self):
        self.assertEquals(self.mrms.all_dates.size, 22,
                          "Number of dates is wrong")
        self.assertIsNone(self.mrms.data, "Data already loaded")
        self.assertIsNone(self.mrms.valid_dates, "Valid dates already loaded")

    def test_loading(self):
        self.mrms.load_data()
        self.assertEquals(self.mrms.data.shape[0],
                          self.mrms.valid_dates.shape[0],
                          "Data and valid dates unequal length")
        self.assertEquals(self.mrms.all_dates.shape[0],
                          self.mrms.valid_dates.shape[0],
                          "All dates were not loaded")
Example #4
0
    def load_obs(self,  mask_threshold=0.5):
        """
        Loads observations and masking grid (if needed).

        :param mask_threshold: Values greater than the threshold are kept, others are masked.
        :return:
        """
        start_date = self.run_date + timedelta(hours=self.start_hour)
        end_date = self.run_date + timedelta(hours=self.end_hour)
        mrms_grid = MRMSGrid(start_date, end_date, self.mrms_variable, self.mrms_path)
        mrms_grid.load_data()
        if len(mrms_grid.data) > 0:
            self.raw_obs[self.mrms_variable] = np.where(mrms_grid.data > 100, 100, mrms_grid.data)
            self.window_obs[self.mrms_variable] = np.array([self.raw_obs[self.mrms_variable][sl].max(axis=0)
                                                            for sl in self.hour_windows])
            if self.obs_mask:
                mask_grid = MRMSGrid(start_date, end_date, self.mask_variable, self.mrms_path)
                mask_grid.load_data()
                self.raw_obs[self.mask_variable] = np.where(mask_grid.data >= mask_threshold, 1, 0)
                self.window_obs[self.mask_variable] = np.array([self.raw_obs[self.mask_variable][sl].max(axis=0)
                                                               for sl in self.hour_windows])
Example #5
0
    def load_obs(self,  mask_threshold=0.5):
        """
        Loads observations and masking grid (if needed).

        Args:
            mask_threshold: Values greater than the threshold are kept, others are masked.
        """
        print("Loading obs ", self.run_date, self.model_name, self.forecast_variable)
        start_date = self.run_date + timedelta(hours=self.start_hour)
        end_date = self.run_date + timedelta(hours=self.end_hour)
        mrms_grid = MRMSGrid(start_date, end_date, self.mrms_variable, self.mrms_path)
        mrms_grid.load_data()
        if len(mrms_grid.data) > 0:
            self.raw_obs[self.mrms_variable] = np.where(mrms_grid.data > 100, 100, mrms_grid.data)
            self.period_obs[self.mrms_variable] = self.raw_obs[self.mrms_variable].max(axis=0)
            if self.obs_mask:
                mask_grid = MRMSGrid(start_date, end_date, self.mask_variable, self.mrms_path)
                mask_grid.load_data()
                self.raw_obs[self.mask_variable] = np.where(mask_grid.data >= mask_threshold, 1, 0)
                self.period_obs[self.mask_variable] = self.raw_obs[self.mask_variable].max(axis=0)
 def __init__(self,
              run_date,
              start_date,
              end_date,
              ensemble_name,
              ensemble_member,
              variable,
              model_path,
              model_map_file,
              model_watershed_params,
              object_matcher_params,
              track_matcher_params,
              size_filter,
              gaussian_window,
              segmentation_approach="ew",
              match_steps=True,
              mrms_path=None,
              mrms_variable=None,
              mrms_watershed_params=None,
              single_step=True,
              mask_file=None,
              patch_radius=32):
     self.run_date = run_date
     self.start_date = start_date
     self.end_date = end_date
     self.start_hour = int(
         (self.start_date - self.run_date).total_seconds()) // 3600
     self.end_hour = int(
         (self.end_date - self.run_date).total_seconds()) // 3600
     self.hours = np.arange(int(self.start_hour), int(self.end_hour) + 1)
     self.ensemble_name = ensemble_name
     self.ensemble_member = ensemble_member
     self.variable = variable
     self.segmentation_approach = segmentation_approach
     if self.segmentation_approach == "ws":
         self.model_ew = Watershed(*model_watershed_params)
     elif self.segmentation_approach == "hyst":
         self.model_ew = Hysteresis(*model_watershed_params)
     else:
         self.model_ew = EnhancedWatershed(*model_watershed_params)
     self.object_matcher = ObjectMatcher(*object_matcher_params)
     if match_steps:
         self.track_matcher = None
         self.track_step_matcher = TrackStepMatcher(*track_matcher_params)
     else:
         self.track_matcher = TrackMatcher(*track_matcher_params)
         self.track_step_matcher = None
     self.size_filter = size_filter
     self.gaussian_window = gaussian_window
     self.model_path = model_path
     self.model_map_file = model_map_file
     self.mrms_path = mrms_path
     self.single_step = single_step
     self.model_grid = ModelOutput(self.ensemble_name,
                                   self.ensemble_member,
                                   self.run_date,
                                   self.variable,
                                   self.start_date,
                                   self.end_date,
                                   self.model_path,
                                   self.model_map_file,
                                   single_step=self.single_step)
     self.model_grid.load_map_info(self.model_map_file)
     if self.mrms_path is not None:
         self.mrms_variable = mrms_variable
         self.mrms_grid = MRMSGrid(self.start_date, self.end_date,
                                   self.mrms_variable, self.mrms_path)
         if mrms_watershed_params is None:
             mrms_watershed_params = model_watershed_params
         if self.segmentation_approach == "ws":
             self.mrms_ew = Watershed(*mrms_watershed_params)
         elif self.segmentation_approach == "hyst":
             self.mrms_ew = Hysteresis(*mrms_watershed_params)
         else:
             self.mrms_ew = EnhancedWatershed(*mrms_watershed_params)
     else:
         self.mrms_grid = None
         self.mrms_ew = None
     self.mask_file = mask_file
     self.mask = None
     if self.mask_file is not None:
         mask_data = Dataset(self.mask_file)
         self.mask = mask_data.variables["usa_mask"][:]
         mask_data.close()
     self.patch_radius = patch_radius
     return
class TrackProcessor(object):
    """
    TrackProcessor identifies local maxima in a convection-allowing model run and links them in time to form
    storm tracks. A similar procedure is applied to the observations, and the two sets of tracks are matched.
    Storm and environmental attributes are extracted from within the identified track areas.
    
    Args:
        run_date: Datetime model run was initialized
        start_date: Datetime for the beginning of storm extraction.
        end_date: Datetime for the ending of storm extraction.
        ensemble_name: Name of the ensemble being used.
        ensemble_member: name of the ensemble member being used.
        variable: model variable being used for extraction.
        model_path: path to the ensemble output.
        model_map_file: File containing model map projection information.
        model_watershed_params: tuple of parameters used for segmentation,
        object_matcher_params: tuple of parameters used for ObjectMatcher.
        track_matcher_params: tuple of parameters for TrackMatcher or TrackStepMatcher.
        size_filter: minimum size of model objects
        gaussian_window: number of grid points
        segmentation_approach: Select the segmentation algorithm. "ew" for enhanced watershed (default), "ws" for
            regular watershed, and "hyst" for hysteresis.
        match_steps: If True, match individual steps in tracks instead of matching whole tracks
        mrms_path: Path to MRMS netCDF files
        mrms_variable: MRMS variable being used
        mrms_watershed_params: tuple of parameters for segmentation applied to MESH data. If None, then model
            segmentation parameters are used.
        single_step: Whether model timesteps are in separate files or aggregated into one file.
        mask_file: netCDF filename containing a mask of valid grid points on the model domain.
    """
    def __init__(self,
                 run_date,
                 start_date,
                 end_date,
                 ensemble_name,
                 ensemble_member,
                 variable,
                 model_path,
                 model_map_file,
                 model_watershed_params,
                 object_matcher_params,
                 track_matcher_params,
                 size_filter,
                 gaussian_window,
                 segmentation_approach="ew",
                 match_steps=True,
                 mrms_path=None,
                 mrms_variable=None,
                 mrms_watershed_params=None,
                 single_step=True,
                 mask_file=None,
                 patch_radius=32):
        self.run_date = run_date
        self.start_date = start_date
        self.end_date = end_date
        self.start_hour = int(
            (self.start_date - self.run_date).total_seconds()) // 3600
        self.end_hour = int(
            (self.end_date - self.run_date).total_seconds()) // 3600
        self.hours = np.arange(int(self.start_hour), int(self.end_hour) + 1)
        self.ensemble_name = ensemble_name
        self.ensemble_member = ensemble_member
        self.variable = variable
        self.segmentation_approach = segmentation_approach
        if self.segmentation_approach == "ws":
            self.model_ew = Watershed(*model_watershed_params)
        elif self.segmentation_approach == "hyst":
            self.model_ew = Hysteresis(*model_watershed_params)
        else:
            self.model_ew = EnhancedWatershed(*model_watershed_params)
        self.object_matcher = ObjectMatcher(*object_matcher_params)
        if match_steps:
            self.track_matcher = None
            self.track_step_matcher = TrackStepMatcher(*track_matcher_params)
        else:
            self.track_matcher = TrackMatcher(*track_matcher_params)
            self.track_step_matcher = None
        self.size_filter = size_filter
        self.gaussian_window = gaussian_window
        self.model_path = model_path
        self.model_map_file = model_map_file
        self.mrms_path = mrms_path
        self.single_step = single_step
        self.model_grid = ModelOutput(self.ensemble_name,
                                      self.ensemble_member,
                                      self.run_date,
                                      self.variable,
                                      self.start_date,
                                      self.end_date,
                                      self.model_path,
                                      self.model_map_file,
                                      single_step=self.single_step)
        self.model_grid.load_map_info(self.model_map_file)
        if self.mrms_path is not None:
            self.mrms_variable = mrms_variable
            self.mrms_grid = MRMSGrid(self.start_date, self.end_date,
                                      self.mrms_variable, self.mrms_path)
            if mrms_watershed_params is None:
                mrms_watershed_params = model_watershed_params
            if self.segmentation_approach == "ws":
                self.mrms_ew = Watershed(*mrms_watershed_params)
            elif self.segmentation_approach == "hyst":
                self.mrms_ew = Hysteresis(*mrms_watershed_params)
            else:
                self.mrms_ew = EnhancedWatershed(*mrms_watershed_params)
        else:
            self.mrms_grid = None
            self.mrms_ew = None
        self.mask_file = mask_file
        self.mask = None
        if self.mask_file is not None:
            mask_data = Dataset(self.mask_file)
            self.mask = mask_data.variables["usa_mask"][:]
            mask_data.close()
        self.patch_radius = patch_radius
        return

    def find_model_patch_tracks(self):
        """
        Identify storms in gridded model output and extract uniform sized patches around the storm centers of mass.

        Returns:

        """
        self.model_grid.load_data()
        tracked_model_objects = []
        model_objects = []
        if self.model_grid.data is None:
            print("No model output found")
            return tracked_model_objects
        if self.segmentation_approach == "ew":
            min_orig = self.model_ew.min_intensity
            max_orig = self.model_ew.max_intensity
            data_increment_orig = self.model_ew.data_increment
            self.model_ew.min_intensity = 0
            self.model_ew.data_increment = 1
            self.model_ew.max_intensity = 100
        else:
            min_orig = 0
            max_orig = 1
            data_increment_orig = 1
        for h, hour in enumerate(self.hours):
            # Identify storms at each time step and apply size filter
            print("Finding {0} objects for run {1} Hour: {2:02d}".format(
                self.ensemble_member, self.run_date.strftime("%Y%m%d%H"),
                hour))
            if self.mask is not None:
                model_data = self.model_grid.data[h] * self.mask
            else:
                model_data = self.model_grid.data[h]
            model_data[:self.patch_radius] = 0
            model_data[-self.patch_radius:] = 0
            model_data[:, :self.patch_radius] = 0
            model_data[:, -self.patch_radius:] = 0
            if self.segmentation_approach == "ew":
                scaled_data = np.array(
                    rescale_data(model_data, min_orig, max_orig))
                hour_labels = label_storm_objects(
                    scaled_data,
                    self.segmentation_approach,
                    self.model_ew.min_intensity,
                    self.model_ew.max_intensity,
                    min_area=self.size_filter,
                    max_area=self.model_ew.max_size,
                    max_range=self.model_ew.delta,
                    increment=self.model_ew.data_increment,
                    gaussian_sd=self.gaussian_window)
                del scaled_data
            else:
                hour_labels = label_storm_objects(
                    model_data,
                    self.segmentation_approach,
                    self.model_ew.min_intensity,
                    self.model_ew.max_intensity,
                    min_area=self.size_filter,
                    gaussian_sd=self.gaussian_window)
            model_objects.extend(
                extract_storm_patches(hour_labels,
                                      model_data,
                                      self.model_grid.x,
                                      self.model_grid.y, [hour],
                                      dx=self.model_grid.dx,
                                      patch_radius=self.patch_radius))
            for model_obj in model_objects[-1]:
                slices = list(find_objects(model_obj.masks[-1]))
                if len(slices) > 0:
                    dims = (slices[0][0].stop - slices[0][0].start,
                            slices[0][1].stop - slices[0][1].start)
                    if h > 0:
                        model_obj.estimate_motion(hour,
                                                  self.model_grid.data[h - 1],
                                                  dims[1], dims[0])

            del model_data
            del hour_labels
        tracked_model_objects.extend(
            track_storms(model_objects, self.hours,
                         self.object_matcher.cost_function_components,
                         self.object_matcher.max_values,
                         self.object_matcher.weights))
        if self.segmentation_approach == "ew":
            self.model_ew.min_intensity = min_orig
            self.model_ew.max_intensity = max_orig
            self.model_ew.data_increment = data_increment_orig
        return tracked_model_objects

    def find_model_tracks(self):
        """
        Identify storms at each model time step and link them together with object matching.

        Returns:
            List of STObjects containing model track information.
        """
        self.model_grid.load_data()
        model_objects = []
        tracked_model_objects = []
        if self.model_grid.data is None:
            print("No model output found")
            return tracked_model_objects
        for h, hour in enumerate(self.hours):
            # Identify storms at each time step and apply size filter
            print("Finding {0} objects for run {1} Hour: {2:02d}".format(
                self.ensemble_member, self.run_date.strftime("%Y%m%d%H"),
                hour))
            if self.mask is not None:
                model_data = self.model_grid.data[h] * self.mask
            else:
                model_data = self.model_grid.data[h]

            # remember orig values

            # scale to int 0-100.
            if self.segmentation_approach == "ew":
                min_orig = self.model_ew.min_intensity
                max_orig = self.model_ew.max_intensity
                data_increment_orig = self.model_ew.data_increment
                scaled_data = np.array(
                    rescale_data(self.model_grid.data[h], min_orig, max_orig))
                self.model_ew.min_intensity = 0
                self.model_ew.data_increment = 1
                self.model_ew.max_intensity = 100
            else:
                min_orig = 0
                max_orig = 1
                data_increment_orig = 1
                scaled_data = self.model_grid.data[h]
            hour_labels = self.model_ew.label(
                gaussian_filter(scaled_data, self.gaussian_window))
            hour_labels[model_data < self.model_ew.min_intensity] = 0
            if self.size_filter > 1:
                hour_labels = self.model_ew.size_filter(
                    hour_labels, self.size_filter)
            # Return to orig values
            if self.segmentation_approach == "ew":
                self.model_ew.min_intensity = min_orig
                self.model_ew.max_intensity = max_orig
                self.model_ew.data_increment = data_increment_orig
            obj_slices = find_objects(hour_labels)

            num_slices = len(list(obj_slices))
            model_objects.append([])
            if num_slices > 0:
                for s, sl in enumerate(obj_slices):
                    model_objects[-1].append(
                        STObject(self.model_grid.data[h][sl],
                                 np.where(hour_labels[sl] == s + 1, 1, 0),
                                 self.model_grid.x[sl],
                                 self.model_grid.y[sl],
                                 self.model_grid.i[sl],
                                 self.model_grid.j[sl],
                                 hour,
                                 hour,
                                 dx=self.model_grid.dx))
                    if h > 0:
                        dims = model_objects[-1][-1].timesteps[0].shape
                        model_objects[-1][-1].estimate_motion(
                            hour, self.model_grid.data[h - 1], dims[1],
                            dims[0])
            del hour_labels
            del scaled_data
            del model_data
        for h, hour in enumerate(self.hours):
            past_time_objs = []
            for obj in tracked_model_objects:
                # Potential trackable objects are identified
                if obj.end_time == hour - 1:
                    past_time_objs.append(obj)
            # If no objects existed in the last time step, then consider objects in current time step all new
            if len(past_time_objs) == 0:
                tracked_model_objects.extend(model_objects[h])
            # Match from previous time step with current time step
            elif len(past_time_objs) > 0 and len(model_objects[h]) > 0:
                assignments = self.object_matcher.match_objects(
                    past_time_objs, model_objects[h], hour - 1, hour)
                unpaired = list(range(len(model_objects[h])))
                for pair in assignments:
                    past_time_objs[pair[0]].extend(model_objects[h][pair[1]])
                    unpaired.remove(pair[1])
                if len(unpaired) > 0:
                    for up in unpaired:
                        tracked_model_objects.append(model_objects[h][up])
            print("Tracked Model Objects: {0:03d} Hour: {1:02d}".format(
                len(tracked_model_objects), hour))

        return tracked_model_objects

    def load_model_tracks(self, json_path):
        model_track_files = sorted(
            glob(json_path +
                 "{0}/{1}/{2}_*.json".format(self.run_date.strftime(
                     "%Y%m%d"), self.ensemble_member, self.ensemble_name)))
        model_tracks = []
        for model_track_file in model_track_files:
            model_tracks.append(read_geojson(model_track_file))
        return model_tracks

    def load_mrms_tracks(self, json_path, mrms_name="mesh"):
        mrms_track_files = sorted(
            glob(json_path +
                 "{0}/{1}/{2}_*.json".format(self.run_date.strftime("%Y%m%d"),
                                             self.ensemble_member, mrms_name)))
        mrms_tracks = []
        for mrms_track_file in mrms_track_files:
            mrms_tracks.append(read_geojson(mrms_track_file))
        return mrms_tracks

    def find_mrms_tracks(self):
        """
        Identify objects from MRMS timesteps and link them together with object matching.

        Returns:
            List of STObjects containing MESH track information.
        """
        obs_objects = []
        tracked_obs_objects = []
        if self.mrms_ew is not None:
            self.mrms_grid.load_data()

            if len(self.mrms_grid.data) != len(self.hours):
                print('Less than 24 hours of observation data found')

                return tracked_obs_objects

            for h, hour in enumerate(self.hours):
                mrms_data = np.zeros(self.mrms_grid.data[h].shape)
                mrms_data[:] = np.array(self.mrms_grid.data[h])
                mrms_data[mrms_data < 0] = 0
                hour_labels = self.mrms_ew.size_filter(
                    self.mrms_ew.label(
                        gaussian_filter(mrms_data, self.gaussian_window)),
                    self.size_filter)
                hour_labels[mrms_data < self.mrms_ew.min_intensity] = 0
                obj_slices = find_objects(hour_labels)
                num_slices = len(list(obj_slices))
                obs_objects.append([])
                if num_slices > 0:
                    for sl in obj_slices:
                        obs_objects[-1].append(
                            STObject(mrms_data[sl],
                                     np.where(hour_labels[sl] > 0, 1, 0),
                                     self.model_grid.x[sl],
                                     self.model_grid.y[sl],
                                     self.model_grid.i[sl],
                                     self.model_grid.j[sl],
                                     hour,
                                     hour,
                                     dx=self.model_grid.dx))
                        if h > 0:
                            dims = obs_objects[-1][-1].timesteps[0].shape
                            obs_objects[-1][-1].estimate_motion(
                                hour, self.mrms_grid.data[h - 1], dims[1],
                                dims[0])

            for h, hour in enumerate(self.hours):
                past_time_objs = []
                for obj in tracked_obs_objects:
                    if obj.end_time == hour - 1:
                        past_time_objs.append(obj)
                if len(past_time_objs) == 0:
                    tracked_obs_objects.extend(obs_objects[h])
                elif len(past_time_objs) > 0 and len(obs_objects[h]) > 0:
                    assignments = self.object_matcher.match_objects(
                        past_time_objs, obs_objects[h], hour - 1, hour)
                    unpaired = list(range(len(obs_objects[h])))
                    for pair in assignments:
                        past_time_objs[pair[0]].extend(obs_objects[h][pair[1]])
                        unpaired.remove(pair[1])
                    if len(unpaired) > 0:
                        for up in unpaired:
                            tracked_obs_objects.append(obs_objects[h][up])
                print("Tracked Obs Objects: {0:03d} Hour: {1:02d}".format(
                    len(tracked_obs_objects), hour))

        return tracked_obs_objects

    def match_tracks(self,
                     model_tracks,
                     obs_tracks,
                     unique_matches=True,
                     closest_matches=False):
        """
        Match forecast and observed tracks.

        Args:
            model_tracks:
            obs_tracks:
            unique_matches:
            closest_matches:

        Returns:

        """
        if unique_matches:
            pairings = self.track_matcher.match_tracks(
                model_tracks, obs_tracks, closest_matches=closest_matches)
        else:
            pairings = self.track_matcher.neighbor_matches(
                model_tracks, obs_tracks)
        return pairings

    def match_track_steps(self, model_tracks, obs_tracks):
        return self.track_step_matcher.match(model_tracks, obs_tracks)

    def extract_model_attributes(self,
                                 tracked_model_objects,
                                 storm_variables,
                                 potential_variables,
                                 tendency_variables=None,
                                 future_variables=None):
        """
        Extract model attribute data for each model track. Storm variables are those that describe the model storm
        directly, such as radar reflectivity or updraft helicity. Potential variables describe the surrounding
        environmental conditions of the storm, and should be extracted from the timestep before the storm arrives to
        reduce the chance of the storm contaminating the environmental values. Examples of potential variables include
        CAPE, shear, temperature, and dewpoint. Future variables are fields that occur in the hour after the extracted
        field.

        Args:
            tracked_model_objects: List of STObjects describing each forecasted storm
            storm_variables: List of storm variable names
            potential_variables: List of potential variable names.
            tendency_variables: List of tendency variables
        """
        if tendency_variables is None:
            tendency_variables = []
        if future_variables is None:
            future_variables = []
        model_grids = {}
        for l_var in ["lon", "lat"]:
            for model_obj in tracked_model_objects:
                model_obj.extract_attribute_array(
                    getattr(self.model_grid, l_var), l_var)
        for storm_var in storm_variables:
            print("Storm {0} {1} {2}".format(storm_var, self.ensemble_member,
                                             self.run_date.strftime("%Y%m%d")))
            model_grids[storm_var] = ModelOutput(
                self.ensemble_name, self.ensemble_member, self.run_date,
                storm_var, self.start_date - timedelta(hours=1),
                self.end_date + timedelta(hours=1), self.model_path,
                self.model_map_file, self.single_step)
            model_grids[storm_var].load_data()
            for model_obj in tracked_model_objects:
                model_obj.extract_attribute_grid(model_grids[storm_var])
            if storm_var not in potential_variables + tendency_variables + future_variables:
                del model_grids[storm_var]
        for potential_var in potential_variables:
            print("Potential {0} {1} {2}".format(
                potential_var, self.ensemble_member,
                self.run_date.strftime("%Y%m%d")))
            if potential_var not in model_grids.keys():
                model_grids[potential_var] = ModelOutput(
                    self.ensemble_name, self.ensemble_member, self.run_date,
                    potential_var, self.start_date - timedelta(hours=1),
                    self.end_date + timedelta(hours=1), self.model_path,
                    self.model_map_file, self.single_step)
                model_grids[potential_var].load_data()
            for model_obj in tracked_model_objects:
                model_obj.extract_attribute_grid(model_grids[potential_var],
                                                 potential=True)
            if potential_var not in tendency_variables + future_variables:
                del model_grids[potential_var]
        for future_var in future_variables:
            print("Future {0} {1} {2}".format(
                future_var, self.ensemble_member,
                self.run_date.strftime("%Y%m%d")))
            if future_var not in model_grids.keys():
                model_grids[future_var] = ModelOutput(
                    self.ensemble_name, self.ensemble_member, self.run_date,
                    future_var, self.start_date - timedelta(hours=1),
                    self.end_date + timedelta(hours=1), self.model_path,
                    self.model_map_file, self.single_step)
                model_grids[future_var].load_data()
            for model_obj in tracked_model_objects:
                model_obj.extract_attribute_grid(model_grids[future_var],
                                                 future=True)
            if future_var not in tendency_variables:
                del model_grids[future_var]
        for tendency_var in tendency_variables:
            print("Tendency {0} {1} {2}".format(
                tendency_var, self.ensemble_member,
                self.run_date.strftime("%Y%m%d")))
            if tendency_var not in model_grids.keys():
                model_grids[tendency_var] = ModelOutput(
                    self.ensemble_name, self.ensemble_member, self.run_date,
                    tendency_var, self.start_date - timedelta(hours=1),
                    self.end_date, self.model_path, self.model_map_file,
                    self.single_step)
            for model_obj in tracked_model_objects:
                model_obj.extract_tendency_grid(model_grids[tendency_var])
            del model_grids[tendency_var]

    @staticmethod
    def match_hail_sizes(model_tracks, obs_tracks, track_pairings):
        """
        Given forecast and observed track pairings, maximum hail sizes are associated with each paired forecast storm
        track timestep. If the duration of the forecast and observed tracks differ, then interpolation is used for the
        intermediate timesteps.

        Args:
            model_tracks: List of model track STObjects
            obs_tracks: List of observed STObjects
            track_pairings: list of tuples containing the indices of the paired (forecast, observed) tracks
        """
        unpaired = list(range(len(model_tracks)))
        for p, pair in enumerate(track_pairings):
            model_track = model_tracks[pair[0]]
            unpaired.remove(pair[0])
            obs_track = obs_tracks[pair[1]]
            obs_hail_sizes = np.array([
                step[obs_track.masks[t] == 1].max()
                for t, step in enumerate(obs_track.timesteps)
            ])
            if obs_track.times.size > 1 and model_track.times.size > 1:
                normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min()) \
                                       * (obs_track.times - obs_track.times.min())
                normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min()) \
                                         * (model_track.times - model_track.times.min())
                hail_interp = interp1d(normalized_obs_times,
                                       obs_hail_sizes,
                                       kind="nearest",
                                       bounds_error=False,
                                       fill_value=0)
                model_track.observations = hail_interp(normalized_model_times)
            elif obs_track.times.size == 1:
                model_track.observations = np.ones(
                    model_track.times.shape) * obs_hail_sizes[0]
            elif model_track.times.size == 1:
                model_track.observations = np.array([obs_hail_sizes.max()])
            print(pair[0], "obs", obs_hail_sizes)
            print(pair[0], "model", model_track.observations)
        for u in unpaired:
            model_tracks[u].observations = np.zeros(
                model_tracks[u].times.shape)

    def match_size_distributions(self, model_tracks, obs_tracks,
                                 track_pairings):
        def match_single_track_dist(model_track, obs_track):
            label_columns = ["Max_Hail_Size", "Shape", "Location", "Scale"]
            obs_hail_dists = pd.DataFrame(index=obs_track.times,
                                          columns=label_columns)
            model_hail_dists = pd.DataFrame(index=model_track.times,
                                            columns=label_columns)
            for t, step in enumerate(obs_track.timesteps):
                step_vals = step[(obs_track.masks[t] == 1) & (
                    obs_track.timesteps[t] > self.mrms_ew.min_intensity)]
                min_hail = step_vals.min() - 0.1
                obs_hail_dists.loc[obs_track.times[t],
                                   ["Shape", "Location", "Scale"]] = gamma.fit(
                                       step_vals, floc=min_hail)
                obs_hail_dists.loc[obs_track.times[t],
                                   "Max_Hail_Size"] = step_vals.max()
            if obs_track.times.size > 1 and model_track.times.size > 1:
                normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min()) \
                                       * (obs_track.times - obs_track.times.min())
                normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min()) \
                                         * (model_track.times - model_track.times.min())
                for col in label_columns:
                    interp_func = interp1d(normalized_obs_times,
                                           obs_hail_dists[col],
                                           kind="linear",
                                           bounds_error=False,
                                           fill_value=0)
                    model_hail_dists.loc[model_track.times, col] = interp_func(
                        normalized_model_times)
            else:
                for param in obs_hail_dists.columns:
                    model_hail_dists.loc[model_track.times,
                                         param] = obs_hail_dists.loc[
                                             obs_track.times[0], param]
            return model_hail_dists

        unpaired = list(range(len(model_tracks)))
        for p, pair in enumerate(track_pairings):
            unpaired.remove(pair[0])
            if type(pair[1]) in [int, np.int64, np.int32]:
                interp_hail_dists = match_single_track_dist(
                    model_tracks[pair[0]], obs_tracks[pair[1]])
                model_tracks[pair[0]].observations = interp_hail_dists
            else:
                model_tracks[pair[0]].observations = []
                for op in pair[1]:
                    interp_hail_dists = match_single_track_dist(
                        model_tracks[pair[0]], obs_tracks[op])
                    model_tracks[pair[0]].observations.append(
                        interp_hail_dists)
        return

    def match_hail_size_step_distributions(self, model_tracks, obs_tracks,
                                           track_pairings):
        """
        Given a matching set of observed tracks for each model track, combine the hail size values and create
        an observed hail size distribution.
        
        Args:
            model_tracks: List of STObjects
            obs_tracks: List of STObjects
            track_pairings:

        Returns:

        """
        label_columns = [
            "Matched", "Max_Hail_Size", "Num_Matches", "Shape", "Location",
            "Scale"
        ]
        s = 0
        for m, model_track in enumerate(model_tracks):
            model_track.observations = pd.DataFrame(index=model_track.times,
                                                    columns=label_columns,
                                                    dtype=np.float64)
            model_track.observations.loc[:, :] = 0
            model_track.observations["Matched"] = model_track.observations[
                "Matched"].astype(np.int32)
            for t, time in enumerate(model_track.times):
                model_track.observations.loc[
                    time, "Matched"] = track_pairings.loc[s, "Matched"]
                if model_track.observations.loc[time, "Matched"] > 0:
                    all_hail_sizes = []
                    step_pairs = track_pairings.loc[s, "Pairings"]
                    for step_pair in step_pairs:
                        obs_step = obs_tracks[step_pair[0]].timesteps[
                            step_pair[1]].ravel()
                        obs_mask = obs_tracks[step_pair[0]].masks[
                            step_pair[1]].ravel()
                        all_hail_sizes.append(obs_step[(obs_mask == 1) & (
                            obs_step >= self.mrms_ew.min_intensity)])
                    combined_hail_sizes = np.concatenate(all_hail_sizes)
                    min_hail = combined_hail_sizes.min() - 0.1
                    model_track.observations.loc[
                        time, "Max_Hail_Size"] = combined_hail_sizes.max()
                    model_track.observations.loc[
                        time, "Num_Matches"] = step_pairs.shape[0]
                    model_track.observations.loc[
                        time, ["Shape", "Location", "Scale"]] = gamma.fit(
                            combined_hail_sizes, floc=min_hail)
                s += 1

    @staticmethod
    def calc_track_errors(model_tracks, obs_tracks, track_pairings):
        """
        Calculates spatial and temporal translation errors between matched
        forecast and observed tracks.

        Args:
            model_tracks: List of model track STObjects
            obs_tracks: List of observed track STObjects
            track_pairings: List of tuples pairing forecast and observed tracks.

        Returns:
            pandas DataFrame containing different track errors
        """
        columns = [
            'obs_track_id',
            'translation_error_x',
            'translation_error_y',
            'start_time_difference',
            'end_time_difference',
        ]
        track_errors = pd.DataFrame(index=list(range(len(model_tracks))),
                                    columns=columns)
        for p, pair in enumerate(track_pairings):
            model_track = model_tracks[pair[0]]
            if type(pair[1]) in [int, np.int64]:
                obs_track = obs_tracks[pair[1]]
            else:
                obs_track = obs_tracks[pair[1][0]]
            model_com = model_track.center_of_mass(model_track.start_time)
            obs_com = obs_track.center_of_mass(obs_track.start_time)
            track_errors.loc[pair[0], 'obs_track_id'] = pair[1] if type(
                pair[1]) in [int, np.int64] else pair[1][0]
            track_errors.loc[pair[0],
                             'translation_error_x'] = model_com[0] - obs_com[0]
            track_errors.loc[pair[0],
                             'translation_error_y'] = model_com[1] - obs_com[1]
            track_errors.loc[
                pair[0],
                'start_time_difference'] = model_track.start_time - obs_track.start_time
            track_errors.loc[
                pair[0],
                'end_time_difference'] = model_track.end_time - obs_track.end_time
        return track_errors
Example #8
0
 def __init__(self,
              run_date,
              start_date,
              end_date,
              ensemble_name,
              ensemble_member,
              variable,
              model_path,
              model_map_file,
              model_watershed_params,
              object_matcher_params,
              track_matcher_params,
              size_filter,
              gaussian_window,
              sector_ind_path,
              match_steps=True,
              mrms_path=None,
              mrms_variable=None,
              mrms_watershed_params=None,
              single_step=True,
              mask_file=None,
              patch_radius=32):
     self.run_date = run_date
     self.start_date = start_date
     self.end_date = end_date
     self.start_hour = int((self.start_date - self.run_date).total_seconds()) / 3600
     self.end_hour = int((self.end_date - self.run_date).total_seconds()) / 3600
     self.hours = np.arange(int(self.start_hour), int(self.end_hour) + 1)
     self.ensemble_name = ensemble_name
     self.ensemble_member = ensemble_member
     self.variable = variable
     self.model_ew = EnhancedWatershed(*model_watershed_params)
     self.object_matcher = ObjectMatcher(*object_matcher_params)
     if match_steps:
         self.track_matcher = None
         self.track_step_matcher = TrackStepMatcher(*track_matcher_params)
     else:
         self.track_matcher = TrackMatcher(*track_matcher_params)
         self.track_step_matcher = None
     self.size_filter = size_filter
     self.gaussian_window = gaussian_window
     self.sector_ind_path = sector_ind_path
     self.model_path = model_path
     self.model_map_file = model_map_file
     self.mrms_path = mrms_path
     self.single_step = single_step
     self.model_grid = ModelOutput(self.ensemble_name, self.ensemble_member, self.run_date, self.variable,
                                   self.start_date, self.end_date, self.model_path, self.model_map_file,
                                   self.sector_ind_path,single_step=self.single_step)
     self.model_grid.load_map_info(self.model_map_file)
     if self.mrms_path is not None:
         self.mrms_variable = mrms_variable
         self.mrms_grid = MRMSGrid(self.start_date, self.end_date, self.mrms_variable, self.mrms_path)
         self.mrms_ew = EnhancedWatershed(*mrms_watershed_params)
     else:
         self.mrms_grid = None
         self.mrms_ew = None
     self.mask_file = mask_file
     self.mask = None
     if self.mask_file is not None:
         mask_data = Dataset(self.mask_file)
         self.mask = mask_data.variables["usa_mask"][:]
         mask_data.close()
     self.patch_radius = patch_radius
     return
Example #9
0
class TrackProcessor(object):
    """
    TrackProcessor identifies local maxima in a convection-allowing model run and links them in time to form
    storm tracks. A similar procedure is applied to the observations, and the two sets of tracks are matched.
    Storm and environmental attributes are extracted from within the identified track areas.
    
    Args:
        run_date: Datetime model run was initialized
        start_date: Datetime for the beginning of storm extraction.
        end_date: Datetime for the ending of storm extraction.
        ensemble_name: Name of the ensemble being used.
        ensemble_member: name of the ensemble member being used.
        variable: model variable being used for extraction.
        model_path: path to the ensemble output.
        model_map_file: File containing model map projection information.
        model_watershed_params: tuple of parameters used for EnhancedWatershed
        object_matcher_params: tuple of parameters used for ObjectMatcher.
        track_matcher_params: tuple of parameters for TrackMatcher or TrackStepMatcher.
        size_filter: minimum size of model objects
        gaussian_window: number of grid points
        match_steps: If True, match individual steps in tracks instead of matching whole tracks
        mrms_path: Path to MRMS netCDF files
        mrms_variable: MRMS variable being used
        mrms_watershed_params: tuple of parameters for Enhanced Watershed applied to MESH data.
        single_step: Whether model timesteps are in separate files or aggregated into one file.
        mask_file: netCDF filename containing a mask of valid grid points on the model domain.
    """
    def __init__(self,
                 run_date,
                 start_date,
                 end_date,
                 ensemble_name,
                 ensemble_member,
                 variable,
                 model_path,
                 model_map_file,
                 model_watershed_params,
                 object_matcher_params,
                 track_matcher_params,
                 size_filter,
                 gaussian_window,
                 sector_ind_path,
                 match_steps=True,
                 mrms_path=None,
                 mrms_variable=None,
                 mrms_watershed_params=None,
                 single_step=True,
                 mask_file=None,
                 patch_radius=32):
        self.run_date = run_date
        self.start_date = start_date
        self.end_date = end_date
        self.start_hour = int((self.start_date - self.run_date).total_seconds()) / 3600
        self.end_hour = int((self.end_date - self.run_date).total_seconds()) / 3600
        self.hours = np.arange(int(self.start_hour), int(self.end_hour) + 1)
        self.ensemble_name = ensemble_name
        self.ensemble_member = ensemble_member
        self.variable = variable
        self.model_ew = EnhancedWatershed(*model_watershed_params)
        self.object_matcher = ObjectMatcher(*object_matcher_params)
        if match_steps:
            self.track_matcher = None
            self.track_step_matcher = TrackStepMatcher(*track_matcher_params)
        else:
            self.track_matcher = TrackMatcher(*track_matcher_params)
            self.track_step_matcher = None
        self.size_filter = size_filter
        self.gaussian_window = gaussian_window
        self.sector_ind_path = sector_ind_path
        self.model_path = model_path
        self.model_map_file = model_map_file
        self.mrms_path = mrms_path
        self.single_step = single_step
        self.model_grid = ModelOutput(self.ensemble_name, self.ensemble_member, self.run_date, self.variable,
                                      self.start_date, self.end_date, self.model_path, self.model_map_file,
                                      self.sector_ind_path,single_step=self.single_step)
        self.model_grid.load_map_info(self.model_map_file)
        if self.mrms_path is not None:
            self.mrms_variable = mrms_variable
            self.mrms_grid = MRMSGrid(self.start_date, self.end_date, self.mrms_variable, self.mrms_path)
            self.mrms_ew = EnhancedWatershed(*mrms_watershed_params)
        else:
            self.mrms_grid = None
            self.mrms_ew = None
        self.mask_file = mask_file
        self.mask = None
        if self.mask_file is not None:
            mask_data = Dataset(self.mask_file)
            self.mask = mask_data.variables["usa_mask"][:]
            mask_data.close()
        self.patch_radius = patch_radius
        return

    def find_model_patch_tracks(self):
        """
        Identify storms in gridded model output and extract uniform sized patches around the storm centers of mass.

        Returns:

        """
        self.model_grid.load_data()
        tracked_model_objects = []
        model_objects = []
        if self.model_grid.data is None:
            print("No model output found")
            return tracked_model_objects
        min_orig = self.model_ew.min_thresh
        max_orig = self.model_ew.max_thresh
        data_increment_orig = self.model_ew.data_increment
        self.model_ew.min_thresh = 0
        self.model_ew.data_increment = 1
        self.model_ew.max_thresh = 100
        for h, hour in enumerate(self.hours):
            # Identify storms at each time step and apply size filter
            print("Finding {0} objects for run {1} Hour: {2:02d}".format(self.ensemble_member,
                                                                         self.run_date.strftime("%Y%m%d%H"), hour))
            if self.mask is not None:
                model_data = self.model_grid.data[h] * self.mask
            else:
                model_data = self.model_grid.data[h]
            model_data[:self.patch_radius] = 0
            model_data[-self.patch_radius:] = 0
            model_data[:, :self.patch_radius] = 0
            model_data[:, -self.patch_radius:] = 0
            scaled_data = np.array(rescale_data(model_data, min_orig, max_orig))
            hour_labels = label_storm_objects(scaled_data, "ew",
                                              self.model_ew.min_thresh, self.model_ew.max_thresh,
                                              min_area=self.size_filter, max_area=self.model_ew.max_size,
                                              max_range=self.model_ew.delta, increment=self.model_ew.data_increment,
                                              gaussian_sd=self.gaussian_window)
            model_objects.extend(extract_storm_patches(hour_labels, model_data, self.model_grid.x,
                                                       self.model_grid.y, [hour],
                                                       dx=self.model_grid.dx,
                                                       patch_radius=self.patch_radius))
            for model_obj in model_objects[-1]:
                dims = model_obj.timesteps[-1].shape
                if h > 0:
                    model_obj.estimate_motion(hour, self.model_grid.data[h-1], dims[1], dims[0])
            del scaled_data
            del model_data
            del hour_labels
        tracked_model_objects.extend(track_storms(model_objects, self.hours,
                                                  self.object_matcher.cost_function_components,
                                                  self.object_matcher.max_values,
                                                  self.object_matcher.weights))
        self.model_ew.min_thresh = min_orig
        self.model_ew.max_thresh = max_orig
        self.model_ew.data_increment = data_increment_orig
        return tracked_model_objects

    def find_model_tracks(self):
        """
        Identify storms at each model time step and link them together with object matching.

        Returns:
            List of STObjects containing model track information.
        """
        self.model_grid.load_data()
        model_objects = []
        tracked_model_objects = []
        if self.model_grid.data is None:
            print("No model output found")
            return tracked_model_objects
        for h, hour in enumerate(self.hours):
            # Identify storms at each time step and apply size filter
            print("Finding {0} objects for run {1} Hour: {2:02d}".format(self.ensemble_member,
                                                                         self.run_date.strftime("%Y%m%d%H"), hour))
            if self.mask is not None:
                model_data = self.model_grid.data[h] * self.mask
            else:
                model_data = self.model_grid.data[h]

            # remember orig values
            min_orig = self.model_ew.min_thresh
            max_orig = self.model_ew.max_thresh
            data_increment_orig = self.model_ew.data_increment
            # scale to int 0-100.
            scaled_data = np.array(rescale_data( self.model_grid.data[h], min_orig, max_orig))
            self.model_ew.min_thresh = 0
            self.model_ew.data_increment = 1
            self.model_ew.max_thresh = 100
            hour_labels = self.model_ew.label(gaussian_filter(scaled_data, self.gaussian_window))
            hour_labels[model_data < self.model_ew.min_thresh] = 0
            hour_labels = self.model_ew.size_filter(hour_labels, self.size_filter)
            # Return to orig values
            self.model_ew.min_thresh = min_orig
            self.model_ew.max_thresh = max_orig
            self.model_ew.data_increment = data_increment_orig
            obj_slices = find_objects(hour_labels)

            num_slices = len(obj_slices)
            model_objects.append([])
            if num_slices > 0:
                for s, sl in enumerate(obj_slices):
                    model_objects[-1].append(STObject(self.model_grid.data[h][sl],
                                                      np.where(hour_labels[sl] == s + 1, 1, 0),
                                                      self.model_grid.x[sl], 
                                                      self.model_grid.y[sl], 
                                                      self.model_grid.i[sl], 
                                                      self.model_grid.j[sl],
                                                      hour,
                                                      hour,
                                                      dx=self.model_grid.dx))
                    if h > 0:
                        dims = model_objects[-1][-1].timesteps[0].shape
                        model_objects[-1][-1].estimate_motion(hour, self.model_grid.data[h-1], dims[1], dims[0])
            del hour_labels
            del scaled_data
            del model_data
        for h, hour in enumerate(self.hours):
            past_time_objs = []
            for obj in tracked_model_objects:
                # Potential trackable objects are identified
                if obj.end_time == hour - 1:
                    past_time_objs.append(obj)
            # If no objects existed in the last time step, then consider objects in current time step all new
            if len(past_time_objs) == 0:
                tracked_model_objects.extend(model_objects[h])
            # Match from previous time step with current time step
            elif len(past_time_objs) > 0 and len(model_objects[h]) > 0:
                assignments = self.object_matcher.match_objects(past_time_objs, model_objects[h], hour - 1, hour)
                unpaired = list(range(len(model_objects[h])))
                for pair in assignments:
                    past_time_objs[pair[0]].extend(model_objects[h][pair[1]])
                    unpaired.remove(pair[1])
                if len(unpaired) > 0:
                    for up in unpaired:
                        tracked_model_objects.append(model_objects[h][up])
            print("Tracked Model Objects: {0:03d} Hour: {1:02d}".format(len(tracked_model_objects), hour))

        return tracked_model_objects

    def load_model_tracks(self, json_path):
        model_track_files = sorted(glob(json_path + "{0}/{1}/{2}_*.json".format(self.run_date.strftime("%Y%m%d"),
                                                                                self.ensemble_member,
                                                                                self.ensemble_name)))
        model_tracks = []
        for model_track_file in model_track_files:
            model_tracks.append(read_geojson(model_track_file))
        return model_tracks

    def load_mrms_tracks(self, json_path, mrms_name="mesh"):
        mrms_track_files = sorted(glob(json_path + "{0}/{1}/{2}_*.json".format(self.run_date.strftime("%Y%m%d"),
                                                                               self.ensemble_member,
                                                                               mrms_name)))
        mrms_tracks = []
        for mrms_track_file in mrms_track_files:
            mrms_tracks.append(read_geojson(mrms_track_file))
        return mrms_tracks

    def find_mrms_tracks(self):
        """
        Identify objects from MRMS timesteps and link them together with object matching.

        Returns:
            List of STObjects containing MESH track information.
        """
        obs_objects = []
        tracked_obs_objects = []
        if self.mrms_ew is not None:
            self.mrms_grid.load_data()
            
            if len(self.mrms_grid.data) != len(self.hours):
                print('Less than 24 hours of observation data found')
                
                return tracked_obs_objects
         
            for h, hour in enumerate(self.hours):
                mrms_data = np.zeros(self.mrms_grid.data[h].shape)
                mrms_data[:] = np.array(self.mrms_grid.data[h])
                mrms_data[mrms_data < 0] = 0
                hour_labels = self.mrms_ew.size_filter(self.mrms_ew.label(gaussian_filter(mrms_data,
                                                                                      self.gaussian_window)),
                                                       self.size_filter)
                hour_labels[mrms_data < self.mrms_ew.min_thresh] = 0
                obj_slices = find_objects(hour_labels)
                num_slices = len(obj_slices)
                obs_objects.append([])
                if num_slices > 0:
                    for sl in obj_slices:
                        obs_objects[-1].append(STObject(mrms_data[sl],
                                                        np.where(hour_labels[sl] > 0, 1, 0),
                                                        self.model_grid.x[sl],
                                                        self.model_grid.y[sl],
                                                        self.model_grid.i[sl],
                                                        self.model_grid.j[sl],
                                                        hour,
                                                        hour,
                                                        dx=self.model_grid.dx))
                        if h > 0:
                            dims = obs_objects[-1][-1].timesteps[0].shape
                            obs_objects[-1][-1].estimate_motion(hour, self.mrms_grid.data[h-1], dims[1], dims[0])
        
            for h, hour in enumerate(self.hours):
                past_time_objs = []
                for obj in tracked_obs_objects:
                    if obj.end_time == hour - 1:
                        past_time_objs.append(obj)
                if len(past_time_objs) == 0:
                    tracked_obs_objects.extend(obs_objects[h])
                elif len(past_time_objs) > 0 and len(obs_objects[h]) > 0:
                    assignments = self.object_matcher.match_objects(past_time_objs, obs_objects[h], hour - 1, hour)
                    unpaired = list(range(len(obs_objects[h])))
                    for pair in assignments:
                        past_time_objs[pair[0]].extend(obs_objects[h][pair[1]])
                        unpaired.remove(pair[1])
                    if len(unpaired) > 0:
                        for up in unpaired:
                            tracked_obs_objects.append(obs_objects[h][up])
                print("Tracked Obs Objects: {0:03d} Hour: {1:02d}".format(len(tracked_obs_objects), hour))
        
        return tracked_obs_objects

    def match_tracks(self, model_tracks, obs_tracks, unique_matches=True, closest_matches=False):
        """
        Match forecast and observed tracks.

        Args:
            model_tracks:
            obs_tracks:
            unique_matches:
            closest_matches:

        Returns:

        """
        if unique_matches:
            pairings = self.track_matcher.match_tracks(model_tracks, obs_tracks, closest_matches=closest_matches)
        else:
            pairings = self.track_matcher.neighbor_matches(model_tracks, obs_tracks)
        return pairings

    def match_track_steps(self, model_tracks, obs_tracks):
        return self.track_step_matcher.match(model_tracks, obs_tracks)

    def extract_model_attributes(self, tracked_model_objects, storm_variables, potential_variables,
                                 tendency_variables=None, future_variables=None):
        """
        Extract model attribute data for each model track. Storm variables are those that describe the model storm
        directly, such as radar reflectivity or updraft helicity. Potential variables describe the surrounding
        environmental conditions of the storm, and should be extracted from the timestep before the storm arrives to
        reduce the chance of the storm contaminating the environmental values. Examples of potential variables include
        CAPE, shear, temperature, and dewpoint. Future variables are fields that occur in the hour after the extracted
        field.

        Args:
            tracked_model_objects: List of STObjects describing each forecasted storm
            storm_variables: List of storm variable names
            potential_variables: List of potential variable names.
            tendency_variables: List of tendency variables
        """
        if tendency_variables is None:
            tendency_variables = []
        if future_variables is None:
            future_variables = []
        model_grids = {}
        for l_var in ["lon", "lat"]:
            for model_obj in tracked_model_objects:
                model_obj.extract_attribute_array(getattr(self.model_grid, l_var), l_var)
        for storm_var in storm_variables:
            print("Storm {0} {1} {2}".format(storm_var,self.ensemble_member, self.run_date.strftime("%Y%m%d")))
            model_grids[storm_var] = ModelOutput(self.ensemble_name, self.ensemble_member,
                                                 self.run_date, storm_var, self.start_date - timedelta(hours=1),
                                                 self.end_date + timedelta(hours=1),
                                                 self.model_path,self.model_map_file, 
                                                 self.sector_ind_path,self.single_step)
            model_grids[storm_var].load_data()
            for model_obj in tracked_model_objects:
                model_obj.extract_attribute_grid(model_grids[storm_var])
            if storm_var not in potential_variables + tendency_variables + future_variables:
                del model_grids[storm_var]
        for potential_var in potential_variables:
            print("Potential {0} {1} {2}".format(potential_var,self.ensemble_member, self.run_date.strftime("%Y%m%d")))
            if potential_var not in model_grids.keys():
                model_grids[potential_var] = ModelOutput(self.ensemble_name, self.ensemble_member,
                                                         self.run_date, potential_var,
                                                         self.start_date - timedelta(hours=1),
                                                         self.end_date + timedelta(hours=1),
                                                         self.model_path, self.model_map_file, 
                                                         self.sector_ind_path,self.single_step)
                model_grids[potential_var].load_data()
            for model_obj in tracked_model_objects:
                model_obj.extract_attribute_grid(model_grids[potential_var], potential=True)
            if potential_var not in tendency_variables + future_variables:
                del model_grids[potential_var]
        for future_var in future_variables:
            print("Future {0} {1} {2}".format(future_var, self.ensemble_member, self.run_date.strftime("%Y%m%d")))
            if future_var not in model_grids.keys():
                model_grids[future_var] = ModelOutput(self.ensemble_name, self.ensemble_member,
                                                         self.run_date, future_var,
                                                         self.start_date - timedelta(hours=1),
                                                         self.end_date + timedelta(hours=1),
                                                         self.model_path, self.model_map_file,
                                                         self.sector_ind_path,self.single_step)
                model_grids[future_var].load_data()
            for model_obj in tracked_model_objects:
                model_obj.extract_attribute_grid(model_grids[future_var], future=True)
            if future_var not in tendency_variables:
                del model_grids[future_var]
        for tendency_var in tendency_variables:
            print("Tendency {0} {1} {2}".format(tendency_var, self.ensemble_member, self.run_date.strftime("%Y%m%d")))
            if tendency_var not in model_grids.keys():
                model_grids[tendency_var] = ModelOutput(self.ensemble_name, self.ensemble_member,
                                                        self.run_date, tendency_var,
                                                        self.start_date - timedelta(hours=1),
                                                        self.end_date,
                                                        self.model_path, self.model_map_file, 
                                                        self.sector_ind_path,self.single_step)
            for model_obj in tracked_model_objects:
                model_obj.extract_tendency_grid(model_grids[tendency_var])
            del model_grids[tendency_var]


    @staticmethod
    def match_hail_sizes(model_tracks, obs_tracks, track_pairings):
        """
        Given forecast and observed track pairings, maximum hail sizes are associated with each paired forecast storm
        track timestep. If the duration of the forecast and observed tracks differ, then interpolation is used for the
        intermediate timesteps.

        Args:
            model_tracks: List of model track STObjects
            obs_tracks: List of observed STObjects
            track_pairings: list of tuples containing the indices of the paired (forecast, observed) tracks
        """
        unpaired = list(range(len(model_tracks)))
        for p, pair in enumerate(track_pairings):
            model_track = model_tracks[pair[0]]
            unpaired.remove(pair[0])
            obs_track = obs_tracks[pair[1]]
            obs_hail_sizes = np.array([step[obs_track.masks[t] == 1].max()
                                       for t, step in enumerate(obs_track.timesteps)])
            if obs_track.times.size > 1 and model_track.times.size > 1:
                normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min())\
                    * (obs_track.times - obs_track.times.min())
                normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min())\
                    * (model_track.times - model_track.times.min())
                hail_interp = interp1d(normalized_obs_times, obs_hail_sizes, kind="nearest",
                                       bounds_error=False, fill_value=0)
                model_track.observations = hail_interp(normalized_model_times)
            elif obs_track.times.size == 1:
                model_track.observations = np.ones(model_track.times.shape) * obs_hail_sizes[0]
            elif model_track.times.size == 1:
                model_track.observations = np.array([obs_hail_sizes.max()])
            print(pair[0], "obs",  obs_hail_sizes)
            print(pair[0], "model", model_track.observations)
        for u in unpaired:
            model_tracks[u].observations = np.zeros(model_tracks[u].times.shape)

    def match_size_distributions(self, model_tracks, obs_tracks, track_pairings):
        def match_single_track_dist(model_track, obs_track):
            label_columns = ["Max_Hail_Size", "Shape", "Location", "Scale"]
            obs_hail_dists = pd.DataFrame(index=obs_track.times,
                                          columns=label_columns)
            model_hail_dists = pd.DataFrame(index=model_track.times,
                                            columns=label_columns)
            for t, step in enumerate(obs_track.timesteps):
                step_vals = step[(obs_track.masks[t] == 1) & (obs_track.timesteps[t] > self.mrms_ew.min_thresh)]
                min_hail = step_vals.min() - 0.1
                obs_hail_dists.loc[obs_track.times[t], ["Shape", "Location", "Scale"]] = gamma.fit(step_vals,
                                                                                                   floc=min_hail)
                obs_hail_dists.loc[obs_track.times[t], "Max_Hail_Size"] = step_vals.max()
            if obs_track.times.size > 1 and model_track.times.size > 1:
                normalized_obs_times = 1.0 / (obs_track.times.max() - obs_track.times.min()) \
                                       * (obs_track.times - obs_track.times.min())
                normalized_model_times = 1.0 / (model_track.times.max() - model_track.times.min()) \
                                         * (model_track.times - model_track.times.min())
                for col in label_columns:
                    interp_func = interp1d(normalized_obs_times, obs_hail_dists[col], kind="linear",
                                           bounds_error=False, fill_value=0)
                    model_hail_dists.loc[model_track.times, col] = interp_func(normalized_model_times)
            else:
                for param in obs_hail_dists.columns:
                    model_hail_dists.loc[model_track.times, param] = obs_hail_dists.loc[obs_track.times[0], param]
            return model_hail_dists
        unpaired = list(range(len(model_tracks)))
        for p, pair in enumerate(track_pairings):
            unpaired.remove(pair[0])
            if type(pair[1]) in [int, np.int64, np.int32]:
                interp_hail_dists = match_single_track_dist(model_tracks[pair[0]], obs_tracks[pair[1]])
                model_tracks[pair[0]].observations = interp_hail_dists
            else:
                model_tracks[pair[0]].observations = []
                for op in pair[1]:
                    interp_hail_dists = match_single_track_dist(model_tracks[pair[0]], obs_tracks[op])
                    model_tracks[pair[0]].observations.append(interp_hail_dists)
        return

    def match_hail_size_step_distributions(self, model_tracks, obs_tracks, track_pairings):
        """
        Given a matching set of observed tracks for each model track, 
        
        Args:
            model_tracks: 
            obs_tracks: 
            track_pairings: 

        Returns:

        """
        label_columns = ["Matched", "Max_Hail_Size", "Num_Matches", "Shape", "Location", "Scale"]
        s = 0
        for m, model_track in enumerate(model_tracks):
            model_track.observations = pd.DataFrame(index=model_track.times, columns=label_columns, dtype=np.float64)
            model_track.observations.loc[:, :] = 0
            model_track.observations["Matched"] = model_track.observations["Matched"].astype(np.int32)
            for t, time in enumerate(model_track.times):
                model_track.observations.loc[time, "Matched"] = track_pairings.loc[s, "Matched"]
                if model_track.observations.loc[time, "Matched"] > 0:
                    all_hail_sizes = []
                    step_pairs = track_pairings.loc[s, "Pairings"]
                    for step_pair in step_pairs:
                        obs_step = obs_tracks[step_pair[0]].timesteps[step_pair[1]].ravel()
                        obs_mask = obs_tracks[step_pair[0]].masks[step_pair[1]].ravel()
                        all_hail_sizes.append(obs_step[(obs_mask == 1) & (obs_step >= self.mrms_ew.min_thresh)])
                    combined_hail_sizes = np.concatenate(all_hail_sizes)
                    min_hail = combined_hail_sizes.min() - 0.1
                    model_track.observations.loc[time, "Max_Hail_Size"] = combined_hail_sizes.max()
                    model_track.observations.loc[time, "Num_Matches"] = step_pairs.shape[0]
                    model_track.observations.loc[time, ["Shape", "Location", "Scale"]] = gamma.fit(combined_hail_sizes,
                                                                                                   floc=min_hail)
                s += 1

    @staticmethod
    def calc_track_errors(model_tracks, obs_tracks, track_pairings):
        """
        Calculates spatial and temporal translation errors between matched
        forecast and observed tracks.

        Args:
            model_tracks: List of model track STObjects
            obs_tracks: List of observed track STObjects
            track_pairings: List of tuples pairing forecast and observed tracks.

        Returns:
            pandas DataFrame containing different track errors
        """
        columns = ['obs_track_id',
                   'translation_error_x',
                   'translation_error_y',
                   'start_time_difference',
                   'end_time_difference',
                   ]
        track_errors = pd.DataFrame(index=list(range(len(model_tracks))),
                                    columns=columns)
        for p, pair in enumerate(track_pairings):
            model_track = model_tracks[pair[0]]
            if type(pair[1]) in [int, np.int64]:
                obs_track = obs_tracks[pair[1]]
            else:
                obs_track = obs_tracks[pair[1][0]]
            model_com = model_track.center_of_mass(model_track.start_time)
            obs_com = obs_track.center_of_mass(obs_track.start_time)
            track_errors.loc[pair[0], 'obs_track_id'] = pair[1] if type(pair[1]) in [int, np.int64] else pair[1][0]
            track_errors.loc[pair[0], 'translation_error_x'] = model_com[0] - obs_com[0]
            track_errors.loc[pair[0], 'translation_error_y'] = model_com[1] - obs_com[1]
            track_errors.loc[pair[0], 'start_time_difference'] = model_track.start_time - obs_track.start_time
            track_errors.loc[pair[0], 'end_time_difference'] = model_track.end_time - obs_track.end_time 
        return track_errors
Example #10
0
 def __init__(self,
              run_date,
              start_date,
              end_date,
              ensemble_name,
              ensemble_member,
              variable,
              model_path,
              model_map_file,
              model_watershed_params,
              object_matcher_params,
              track_matcher_params,
              size_filter,
              gaussian_window,
              mrms_path=None,
              mrms_variable=None,
              mrms_watershed_params=None,
              single_step=True,
              mask_file=None):
     self.run_date = run_date
     self.start_date = start_date
     self.end_date = end_date
     self.start_hour = int(
         (self.start_date - self.run_date).total_seconds()) / 3600
     self.end_hour = int(
         (self.end_date - self.run_date).total_seconds()) / 3600
     self.hours = range(self.start_hour, self.end_hour + 1)
     self.ensemble_name = ensemble_name
     self.ensemble_member = ensemble_member
     self.variable = variable
     self.model_ew = EnhancedWatershed(*model_watershed_params)
     self.object_matcher = ObjectMatcher(*object_matcher_params)
     self.track_matcher = TrackMatcher(*track_matcher_params)
     self.size_filter = size_filter
     self.gaussian_window = gaussian_window
     self.model_path = model_path
     self.mrms_path = mrms_path
     self.single_step = single_step
     self.model_grid = ModelOutput(self.ensemble_name,
                                   self.ensemble_member,
                                   self.run_date,
                                   self.variable,
                                   self.start_date,
                                   self.end_date,
                                   self.model_path,
                                   single_step=self.single_step)
     self.model_grid.load_map_info(model_map_file)
     if self.mrms_path is not None:
         self.mrms_variable = mrms_variable
         self.mrms_grid = MRMSGrid(self.start_date, self.end_date,
                                   self.mrms_variable, self.mrms_path)
         self.mrms_ew = EnhancedWatershed(*mrms_watershed_params)
     else:
         self.mrms_grid = None
         self.mrms_ew = None
     self.mask_file = mask_file
     self.mask = None
     if self.mask_file is not None:
         mask_data = Dataset(self.mask_file)
         self.mask = mask_data.variables["usa_mask"][:]
         mask_data.close()
     return