예제 #1
0
    def __init__(self,
                 incidents,
                 deployments,
                 vehicle_types,
                 location_ids,
                 start_time=None,
                 end_time=None,
                 predictor="basic",
                 fc_dir="/data",
                 verbose=True):
        """ Initialize all properties by extracting probabilities from the data. """
        self.incidents = incidents[np.in1d(
            incidents["hub_vak_bk"].fillna(0).astype(int).astype(str),
            location_ids)]
        self.deployments = deployments
        self.vehicle_types = vehicle_types

        if not isinstance(location_ids[0], str):
            raise ValueError('locations must be an iterable of strings')
        self.location_ids = location_ids
        self.verbose = verbose

        self.types = self._infer_incident_types()

        self._assign_predictor(predictor, fc_dir)
        self._set_sampling_dict(start_time,
                                end_time,
                                incident_types=self.types)
        self._create_incident_types()
        self._create_demand_locations()

        self.reset_time()

        progress("IncidentSampler ready for simulation.", verbose=self.verbose)
예제 #2
0
    def _assign_predictor(self, predictor, fc_dir):
        """ Initialize incident rate predictor and assign to property.

        Parameters
        ----------
        predictor: str, one of ['prophet']
            The predictor to use to forecast the incident rates. Currently,
            only supports predictor='prophet'.
        """
        if predictor == "prophet":
            progress("Initializing ProphetIncidentPredictor...",
                     verbose=self.verbose)
            predictor_cls = ProphetIncidentPredictor

        elif predictor == "basic":
            progress("Initializing BasicLambdaForecaster...",
                     verbose=self.verbose)
            predictor_cls = BasicLambdaForecaster

        else:
            raise ValueError(
                "'predictor' must be one of {}.".format(predictors))

        self.predictor = predictor_cls(load_forecast=True,
                                       fc_dir=fc_dir,
                                       verbose=self.verbose)
예제 #3
0
    def _prep_data_for_fitting(self, incidents, deployments, stations,
                               vehicles, osrm_host, save):
        """Perform basic preprocessing and calculate OSRM estimates for travel time.

        Prepared data is stored under self.data. Nothing is returned.

        Parameters
        ----------
        incidents: pd.DataFrame
            The incident data.
        deployments: pd.DataFrame
            The deployment data.
        stations: pd.DataFrame
            The station information including coordinates and station names.
        vehicles: array-like of strings
            The types of vehicles to use. Defaults to ["TS", "RV", "HV", "WO"].
        osrm_host: str
            The url to the OSRM API.
        save: boolean
            Whether to save the data to a csv file after preparing it.
        """
        progress("Preprocessing and merging datasets.", verbose=self.verbose)
        data = prepare_data_for_response_time_analysis(incidents, deployments,
                                                       stations, vehicles)

        progress("Adding OSRM distance and duration.", verbose=self.verbose)
        self.data = add_osrm_distance_and_duration(data, osrm_host=osrm_host)

        if save:
            progress("Saving file.", verbose=self.verbose)
            self.data.to_csv(os.path.join(self.data_dir, self.file_name),
                             index=False)

        progress("Data prepared for fitting.", verbose=self.verbose)
예제 #4
0
    def fit(self, data, types=None):
        """ Perform time series decomposition using Prophet.

        This function first prepares the data and saves the prepared data
        as 'self.incidents'. then it creates a dictionary of Prophet() objects,
        where the keys equal the incident types and the corresponding model
        is fitted to the data of that type. The dictionary of models is stored
        as 'self.models_dict' and used when predict is called.

        Notes
        -----
        This function does not return anything.

        Parameters
        ----------
        data: pd.DataFrame
            The incidents to train the models on.
        types: Sequence(str)
            The incident types to fit models for. If None, uses
            all incident types in the data, except 'nan' and 'NVT'.
            Defaults to None.
        """

        if types is not None:
            self.types = types
        else:
            progress("No incident types given, using all types in data.",
                     verbose=self.verbose)

            self.types = [
                t for t in data["dim_incident_incident_type"].unique()
                if t not in ["nan", "NVT", np.nan]
            ]

        progress("Preparing incident data for analysis...",
                 verbose=self.verbose)

        self.incidents = self._prep_data_for_prediction(data)
        self.incidents["hourly_datetime"] = self._create_date_hour_column(
            self.incidents, datetime_col="dim_incident_start_datumtijd")

        start = self.incidents["hourly_datetime"].min()
        end = self.incidents["hourly_datetime"].max()
        self.time_index = self._create_complete_hourly_index(start,
                                                             end_datetime=end)

        self.models_dict = dict()
        for type_ in self.types:
            progress("Fitting model for type {}...".format(type_),
                     verbose=self.verbose)

            m = Prophet()
            dfprophet = self._create_prophet_data(self.incidents,
                                                  self.time_index,
                                                  type_=type_)
            m.fit(dfprophet)
            self.models_dict[type_] = m

        self.fitted = True
        progress("Models fitted.", verbose=self.verbose)
예제 #5
0
    def _apply_filters(self, log, metric_set):
        """Applies all the filtering specified in a metric set and returns the resulting
        observations in the simulation log. Also adds relevant performance measures.

        Parameters
        ----------
        log: pd.DataFrame
            The simulation log
        metric_set: dict
            The metric set as created by this class.

        Returns
        -------
        data: pd.Dataframe
            The filtered log.
        y_col: str
            The name of the column that describes the measure of the metric set.
        """
        data = log.copy()
        # apply filters
        for f in self.filters:
            if metric_set[f] is not None:
                progress("Filtering on {}.".format(f), verbose=self.verbose)
                data = self._filter_data(data, self.filter_column_map[f],
                                         metric_set[f])

        if metric_set["first_only"]:
            progress("Keeping only first vehicle per incident.",
                     verbose=self.verbose)
            data.sort_values(self.response_time_col, inplace=True)
            data.drop_duplicates(subset=[self.run_col, self.incident_id_col],
                                 inplace=True)
            data.sort_values([self.run_col, self.incident_id_col],
                             inplace=True)

        # add relevant performance measures to data
        if metric_set["measure"] == "response_time":
            y_col = self.response_time_col
        if metric_set["measure"] == "on_time":
            data["on_time"] = (data[self.response_time_col] <=
                               data[self.target_col])
            y_col = "on_time"
        if metric_set["measure"] == "delay":
            data["delay"] = data[self.response_time_col] - data[
                self.target_col]
            y_col = "delay"

        return data, y_col
예제 #6
0
    def _create_demand_locations(self):
        """ Initialize demand locations and their building function distributions.

        Creates a dictionary of DemandLocation objects. Each such object has its
        own distribution over building functions that is used during sampling.
        """
        progress("Getting building function probabilities.",
                 verbose=self.verbose)
        building_probs = get_building_function_probabilities(
            self.incidents, locations=self.location_ids)

        progress("Initializing demand locations", verbose=self.verbose)
        self.locations = {
            l: DemandLocation(l, building_probs[l])
            for l in building_probs.keys()
        }
예제 #7
0
    def _get_travel_durations(self):
        """ Use OSRM to find the travel durations between every set of demand
            locations and stations.
        """
        progress("Creating matrix of travel times...", verbose=self.verbose)
        coord_list = list(self.demand_locs.values()) + list(
            self.station_locs.values())
        id_list = list(self.demand_locs.keys()) + list(
            self.station_locs.keys())

        time_matrix, _, _ = osrm.table(coord_list,
                                       coords_dest=coord_list,
                                       ids_origin=id_list,
                                       ids_dest=id_list,
                                       output='dataframe',
                                       url_config=self.osrm_config)
        return time_matrix
예제 #8
0
    def predict(self, periods=365 * 24, freq="H", save=False, future=None):
        """ Forecast the incident rate using Prophet.

        Notes
        -----
        Can only be called after calling '.fit()', throws assertion error
        otherwise. Does not return anything, since it's main use cases are
        sampling from directly from this predictor and saving predictions to
        file. The result of this method can be obtained by calling
        'get_forecast()' afterwards.

        Parameters
        ----------
        periods: int
            The number of periods to forecast.
        freq: str,
            The frequency to predict the incident rates at. Accepts any valid frequency
            for pd.date_range, such as 'H' (default), 'D', or 'M'.
        save: boolean
            Whether to save the forecast to a csv file. Optional, defaults to false.
        """
        assert self.fitted, "First use 'fit()' to fit a model before predicting."
        if future is None:
            future = self.models_dict[self.types[0]].make_future_dataframe(
                periods=periods, freq=freq, include_history=False)

        forecast_dict = dict(ds=future["ds"].tolist())

        for type_ in self.types:
            progress("Predicting incident rates for {}".format(type_),
                     verbose=self.verbose)

            forecast_dict[type_] = np.maximum(
                0.0, self.models_dict[type_].predict(future)["yhat"].tolist())

        self.forecast = pd.DataFrame(forecast_dict)

        progress("Forecast made.", verbose=self.verbose)
        if save:
            self.save_forecast()
예제 #9
0
    def _filter_data(self, data, remove_unfinished_month=True, last_n_years=5):
        """Filter out some stuff for proper analysis."""
        data[self.date_col] = pd.to_datetime(data[self.date_col],
                                             dayfirst=True)
        end = data[self.date_col].max()

        if remove_unfinished_month:
            cutoff = pd.Timestamp(year=end.year,
                                  month=end.month,
                                  day=1,
                                  hour=0)
            progress("Cutting off at {}.".format(cutoff))
            data = data[data[self.date_col] < cutoff].copy()
        else:
            cutoff = end

        if last_n_years:
            start = pd.Timestamp(year=(cutoff.year - last_n_years),
                                 month=cutoff.month,
                                 day=cutoff.day,
                                 hour=cutoff.hour)
            progress("Using incidents after {}.".format(start))
            data = data[data[self.date_col] >= start].copy()

        progress("Data filtered.", verbose=self.verbose)
        return data
예제 #10
0
    def _evaluate_metric_set(self, log, metric_set):
        """Evaluate a set of metrics relating to a single measure.

        Parameters
        ----------
        log: pd.DataFrame
            The log of simulation outputs.
        metric_set: dict
            The description of the metrics to calculate as created in :code:`.add_metric()`.

        Returns
        -------
        result: pd.DataFrame
            The calculated metrics.
        """
        data, y_col = self._apply_filters(log, metric_set)

        # calculate metrics
        progress("Calculating requested metrics.", verbose=self.verbose)
        if self.by_run:
            results_per_run = self._calculate_descriptors_by_run(
                data,
                y_col=y_col,
                count=metric_set["count"],
                mean=metric_set["mean"],
                std=metric_set["std"],
                missing=metric_set["missing"],
                quantiles=metric_set["quantiles"])

            results_per_run.drop(self.run_col, axis=1, inplace=True)
            return results_per_run
        else:
            results = self._calculate_descriptors(
                data[y_col],
                count=metric_set["count"],
                mean=metric_set["mean"],
                std=metric_set["std"],
                missing=metric_set["missing"],
                quantiles=metric_set["quantiles"])
            return results
예제 #11
0
    def __init__(self,
                 demand_locs=None,
                 station_locs=None,
                 osrm_host="http://192.168.56.101:5000",
                 load_matrix=True,
                 save_matrix=False,
                 data_dir="data",
                 verbose=True):
        """ Create the matrix of travel durations with OSRM. """
        self.osrm_host = osrm_host
        self.demand_locs = demand_locs
        self.station_locs = station_locs
        self.verbose = verbose
        self.path = os.path.join(data_dir, "time_matrix.csv")

        if load_matrix:
            self.time_matrix_df = self.load_time_matrix(self.path)
        else:
            try:
                global osrm
                import osrm
                osrm.RequestConfig.host = self.osrm_host
                self.osrm_config = osrm.RequestConfig
                self.time_matrix_df = self._get_travel_durations()
            except ImportError:
                raise ImportError(
                    "If load_matrix=False, OSRM is required to calculate the "
                    "travel durations. Either use load_matrix=True or install"
                    " the osrm Python package.")

        self._prepare_dispatch_information()

        if save_matrix:
            self.save_time_matrix(self.path)

        progress("Dispatcher ready to go.", verbose=self.verbose)
예제 #12
0
    def _create_incident_types(self):
        """ Initialize incident types with their characteristics.

        Creates a dictionary of IncidentType objects. Every such object holds
        type-specific distributions about priority, required vehicles,
        and demand locations.
        """
        progress("Getting priority probabilities.", verbose=self.verbose)
        prio_probs = get_prio_probabilities_per_type(self.incidents)

        progress("Getting vehicle requirement probabilities.",
                 verbose=self.verbose)
        vehicle_probs = get_vehicle_requirements_probabilities(
            self.incidents, self.deployments, self.vehicle_types)

        progress("Getting spatial distributions.", verbose=self.verbose)
        location_probs = get_spatial_distribution_per_type(
            self.incidents, locations=self.location_ids)

        progress("Initializing incident types.", verbose=self.verbose)
        self.incident_types = {
            t: IncidentType(prio_probs[t], vehicle_probs[t], location_probs[t])
            for t in self.types
        }
예제 #13
0
    def evaluate(self, log):
        """Evaluate a given simulation output on all set metrics.

        Parameters
        ----------
        log: pd.DataFrame
            The raw simulation output/log.

        Returns
        -------
        metrics: pd.DataFrame
            The calculated metrics.
        """
        progress("Evaluating {} sets of metrics.".format(
            len(self.metric_set_names)),
                 verbose=self.verbose)
        result_dict = {}
        for name in self.metric_set_names:
            progress("Evaluating {}.".format(name), verbose=self.verbose)
            result_dict[name] = self._evaluate_metric_set(
                log, self.metric_sets[name])

        progress("Evaluation completed.", verbose=self.verbose)
        return result_dict
예제 #14
0
    def predict(self, start, end, predict_nye=True, save=False):
        """Forecast arrival rates for a given future period and save it under 'self.forecast'.

        Parameters
        ----------
        start, end: datetime object,
            The start and end dates and times (rounded to the whole hour) for the period
            to forecast.
        predict_nye: boolean, optional (default: True),
            Whether to predict NYE with high activity like in reality (True) or ignore it
            and forecast a regular day instead (False).
        """
        assert self.fitted, "First use the 'fit' method before making predictions."

        def replace_with_other(df1, df2, match_cols, fill_cols):
            """Fill one dataframe with values from another, based on specified columns."""
            assert len(match_cols
                       ) == 3, "This function needs three columns to match on."
            for i in range(len(df2)):
                mask = ((df1[match_cols[0]] == df2[match_cols[0]].iloc[i]) &
                        (df1[match_cols[1]] == df2[match_cols[1]].iloc[i]) &
                        (df1[match_cols[2]] == df2[match_cols[2]].iloc[i]))
                df1.loc[mask, fill_cols] = df2[fill_cols].iloc[i, :].values

            return df1

        # create dataframe with requested date range
        indx = pd.date_range(start=start, end=end, freq="H")
        df = pd.DataFrame({"ds": pd.Series(indx)})
        df[self.month_col] = df["ds"].dt.month
        df[self.day_col] = df["ds"].apply(lambda x: x.isoweekday())
        df[self.month_day_col] = df["ds"].dt.day
        df[self.hour_col] = df["ds"].dt.hour

        types = self.lambdas.columns
        for type_ in types:
            df[type_] = np.nan

        lambdas = self.lambdas.copy()
        lambdas.reset_index(drop=False, inplace=True)

        # fill with the overall patterns/lambdas
        progress("Filling future DataFrame..", verbose=self.verbose)
        cols = [self.month_col, self.day_col, self.hour_col]
        df = replace_with_other(df, lambdas, cols, types)
        progress("DataFrame filled with general patterns (shape: {}).".format(
            df.shape))

        # fill NYEs with high activity if requested
        if predict_nye:
            progress("Filling future New Year's Eves", verbose=self.verbose)
            cols = [self.month_col, self.month_day_col, self.hour_col]
            nye = self.nye_lambdas.copy()
            nye.reset_index(drop=False, inplace=True)
            df = replace_with_other(df, nye, cols, types)

            msg = "New Year's Eve forecasts added to DataFrame (shape: {})".format(
                df.shape)
            progress(msg, verbose=self.verbose)

        # remove added columns
        df.drop(
            [self.month_col, self.day_col, self.month_day_col, self.hour_col],
            axis=1,
            inplace=True)
        self.forecast = df
        progress("Forecast created.", verbose=self.verbose)

        if save:
            self.save_forecast()
예제 #15
0
    def fit(self, data, last_n_years=8, fit_nye=True):
        """Obtain arrival rates from the data.

        Fits arrival rates per incident type, month, day of the week, and hour of the day.
        Saves the results under self.lambdas and self.nye_lambdas (if fit_nye == True). Sets
        self.fitted = True when fit procedure is completed.

        Parameters
        ----------
        data: pd.DataFrame,
            The incident data.
        last_n_years: int, optional (default: 8),
            How many years to use to estimate the arrival rates. It uses the latest
            'last_n_years' years.
        fit_nye: boolean, optional (default: True),
            Whether to fit New Year's Eve separately (True) or to treat it as a regular day.
        """
        progress("Start fitting arrival rates.", verbose=self.verbose)
        # prepare data
        data = self._filter_data(data, last_n_years=last_n_years)
        data[self.day_col] = data[self.day_name_col].map({
            "Maandag": 1,
            "Dinsdag": 2,
            "Woensdag": 3,
            "Donderdag": 4,
            "Vrijdag": 5,
            "Zaterdag": 6,
            "Zondag": 7
        })
        for col in [
                self.month_col, self.day_col, self.hour_col, self.month_day_col
        ]:
            data[col] = data[col].astype(float).astype(int)

        # obtain lambdas
        progress("Obtaining lambdas..", verbose=self.verbose)
        lambdas = (data.groupby([
            self.type_col, self.month_col
        ]).apply(lambda x: self._get_incidents_per_hour_of_week(x, x.name[1])))

        # reindex on a complete set of types, months, and weekdays
        new_index = pd.MultiIndex.from_product(
            [data[self.type_col].unique(),
             np.arange(1, 13),
             np.arange(1, 8)],
            names=[self.type_col, self.month_col, self.day_col])
        lambdas = lambdas.reindex(new_index, fill_value=0)

        # stack the hour columns and use types as columns instead
        self.lambdas = lambdas.stack().unstack(self.type_col, fill_value=0)
        progress("Lambdas obtained.", verbose=self.verbose)

        if fit_nye:
            progress("Fitting New Year's Eve.", verbose=self.verbose)
            self.nye_lambdas = self._get_incidents_at_nye(data)
            progress("New Year's Eve arrival rates fitted.",
                     verbose=self.verbose)

        progress("Fit completed.", verbose=self.verbose)
        self.fitted = True
예제 #16
0
    def fit(self,
            incidents=None,
            deployments=None,
            stations=None,
            loc_coords=None,
            vehicle_types=["TS", "RV", "HV", "WO"],
            osrm_host="http://192.168.56.101:5000",
            save_prepared_data=False,
            location_col="hub_vak_bk",
            volunteer_stations=[
                "DRIEMOND", "DUIVENDRECHT", "AMSTELVEEN VRIJWILLIG"
            ]):
        """ Fit random variables related to response time.

        Parameters
        ----------
        incidents: pd.DataFrame
            The incident data. Only required when no prepared data is loaded.
        deployments: pd.DataFrame (optional)
            The deployment data. Only required when no prepared data is loaded.
        stations: pd.DataFrame (optional)
            The station information including coordinates and station names.
            Only required when no prepared data is loaded.
        vehicle_types: array-like of strings
            The types of vehicles to use. Defaults to ["TS", "RV", "HV", "WO"].
        osrm_host: str
            The url to the OSRM API, required when object is initialized with
            load_data=False or when no prepared data was found.
        save_prepared_data: boolean
            Whether to write the preprocessed data to a csv file so that it can
            be loaded the next time. Defaults to False.
        location_col: str
            The name of the column that specifies the demand locations, defaults
            to "hub_vak_bk".
        volunteer_stations: array-like of str, optional (default: None)
            The names of the stations that are run by volunteers. Turn-out times
            are fitted separately for these stations, since volunteers have to travel
            to the station first.

        Notes
        -----
        Performs the following steps:
            - Prepares data (merges and adds OSRM distance and duration per
              deployment)
            - Fits lognormal random variables to dispatch times per incident type.
            - Fits Gamma random variables to turnout time per station and type.
            - Models the travel time as :math:`\\alpha + \\beta * \\gamma (\\theta, k) * \\hat{t}`,
              per vehicle type. Here :math:`\\hat{t}` represents the OSRM estiamte of the
              travel time and :math:`\\gamma` is a random noise factor.
            - Saves the station and demand location coordinates in dictionaries.
        """
        self.location_col = location_col

        if self.data is None:
            if incidents is not None and deployments is not None and stations is not None:
                progress("No data loaded, preprocess with OSRM.",
                         verbose=self.verbose)
                self._prep_data_for_fitting(incidents=incidents,
                                            deployments=deployments,
                                            stations=stations,
                                            vehicles=vehicle_types,
                                            osrm_host=osrm_host,
                                            save=save_prepared_data)
            else:
                raise ValueError(
                    "No prepared data loaded and not all data fed to 'fit()'.")

        if loc_coords is None:
            progress(
                "Location coordinates provided. Extracting station coordinates",
                verbose=self.verbose)
            self.location_coords = loc_coords
            _, self.station_coords = get_coordinates_locations_stations(
                self.data, location_col=location_col)
        else:
            progress("Extracting station and location coordinates.",
                     verbose=self.verbose)
            self.location_coords, self.station_coords = \
                get_coordinates_locations_stations(self.data, location_col=location_col)

        progress('Fitting random variables on response time...',
                 verbose=self.verbose)
        self.high_prio_data = (self.data[
            (self.data["dim_prioriteit_prio"] == 1)
            & (self.data["inzet_terplaatse_volgnummer"] == 1)].copy())
        self.dispatch_rv_dict = fit_dispatch_times(self.high_prio_data)
        self.turnout_time_rv_dict = fit_turnout_times(
            self.data,
            vehicle_types=vehicle_types,
            volunteer_stations=volunteer_stations)
        self.travel_time_dict = model_travel_time_per_vehicle(
            self.high_prio_data)
        self.onscene_time_rv_dict = fit_onscene_times(self.data)

        progress("Creating response time generators.", verbose=self.verbose)
        self._create_response_time_generators()

        progress("Response time variables fitted.", verbose=self.verbose)
        self.fitted = True
예제 #17
0
 def save_forecast(self):
     """ Save forecasted incident rate to csv. """
     path = os.path.join(self.fc_dir, self.file_name)
     self.forecast.to_csv(path, index=False)
     progress("Forecast saved to {}.".format(path), verbose=self.verbose)
예제 #18
0
    def create_sampling_dict(self,
                             start_time=None,
                             end_time=None,
                             incident_types=None):
        """Create a dictionary that can conveniently be used for
        sampling random incidents based on the forecast.

        Parameters
        ----------
        start_time: Timestamp or str convertible to Timestamp
            The earliest time that should be included in the dictionary.
        end_time: Timestamp or str convertible to Timestamp
            The latest time that should be included in the dictionary.
        incident_types: array-like of strings
            The incident types to forecast for. Defaults to None. If None,
            uses all incident types in the forecast.

        Returns
        -------
        sampling_dict: dict,
            The sampling dictionary as described below.

        Notes
        -----
        Stores three results:
            -self.sampling_dict, a dictionary like:
             `{t -> {'type_distribution' -> probs,
             'beta' -> expected interarrival time in minutes,
             'time' -> the timestamp corresponding to start_time+t}}`
             where t is an integer representing the time_units since the
             start_time.
            -self.sampling_start_time, timestamp of earliest time
             in the dictionary.
            -self.sampling_end_time, timestamp of the latest time
             in the dictionary.

        """
        assert self.forecast is not None, \
            ("No forecast available, initiate with load_forecast=True "
             "or use .fit() and .predict() to create one.")

        # determine incident types
        if incident_types is not None:
            fc = self.forecast[["ds"] + list(incident_types)].copy()
        else:
            fc = self.forecast.copy()

        # determine start and end times
        fc["ds"] = pd.to_datetime(fc["ds"], dayfirst=True)
        if start_time is None:
            start_time = fc["ds"].min()
        if end_time is None:
            end_time = fc["ds"].max()

        msg = "Creating a sampling dictionary from {} to {}.".format(
            start_time, end_time)
        progress(msg, verbose=self.verbose)

        # process date time range and remove it from the forecast
        fc = fc[(fc["ds"] >= start_time) & (fc["ds"] <= end_time)]
        timestamps = fc["ds"].copy()
        del fc["ds"]

        # create the dictionary
        rates_dict = fc.reset_index(drop=True).T.to_dict(orient="list")
        self.sampling_dict = {}
        for i, rts in rates_dict.items():
            self.sampling_dict[i] = {
                "type_distribution": np.array(rts) / np.sum(rts),
                "beta": 1 / np.sum(rts) * 60,
                "lambda": np.sum(rts),
                "time": timestamps.iloc[i]
            }

        # save start and end time for future reference
        self.sampling_start_time = start_time
        self.sampling_end_time = end_time

        progress("Sampling dictionary created.", verbose=self.verbose)
        return self.sampling_dict
예제 #19
0
    def add_metric(self,
                   measure,
                   name=None,
                   description=None,
                   count=True,
                   mean=True,
                   std=True,
                   missing=True,
                   quantiles=[0.5, 0.75, 0.90, 0.95, 0.98, 0.99],
                   prios=None,
                   locations=None,
                   vehicles=None,
                   incident_types=None,
                   objects=None,
                   hours=None,
                   days_of_week=None,
                   first_only=False):
        """Add metrics that should be evaluated.

        Parameters
        ----------
        measure: str, one of ["response_time", "on_time", "delay"]
            The measure to evaluate.
        name: str, optional, default=None
            How to name the set of metrics for reference in outputs. If None, a standard name
            is given (i.e., 'metric set 1', 'metric set 2').
        description: str, optional, default=None
            A description of the set of evaluation metrics. This can be used to explain, e.g.,
            the applied filtering in a more elaborate way, whereas the 'name' property should
            be kept concise.
        count, mean, std, missing: boolean, optional, default=True
            Whether to describe the measure by its count, mean, standard deviation and
            proportion of missing (NaN) values. Note that a missing response time means the
            response was carried out by an external vehicle.
        quantiles: array(float), optional, default=[0.5, 0.75, 0.90, 0.95, 0.98, 0.99])
            Which quantiles to describe the measure with. Set to None to not use any quantiles.
        prios: int or array-like of ints, optional, default=None
            Which priority levels to include during evaluation. If None, uses all levels.
        locations, vehicles, incident_types, objects: array(str), optional (default: None),
            Which locations, vehicles types, incident types and object functions to include
            during evaluation. If None, uses all values.
        hours: array-like of ints or None, optional, default=None
            Which hours of dat to incorporate during evaluation. Values must be integers in
            [0, 23].
        days_of_week: array-like of ints or None, optional, default=None
            Which days of the week to incorporate during evaluation. Monday = 0, ..., Sunday = 6.
        first_only: boolean, optional, default=False
            Whether to calculate the metrics for only the first arriving vehicle per incident
            (True) or to evaluate all vehicles (False).
        """
        if name is None:
            if len(self.metric_set_names) == 0:
                i = 1
            else:
                i = int(
                    np.max([int(n[-1]) for n in self.metric_set_names]) + 1)
            name = "metric_set_{}".format(i)

        assert measure in self.measures, "'measure' must be one of {}. Received {}" \
            .format(measure, self.measures)
        self.metric_set_measures[name] = measure

        if locations is not None:
            locations = np.array(locations, dtype=str)

        self.metric_sets[name] = {
            "count": count,
            "mean": mean,
            "std": std,
            "missing": missing,
            "quantiles": quantiles,
            "locations": locations,
            "prios": prios,
            "vehicles": vehicles,
            "incident_types": incident_types,
            "objects": objects,
            "hours": hours,
            "days_of_week": days_of_week,
            "first_only": first_only,
            "description": description,
            "measure": measure
        }

        self.metric_set_names.append(name)
        progress("Set of metrics '{}' added.".format(name),
                 verbose=self.verbose)