Ejemplo n.º 1
0
def test_teaser_near_classification_points():
    """Test of TEASER with incremental time stamps outside defined class points."""
    X_train, y_train, X_test, y_test, indices = load_unit_data()

    # train probability threshold
    teaser = TEASER(
        random_state=0,
        classification_points=[6, 10, 14, 18, 24],
        estimator=TimeSeriesForestClassifier(n_estimators=10, random_state=0),
    )
    teaser.fit(X_train, y_train)

    # use test_points that are not within list above
    test_points = [7, 11, 19, 20]

    X_test = from_nested_to_3d_numpy(X_test)
    states = None
    for i in test_points:
        X = X_test[indices, :, :i]
        if i == 20:
            with pytest.raises(ValueError):
                probas, decisions, states = teaser.predict_proba(
                    X, state_info=states)
        else:
            probas, decisions, states = teaser.predict_proba(X,
                                                             state_info=states)
Ejemplo n.º 2
0
    def _transform_words(self, X):
        if self.use_first_order_differences:
            X = self._add_first_order_differences(X)

        bag_all_words = [dict() for _ in range(len(X))]

        # On each dimension, perform SFA
        for ind, column in enumerate(self.col_names):
            X_dim = X[[column]]
            X_dim = from_nested_to_3d_numpy(X_dim)

            for i, window_size in enumerate(self.window_sizes[ind]):

                # SFA transform
                sfa_words = self.SFA_transformers[ind][i].transform(X_dim)
                bag = sfa_words[0]

                # merging bag-of-patterns of different window_sizes
                # to single bag-of-patterns with prefix indicating
                # the used window-length
                highest = np.int32(self.highest_bits[ind])
                for j in range(len(bag)):
                    for (key, value) in bag[j].items():
                        # append the prefices to the words to distinguish
                        # between window-sizes
                        word = MUSE._shift_left(key, highest, ind,
                                                self.highest_dim_bit,
                                                window_size)
                        bag_all_words[j][word] = value

        return bag_all_words
Ejemplo n.º 3
0
        def wrapper(self, data, labels=None, **kwargs):
            # Check if pandas so we can convert back
            is_pandas = True if isinstance(data, pd.DataFrame) else False
            pd_idx = data.index if is_pandas else None

            # Fit checks
            if check_fitted:
                self.check_is_fitted()

            # First convert to pandas so everything is the same format
            if labels is None:
                data = check_X(data, coerce_to_pandas=True)
            else:
                data, labels = check_X_y(data, labels, coerce_to_pandas=True)

            # Now convert it to a numpy array
            # Note sktime uses [N, C, L] whereas signature code uses shape
            # [N, L, C] (C being channels) so we must transpose.
            data = np.transpose(from_nested_to_3d_numpy(data), [0, 2, 1])

            # Apply the function to the transposed array
            if labels is None:
                output = func(self, data, **kwargs)
            else:
                output = func(self, data, labels, **kwargs)

            # Convert back
            if all(
                [is_pandas,
                 isinstance(output, np.ndarray), not force_numpy]):
                output = pd.DataFrame(index=pd_idx, data=output)

            return output
Ejemplo n.º 4
0
def test_prob_threshold_on_unit_test_data():
    """Test of ProbabilityThresholdEarlyClassifier on unit test data."""
    # load unit test data
    X_train, y_train = load_unit_test(split="train", return_X_y=True)
    X_test, y_test = load_unit_test(split="test", return_X_y=True)
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    # train probability threshold
    pt = ProbabilityThresholdEarlyClassifier(
        random_state=0,
        classification_points=[6, 16, 24],
        probability_threshold=1,
        estimator=TimeSeriesForestClassifier(n_estimators=10, random_state=0),
    )
    pt.fit(X_train, y_train)

    final_probas = np.zeros((10, 2))
    final_decisions = np.zeros(10)

    X_test = from_nested_to_3d_numpy(X_test)
    states = None
    for i in pt.classification_points:
        X = X_test[indices, :, :i]
        probas = pt.predict_proba(X)
        decisions, states = pt.decide_prediction_safety(X, probas, states)

        for n in range(10):
            if decisions[n] and final_decisions[n] == 0:
                final_probas[n] = probas[n]
                final_decisions[n] = i

    testing.assert_array_equal(final_probas, pt_unit_test_probas)
Ejemplo n.º 5
0
def test_teaser_with_different_decision_maker():
    """Test of TEASER with different One-Class-Classifier."""
    X_train, y_train, X_test, y_test, indices = load_unit_data()

    # train probability threshold
    teaser = TEASER(
        random_state=0,
        classification_points=[6, 10, 16, 24],
        estimator=TimeSeriesForestClassifier(n_estimators=10, random_state=0),
        one_class_classifier=IsolationForest(n_estimators=5),
        one_class_param_grid={"bootstrap": [True, False]},
    )
    teaser.fit(X_train, y_train)

    final_probas = np.zeros((10, 2))
    final_decisions = np.zeros(10)

    X_test = from_nested_to_3d_numpy(X_test)
    states = None
    for i in teaser.classification_points:
        X = X_test[indices, :, :i]
        probas, decisions, states = teaser.predict_proba(X, state_info=states)

        for n in range(10):
            if decisions[n] and final_decisions[n] == 0:
                final_probas[n] = probas[n]
                final_decisions[n] = i

    testing.assert_array_equal(final_probas, teaser_if_unit_test_probas)
Ejemplo n.º 6
0
def test_teaser_on_unit_test_data():
    """Test of TEASER on unit test data."""
    X_train, y_train, X_test, y_test, indices = load_unit_data()

    # train probability threshold
    teaser = TEASER(
        random_state=0,
        classification_points=[6, 10, 16, 24],
        estimator=TimeSeriesForestClassifier(n_estimators=10, random_state=0),
    )
    teaser.fit(X_train, y_train)

    final_probas = np.zeros((10, 2))
    final_decisions = np.zeros(10)

    X_test = from_nested_to_3d_numpy(X_test)
    states = None
    for i in teaser.classification_points:
        X = X_test[indices, :, :i]
        probas, decisions, states = teaser.predict_proba(X, state_info=states)

        for n in range(10):
            if decisions[n] and final_decisions[n] == 0:
                final_probas[n] = probas[n]
                final_decisions[n] = i

    testing.assert_array_equal(final_probas, teaser_unit_test_probas)
Ejemplo n.º 7
0
def test_from_nested_to_3d_numpy(n_instances, n_columns, n_timepoints):
    """Test from_nested_to_3d_numpy for correctness."""
    nested, _ = make_classification_problem(n_instances, n_columns, n_timepoints)
    array = from_nested_to_3d_numpy(nested)

    # check types and shapes
    assert isinstance(array, np.ndarray)
    assert array.shape == (n_instances, n_columns, n_timepoints)

    # check values of random series
    np.testing.assert_array_equal(nested.iloc[1, 0], array[1, 0, :])
Ejemplo n.º 8
0
    def _fit_local(self,
                   local_X: pd.DataFrame,
                   local_y: np.ndarray,
                   clone: bool = False) -> None:
        """Fit the local classifier.

        Args:
            local_X (pd.DataFrame): Training data (sensor windows, in sktime 
                nested DataFrame format).
            local_y (np.ndarray): Training labels (0's and 1's).
            clone (bool, optional): Clone flag. True for internal cross 
                validation. Defaults to False.
        """
        if len(local_X.columns) > 1:
            local_X = convert.from_nested_to_3d_numpy(local_X)

        clf = self.__local_clf2 if clone else self.local_clf
        clf.fit(local_X, local_y)
Ejemplo n.º 9
0
def _reproduce_early_classification_unit_test(estimator):
    X_train, y_train = load_unit_test(split="train")
    X_test, y_test = load_unit_test(split="test")
    indices = np.random.RandomState(0).choice(len(y_train), 10, replace=False)

    estimator.fit(X_train, y_train)

    final_probas = np.zeros((10, 2))
    final_decisions = np.zeros(10)

    X_test = from_nested_to_3d_numpy(X_test)
    states = None
    for i in estimator.classification_points:
        X = X_test[indices, :, :i]
        probas, decisions, states = estimator.predict_proba(X,
                                                            state_info=states)

        for n in range(10):
            if decisions[n] and final_decisions[n] == 0:
                final_probas[n] = probas[n]
                final_decisions[n] = i

    return final_probas
Ejemplo n.º 10
0
    def transform_single_feature(self, X, feature, case_id=None):
        """Transform data into a specified catch22 feature.

        Parameters
        ----------
        X : pandas DataFrame, input time series. Currently univariate only.
        feature : int, catch22 feature id or String, catch22 feature
                  name.
        case_id : int, identifier for the current set of cases. If the case_id is not
                  None and the same as the previously used case_id, calculations from
                  previous features will be reused.

        Returns
        -------
        Numpy array containing a catch22 feature for each input series.
        """
        if isinstance(feature, (int, np.integer)) or isinstance(
            feature, (float, float)
        ):
            if feature > 21 or feature < 0:
                raise ValueError("Invalid catch22 feature ID")
        elif isinstance(feature, str):
            if feature in feature_names:
                feature = feature_names.index(feature)
            else:
                raise ValueError("Invalid catch22 feature name")
        else:
            raise ValueError("catch22 feature name or ID required")

        if isinstance(X, pd.DataFrame):
            X = from_nested_to_3d_numpy(X)

        if len(X.shape) > 2:
            n_instances, n_dims, series_length = X.shape

            if n_dims > 1:
                raise ValueError(
                    "transform_single_feature can only handle univariate series "
                    "currently."
                )

            X = np.reshape(X, (n_instances, -1))
        else:
            n_instances, series_length = X.shape

        if case_id is not None:
            if case_id != self._case_id:
                self._case_id = case_id
                self._st_n_instances = n_instances
                self._st_series_length = series_length
                self._outlier_series = [None] * n_instances
                self._smin = [None] * n_instances
                self._smax = [None] * n_instances
                self._smean = [None] * n_instances
                self._fft = [None] * n_instances
                self._ac = [None] * n_instances
                self._acfz = [None] * n_instances
            else:
                if (
                    n_instances != self._st_n_instances
                    or series_length != self._st_series_length
                ):
                    raise ValueError(
                        "Catch22: case_is the same, but n_instances and "
                        "series_length do not match last seen for single "
                        "feature transform."
                    )

        c22_list = Parallel(n_jobs=self.n_jobs)(
            delayed(self._transform_case_single)(
                X[i],
                feature,
                case_id,
                i,
            )
            for i in range(n_instances)
        )

        if self.replace_nans:
            c22_list = np.nan_to_num(c22_list, False, 0, 0, 0)

        return np.asarray(c22_list)
Ejemplo n.º 11
0
    def _fit(self, X, y):
        """Build a WEASEL+MUSE classifiers from the training set (X, y).

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_instances, 1]
            Nested dataframe with univariate time-series in cells.
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        y = np.asarray(y)
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        # add first order differences in each dimension to TS
        if self.use_first_order_differences:
            X = self._add_first_order_differences(X)

        # Window length parameter space dependent on series length
        self.col_names = X.columns

        rng = check_random_state(self.random_state)

        self.n_dims = len(self.col_names)
        self.highest_dim_bit = (math.ceil(math.log2(self.n_dims))) + 1
        self.highest_bits = np.zeros(self.n_dims)

        self.SFA_transformers = [[] for _ in range(self.n_dims)]

        # the words of all dimensions and all time series
        all_words = [dict() for _ in range(X.shape[0])]

        # On each dimension, perform SFA
        for ind, column in enumerate(self.col_names):
            X_dim = X[[column]]
            X_dim = from_nested_to_3d_numpy(X_dim)
            series_length = X_dim.shape[
                -1]  # TODO compute minimum over all ts ?

            # increment window size in steps of 'win_inc'
            win_inc = self._compute_window_inc(series_length)

            self.max_window = int(min(series_length, self.max_window))
            if self.min_window > self.max_window:
                raise ValueError(
                    f"Error in MUSE, min_window ="
                    f"{self.min_window} is bigger"
                    f" than max_window ={self.max_window},"
                    f" series length is {self.series_length}"
                    f" try set min_window to be smaller than series length in "
                    f"the constructor, but the classifier may not work at "
                    f"all with very short series")
            self.window_sizes.append(
                list(range(self.min_window, self.max_window, win_inc)))

            self.highest_bits[ind] = math.ceil(math.log2(self.max_window)) + 1

            for window_size in self.window_sizes[ind]:

                transformer = SFA(
                    word_length=rng.choice(self.word_lengths),
                    alphabet_size=self.alphabet_size,
                    window_size=window_size,
                    norm=rng.choice(self.norm_options),
                    anova=self.anova,
                    binning_method=rng.choice(self.binning_strategies),
                    bigrams=self.bigrams,
                    remove_repeat_words=False,
                    lower_bounding=False,
                    save_words=False,
                )

                sfa_words = transformer.fit_transform(X_dim, y)

                self.SFA_transformers[ind].append(transformer)
                bag = sfa_words[0]

                # chi-squared test to keep only relevant features
                relevant_features = {}
                apply_chi_squared = self.p_threshold < 1
                if apply_chi_squared:
                    vectorizer = DictVectorizer(sparse=True,
                                                dtype=np.int32,
                                                sort=False)
                    bag_vec = vectorizer.fit_transform(bag)

                    chi2_statistics, p = chi2(bag_vec, y)
                    relevant_features_idx = np.where(p <= self.p_threshold)[0]
                    relevant_features = set(
                        np.array(
                            vectorizer.feature_names_)[relevant_features_idx])

                # merging bag-of-patterns of different window_sizes
                # to single bag-of-patterns with prefix indicating
                # the used window-length
                highest = np.int32(self.highest_bits[ind])
                for j in range(len(bag)):
                    for (key, value) in bag[j].items():
                        # chi-squared test
                        if (not apply_chi_squared) or (key
                                                       in relevant_features):
                            # append the prefices to the words to
                            # distinguish between window-sizes
                            word = MUSE._shift_left(key, highest, ind,
                                                    self.highest_dim_bit,
                                                    window_size)
                            all_words[j][word] = value

        self.clf = make_pipeline(
            DictVectorizer(sparse=True, sort=False),
            # StandardScaler(with_mean=True, copy=False),
            LogisticRegression(
                max_iter=5000,
                solver="liblinear",
                dual=True,
                # class_weight="balanced",
                penalty="l2",
                random_state=self.random_state,
            ),
        )

        self.clf.fit(all_words, y)
        return self
Ejemplo n.º 12
0
def load_from_tsfile(
    full_file_path_and_name,
    replace_missing_vals_with="NaN",
    return_y=True,
):
    """Load time series data into X and (optionally) y.

    Data from a .ts file into a an 2D (univariate) or 3D (multivariate) if equal
    length or Pandas DataFrame if unequal length. If present, y is loaded into a 1D
    array.

    Parameters
    ----------
    full_file_path_and_name: str
        The full pathname of the .ts file to read.
    replace_missing_vals_with: str, default NaN
       The value that missing values in the text file should be replaced
       with prior to parsing.
    return_y: boolean default True
       whether to return the y variable, if it is present.

    Returns
    -------
    X: DataFrame or ndarray
    y (optional): ndarray.
    """
    # Initialize flags and variables used when parsing the file
    is_first_case = True
    instance_list = []
    class_val_list = []
    line_num = 0
    num_dimensions = 0
    num_cases = 0
    # equal_length = True
    with open(full_file_path_and_name, "r", encoding="utf-8") as file:
        _meta_data = _read_header(file, full_file_path_and_name)
        for line in file:
            num_cases += 1
            line = line.replace("?", replace_missing_vals_with)
            dimensions = line.split(":")
            # If first instance then note the number of dimensions.
            # This must be the same for all cases.
            if is_first_case:
                num_dimensions = len(dimensions)
                if _meta_data["has_class_labels"]:
                    num_dimensions -= 1
                instance_list = [[] for _ in range(num_dimensions)]
                is_first_case = False
                _meta_data["num_dimensions"] = num_dimensions
            # See how many dimensions a case has
            this_line_num_dim = len(dimensions)
            if _meta_data["has_class_labels"]:
                this_line_num_dim -= 1
            if this_line_num_dim != _meta_data["num_dimensions"]:
                raise IOError(
                    f"Error input {full_file_path_and_name} all cases must "
                    f"have the {num_dimensions} dimensions. Case "
                    f"{num_cases} has {this_line_num_dim}")
            # Process the data for each dimension
            for dim in range(0, _meta_data["num_dimensions"]):
                dimension = dimensions[dim].strip()
                if dimension:
                    data_series = dimension.split(",")
                    data_series = [float(i) for i in data_series]
                    instance_list[dim].append(pd.Series(data_series))
                else:
                    instance_list[dim].append(pd.Series(dtype="object"))
            if _meta_data["has_class_labels"]:
                class_val_list.append(
                    dimensions[_meta_data["num_dimensions"]].strip())
                line_num += 1
    # Check that the file was not empty
    if line_num:
        # Create a DataFrame from the data parsed
        data = pd.DataFrame(dtype=np.float32)
        for dim in range(0, _meta_data["num_dimensions"]):
            data["dim_" + str(dim)] = instance_list[dim]
        if not _meta_data["has_timestamps"] and _meta_data["is_equal_length"]:
            if _meta_data["is_univariate"]:
                data = from_nested_to_2d_np_array(data)
            else:
                data = from_nested_to_3d_numpy(data)
        if return_y and not _meta_data["has_class_labels"]:
            raise IOError(f"class labels have been requested, but they "
                          f"are not present in the file "
                          f"{full_file_path_and_name}")
        if _meta_data["has_class_labels"] and return_y:
            return data, np.asarray(class_val_list)
        else:
            return data
    else:
        raise IOError(
            f"Empty file {full_file_path_and_name} with header info but no "
            f"cases")
Ejemplo n.º 13
0
def check_X(
    X,
    enforce_univariate=False,
    enforce_min_instances=1,
    enforce_min_columns=1,
    coerce_to_numpy=False,
    coerce_to_pandas=False,
):
    """Validate input data.

    Parameters
    ----------
    X : pd.DataFrame or np.array
        Input data
    enforce_univariate : bool, optional (default=False)
        Enforce that X is univariate.
    enforce_min_instances : int, optional (default=1)
        Enforce minimum number of instances.
    enforce_min_columns : int, optional (default=1)
        Enforce minimum number of columns (or time-series variables).
    coerce_to_numpy : bool, optional (default=False)
        If True, X will be coerced to a 3-dimensional numpy array.
    coerce_to_pandas : bool, optional (default=False)
        If True, X will be coerced to a nested pandas DataFrame.

    Returns
    -------
    X : pd.DataFrame or np.array
        Checked and possibly converted input data

    Raises
    ------
    ValueError
        If X is invalid input data
    """
    # check input type
    if coerce_to_pandas and coerce_to_numpy:
        raise ValueError(
            "`coerce_to_pandas` and `coerce_to_numpy` cannot both be set to True"
        )

    if not isinstance(X, VALID_X_TYPES):
        raise ValueError(f"X must be a pd.DataFrame or a np.array, "
                         f"but found: {type(X)}")

    # check np.array
    # check first if we have the right number of dimensions, otherwise we
    # may not be able to get the shape of the second dimension below
    if isinstance(X, np.ndarray):
        if not X.ndim == 3:
            raise ValueError(
                f"If passed as a np.array, X must be a 3-dimensional "
                f"array, but found shape: {X.shape}")
        if coerce_to_pandas:
            X = from_3d_numpy_to_nested(X)

    # enforce minimum number of columns
    n_columns = X.shape[1]
    if n_columns < enforce_min_columns:
        raise ValueError(
            f"X must contain at least: {enforce_min_columns} columns, "
            f"but found only: {n_columns}.")

    # enforce univariate data
    if enforce_univariate and n_columns > 1:
        raise ValueError(
            f"X must be univariate with X.shape[1] == 1, but found: "
            f"X.shape[1] == {n_columns}.")

    # enforce minimum number of instances
    if enforce_min_instances > 0:
        _enforce_min_instances(X, min_instances=enforce_min_instances)

    # check pd.DataFrame
    if isinstance(X, pd.DataFrame):
        if not is_nested_dataframe(X):
            raise ValueError(
                "If passed as a pd.DataFrame, X must be a nested "
                "pd.DataFrame, with pd.Series or np.arrays inside cells.")
        # convert pd.DataFrame
        if coerce_to_numpy:
            X = from_nested_to_3d_numpy(X)

    return X
Ejemplo n.º 14
0
    def _fit(self, X, y):
        """Build an ensemble of 1-NN classifiers from the training set (X, y).

        Parameters
        ----------
        X : array-like or sparse matrix of shape = [n_instances, n_columns]
            The training input samples.  If a Pandas data frame is passed,
            it must have a single column. BOSS not configured
            to handle multivariate
        y : array-like, shape = [n_instances] The class labels.

        Returns
        -------
        self : object
        """
        # Derivative DTW (DDTW) uses the regular DTW algorithm on data that
        # are transformed into derivatives.
        # To increase the efficiency of DDTW we can pre-transform the data
        # into derivatives, and then call the
        # standard DTW algorithm on it, rather than transforming each series
        # every time a distance calculation
        # is made. Please note that using DDTW elsewhere will not benefit
        # from this speed enhancement
        if self.distance_measures.__contains__(
                "ddtw") or self.distance_measures.__contains__("wddtw"):
            der_X = DerivativeSlopeTransformer().fit_transform(X)
            # convert back to numpy
            if isinstance(der_X, pd.DataFrame):
                der_X = from_nested_to_3d_numpy(der_X)
        else:
            der_X = None

        self.train_accs_by_classifier = np.zeros(len(self.distance_measures))
        self.estimators_ = [None] * len(self.distance_measures)
        rand = np.random.RandomState(self.random_state)

        # The default EE uses all training instances for setting parameters,
        # and 100 parameter options per elastic measure. The
        # prop_train_in_param_finding and prop_of_param_options attributes of this class
        # can be used to control this however, using fewer cases to optimise
        # parameters on the training data and/or using less parameter options.
        #
        # For using fewer training instances the appropriate number of cases must be
        # sampled from the data. This is achieved through the use of a deterministic
        # StratifiedShuffleSplit
        #
        # For using fewer parameter options a RandomizedSearchCV is used in
        # place of a GridSearchCV

        param_train_x = None
        der_param_train_x = None
        param_train_y = None

        # If using less cases for parameter optimisation, use the
        # StratifiedShuffleSplit:
        if self.proportion_train_in_param_finding < 1:
            if self.verbose > 0:
                print(  # noqa: T001
                    "Restricting training cases for parameter optimisation: ",
                    end="")
            sss = StratifiedShuffleSplit(
                n_splits=1,
                test_size=1 - self.proportion_train_in_param_finding,
                random_state=rand,
            )
            for train_index, _ in sss.split(X, y):
                param_train_x = X[train_index, :]
                param_train_y = y[train_index]
                if der_X is not None:
                    der_param_train_x = der_X[train_index, :]
                if self.verbose > 0:
                    print(  # noqa: T001
                        "using " + str(len(param_train_x)) +
                        " training cases instead of " + str(len(X)) +
                        " for parameter optimisation")
        # else, use the full training data for optimising parameters
        else:
            if self.verbose > 0:
                print(  # noqa: T001
                    "Using all training cases for parameter optimisation")
            param_train_x = X
            param_train_y = y
            if der_X is not None:
                der_param_train_x = der_X

        self.constituent_build_times = []

        if self.verbose > 0:
            print(  # noqa: T001
                "Using " + str(100 * self.proportion_of_param_options) +
                " parameter "
                "options per "
                "measure")
        for dm in range(0, len(self.distance_measures)):
            this_measure = self.distance_measures[dm]

            # uses the appropriate training data as required (either full or
            # smaller sample as per the StratifiedShuffleSplit)
            param_train_to_use = param_train_x
            full_train_to_use = X
            if this_measure == "ddtw" or this_measure == "wddtw":
                param_train_to_use = der_param_train_x
                full_train_to_use = der_X
                if this_measure == "ddtw":
                    this_measure = "dtw"
                elif this_measure == "wddtw":
                    this_measure = "wdtw"

            start_build_time = time.time()
            if self.verbose > 0:
                if (self.distance_measures[dm] == "ddtw"
                        or self.distance_measures[dm] == "wddtw"):
                    print(  # noqa: T001
                        "Currently evaluating " +
                        str(self.distance_measures[dm].__name__) +
                        " (implemented as " + str(this_measure.__name__) +
                        " with pre-transformed derivative data)")
                else:
                    print(  # noqa: T001
                        "Currently evaluating " +
                        str(self.distance_measures[dm].__name__))

            # If 100 parameter options are being considered per measure,
            # use a GridSearchCV
            if self.proportion_of_param_options == 1:

                grid = GridSearchCV(
                    estimator=KNeighborsTimeSeriesClassifier(
                        distance=this_measure, n_neighbors=1),
                    param_grid=ElasticEnsemble._get_100_param_options(
                        self.distance_measures[dm], X),
                    cv=LeaveOneOut(),
                    scoring="accuracy",
                    n_jobs=self._threads_to_use,
                    verbose=self.verbose,
                )
                grid.fit(param_train_to_use, param_train_y)

            # Else, used RandomizedSearchCV to randomly sample parameter
            # options for each measure
            else:
                grid = RandomizedSearchCV(
                    estimator=KNeighborsTimeSeriesClassifier(
                        distance=this_measure, n_neighbors=1),
                    param_distributions=ElasticEnsemble._get_100_param_options(
                        self.distance_measures[dm], X),
                    n_iter=100 * self.proportion_of_param_options,
                    cv=LeaveOneOut(),
                    scoring="accuracy",
                    n_jobs=self._threads_to_use,
                    random_state=rand,
                    verbose=self.verbose,
                )
                grid.fit(param_train_to_use, param_train_y)

            if self.majority_vote:
                acc = 1
            # once the best parameter option has been estimated on the
            # training data, perform a final pass with this parameter option
            # to get the individual predictions with cross_cal_predict (
            # Note: optimisation potentially possible here if a GridSearchCV
            # was used previously. TO-DO: determine how to extract
            # predictions for the best param option from GridSearchCV)
            else:
                best_model = KNeighborsTimeSeriesClassifier(
                    n_neighbors=1,
                    distance=this_measure,
                    distance_params=grid.best_params_["distance_params"],
                    n_jobs=self._threads_to_use,
                )
                preds = cross_val_predict(best_model,
                                          full_train_to_use,
                                          y,
                                          cv=LeaveOneOut())
                acc = accuracy_score(y, preds)

            if self.verbose > 0:
                print(  # noqa: T001
                    "Training accuracy for " +
                    str(self.distance_measures[dm].__name__) + ": " +
                    str(acc) + " (with parameter setting: " +
                    str(grid.best_params_["distance_params"]) + ")")

            # Finally, reset the classifier for this measure and parameter
            # option, ready to be called for test classification
            best_model = KNeighborsTimeSeriesClassifier(
                n_neighbors=1,
                distance=this_measure,
                distance_params=grid.best_params_["distance_params"],
            )
            best_model.fit(full_train_to_use, y)
            end_build_time = time.time()

            self.constituent_build_times.append(
                str(end_build_time - start_build_time))
            self.estimators_[dm] = best_model
            self.train_accs_by_classifier[dm] = acc
        return self