Ejemplo n.º 1
0
    def _fit(self, X, y):
        self._n_jobs = check_n_jobs(self.n_jobs)

        self.n_instances, self.n_dims, self.series_length = X.shape
        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        if self.base_estimator == "DTC":
            self._base_estimator = DecisionTreeClassifier(criterion="entropy")
        elif self.base_estimator == "CIT":
            self._base_estimator = ContinuousIntervalTree()
        elif isinstance(self.base_estimator, BaseEstimator):
            self._base_estimator = self.base_estimator
        else:
            raise ValueError("DrCIF invalid base estimator given.")

        if self.n_intervals is None:
            self._n_intervals = int(
                math.sqrt(self.series_length) * math.sqrt(self.n_dims))
        if self._n_intervals <= 0:
            self._n_intervals = 1

        if self.att_subsample_size > 25:
            self._att_subsample_size = 25

        if self.series_length < self.min_interval:
            self._min_interval = self.series_length
        elif self.min_interval < 3:
            self._min_interval = 3

        if self.max_interval is None:
            self._max_interval = self.series_length / 2
        if self._max_interval < self._min_interval:
            self._max_interval = self._min_interval

        fit = Parallel(n_jobs=self._n_jobs)(delayed(self._fit_estimator)(
            X,
            y,
            i,
        ) for i in range(self.n_estimators))

        self.estimators_, self.intervals, self.dims, self.atts = zip(*fit)
Ejemplo n.º 2
0
    def _fit(self, X, y):
        self.n_instances_, self.n_dims_, self.series_length_ = X.shape

        if self.base_estimator.lower() == "dtc":
            self._base_estimator = DecisionTreeClassifier(criterion="entropy")
        elif self.base_estimator.lower() == "cit":
            self._base_estimator = ContinuousIntervalTree()
        elif isinstance(self.base_estimator, BaseEstimator):
            self._base_estimator = self.base_estimator
        else:
            raise ValueError("DrCIF invalid base estimator given.")

        if self.n_intervals is None:
            self._n_intervals = int(
                math.sqrt(self.series_length_) * math.sqrt(self.n_dims_))
        if self._n_intervals <= 0:
            self._n_intervals = 1

        if self.att_subsample_size > 25:
            self._att_subsample_size = 25

        if self.series_length_ < self.min_interval:
            self._min_interval = self.series_length_
        elif self.min_interval < 3:
            self._min_interval = 3

        if self.max_interval is None:
            self._max_interval = self.series_length_ / 2
        if self._max_interval < self._min_interval:
            self._max_interval = self._min_interval

        fit = Parallel(n_jobs=self._threads_to_use)(
            delayed(self._fit_estimator)(
                X,
                y,
                i,
            ) for i in range(self.n_estimators))

        self.estimators_, self.intervals_, self.dims_, self.atts_ = zip(*fit)

        return self
Ejemplo n.º 3
0
    def _fit(self, X, y):
        self._n_jobs = check_n_jobs(self.n_jobs)

        self.n_instances, self.n_dims, self.series_length = X.shape
        self.n_classes = np.unique(y).shape[0]
        self.classes_ = class_distribution(np.asarray(y).reshape(-1, 1))[0][0]

        time_limit = self.time_limit_in_minutes * 60
        start_time = time.time()
        train_time = 0

        if self.base_estimator == "DTC":
            self._base_estimator = DecisionTreeClassifier(criterion="entropy")
        elif self.base_estimator == "CIT":
            self._base_estimator = ContinuousIntervalTree()
        elif isinstance(self.base_estimator, BaseEstimator):
            self._base_estimator = self.base_estimator
        else:
            raise ValueError("DrCIF invalid base estimator given.")

        X_p = np.zeros(
            (
                self.n_instances,
                self.n_dims,
                int(
                    math.pow(2, math.ceil(math.log(self.series_length, 2)))
                    - self.series_length
                ),
            )
        )
        X_p = np.concatenate((X, X_p), axis=2)
        X_p = np.abs(np.fft.fft(X_p)[:, :, : int(X_p.shape[2] / 2)])

        X_d = np.diff(X, 1)

        if self.n_intervals is None:
            self._n_intervals = [None, None, None]
            self._n_intervals[0] = 4 + int(
                (math.sqrt(self.series_length) * math.sqrt(self.n_dims)) / 3
            )
            self._n_intervals[1] = 4 + int(
                (math.sqrt(X_p.shape[2]) * math.sqrt(self.n_dims)) / 3
            )
            self._n_intervals[2] = 4 + int(
                (math.sqrt(X_d.shape[2]) * math.sqrt(self.n_dims)) / 3
            )
        elif isinstance(self.n_intervals, int):
            self._n_intervals = [self.n_intervals, self.n_intervals, self.n_intervals]
        elif isinstance(self.n_intervals, list) and len(self.n_intervals) == 3:
            self._n_intervals = self.n_intervals
        else:
            raise ValueError("DrCIF n_intervals must be an int or list of length 3.")
        for i, n in enumerate(self._n_intervals):
            if n <= 0:
                self._n_intervals[i] = 1

        if self.att_subsample_size > 25:
            self._att_subsample_size = 25

        if isinstance(self.min_interval, int):
            self._min_interval = [
                self.min_interval,
                self.min_interval,
                self.min_interval,
            ]
        elif isinstance(self.min_interval, list) and len(self.min_interval) == 3:
            self._min_interval = self.min_interval
        else:
            raise ValueError("DrCIF min_interval must be an int or list of length 3.")
        if self.series_length < self._min_interval[0]:
            self._min_interval[0] = self.series_length
        if X_p.shape[2] < self._min_interval[1]:
            self._min_interval[1] = X_p.shape[2]
        if X_d.shape[2] < self._min_interval[2]:
            self._min_interval[2] = X_d.shape[2]

        if self.max_interval is None:
            self._max_interval = [
                self.series_length / 2,
                X_p.shape[2] / 2,
                X_d.shape[2] / 2,
            ]
        elif isinstance(self.max_interval, int):
            self._max_interval = [
                self.max_interval,
                self.max_interval,
                self.max_interval,
            ]
        elif isinstance(self.max_interval, list) and len(self.max_interval) == 3:
            self._max_interval = self.max_interval
        else:
            raise ValueError("DrCIF max_interval must be an int or list of length 3.")
        for i, n in enumerate(self._max_interval):
            if n < self._min_interval[i]:
                self._max_interval[i] = self._min_interval[i]

        self.total_intervals = sum(self._n_intervals)

        if time_limit > 0:
            self._n_estimators = 0
            self.estimators_ = []
            self.intervals = []
            self.atts = []
            self.dims = []
            self.transformed_data = []

            while (
                train_time < time_limit
                and self._n_estimators < self.contract_max_n_estimators
            ):
                fit = Parallel(n_jobs=self._n_jobs)(
                    delayed(self._fit_estimator)(
                        X,
                        X_p,
                        X_d,
                        y,
                        i,
                    )
                    for i in range(self._n_jobs)
                )

                (
                    estimators,
                    intervals,
                    dims,
                    atts,
                    transformed_data,
                ) = zip(*fit)

                self.estimators_ += estimators
                self.intervals += intervals
                self.atts += atts
                self.dims += dims
                self.transformed_data += transformed_data

                self._n_estimators += self._n_jobs
                train_time = time.time() - start_time
        else:
            fit = Parallel(n_jobs=self._n_jobs)(
                delayed(self._fit_estimator)(
                    X,
                    X_p,
                    X_d,
                    y,
                    i,
                )
                for i in range(self._n_estimators)
            )

            (
                self.estimators_,
                self.intervals,
                self.dims,
                self.atts,
                self.transformed_data,
            ) = zip(*fit)
Ejemplo n.º 4
0
    def _fit(self, X, y):
        self.n_instances_, self.n_dims_, self.series_length_ = X.shape

        time_limit = self.time_limit_in_minutes * 60
        start_time = time.time()
        train_time = 0

        if self.base_estimator.lower() == "dtc":
            self._base_estimator = DecisionTreeClassifier(criterion="entropy")
        elif self.base_estimator.lower() == "cit":
            self._base_estimator = ContinuousIntervalTree()
        elif isinstance(self.base_estimator, BaseEstimator):
            self._base_estimator = self.base_estimator
        else:
            raise ValueError("DrCIF invalid base estimator given.")

        X_p = np.zeros((
            self.n_instances_,
            self.n_dims_,
            int(
                math.pow(2, math.ceil(math.log(self.series_length_, 2))) -
                self.series_length_),
        ))
        X_p = np.concatenate((X, X_p), axis=2)
        X_p = np.abs(np.fft.fft(X_p)[:, :, :int(X_p.shape[2] / 2)])

        X_d = np.diff(X, 1)

        if self.n_intervals is None:
            self._n_intervals = [None, None, None]
            self._n_intervals[0] = 4 + int(
                (math.sqrt(self.series_length_) * math.sqrt(self.n_dims_)) / 3)
            self._n_intervals[1] = 4 + int(
                (math.sqrt(X_p.shape[2]) * math.sqrt(self.n_dims_)) / 3)
            self._n_intervals[2] = 4 + int(
                (math.sqrt(X_d.shape[2]) * math.sqrt(self.n_dims_)) / 3)
        elif isinstance(self.n_intervals, int):
            self._n_intervals = [
                self.n_intervals, self.n_intervals, self.n_intervals
            ]
        elif isinstance(self.n_intervals, list) and len(self.n_intervals) == 3:
            self._n_intervals = self.n_intervals
        else:
            raise ValueError(
                "DrCIF n_intervals must be an int or list of length 3.")
        for i, n in enumerate(self._n_intervals):
            if n <= 0:
                self._n_intervals[i] = 1

        if self.att_subsample_size > 29:
            self._att_subsample_size = 29

        if isinstance(self.min_interval, int):
            self._min_interval = [
                self.min_interval,
                self.min_interval,
                self.min_interval,
            ]
        elif isinstance(self.min_interval, list) and len(
                self.min_interval) == 3:
            self._min_interval = self.min_interval
        else:
            raise ValueError(
                "DrCIF min_interval must be an int or list of length 3.")
        if self.series_length_ < self._min_interval[0]:
            self._min_interval[0] = self.series_length_
        if X_p.shape[2] < self._min_interval[1]:
            self._min_interval[1] = X_p.shape[2]
        if X_d.shape[2] < self._min_interval[2]:
            self._min_interval[2] = X_d.shape[2]

        if self.max_interval is None:
            self._max_interval = [
                int(self.series_length_ / 2),
                int(X_p.shape[2] / 2),
                int(X_d.shape[2] / 2),
            ]
        elif isinstance(self.max_interval, int):
            self._max_interval = [
                self.max_interval,
                self.max_interval,
                self.max_interval,
            ]
        elif isinstance(self.max_interval, list) and len(
                self.max_interval) == 3:
            self._max_interval = self.max_interval
        else:
            raise ValueError(
                "DrCIF max_interval must be an int or list of length 3.")
        for i, n in enumerate(self._max_interval):
            if n < self._min_interval[i]:
                self._max_interval[i] = self._min_interval[i]

        self.total_intervals_ = sum(self._n_intervals)

        if time_limit > 0:
            self._n_estimators = 0
            self.estimators_ = []
            self.intervals_ = []
            self.atts_ = []
            self.dims_ = []
            self.transformed_data_ = []

            while (train_time < time_limit
                   and self._n_estimators < self.contract_max_n_estimators):
                fit = Parallel(n_jobs=self._threads_to_use)(
                    delayed(self._fit_estimator)(
                        X,
                        X_p,
                        X_d,
                        y,
                        i,
                    ) for i in range(self._threads_to_use))

                (
                    estimators,
                    intervals,
                    dims,
                    atts,
                    transformed_data,
                ) = zip(*fit)

                self.estimators_ += estimators
                self.intervals_ += intervals
                self.atts_ += atts
                self.dims_ += dims
                self.transformed_data_ += transformed_data

                self._n_estimators += self._threads_to_use
                train_time = time.time() - start_time
        else:
            fit = Parallel(n_jobs=self._threads_to_use)(
                delayed(self._fit_estimator)(
                    X,
                    X_p,
                    X_d,
                    y,
                    i,
                ) for i in range(self._n_estimators))

            (
                self.estimators_,
                self.intervals_,
                self.dims_,
                self.atts_,
                self.transformed_data_,
            ) = zip(*fit)

        return self