Esempio n. 1
0
File: sast.py Progetto: frankl1/sast
class SAST(BaseEstimator, ClassifierMixin):
    
    def __init__(self, cand_length_list, shp_step = 1, nb_inst_per_class = 1, random_state = None, classifier = None):
        super(SAST, self).__init__()
        self.cand_length_list = cand_length_list
        self.shp_step = shp_step
        self.nb_inst_per_class = nb_inst_per_class
        self.kernels_ = None
        self.kernel_orig_ = None # not z-normalized kernels
        self.kernels_generators_ = {}
        self.random_state = np.random.RandomState(random_state) if not isinstance(random_state, np.random.RandomState) else random_state
        
        self.classifier = classifier
    
    def get_params(self, deep=True):
        return {
            'cand_length_list': self.cand_length_list,
            'shp_step': self.shp_step,
            'nb_inst_per_class': self.nb_inst_per_class,
            'classifier': self.classifier
        }

    def init_sast(self, X, y):

        self.cand_length_list = np.array(sorted(self.cand_length_list))

        assert self.cand_length_list.ndim == 1, 'Invalid shapelet length list: required list or tuple, or a 1d numpy array'

        if self.classifier is None:
            self.classifier = RandomForestClassifier(min_impurity_decrease=0.05, max_features=None) 

        classes = np.unique(y)
        self.num_classes = classes.shape[0]
        
        candidates_ts = []
        for c in classes:
            X_c = X[y==c]
            
            # convert to int because if self.nb_inst_per_class is float, the result of np.min() will be float
            cnt = np.min([self.nb_inst_per_class, X_c.shape[0]]).astype(int)
            choosen = self.random_state.permutation(X_c.shape[0])[:cnt]
            candidates_ts.append(X_c[choosen])
            self.kernels_generators_[c] = X_c[choosen]
            
        candidates_ts = np.concatenate(candidates_ts, axis=0)
        
        self.cand_length_list = self.cand_length_list[self.cand_length_list <= X.shape[1]]

        max_shp_length = max(self.cand_length_list)

        n, m = candidates_ts.shape
        
        n_kernels = n * np.sum([m - l + 1 for l in self.cand_length_list])

        self.kernels_ = np.full((n_kernels, max_shp_length), dtype=np.float32, fill_value=np.nan)
        self.kernel_orig_ = []
        
        k = 0
        for shp_length in self.cand_length_list:
            for i in range(candidates_ts.shape[0]):
                for j in range(0, candidates_ts.shape[1] - shp_length + 1, self.shp_step):
                    end = j + shp_length
                    can = np.squeeze(candidates_ts[i][j : end])
                    self.kernel_orig_.append(can)
                    self.kernels_[k, :shp_length] = znormalize_array(can)
                    k += 1
    
    def fit(self, X, y):
        
        X, y = check_X_y(X, y) # check the shape of the data

        self.init_sast(X, y) # randomly choose reference time series and generate kernels

        X_transformed = apply_kernels(X, self.kernels_) # subsequence transform of X

        self.classifier.fit(X_transformed, y) # fit the classifier

        return self

    def predict(self, X):

        check_is_fitted(self) # make sure the classifier is fitted

        X = check_array(X) # validate the shape of X

        X_transformed = apply_kernels(X, self.kernels_) # subsequence transform of X

        return self.classifier.predict(X_transformed)

    def predict_proba(self, X):
        check_is_fitted(self) # make sure the classifier is fitted

        X = check_array(X) # validate the shape of X

        X_transformed = apply_kernels(X, self.kernels_) # subsequence transform of X

        if isinstance(self.classifier, LinearClassifierMixin):
            return self.classifier._predict_proba_lr(X_transformed)
        return self.classifier.predict_proba(X_transformed)