Exemple #1
0
    def fit(self, data):
        if data.kfold > 1:
            cv_eval = {}
            for k, cv_fold in enumerate(data.Xy_train.keys()):
#                 print('    cv_fold: ', cv_fold)
                [(X_train, y_train), (X_val, y_val)] = data.Xy_train[cv_fold]
                X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
                knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance="dtw", n_jobs=-1)
                knn.fit(X_train, y_train)
                eval_metrics = weareval.eval_output(knn.predict(X_val), y_val, tasktype=data.tasktype)
                cv_eval[cv_fold] = {'model': knn, 
                                    # 'data': [(X_train, y_train), (X_val, y_val)], # store just IDs?
                                    'metric': eval_metrics['mae'] if data.tasktype=='regression' else eval_metrics['balanced_acc_adj'],
                                    'metrics': eval_metrics}
            # retain only best model
            tmp = {cv_fold:cv_eval[cv_fold]['metric'] for cv_fold in cv_eval.keys()}
            bst_fold = min(tmp, key=tmp.get) if data.tasktype=='regression' else max(tmp, key=tmp.get)
            self.knn = cv_eval[bst_fold]['model']
            return {'model': self.knn, 'metrics': cv_eval[bst_fold]['metrics']}
        else:
            X_train, y_train = data.Xy_train
            X_val, y_val = data.Xy_val
            X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
            self.knn = knn = KNeighborsTimeSeriesClassifier(n_neighbors=5, distance="dtw", n_jobs=-1)
            self.knn.fit(X_train, y_train)
            eval_metrics = weareval.eval_output(self.knn.predict(X_val), y_val, tasktype=data.tasktype)
            return {'model': self.knn, 'metrics': eval_metrics}
Exemple #2
0
 def fit(self, data):
     if data.kfold > 1:
         cv_eval = {}
         for k, cv_fold in enumerate(data.Xy_train.keys()):
             [(X_train, y_train), (X_val, y_val)] = data.Xy_train[cv_fold]
             X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
             tsf = ComposableTimeSeriesForestRegressor(
                 n_jobs=-1) if data.tasktype=='regression' else ComposableTimeSeriesForestClassifier(
                 n_jobs=-1)
             tsf.fit(X_train, y_train)
             eval_metrics = weareval.eval_output(tsf.predict(X_val), y_val, tasktype=data.tasktype)
             cv_eval[cv_fold] = {'model': tsf, 
                                 # 'data': [(X_train, y_train), (X_val, y_val)], # store just IDs?
                                 'metric': eval_metrics['mae'] if data.tasktype=='regression' else eval_metrics['balanced_acc_adj'],
                                 'metrics': eval_metrics}
         # retain only best model
         tmp = {cv_fold:cv_eval[cv_fold]['metric'] for cv_fold in cv_eval.keys()}
         bst_fold = min(tmp, key=tmp.get) if data.tasktype=='regression' else max(tmp, key=tmp.get)
         self.tsf = cv_eval[bst_fold]['model']
         return {'model': self.tsf, 'metrics': cv_eval[bst_fold]['metrics']}
     else:
         X_train, y_train = data.Xy_train
         X_val, y_val = data.Xy_val
         X_train, X_val = from_2d_array_to_nested(X_train), from_2d_array_to_nested(X_val)
         self.tsf = ComposableTimeSeriesForestRegressor(
             n_jobs=-1) if data.tasktype=='regression' else ComposableTimeSeriesForestClassifier(
             n_jobs=-1)
         self.tsf.fit(X_train, y_train)
         eval_metrics = weareval.eval_output(self.tsf.predict(X_val), y_val, tasktype=data.tasktype)
         return {'model': self.tsf, 'metrics': eval_metrics}
Exemple #3
0
    def transform(self, X, y=None):
        """
        Transform X, transforms univariate time-series using sklearn's PCA
        class

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, 1]
            Nested dataframe with univariate time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with the same number of rows and the
          (potentially reduced) PCA transformed
          column. Time indices of the original column are replaced with 0:(
          n_components - 1).
        """
        self.check_is_fitted()
        X = check_X(X, enforce_univariate=True, coerce_to_numpy=True)
        X = X.squeeze(1)

        # Transform X using the fitted PCA
        Xpca = pd.DataFrame(data=self.pca.transform(X))

        # Back-transform into time series data format
        Xt = from_2d_array_to_nested(Xpca)
        return Xt
Exemple #4
0
 def transform(self, X, y=None):
     X = self._prepare(X)
     xts = list()
     for i in range(X.shape[0]):
         xt = self.transformer_[i].fit_transform(X[i].T)
         xts.append(from_2d_array_to_nested(xt.T).T)
     return pd.concat(xts, axis=0)
Exemple #5
0
    def transform(self, X, y=None):
        """Concatenate multivariate time series/panel data into long
        univariate time series/panel
        data by simply concatenating times series in time.

        Parameters
        ----------
        X : nested pandas DataFrame of shape [n_samples, n_features]
            Nested dataframe with time-series in cells.

        Returns
        -------
        Xt : pandas DataFrame
          Transformed pandas DataFrame with same number of rows and single
          column
        """
        self.check_is_fitted()
        X = check_X(X)

        # We concatenate by tabularizing all columns and then detabularizing
        # them into a single column
        if isinstance(X, pd.DataFrame):
            Xt = from_nested_to_2d_array(X)
        else:
            Xt = from_3d_numpy_to_2d_array(X)
        return from_2d_array_to_nested(Xt)
Exemple #6
0
def test_from_2d_array_to_nested(n_instances, n_columns, n_timepoints):
    """Test from_2d_array_to_nested for correctness."""
    rng = np.random.default_rng()
    X_2d = rng.standard_normal((n_instances, n_timepoints))
    nested_df = from_2d_array_to_nested(X_2d)

    assert is_nested_dataframe(nested_df)
    assert nested_df.shape == (n_instances, 1)
def test_pca_results(n_components):
    np.random.seed(42)

    # sklearn
    X = pd.DataFrame(data=np.random.randn(10, 5))
    pca = PCA(n_components=n_components)
    Xt1 = pca.fit_transform(X)

    # sktime
    Xs = from_2d_array_to_nested(X)
    pca_transform = PCATransformer(n_components=n_components)
    Xt2 = pca_transform.fit_transform(Xs)

    assert np.allclose(np.asarray(Xt1),
                       np.asarray(from_nested_to_2d_array(Xt2)))
def test_output_format_dim(len_series, n_instances, n_components):
    np.random.seed(42)
    X = from_2d_array_to_nested(
        pd.DataFrame(data=np.random.randn(n_instances, len_series)))

    trans = PCATransformer(n_components=n_components)
    Xt = trans.fit_transform(X)

    # Check number of rows and output type.
    assert isinstance(Xt, pd.DataFrame)
    assert Xt.shape[0] == X.shape[0]

    # Check number of principal components in the output.
    assert from_nested_to_2d_array(Xt).shape[1] == min(
        n_components,
        from_nested_to_2d_array(X).shape[1])
Exemple #9
0
    def inverse_transform(self, X, y=None):
        """Transform tabular pandas dataframe into nested dataframe.

        Parameters
        ----------
        X : pandas DataFrame
            Tabular dataframe with primitives in cells.
        y : array-like, optional (default=None)

        Returns
        -------
        Xt : pandas DataFrame
            Transformed dataframe with series in cells.
        """
        self.check_is_fitted()
        # We expect a tabular pd.DataFrame or np.array here, hence we use
        # scikit-learn's input validation function.
        X = check_array(X)
        return from_2d_array_to_nested(X)
    def StartTrain(self): 
        train_files = glob.glob(self.lineEdit.text() + "\\*.csv")
        test_files = glob.glob(self.lineEdit_2.text() + "\\*.csv")
        train_li = []
        for filename in train_files:                
            df = pd.read_csv(filename, index_col=None, header=None,usecols=[2])
            train_li.append(df)
        X_df = pd.concat(train_li, axis=1, ignore_index=True)
        X_df = X_df.T  

        test_li = []        
        for filename in test_files:
            df = pd.read_csv(filename, index_col=None, header=None,usecols=[2])
            test_li.append(df)
        X_df_ng = pd.concat(test_li, axis=1, ignore_index=True)
        X_df_ng = X_df_ng.T

        X_df = X_df.append(X_df_ng)           
        X_df_tab = from_2d_array_to_nested(X_df)

        Y_df_ok = np.zeros(len(test_li), dtype="int32")
        Y_df_ng = np.ones(len(train_li), dtype="int32")
        Y_df = np.concatenate([Y_df_ok, Y_df_ng], 0)
        
        X_train, X_test, y_train, y_test = train_test_split(X_df_tab, Y_df, test_size= (100 - self.horizontalSlider.value()) / 100)
        self.tableWidget.setRowCount(0)
        selectedModel = self.comboBox.currentText()
        if(selectedModel == "RandomForestClassifier"):
            classifier = make_pipeline(Tabularizer(), RandomForestClassifier())
            classifier.fit(X_train, y_train)
            self.lineEdit_5.setText(str(classifier.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(classifier.score(X_test, y_test)))          
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                classifier_preds = classifier.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(classifier_preds)])   
            
        elif(selectedModel == "RocketClassifier"):
            rocket = RocketClassifier()
            rocket.fit(X_train, y_train)
            self.lineEdit_5.setText(str(rocket.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(rocket.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                rocket_preds = rocket.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(rocket_preds)]) 
        
        elif(selectedModel == "TimeSeriesForestClassifier"):
            tsf = TimeSeriesForestClassifier(n_estimators=50, random_state=47)
            tsf.fit(X_train, y_train)
            self.lineEdit_5.setText(str(tsf.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(tsf.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                tsf_preds = tsf.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(tsf_preds)]) 

        elif(selectedModel == "RandomIntervalSpectralEnsemble"):
            rise = RandomIntervalSpectralEnsemble(n_estimators=50, random_state=47)
            rise.fit(X_train, y_train)
            self.lineEdit_5.setText(str(rise.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(rise.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                rise_preds = rise.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(rise_preds)]) 

        elif(selectedModel == "SupervisedTimeSeriesForest"):
            stsf = SupervisedTimeSeriesForest(n_estimators=50, random_state=47)
            stsf.fit(X_train, y_train)
            self.lineEdit_5.setText(str(stsf.score(X_train, y_train)))  
            self.lineEdit_6.setText(str(stsf.score(X_test, y_test))) 
            for i in range(len(X_test)): 
                row = self.tableWidget.rowCount()
                self.tableWidget.setRowCount(row)                
                stsf_preds = rise.predict(X_test.iloc[i].to_frame())                
                self.addTableRow(self.tableWidget, [str(i),str(y_test[i]), str(stsf_preds)]) 
        else:
            print("None")
Exemple #11
0
 def eval_test(self, data):
     X_test, y_test = data.Xy_test
     X_test = from_2d_array_to_nested(X_test)
     eval_metrics = weareval.eval_output(self.knn.predict(X_test), y_test, tasktype=data.tasktype)
     return eval_metrics
def test_pca_kwargs(kwargs):
    np.random.seed(42)
    X = from_2d_array_to_nested(pd.DataFrame(data=np.random.randn(10, 5)))
    pca = PCATransformer(n_components=1, **kwargs)
    pca.fit_transform(X)