def select(self):
     warnings.filterwarnings("ignore", category=DeprecationWarning)
     # Implemented model selection using CV
     i = -1
     best = None  # Best Model
     bestCV = float('inf')  # Best Model Score
     stateLens = range(self.min_n_components, self.max_n_components +
                       1)  # Num. of States to Test HMM
     Fold = KFold()  # KFold Obj.
     Fold.n_splits = 2 if len(
         self.sequences
     ) > 1 else 1  # If enouph samples: Num Splits in the DataSet (2 Train & Test).
     # Loop over diff. State Num for HMM with Cross Val (Train & Test)
     for trainIdx, testIdx in Fold.split(self.sequences):
         i += 1  # Index
         train = combine_sequences(trainIdx, self.sequences)  # Train Set
         test = combine_sequences(testIdx, self.sequences)  # Test Set
         model = self.baseSetModel(
             stateLens[i], train[0],
             train[1])  # Model Trained on Training Set
         if not model: continue
         try:
             cv = model.score(test[0], test[1])  # Model Scored on Test Set
         except:
             continue
         # Get Best CV Model: Lowest CV
         if cv < bestCV:
             bestCV = cv
             best = model
     # Return Best Model Cross Validation
     return best
Ejemplo n.º 2
0
    def _split_speakers(self, dataset):
        # Assume index of the form spkr_uttid
        indices = dataset.indices
        kfold = KFold(n_splits=self.k, shuffle=True, random_state=self.RND)
        index_pairs = map(self.split_uttid, indices)
        speakers = np.array(list(set(map(itemgetter(0), index_pairs))))
        if len(speakers) < self.k:
            logger.warn(
                f"Set number of fold equal to number of speakers ({len(speakers)})"
            )
            self.k = len(speakers)
            kfold.n_splits = self.k

        for split in kfold.split(speakers):
            yield tuple(
                np.array([
                    i for i, idx in enumerate(indices)
                    if self.split_uttid(idx)[0] in speakers[subset]
                ]) for subset in split)
Ejemplo n.º 3
0
@author: titas
"""
import pandas as pd

from sklearn.model_selection import KFold
from sklearn import linear_model
from sklearn import metrics

Data = pd.read_csv(
    "https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/ISLR/Auto.csv"
)
X = Data["horsepower"].values.reshape(-1, 1)  #
y = Data["mpg"].values.reshape(-1, 1)
kf = KFold()
kf.n_splits = 10

ytests = []
ypreds = []

for train_index, test_index in kf.split(Data):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    model = linear_model.LinearRegression()
    model.fit(X=X_train, y=y_train)
    y_pred = model.predict(X_test)
    ypreds += list(y_pred)
    ytests += list(y_test)
    ms_error = metrics.mean_squared_error(ytests, ypreds)
    print(ms_error)