def select(self): warnings.filterwarnings("ignore", category=DeprecationWarning) # Implemented model selection using CV i = -1 best = None # Best Model bestCV = float('inf') # Best Model Score stateLens = range(self.min_n_components, self.max_n_components + 1) # Num. of States to Test HMM Fold = KFold() # KFold Obj. Fold.n_splits = 2 if len( self.sequences ) > 1 else 1 # If enouph samples: Num Splits in the DataSet (2 Train & Test). # Loop over diff. State Num for HMM with Cross Val (Train & Test) for trainIdx, testIdx in Fold.split(self.sequences): i += 1 # Index train = combine_sequences(trainIdx, self.sequences) # Train Set test = combine_sequences(testIdx, self.sequences) # Test Set model = self.baseSetModel( stateLens[i], train[0], train[1]) # Model Trained on Training Set if not model: continue try: cv = model.score(test[0], test[1]) # Model Scored on Test Set except: continue # Get Best CV Model: Lowest CV if cv < bestCV: bestCV = cv best = model # Return Best Model Cross Validation return best
def _split_speakers(self, dataset): # Assume index of the form spkr_uttid indices = dataset.indices kfold = KFold(n_splits=self.k, shuffle=True, random_state=self.RND) index_pairs = map(self.split_uttid, indices) speakers = np.array(list(set(map(itemgetter(0), index_pairs)))) if len(speakers) < self.k: logger.warn( f"Set number of fold equal to number of speakers ({len(speakers)})" ) self.k = len(speakers) kfold.n_splits = self.k for split in kfold.split(speakers): yield tuple( np.array([ i for i, idx in enumerate(indices) if self.split_uttid(idx)[0] in speakers[subset] ]) for subset in split)
@author: titas """ import pandas as pd from sklearn.model_selection import KFold from sklearn import linear_model from sklearn import metrics Data = pd.read_csv( "https://raw.githubusercontent.com/vincentarelbundock/Rdatasets/master/csv/ISLR/Auto.csv" ) X = Data["horsepower"].values.reshape(-1, 1) # y = Data["mpg"].values.reshape(-1, 1) kf = KFold() kf.n_splits = 10 ytests = [] ypreds = [] for train_index, test_index in kf.split(Data): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] model = linear_model.LinearRegression() model.fit(X=X_train, y=y_train) y_pred = model.predict(X_test) ypreds += list(y_pred) ytests += list(y_test) ms_error = metrics.mean_squared_error(ytests, ypreds) print(ms_error)