def test_upsample(self): X_train, y_train, train_inst, X_dev, y_dev, dev_inst = load_proc_baseline_feature( 'MFCC', verbose=True) X_train, y_train, train_inst = upsample(X_train, y_train, train_inst, verbose=True) print(X_train.shape, y_train.shape, train_inst.shape) from collections import Counter print(Counter(y_train))
def run_MFCC(self): """run classifier on MFCC feature (single modality) """ print("\nbuilding a classifier on MFCC features (both frame-level and session-level)") X_train, y_train, train_inst, X_dev, y_dev, dev_inst = load_proc_baseline_feature('MFCC', verbose=True) if self.model_name == 'RF_cv': y_train, y_dev = np.ravel(y_train), np.ravel(y_dev) train_inst, dev_inst = np.ravel(train_inst), np.ravel(dev_inst) X = np.vstack((X_train, X_dev)) y = np.hstack((y_train, y_dev)) inst = np.hstack((train_inst, dev_inst)) assert len(X) == len(y) == len(inst) cv_ids = k_fold_cv(len(X)) cv_res = [] for (ids_train, ids_dev) in cv_ids: X_train = X[ids_train] y_train = y[ids_train] X_dev = X[ids_dev] y_dev = y[ids_dev] dev_inst = inst[ids_dev] RF_MFCC = RandomForest(self.feature_name, X_train, y_train, X_dev, y_dev, baseline=True, test=self.test) RF_MFCC.run() y_pred_train, y_pred_dev = RF_MFCC.evaluate() _, session_res = get_UAR(y_pred_dev, y_dev, dev_inst, self.model_name, self.feature_name, 'baseline', baseline=True, test=True) cv_res.append(session_res) save_cv_results(cv_res, self.model_name, self.feature_name, 'baseline') print("\nupsampling training data to address class imbalance") X_train, y_train, train_inst = upsample(X_train, y_train, train_inst) print("\nobtaining sparse matrix for better classification") # X_train = sp.csr_matrix(np.vstack((X_train, X_dev))) # X_dev = sp.csr_matrix(X_dev) # y_train = np.hstack((y_train, y_dev)) X_train, X_dev = sp.csr_matrix(X_train), sp.csr_matrix(X_dev) if self.model_name == 'SVM': SVM_MFCC = LinearSVM(self.feature_name, X_train, y_train, X_dev, y_dev, baseline=True, test=self.test) SVM_MFCC.run() y_pred_train, y_pred_dev = SVM_MFCC.evaluate() elif self.model_name == 'RF': RF_MFCC = RandomForest(self.feature_name, X_train, y_train, X_dev, y_dev, baseline=True, test=self.test) RF_MFCC.run() y_pred_train, y_pred_dev = RF_MFCC.evaluate() get_UAR(y_pred_train, y_train, train_inst, self.model_name, self.feature_name, 'baseline', baseline=True, train_set=True, test=self.test) get_UAR(y_pred_dev, y_dev, dev_inst, self.model_name, self.feature_name, 'baseline', baseline=True, test=self.test) if not self.test: get_post_probability(y_pred_dev, y_dev, dev_inst, np.array([]), self.model_name, self.feature_name)
def run_AU(self): """run classifier on AU feature (single modality) """ print("\nbuilding a classifier on AU features (already session-level)") X_train, y_train, _, X_dev, y_dev, _ = load_proc_baseline_feature('AU', verbose=True) if self.model_name == 'RF_cv': X = np.vstack((X_train, X_dev)) y = np.hstack((y_train, y_dev)) assert len(X) == len(y) cv_ids = k_fold_cv(len(X)) cv_res = [] for (ids_train, ids_dev) in cv_ids: X_train = X[ids_train] y_train = y[ids_train] X_dev = X[ids_dev] y_dev = y[ids_dev] RF_MFCC = RandomForest(self.feature_name, X_train, y_train, X_dev, y_dev, baseline=True, test=self.test) RF_MFCC.run() y_pred_train, y_pred_dev = RF_MFCC.evaluate() _, session_res = get_UAR(y_pred_dev, y_dev, np.array([]), self.model_name, self.feature_name, 'baseline', baseline=True, test=True) cv_res.append(session_res) save_cv_results(cv_res, self.model_name, self.feature_name, 'baseline') print("\nupsampling training data to address class imbalance") X_train, y_train, _ = upsample(X_train, y_train, np.array([])) print("\nobtaining sparse matrix for better classification") # X_train = sp.csr_matrix(np.vstack((X_train, X_dev))) # X_dev = sp.csr_matrix(X_dev) # y_train = np.hstack((y_train, y_dev)) X_train, X_dev = sp.csr_matrix(X_train), sp.csr_matrix(X_dev) if self.model_name == 'SVM': SVM_AU = LinearSVM(self.feature_name, X_train, y_train, X_dev, y_dev, baseline=True, test=self.test) SVM_AU.run() y_pred_train, y_pred_dev = SVM_AU.evaluate() session_prob = SVM_AU.get_session_probability() elif self.model_name == 'RF': RF_AU = RandomForest(self.feature_name, X_train, y_train, X_dev, y_dev, baseline=True, test=self.test) RF_AU.run() y_pred_train, y_pred_dev = RF_AU.evaluate() session_prob = RF_AU.get_session_probability() get_UAR(y_pred_train, y_train, np.array([]), self.model_name, self.feature_name, 'baseline', baseline=True, train_set=True, test=self.test) get_UAR(y_pred_dev, y_dev, np.array([]), self.model_name, self.feature_name, 'baseline', baseline=True, test=self.test)