def phoneAccelerometerISVM(): print("Loading data...") data = pd.read_csv("./Train_Phone-Acc-nexus4_1-a.csv") print("Done!") # Parse data and make bike vs not-biking classification using an SVM. # Note: I'm assuming a window width of 500 print("Finding time series windows indexes for each class kind...") previousClassLabel = str(data.get_value(data.index[0], 'gt')) pos = 0 y = [] X = [] window = 500 while pos < data.shape[0]: # Make y label. if str(data.iloc[pos]['gt']) == 'sit': y.append(1) else: y.append(-1) # Make X row. X.append(data.iloc[pos:pos + window]['y']) # Move to the next window pos += window print("Done!") # Build and fit the SVM. print("Training SVM on all data accelerometer data...") X = np.array(X) y = np.array(y) #clfs = LinearSVC() clfs = SVC() clfs.fit(X, y) print("Done!") # print("Predicting accelerometer classes on all data using SVM...") # ypred = predict(X, clfs.coef_.reshape(len(clfs.coef_.ravel()), 1)) # print("Done!") # error = calculateTotalAbsoluteError(y, ypred) / y.shape[0] # print("Accelerometer training error (Means kind of nothing): %f"%error) # Cross validation print("Training SVM on accelerometer training only data...") X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size = 0.1) #, random_state = 0 clfs = SVC() clfs.fit(X_train, y_train) yhat = clfs.predict(X_test) print("Abs Error = %f"%( calculateTotalAbsoluteError(yhat, y_test)/len(yhat))) print("Test data mean accuracy SVM score: %f"%clfs.score(X_test, y_test)) f1_c0 = f1_score(y_test, clfs.predict(X_test), pos_label=1, average='binary') #print("Test data f1 score for class -1: %f"%(f1_c0)) print("Test data f1 score for class +1: %f" % (f1_c0)) print("Done!")
class SVMClassifier(ClassifierI): """Wrapper for scikit-learn svm classifier.""" def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='auto', coef0=0.0, shrinking=True, probability=False, tol=1e-3, cache_size=200, class_weight=None, verbose=False, max_iter=-1, decision_function_shape=None, random_state=None): """Init. See scikit-learn.""" self._clf = SVC(C=1, kernel=kernel, degree=degree, gamma=gamma, coef0=coef0, shrinking=shrinking, probability=probability, tol=tol, cache_size=cache_size, class_weight=class_weight, verbose=verbose, max_iter=max_iter, decision_function_shape=decision_function_shape, random_state=random_state) self.classes_ = None def __repr__(self): return "<SVMClassifier(%r)>" % self._clf def classify_many(self, vectors): """Classify a batch of verbs. :param vectors: An doc term array of vectors :return: The predicted class label for each input sample. :rtype: list """ classes = self.classes_ return [classes[i] for i in self._clf.predict(vectors)] def prob_classify_many(self, vectors): """Compute per-class probabilities for a batch of samples. :param vectors: A doc term array of vectors :rtype: list of ``ProbDistI`` """ y_proba_list = self._clf.predict_proba(vectors) return [self._make_probdist(y_proba) for y_proba in y_proba_list] def labels(self): """The class labels learned by this classifier. :rtype: list """ return list(self.classes_) def train(self, vectors, labels): """ Train (fit) the scikit-learn svm classifier. :param vectors: a doc-term array of vectors to learn from :param labels: a list of labels corresponding to the rows of the doc term array. """ self.classes_, labels = np.unique(labels, return_inverse=True) self._clf.fit(vectors, labels) return self def _make_probdist(self, y_proba): classes = self.classes_ return dict((classes[i], p) for i, p in enumerate(y_proba))
class SVCImpl(): def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight='balanced', verbose=False, max_iter=(- 1), decision_function_shape='ovr', random_state=None): self._hyperparams = { 'C': C, 'kernel': kernel, 'degree': degree, 'gamma': gamma, 'coef0': coef0, 'shrinking': shrinking, 'probability': probability, 'tol': tol, 'cache_size': cache_size, 'class_weight': class_weight, 'verbose': verbose, 'max_iter': max_iter, 'decision_function_shape': decision_function_shape, 'random_state': random_state} def fit(self, X, y=None): self._sklearn_model = SKLModel(**self._hyperparams) if (y is not None): self._sklearn_model.fit(X, y) else: self._sklearn_model.fit(X) return self def predict(self, X): return self._sklearn_model.predict(X) def predict_proba(self, X): return self._sklearn_model.predict_proba(X)
def svm_train(X, y, model_path): model = SVC() model.fit(X, y) expected = y predicted = model.predict(X) print(metrics.classification_report(expected, predicted)) print(metrics.confusion_matrix(expected, predicted)) joblib.dump(model, model_path)
def classifier_panchenko2016(X_train, y_train, X_test, y_test, separateClassifier=False): train_or_test_labels = ["train" for i in y_train] + ["test" for i in y_test] y_train, X_train, y_test, X_test = outlier_removal(train_or_test_labels, X_train + X_test, y_train + y_test) y_train, X_train = features_extraction( y_train, X_train, separateClassifier=separateClassifier, featuresCount=100) y_test, X_test = features_extraction(y_test, X_test, separateClassifier=separateClassifier, featuresCount=100) scaler = MinMaxScaler() X_train = scaler.fit_transform(X_train) X_test = scaler.transform(X_test) classifier = SVC(kernel="rbf", C=2e11, gamma=2e-1, max_iter=5000, class_weight="balanced", verbose=1) print("fitting") classifier.fit(X_train, y_train) print("testing") y_predictions = classifier.predict(X_test) #, y_test) return y_test, y_predictions
test_data_set = DataSet() test_data_set.load(config.get_value('test'), class_index, has_header=False) Xtest, Ytest = test_data_set.convert_2_binary_format_with( X_train.item_dict, Y_train.item_dict) Ytest = Ytest.flatten() class_count = train_data_set.number_of_classes() unexpected_rules = IOHelper.load_json_object(config.get_value('rules')) refined_unexpected_rules = filter_association_rules(unexpected_rules) print('svm testing...') svc_model = SVC(kernel='poly', degree=3, coef0=0.1, random_state=1) svc_model.fit(X_train.relation_matrix, Y_train.values.flatten()) svc_y_pred = svc_model.predict(Xtest) print(f1_score(Ytest, svc_y_pred, average=None)) if (class_count <= 2): fpr, tpr, _ = roc_curve(Ytest, svc_y_pred.flatten()) print(auc(fpr, tpr)) refine_with_unexpectedness(test_data_set, Y_train.item_dict, svc_y_pred, Ytest, refined_unexpected_rules) print('Random forest testing...') rf_model = RandomForestClassifier(n_estimators=20, random_state=1) rf_model.fit(X_train.relation_matrix, Y_train.values.flatten()) rf_y_pred = rf_model.predict(Xtest) print(f1_score(Ytest, rf_y_pred, average=None)) if (class_count <= 2):
] print("train: ", len(tweets_training)) print("test: ", len(tweets_test)) X, X_test, feature_name, feature_index = feature_manager.create_feature_space( tweets_training, feature_type, tweets_test) print(feature_name) print("feature space dimension X:", X.shape) print("feature space dimension X_test:", X_test.shape) clf = SVC(kernel="linear") clf.fit(X, labels_training) test_predict = clf.predict(X_test) """prec, recall, f, support = precision_recall_fscore_support( labels_test, test_predict, beta=1) accuracy = accuracy_score( test_predict, labels_test ) print(prec, recall, f, support ) print(accuracy)""" for i in range(0, len(tweets_test)): csvfile = open('ATC_' + tweets_test[i].language + '.csv', 'w', newline='')
# print (X_trn, X_val) # print (trn_embedding, val_embedding) # print (triplet_model.get_weights()) clf = SVC( # class_weight='balanced', probability=True, # tol=1e-4, ) clf.fit(trn_embedding, Y_trn) print(clf.score(val_embedding, Y_val)) print(clf.predict_proba(val_embedding)) print(roc_auc_score(Y_val, clf.predict(val_embedding))) print(classification_report(Y_val, clf.predict(val_embedding), digits=4)) all_files = [x[:-8] for x in os.listdir(ALL_FILES)] X = [ pickle.load(open(os.path.join(FEATURE_PATH, x + ".fkmeans"), "rb"), encoding='latin1') for x in all_files ] # Y = [ranks[x.split()[0].strip()] for x in all_files] proba = clf.predict_proba(embed(np.array(X), triplet_model)) # print(len(proba), len(proba[0]), proba[0]) wf = open("proba_audio_wo_pretrain.txt", "w") for i, [_, prob] in enumerate(proba):
key=numericalSort) #Read feature vectors of test images testSamples = readVoxels(testFiles) print(len(testSamples)) #2D array to report final prediction in format (ID,Prediction) final = [[0 for j in range(2)] for i in range(139)] final[0][0] = 'ID' final[0][1] = 'Prediction' id = 1 #Predict age of test image using each of the 4 models trained above for item in testSamples: predictionL = regrL.predict(item) predictionR = regrR.predict(item) predictionS = regrS.predict(item) predictionRfc = rfc.predict(item) final[id][0] = id #Taking the average of each of the model predictions as final age prediction final[id][1] = (predictionL[0] + predictionR[0] + predictionS[0] + predictionRfc[0]) // 4 id = id + 1 #Save csv file in the output directory provided as argument with name Dota2Prediction.csv np.savetxt(outputDir + "\\Dota2Prediction.csv", final, delimiter=',', fmt='%s') print("Finished!")
def train_and_predict(samples, labels, feature_selector, inputDir, outputDir): #test set testDir = inputDir + "\\set_test" testFiles = sorted([ join(testDir, f) for f in listdir(testDir) if isfile(join(testDir, f)) ], key=numericalSort) # Different features for gender testSamples_gender = cubeVoxelsVar_gender(testFiles) # Same features for age and health testSamples_age = cubeVoxelsVar_age(testFiles) testSamples_health = testSamples_age testSamples = [testSamples_gender, testSamples_age, testSamples_health] #2D array to report final prediction in format (ID,Prediction) final = [[0 for j in range(4)] for i in range(1 + 138 * 3)] final[0][0] = 'ID' final[0][1] = 'Sample' final[0][2] = 'Label' final[0][3] = 'Predicted' total_labels = ['gender', 'age', 'health'] for label in range(3): print("Prediction label 1 started!") id_count = label #Training logistic regression logRegrL1 = linear_model.LogisticRegression() logRegrL1.fit(samples[label], labels[label]) #Training SVM with linear kernel svmLin = SVC(kernel='linear') svmLin.fit(samples[label], labels[label]) #Training Random Forest Classifier rfc = RandomForestClassifier(n_estimators=100) rfc.fit(samples[label], labels[label]) print("Training complete!") # Do feature selection only for age and health if label == 0: testSamples_curr = testSamples[label] else: testSamples_curr = feature_selector[label].transform( testSamples[label]) print(len(testSamples_curr)) id = label + 1 #Predict gender, age and health status of test image using each of the 3 models trained above for sampleNum, sample in enumerate(testSamples_curr): predictionL1 = logRegrL1.predict(sample) predictionSvmLin = svmLin.predict(sample) predictionRfc = rfc.predict(sample) final[id][0] = id_count final[id][1] = sampleNum final[id][2] = total_labels[label] votes = predictionL1[0] + predictionSvmLin[0] + predictionRfc[0] final[id][3] = 'TRUE' if votes >= 2.0 else 'FALSE' id = id + 3 id_count = id_count + 3 print('Prediction done!') #Save csv file in the output directory with name final_sub.csv np.savetxt(outputDir + "\\final_sub.csv", final, delimiter=',', fmt='%s')
N = len(feature_types) for K in range(1, N): for subset in combinations(range(1, N), K): feature_index_filtered = numpy.array([ list(feature_types).index(f) for f in feature_types[list(subset)] ]) feature_index_filtered = numpy.concatenate( feature_type_indexes[list(feature_index_filtered)]) # extract the column of the features considered in the current combination # the feature space is reduced X_filter = X[:, feature_index_filtered] X_test_filter = X_test[:, feature_index_filtered] clf = SVC(kernel='linear') clf.fit(X_filter, labels_training) test_predict = clf.predict(X_test_filter) prec, recall, f, support = precision_recall_fscore_support( labels_test, test_predict, beta=1) accuracy = accuracy_score(test_predict, labels_test) print(feature_types[list(subset)]) print("feature space dimention X:", X_filter.shape) print("feature space dimention X_Test:", X_test_filter.shape) print(prec, recall, f, support) print(accuracy)
columns=X_train.columns) #:# model params = {'gamma': 5, 'kernel': 'sigmoid', 'probability': True} classifier = SVC(**params) classifier.fit(X_train, y_train) #:# hash #:# aad366f6d5961bc98783c2ad9fb3918d md5 = hashlib.md5(str(classifier).encode('utf-8')).hexdigest() print(f'md5: {md5}') #:# audit y_pred = classifier.predict(transform_pipeline.transform(X_test)) y_pred_proba = classifier.predict_proba( transform_pipeline.transform(X_test))[:, 1] tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel() print(f'acc: {accuracy_score(y_test, y_pred)}') print(f'auc: {roc_auc_score(y_test, y_pred_proba)}') print(f'precision: {precision_score(y_test, y_pred)}') print(f'recall: {recall_score(y_test, y_pred)}') print(f'specificity: {tn/(tn+fp)}') print(f'f1: {f1_score(y_test, y_pred)}') #:# session info # Dodaj wersję pythona w session info
print("_GBC_MEAN \t" + "_GBC_STD \t" + "_GBC_PARAM") for _GBC_MEAN, _GBC_STD, _GBC_PARAM in _GBC_CV_Results: _Result_File.write(("%f \t" + "%f \t" + "%r") % (_GBC_MEAN, _GBC_STD, _GBC_PARAM) + "\n") print(("%f \t" + "%f \t" + "%r") % (_GBC_MEAN, _GBC_STD, _GBC_PARAM)) _Result_File.write("\n") print() # Final Trail - 终结性裁判 # Final Model - 模型终结化 _FINAL_Scaler = StandardScaler().fit(_X_Train) _FINAL_Rescaled_X = _FINAL_Scaler.transform(_X_Train) _FINAL_Model = SVC(C=1.5, kernel="rbf") _FINAL_Model.fit(X=_FINAL_Rescaled_X, y=_Y_Train) _FINAL_Rescaled_X_Val = _FINAL_Scaler.transform(_X_Val) # 终结性模型评估启动 _FINAL_Predictions = _FINAL_Model.predict(_FINAL_Rescaled_X_Val) _Result_File.write("终结性裁判启动...:\n\n") _Result_File.write("Accuracy分数...:\n") _Result_File.write( str( sklearn.metrics.classification.accuracy_score( y_true=_Y_Val, y_pred=_FINAL_Predictions)) + "\n\n") _Result_File.write("冲突矩阵...:\n") _Result_File.write( str( sklearn.metrics.classification.confusion_matrix( y_true=_Y_Val, y_pred=_FINAL_Predictions)) + "\n\n") _Result_File.write("分类报告...:\n") _Result_File.write("\t\t精确率\t召回率\tF1数值\t支持情况\n") _Result_File.write( str(
train_arrays.append(model.docvecs[prefix_train_pos]) train_labels.append(int(email.label)) else: test_arrays.append(model.docvecs[prefix_train_pos]) test_labels.append(int(email.label)) classifier = SVC() classifier.fit(numpy.array(train_arrays), numpy.array(train_labels)) print("Overall score is %f." % classifier.score(numpy.array(test_arrays), numpy.array(test_labels))) corrects = [] wrongs = [] for email in emails: email_id = email.id prefix_train_pos = 'email_' + str(email_id) if email_id % 5 == 0: prediction = classifier.predict([model.docvecs[prefix_train_pos]])[0] actual = int(email.label) if prediction != actual: wrongs.append((email.id, prediction, actual)) else: # print(max(classifier.predict_proba([model.docvecs[prefix_train_pos]])[0]), actual) corrects.append(email.id) print("%i are wrong, %i are correct." % (len(wrongs), len(corrects))) print(wrongs) # print("EmailID\t\tPredicted\tActual") # for w in wrongs: # print("%s\t\t%s\t\t%s" % w)
metrics.confusion_matrix(y_true=_Y_VAL, y_pred=_KNC_PREDICTIONS), "\n", # " " * 4, "CLASSIFICATION_REPORT:\n", metrics.classification_report(y_true=_Y_VAL, y_pred=_KNC_PREDICTIONS), "\n", # sep="", end="\n") print() ############################################################ # 资瓷矢量机预测 _SVC_MODEL = SVC() _SVC_MODEL.fit(X=_X_TRAIN, y=_Y_TRAIN) _SVC_PREDICTIONS = _SVC_MODEL.predict(X=_X_VAL) print( "SVC-资瓷矢量机预测结果:\n", # " " * 4, "ACCURACY_SCORE:\n", " " * 8, metrics.accuracy_score(y_true=_Y_VAL, y_pred=_SVC_PREDICTIONS), "\n", # " " * 4, "CONFUSION_MATRIX:\n", metrics.confusion_matrix(y_true=_Y_VAL, y_pred=_SVC_PREDICTIONS), "\n", # " " * 4,
l.append(mean) testSamples = [] for item in l: mean = np.zeros(shape=(176)) for row in range(208): mean = np.add(mean, item[row]) mean = (1/208) * mean testSamples.append(mean) print(len(testSamples)) testSamples = np.vstack(testSamples) #PCA on test samples (a,b,c) = PCA(testSamples, 100) testSamples = a final = [] for item in testSamples: prediction = svc.predict(item) final.append(prediction) np.savetxt('mydata.csv', final, delimiter=',', fmt='%3i', header='Results')
def create_svm(self, best_kernel, best_c): svm = SVC(gamma='scale', kernel=best_kernel, C=best_c) svm.fit(self.X_train, self.Y_train) predicted_y = svm.predict(self.X_test) self.print_stats(predicted_y, "svm")
class BBNSVC(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]): """ Primitive wrapping for sklearn.ensemble.AdaBoostClassifier """ __author__ = "JPL MARVIN" metadata = metadata_module.PrimitiveMetadata({ "algorithm_types": ['ADABOOST'], "name": "sklearn.svm.classes.SVC", "primitive_family": "CLASSIFICATION", "python_path": "d3m.primitives.bbn.time_series.BBNSVC", "source": {'name': 'JPL'}, "version": "0.1.0", "id": "a2ee7b2b-99c6-4326-b2e7-e081cd292d78", 'installation': [{'type': metadata_module.PrimitiveInstallationType.PIP, 'package_uri': 'git+https://gitlab.datadrivendiscovery.org/jpl/d3m_sklearn_wrap.git@{git_commit}'.format( git_commit=utils.current_git_commit(os.path.dirname(__file__)), ), }] }) def __init__(self, *, hyperparams: Hyperparams, random_seed: int = 0, docker_containers: Dict[str, str] = None, _verbose: int = 0) -> None: super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers) self._clf = SVC( C=self.hyperparams['C'], kernel=self.hyperparams['kernel'], degree=self.hyperparams['degree'], gamma=self.hyperparams['gamma'], coef0=self.hyperparams['coef0'], probability=self.hyperparams['probability'], shrinking=self.hyperparams['shrinking'], tol=self.hyperparams['tol'], class_weight=self.hyperparams['class_weight'], max_iter=self.hyperparams['max_iter'], decision_function_shape=self.hyperparams['decision_function_shape'], verbose=_verbose, random_state=self.random_seed, ) self._training_inputs = None self._training_outputs = None self._fitted = False def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None: self._training_inputs = inputs self._training_outputs = outputs self._fitted = False def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]: if self._fitted: return CallResult(None) if self._training_inputs is None or self._training_outputs is None: raise ValueError("Missing training data.") self._clf.fit(self._training_inputs, self._training_outputs) self._fitted = True return CallResult(None) def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: return CallResult(self._clf.predict(inputs)) def produce_log_proba(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]: return CallResult(self._clf.predict_log_proba(inputs)) def get_params(self) -> Params: return Params( support=self._clf.support_, support_vectors=self._clf.support_vectors_, n_support=self._clf.n_support_, dual_coef=self._clf.dual_coef_, coef=self._clf.coef_, intercept=self._clf.intercept_, ) def set_params(self, *, params: Params) -> None: self._clf.support_ = params.support self._clf.support_vectors_ = params.support_vectors self._clf.n_support_ = params.n_support self._clf.dual_coef_ = params.dual_coef self._clf.intercept_ = params.intercept
# create the feature space with all available features X, feature_names, feature_type_indexes = feature_manager.create_feature_space( tweets, feature_types) print("features:", feature_types) print("feature space dimension:", X.shape) golden = [] predict = [] kf = KFold(n_splits=5, random_state=True) for index_train, index_test in kf.split(X): clf = SVC(kernel="linear") clf.fit(X[index_train], labels[index_train]) test_predict = clf.predict(X[index_test]) golden = numpy.concatenate((golden, labels[index_test]), axis=0) predict = numpy.concatenate((predict, test_predict), axis=0) prec, recall, f, support = precision_recall_fscore_support(golden, predict, beta=1) accuracy = accuracy_score(golden, predict) print(prec) print(recall) print(f) print(support) print(accuracy)
if Y_label != 'NULL' or random.random() > 0: if Y_label == event_name: Y = 1 else: Y = 0 if i == 0: X_all = X Y_all = Y i = 1 else: X_all = np.vstack((X_all, X)) Y_all = np.append(Y_all, Y) i += 1 # print (i) # print (np.sum(X_all, axis = 1)) # print(X_all, Y_all) clf = SVC(kernel=chi2_kernel) # clf = SVC() clf.fit(X_all, Y_all) print(clf.score(X_all, Y_all)) print(clf.predict(X_all)) fread.close() cPickle.dump(clf, open(output_file, "wb")) print 'SVM trained successfully for event %s!' % (event_name)
class EEG_model: ''' This class allow EEG model become an independent model like facial expression model rathan than two separated model. Attributes: valence_model: model for classifying valence arousal_model: model for classifying arousal X: the list that saves all EEGs features y_valence: the valence label list, ground true y_arousal: the arousal label list, ground true ''' valence_model = None arousal_model = None X = None y_valence = None y_arousal = None def __init__(self): self.valence_model = SVC(C=15) self.arousal_model = SVC(C=15) self.X = [] self.y_valence = [] self.y_arousal = [] def train(self): ''' train valence_model and arousal_model using EEG data ''' self.valence_model.fit(self.X, self.y_valence) self.arousal_model.fit(self.X, self.y_arousal) def add_one_trial_data(self, trial_path, preprocessed=False): ''' read one-trial data from trial_path and put them into X, valence_y, arousal_y Parameter: trial_path: the file path of the trial preprocessed: whether the EEG data is preprocessed ''' #load EEG data if preprocessed is False: raw_EEG_obj = mne.io.read_raw_fif(trial_path + 'EEG.raw.fif', preload=True, verbose='ERROR') EEGs = extract_EEG_feature(raw_EEG_obj) else: EEGs = np.load(trial_path + 'EEG.npy') label = pd.read_csv(trial_path + 'label.csv') for EEG in EEGs: self.X.append(EEG) self.y_valence.append(int(label['valence'] > 5)) self.y_arousal.append(int(label['arousal'] > 5)) def predict_one_trial(self, trial_path, preprocessed=False): ''' use model to predict one trial Parameter: trial_path: the trial's path preprocessed: whether the EEG data is preprocessed Return: A: whether the valence was correctly predict. (1 stands for correct 0 otherwise) B: whether the arousal was correctly predict. (1 stands for correct 0 otherwise) ''' #load trial data if preprocessed is False: raw_EEG_obj = mne.io.read_raw_fif(trial_path + 'EEG.raw.fif', preload=True, verbose='ERROR') EEGs = extract_EEG_feature(raw_EEG_obj) else: EEGs = np.load(trial_path + 'EEG.npy') label = pd.read_csv(trial_path + 'label.csv') predict_valences, predict_arousals = self.valence_model.predict( EEGs), self.arousal_model.predict(EEGs) predict_valence = np.sum(predict_valences) / float( len(predict_valences)) > 0.5 predict_arousal = np.sum(predict_arousals) / float( len(predict_arousals)) > 0.5 ground_true_valence = int(label['valence']) > 5 ground_true_arousal = int(label['arousal']) > 5 return (predict_valence == ground_true_valence), ( predict_arousal == ground_true_arousal) def predict_one_trial_scores(self, trial_path, preprocessed=False): ''' use model to predict one trial Parameter: trial_path: the trial's path preprocessed: whether the EEG data is preprocessed Return: score_valence: the scores of valence predicted by face model score_arousal: the scores of arousal predicted by EEG model ''' #load trial data if preprocessed is False: raw_EEG_obj = mne.io.read_raw_fif(trial_path + 'EEG.raw.fif', preload=True, verbose='ERROR') EEGs = extract_EEG_feature(raw_EEG_obj) else: EEGs = np.load(trial_path + 'EEG.npy') predict_valences, predict_arousals = self.valence_model.predict( EEGs), self.arousal_model.predict(EEGs) score_valence = np.sum(predict_valences) / float(len(predict_valences)) score_arousal = np.sum(predict_arousals) / float(len(predict_arousals)) return score_valence, score_arousal def predict_one_trial_results(self, trial_path, preprocessed=False): ''' use model to predict one trial Parameter: trial_path: the trial's path preprocessed: whether the EEG data is preprocessed Return: result_valence: the results of valence predicted by face model result_arousal: the results of arousal predicted by EEG model ''' score_valence, score_arousal = self.predict_one_trial_scores( trial_path, preprocessed) result_valence = score_valence > 0.5 result_arousal = score_arousal > 0.5 return result_valence, result_arousal