Beispiel #1
0
def phoneAccelerometerISVM():
    print("Loading data...")
    data = pd.read_csv("./Train_Phone-Acc-nexus4_1-a.csv")
    print("Done!")

    # Parse data and make bike vs not-biking classification using an SVM.
    # Note: I'm assuming a window width of 500
    print("Finding time series windows indexes for each class kind...")
    previousClassLabel = str(data.get_value(data.index[0], 'gt'))
    pos = 0
    y = []
    X = []
    window = 500
    while pos < data.shape[0]:
        # Make y label.
        if str(data.iloc[pos]['gt']) == 'sit':
            y.append(1)
        else:
            y.append(-1)

        # Make X row.
        X.append(data.iloc[pos:pos + window]['y'])

        # Move to the next window
        pos += window
    print("Done!")

    # Build and fit the SVM.
    print("Training SVM on all data accelerometer data...")
    X = np.array(X)
    y = np.array(y)
    #clfs = LinearSVC()
    clfs = SVC()
    clfs.fit(X, y)
    print("Done!")

    # print("Predicting accelerometer classes on all data using SVM...")
    # ypred = predict(X, clfs.coef_.reshape(len(clfs.coef_.ravel()), 1))
    # print("Done!")
    # error = calculateTotalAbsoluteError(y, ypred) / y.shape[0]
    # print("Accelerometer training error (Means kind of nothing): %f"%error)

    # Cross validation
    print("Training SVM on accelerometer training only data...")
    X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size = 0.1) #, random_state = 0
    clfs = SVC()
    clfs.fit(X_train, y_train)
    yhat = clfs.predict(X_test)
    print("Abs Error = %f"%( calculateTotalAbsoluteError(yhat, y_test)/len(yhat)))
    print("Test data mean accuracy SVM score: %f"%clfs.score(X_test, y_test))
    f1_c0 = f1_score(y_test, clfs.predict(X_test), pos_label=1, average='binary')
    #print("Test data f1 score for class -1: %f"%(f1_c0))
    print("Test data f1 score for class +1: %f" % (f1_c0))
    print("Done!")
Beispiel #2
0
Datei: svm.py Projekt: revotus/Va
class SVMClassifier(ClassifierI):

    """Wrapper for scikit-learn svm classifier."""

    def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='auto', coef0=0.0,
                 shrinking=True, probability=False, tol=1e-3, cache_size=200,
                 class_weight=None, verbose=False, max_iter=-1,
                 decision_function_shape=None, random_state=None):
        """Init. See scikit-learn."""
        self._clf = SVC(C=1, kernel=kernel, degree=degree, gamma=gamma,
                        coef0=coef0, shrinking=shrinking,
                        probability=probability, tol=tol, cache_size=cache_size,
                        class_weight=class_weight, verbose=verbose,
                        max_iter=max_iter,
                        decision_function_shape=decision_function_shape,
                        random_state=random_state)
        self.classes_ = None

    def __repr__(self):
        return "<SVMClassifier(%r)>" % self._clf

    def classify_many(self, vectors):
        """Classify a batch of verbs.

        :param vectors: An doc term array of vectors
        :return: The predicted class label for each input sample.
        :rtype: list
        """
        classes = self.classes_
        return [classes[i] for i in self._clf.predict(vectors)]

    def prob_classify_many(self, vectors):
        """Compute per-class probabilities for a batch of samples.
        :param vectors: A doc term array of vectors
        :rtype: list of ``ProbDistI``
        """
        y_proba_list = self._clf.predict_proba(vectors)
        return [self._make_probdist(y_proba) for y_proba in y_proba_list]

    def labels(self):
        """The class labels learned by this classifier.
        :rtype: list
        """
        return list(self.classes_)

    def train(self, vectors, labels):
        """
        Train (fit) the scikit-learn svm classifier.
        :param vectors: a doc-term array of vectors to learn from
        :param labels: a list of labels corresponding to the rows
        of the doc term array.
        """
        self.classes_, labels = np.unique(labels, return_inverse=True)
        self._clf.fit(vectors, labels)

        return self

    def _make_probdist(self, y_proba):
        classes = self.classes_
        return dict((classes[i], p) for i, p in enumerate(y_proba))
Beispiel #3
0
class SVCImpl():

    def __init__(self, C=1.0, kernel='rbf', degree=3, gamma='auto_deprecated', coef0=0.0, shrinking=True, probability=False, tol=0.001, cache_size=200, class_weight='balanced', verbose=False, max_iter=(- 1), decision_function_shape='ovr', random_state=None):
        self._hyperparams = {
            'C': C,
            'kernel': kernel,
            'degree': degree,
            'gamma': gamma,
            'coef0': coef0,
            'shrinking': shrinking,
            'probability': probability,
            'tol': tol,
            'cache_size': cache_size,
            'class_weight': class_weight,
            'verbose': verbose,
            'max_iter': max_iter,
            'decision_function_shape': decision_function_shape,
            'random_state': random_state}

    def fit(self, X, y=None):
        self._sklearn_model = SKLModel(**self._hyperparams)
        if (y is not None):
            self._sklearn_model.fit(X, y)
        else:
            self._sklearn_model.fit(X)
        return self

    def predict(self, X):
        return self._sklearn_model.predict(X)

    def predict_proba(self, X):
        return self._sklearn_model.predict_proba(X)
Beispiel #4
0
def svm_train(X, y, model_path):
    model = SVC()
    model.fit(X, y)
    expected = y
    predicted = model.predict(X)
    print(metrics.classification_report(expected, predicted))
    print(metrics.confusion_matrix(expected, predicted))
    joblib.dump(model, model_path)
Beispiel #5
0
def classifier_panchenko2016(X_train,
                             y_train,
                             X_test,
                             y_test,
                             separateClassifier=False):
    train_or_test_labels = ["train"
                            for i in y_train] + ["test" for i in y_test]
    y_train, X_train, y_test, X_test = outlier_removal(train_or_test_labels,
                                                       X_train + X_test,
                                                       y_train + y_test)

    y_train, X_train = features_extraction(
        y_train,
        X_train,
        separateClassifier=separateClassifier,
        featuresCount=100)

    y_test, X_test = features_extraction(y_test,
                                         X_test,
                                         separateClassifier=separateClassifier,
                                         featuresCount=100)

    scaler = MinMaxScaler()
    X_train = scaler.fit_transform(X_train)
    X_test = scaler.transform(X_test)

    classifier = SVC(kernel="rbf",
                     C=2e11,
                     gamma=2e-1,
                     max_iter=5000,
                     class_weight="balanced",
                     verbose=1)

    print("fitting")
    classifier.fit(X_train, y_train)

    print("testing")
    y_predictions = classifier.predict(X_test)  #, y_test)

    return y_test, y_predictions
    test_data_set = DataSet()
    test_data_set.load(config.get_value('test'), class_index, has_header=False)
    Xtest, Ytest = test_data_set.convert_2_binary_format_with(
        X_train.item_dict, Y_train.item_dict)
    Ytest = Ytest.flatten()

    class_count = train_data_set.number_of_classes()

    unexpected_rules = IOHelper.load_json_object(config.get_value('rules'))
    refined_unexpected_rules = filter_association_rules(unexpected_rules)

    print('svm testing...')
    svc_model = SVC(kernel='poly', degree=3, coef0=0.1, random_state=1)
    svc_model.fit(X_train.relation_matrix, Y_train.values.flatten())

    svc_y_pred = svc_model.predict(Xtest)
    print(f1_score(Ytest, svc_y_pred, average=None))
    if (class_count <= 2):
        fpr, tpr, _ = roc_curve(Ytest, svc_y_pred.flatten())
        print(auc(fpr, tpr))

    refine_with_unexpectedness(test_data_set, Y_train.item_dict, svc_y_pred,
                               Ytest, refined_unexpected_rules)

    print('Random forest testing...')
    rf_model = RandomForestClassifier(n_estimators=20, random_state=1)
    rf_model.fit(X_train.relation_matrix, Y_train.values.flatten())

    rf_y_pred = rf_model.predict(Xtest)
    print(f1_score(Ytest, rf_y_pred, average=None))
    if (class_count <= 2):
]

print("train: ", len(tweets_training))
print("test: ", len(tweets_test))

X, X_test, feature_name, feature_index = feature_manager.create_feature_space(
    tweets_training, feature_type, tweets_test)

print(feature_name)
print("feature space dimension X:", X.shape)
print("feature space dimension X_test:", X_test.shape)

clf = SVC(kernel="linear")

clf.fit(X, labels_training)
test_predict = clf.predict(X_test)
"""prec, recall, f, support = precision_recall_fscore_support(
labels_test,
test_predict,
beta=1)

accuracy = accuracy_score(
test_predict,
labels_test
)

print(prec, recall, f, support )
print(accuracy)"""

for i in range(0, len(tweets_test)):
    csvfile = open('ATC_' + tweets_test[i].language + '.csv', 'w', newline='')
Beispiel #8
0
    # print (X_trn, X_val)
    # print (trn_embedding, val_embedding)
    # print (triplet_model.get_weights())

    clf = SVC(
        # class_weight='balanced',
        probability=True,
        # tol=1e-4,
    )

    clf.fit(trn_embedding, Y_trn)

    print(clf.score(val_embedding, Y_val))
    print(clf.predict_proba(val_embedding))

    print(roc_auc_score(Y_val, clf.predict(val_embedding)))
    print(classification_report(Y_val, clf.predict(val_embedding), digits=4))

    all_files = [x[:-8] for x in os.listdir(ALL_FILES)]
    X = [
        pickle.load(open(os.path.join(FEATURE_PATH, x + ".fkmeans"), "rb"),
                    encoding='latin1') for x in all_files
    ]
    # Y = [ranks[x.split()[0].strip()] for x in all_files]

    proba = clf.predict_proba(embed(np.array(X), triplet_model))

    # print(len(proba), len(proba[0]), proba[0])

    wf = open("proba_audio_wo_pretrain.txt", "w")
    for i, [_, prob] in enumerate(proba):
                       key=numericalSort)

    #Read feature vectors of test images
    testSamples = readVoxels(testFiles)
    print(len(testSamples))

    #2D array to report final prediction in format (ID,Prediction)
    final = [[0 for j in range(2)] for i in range(139)]
    final[0][0] = 'ID'
    final[0][1] = 'Prediction'
    id = 1

    #Predict age of test image using each of the 4 models trained above
    for item in testSamples:
        predictionL = regrL.predict(item)
        predictionR = regrR.predict(item)
        predictionS = regrS.predict(item)
        predictionRfc = rfc.predict(item)

        final[id][0] = id
        #Taking the average of each of the model predictions as final age prediction
        final[id][1] = (predictionL[0] + predictionR[0] + predictionS[0] +
                        predictionRfc[0]) // 4
        id = id + 1

    #Save csv file in the output directory provided as argument with name Dota2Prediction.csv
    np.savetxt(outputDir + "\\Dota2Prediction.csv",
               final,
               delimiter=',',
               fmt='%s')
    print("Finished!")
Beispiel #10
0
def train_and_predict(samples, labels, feature_selector, inputDir, outputDir):
    #test set
    testDir = inputDir + "\\set_test"
    testFiles = sorted([
        join(testDir, f) for f in listdir(testDir) if isfile(join(testDir, f))
    ],
                       key=numericalSort)

    # Different features for gender
    testSamples_gender = cubeVoxelsVar_gender(testFiles)

    # Same features for age and health
    testSamples_age = cubeVoxelsVar_age(testFiles)
    testSamples_health = testSamples_age

    testSamples = [testSamples_gender, testSamples_age, testSamples_health]

    #2D array to report final prediction in format (ID,Prediction)
    final = [[0 for j in range(4)] for i in range(1 + 138 * 3)]
    final[0][0] = 'ID'
    final[0][1] = 'Sample'
    final[0][2] = 'Label'
    final[0][3] = 'Predicted'

    total_labels = ['gender', 'age', 'health']

    for label in range(3):
        print("Prediction label 1 started!")
        id_count = label
        #Training logistic regression
        logRegrL1 = linear_model.LogisticRegression()
        logRegrL1.fit(samples[label], labels[label])

        #Training SVM with linear kernel
        svmLin = SVC(kernel='linear')
        svmLin.fit(samples[label], labels[label])

        #Training Random Forest Classifier
        rfc = RandomForestClassifier(n_estimators=100)
        rfc.fit(samples[label], labels[label])

        print("Training complete!")

        # Do feature selection only for age and health
        if label == 0:
            testSamples_curr = testSamples[label]
        else:
            testSamples_curr = feature_selector[label].transform(
                testSamples[label])
        print(len(testSamples_curr))

        id = label + 1

        #Predict gender, age and health status of test image using each of the 3 models trained above
        for sampleNum, sample in enumerate(testSamples_curr):
            predictionL1 = logRegrL1.predict(sample)
            predictionSvmLin = svmLin.predict(sample)
            predictionRfc = rfc.predict(sample)

            final[id][0] = id_count
            final[id][1] = sampleNum
            final[id][2] = total_labels[label]

            votes = predictionL1[0] + predictionSvmLin[0] + predictionRfc[0]

            final[id][3] = 'TRUE' if votes >= 2.0 else 'FALSE'
            id = id + 3
            id_count = id_count + 3
        print('Prediction done!')

    #Save csv file in the output directory with name final_sub.csv
    np.savetxt(outputDir + "\\final_sub.csv", final, delimiter=',', fmt='%s')
N = len(feature_types)
for K in range(1, N):
    for subset in combinations(range(1, N), K):

        feature_index_filtered = numpy.array([
            list(feature_types).index(f) for f in feature_types[list(subset)]
        ])
        feature_index_filtered = numpy.concatenate(
            feature_type_indexes[list(feature_index_filtered)])

        # extract the column of the features considered in the current combination
        # the feature space is reduced
        X_filter = X[:, feature_index_filtered]
        X_test_filter = X_test[:, feature_index_filtered]

        clf = SVC(kernel='linear')

        clf.fit(X_filter, labels_training)
        test_predict = clf.predict(X_test_filter)

        prec, recall, f, support = precision_recall_fscore_support(
            labels_test, test_predict, beta=1)

        accuracy = accuracy_score(test_predict, labels_test)

        print(feature_types[list(subset)])
        print("feature space dimention X:", X_filter.shape)
        print("feature space dimention X_Test:", X_test_filter.shape)
        print(prec, recall, f, support)
        print(accuracy)
Beispiel #12
0
                       columns=X_train.columns)

#:# model

params = {'gamma': 5, 'kernel': 'sigmoid', 'probability': True}

classifier = SVC(**params)
classifier.fit(X_train, y_train)

#:# hash
#:# aad366f6d5961bc98783c2ad9fb3918d
md5 = hashlib.md5(str(classifier).encode('utf-8')).hexdigest()
print(f'md5: {md5}')

#:# audit
y_pred = classifier.predict(transform_pipeline.transform(X_test))
y_pred_proba = classifier.predict_proba(
    transform_pipeline.transform(X_test))[:, 1]

tn, fp, fn, tp = confusion_matrix(y_test, y_pred).ravel()

print(f'acc: {accuracy_score(y_test, y_pred)}')
print(f'auc: {roc_auc_score(y_test, y_pred_proba)}')
print(f'precision: {precision_score(y_test, y_pred)}')
print(f'recall: {recall_score(y_test, y_pred)}')
print(f'specificity: {tn/(tn+fp)}')
print(f'f1: {f1_score(y_test, y_pred)}')

#:# session info

# Dodaj wersję pythona w session info
Beispiel #13
0
print("_GBC_MEAN \t" + "_GBC_STD \t" + "_GBC_PARAM")
for _GBC_MEAN, _GBC_STD, _GBC_PARAM in _GBC_CV_Results:
    _Result_File.write(("%f \t" + "%f \t" + "%r") %
                       (_GBC_MEAN, _GBC_STD, _GBC_PARAM) + "\n")
    print(("%f \t" + "%f \t" + "%r") % (_GBC_MEAN, _GBC_STD, _GBC_PARAM))
_Result_File.write("\n")
print()
# Final Trail - 终结性裁判
# Final Model - 模型终结化
_FINAL_Scaler = StandardScaler().fit(_X_Train)
_FINAL_Rescaled_X = _FINAL_Scaler.transform(_X_Train)
_FINAL_Model = SVC(C=1.5, kernel="rbf")
_FINAL_Model.fit(X=_FINAL_Rescaled_X, y=_Y_Train)
_FINAL_Rescaled_X_Val = _FINAL_Scaler.transform(_X_Val)
# 终结性模型评估启动
_FINAL_Predictions = _FINAL_Model.predict(_FINAL_Rescaled_X_Val)
_Result_File.write("终结性裁判启动...:\n\n")
_Result_File.write("Accuracy分数...:\n")
_Result_File.write(
    str(
        sklearn.metrics.classification.accuracy_score(
            y_true=_Y_Val, y_pred=_FINAL_Predictions)) + "\n\n")
_Result_File.write("冲突矩阵...:\n")
_Result_File.write(
    str(
        sklearn.metrics.classification.confusion_matrix(
            y_true=_Y_Val, y_pred=_FINAL_Predictions)) + "\n\n")
_Result_File.write("分类报告...:\n")
_Result_File.write("\t\t精确率\t召回率\tF1数值\t支持情况\n")
_Result_File.write(
    str(
        train_arrays.append(model.docvecs[prefix_train_pos])
        train_labels.append(int(email.label))
    else:
        test_arrays.append(model.docvecs[prefix_train_pos])
        test_labels.append(int(email.label))
        
classifier = SVC()
classifier.fit(numpy.array(train_arrays), numpy.array(train_labels))

print("Overall score is %f." % classifier.score(numpy.array(test_arrays), numpy.array(test_labels)))

corrects = []
wrongs = []
for email in emails:
    email_id = email.id
    prefix_train_pos = 'email_' + str(email_id)
    if email_id % 5 == 0:
        prediction = classifier.predict([model.docvecs[prefix_train_pos]])[0]
        actual = int(email.label)
        if prediction != actual:
            wrongs.append((email.id, prediction, actual))
        else:
#             print(max(classifier.predict_proba([model.docvecs[prefix_train_pos]])[0]), actual)
            corrects.append(email.id)

print("%i are wrong, %i are correct." % (len(wrongs), len(corrects)))
print(wrongs)
# print("EmailID\t\tPredicted\tActual")
# for w in wrongs:
#     print("%s\t\t%s\t\t%s" % w)
Beispiel #15
0
    metrics.confusion_matrix(y_true=_Y_VAL, y_pred=_KNC_PREDICTIONS),
    "\n",
    #
    " " * 4,
    "CLASSIFICATION_REPORT:\n",
    metrics.classification_report(y_true=_Y_VAL, y_pred=_KNC_PREDICTIONS),
    "\n",
    #
    sep="",
    end="\n")
print()
############################################################
# 资瓷矢量机预测
_SVC_MODEL = SVC()
_SVC_MODEL.fit(X=_X_TRAIN, y=_Y_TRAIN)
_SVC_PREDICTIONS = _SVC_MODEL.predict(X=_X_VAL)
print(
    "SVC-资瓷矢量机预测结果:\n",
    #
    " " * 4,
    "ACCURACY_SCORE:\n",
    " " * 8,
    metrics.accuracy_score(y_true=_Y_VAL, y_pred=_SVC_PREDICTIONS),
    "\n",
    #
    " " * 4,
    "CONFUSION_MATRIX:\n",
    metrics.confusion_matrix(y_true=_Y_VAL, y_pred=_SVC_PREDICTIONS),
    "\n",
    #
    " " * 4,
        l.append(mean)

    testSamples = []
    for item in l:
        mean = np.zeros(shape=(176))
        for row in range(208):
            mean = np.add(mean, item[row])
        mean = (1/208) * mean
        testSamples.append(mean)

    print(len(testSamples))

    testSamples = np.vstack(testSamples)

    #PCA on test samples
    (a,b,c) = PCA(testSamples, 100)
    testSamples = a


    final = []
    for item in testSamples:
        prediction = svc.predict(item)
        final.append(prediction)

    np.savetxt('mydata.csv', 
           final, 
           delimiter=',', 
           fmt='%3i', 
           header='Results')

    
 def create_svm(self, best_kernel, best_c):
     svm = SVC(gamma='scale', kernel=best_kernel, C=best_c)
     svm.fit(self.X_train, self.Y_train)
     predicted_y = svm.predict(self.X_test)
     self.print_stats(predicted_y, "svm")
Beispiel #18
0
class BBNSVC(SupervisedLearnerPrimitiveBase[Inputs, Outputs, Params, Hyperparams]):
    """
    Primitive wrapping for sklearn.ensemble.AdaBoostClassifier
    """

    __author__ = "JPL MARVIN"
    metadata = metadata_module.PrimitiveMetadata({ 
         "algorithm_types": ['ADABOOST'],
         "name": "sklearn.svm.classes.SVC",
         "primitive_family": "CLASSIFICATION",
         "python_path": "d3m.primitives.bbn.time_series.BBNSVC",
         "source": {'name': 'JPL'},
         "version": "0.1.0",
         "id": "a2ee7b2b-99c6-4326-b2e7-e081cd292d78",
         'installation': [{'type': metadata_module.PrimitiveInstallationType.PIP,
                           'package_uri': 'git+https://gitlab.datadrivendiscovery.org/jpl/d3m_sklearn_wrap.git@{git_commit}'.format(
                               git_commit=utils.current_git_commit(os.path.dirname(__file__)),
                            ),
                         }]
    })

    def __init__(self, *,
                 hyperparams: Hyperparams,
                 random_seed: int = 0,
                 docker_containers: Dict[str, str] = None,
                 _verbose: int = 0) -> None:

        super().__init__(hyperparams=hyperparams, random_seed=random_seed, docker_containers=docker_containers)

        self._clf = SVC(
            C=self.hyperparams['C'],
            kernel=self.hyperparams['kernel'],
            degree=self.hyperparams['degree'],
            gamma=self.hyperparams['gamma'],
            coef0=self.hyperparams['coef0'],
            probability=self.hyperparams['probability'],
            shrinking=self.hyperparams['shrinking'],
            tol=self.hyperparams['tol'],
            class_weight=self.hyperparams['class_weight'],
            max_iter=self.hyperparams['max_iter'],
            decision_function_shape=self.hyperparams['decision_function_shape'],
            verbose=_verbose,
            random_state=self.random_seed,
        )
        self._training_inputs = None
        self._training_outputs = None
        self._fitted = False

    def set_training_data(self, *, inputs: Inputs, outputs: Outputs) -> None:
        self._training_inputs = inputs
        self._training_outputs = outputs
        self._fitted = False

    def fit(self, *, timeout: float = None, iterations: int = None) -> CallResult[None]:
        if self._fitted:
            return CallResult(None)

        if self._training_inputs is None or self._training_outputs is None:
            raise ValueError("Missing training data.")

        self._clf.fit(self._training_inputs, self._training_outputs)
        self._fitted = True

        return CallResult(None)

    def produce(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        return CallResult(self._clf.predict(inputs))

    def produce_log_proba(self, *, inputs: Inputs, timeout: float = None, iterations: int = None) -> CallResult[Outputs]:
        return CallResult(self._clf.predict_log_proba(inputs))

    def get_params(self) -> Params:
        return Params(
            support=self._clf.support_,
            support_vectors=self._clf.support_vectors_,
            n_support=self._clf.n_support_,
            dual_coef=self._clf.dual_coef_,
            coef=self._clf.coef_,
            intercept=self._clf.intercept_,
        )

    def set_params(self, *, params: Params) -> None:
        self._clf.support_ = params.support
        self._clf.support_vectors_ = params.support_vectors
        self._clf.n_support_ = params.n_support
        self._clf.dual_coef_ = params.dual_coef
        self._clf.intercept_ = params.intercept
Beispiel #19
0
# create the feature space with all available features
X, feature_names, feature_type_indexes = feature_manager.create_feature_space(
    tweets, feature_types)

print("features:", feature_types)
print("feature space dimension:", X.shape)

golden = []
predict = []
kf = KFold(n_splits=5, random_state=True)
for index_train, index_test in kf.split(X):

    clf = SVC(kernel="linear")

    clf.fit(X[index_train], labels[index_train])
    test_predict = clf.predict(X[index_test])

    golden = numpy.concatenate((golden, labels[index_test]), axis=0)
    predict = numpy.concatenate((predict, test_predict), axis=0)

prec, recall, f, support = precision_recall_fscore_support(golden,
                                                           predict,
                                                           beta=1)

accuracy = accuracy_score(golden, predict)

print(prec)
print(recall)
print(f)
print(support)
print(accuracy)
Beispiel #20
0
            if Y_label != 'NULL' or random.random() > 0:
                if Y_label == event_name:
                    Y = 1
                else:
                    Y = 0

                if i == 0:
                    X_all = X
                    Y_all = Y
                    i = 1
                else:
                    X_all = np.vstack((X_all, X))
                    Y_all = np.append(Y_all, Y)
                    i += 1
        # print (i)
    # print (np.sum(X_all, axis = 1))
    # print(X_all, Y_all)

    clf = SVC(kernel=chi2_kernel)
    # clf = SVC()
    clf.fit(X_all, Y_all)

    print(clf.score(X_all, Y_all))
    print(clf.predict(X_all))

    fread.close()

    cPickle.dump(clf, open(output_file, "wb"))

    print 'SVM trained successfully for event %s!' % (event_name)
Beispiel #21
0
class EEG_model:
    '''
        This class allow EEG model become an independent model like facial 
        expression model rathan than two separated model.
        Attributes:
            valence_model: model for classifying valence
            arousal_model: model for classifying arousal
            X: the list that saves all EEGs features
            y_valence: the valence label list, ground true
            y_arousal: the arousal label list, ground true
    '''

    valence_model = None
    arousal_model = None
    X = None
    y_valence = None
    y_arousal = None

    def __init__(self):
        self.valence_model = SVC(C=15)
        self.arousal_model = SVC(C=15)
        self.X = []
        self.y_valence = []
        self.y_arousal = []

    def train(self):
        '''
            train valence_model and arousal_model using EEG data
        '''
        self.valence_model.fit(self.X, self.y_valence)
        self.arousal_model.fit(self.X, self.y_arousal)

    def add_one_trial_data(self, trial_path, preprocessed=False):
        '''
        read one-trial
        data from trial_path and put them into X, valence_y, arousal_y
        Parameter:
            trial_path: the file path of the trial
            preprocessed: whether the EEG data is preprocessed
            
        '''

        #load EEG data
        if preprocessed is False:
            raw_EEG_obj = mne.io.read_raw_fif(trial_path + 'EEG.raw.fif',
                                              preload=True,
                                              verbose='ERROR')
            EEGs = extract_EEG_feature(raw_EEG_obj)
        else:
            EEGs = np.load(trial_path + 'EEG.npy')
        label = pd.read_csv(trial_path + 'label.csv')

        for EEG in EEGs:
            self.X.append(EEG)
            self.y_valence.append(int(label['valence'] > 5))
            self.y_arousal.append(int(label['arousal'] > 5))

    def predict_one_trial(self, trial_path, preprocessed=False):
        '''
             use model to predict one trial
             Parameter:
                 trial_path: the trial's path
                 preprocessed: whether the EEG data is preprocessed
             Return:
                 A: whether the valence was correctly predict. 
                 (1 stands for correct 0 otherwise)
                 B: whether the arousal was correctly predict. 
                 (1 stands for correct 0 otherwise)
        '''

        #load trial data
        if preprocessed is False:
            raw_EEG_obj = mne.io.read_raw_fif(trial_path + 'EEG.raw.fif',
                                              preload=True,
                                              verbose='ERROR')
            EEGs = extract_EEG_feature(raw_EEG_obj)
        else:
            EEGs = np.load(trial_path + 'EEG.npy')

        label = pd.read_csv(trial_path + 'label.csv')
        predict_valences, predict_arousals = self.valence_model.predict(
            EEGs), self.arousal_model.predict(EEGs)
        predict_valence = np.sum(predict_valences) / float(
            len(predict_valences)) > 0.5
        predict_arousal = np.sum(predict_arousals) / float(
            len(predict_arousals)) > 0.5
        ground_true_valence = int(label['valence']) > 5
        ground_true_arousal = int(label['arousal']) > 5

        return (predict_valence == ground_true_valence), (
            predict_arousal == ground_true_arousal)

    def predict_one_trial_scores(self, trial_path, preprocessed=False):
        '''
             use model to predict one trial
             Parameter:
                 trial_path: the trial's path
                 preprocessed: whether the EEG data is preprocessed
             Return:
                 score_valence: the scores of valence predicted by face model
                 score_arousal: the scores of arousal predicted by EEG model
        '''
        #load trial data
        if preprocessed is False:
            raw_EEG_obj = mne.io.read_raw_fif(trial_path + 'EEG.raw.fif',
                                              preload=True,
                                              verbose='ERROR')
            EEGs = extract_EEG_feature(raw_EEG_obj)
        else:
            EEGs = np.load(trial_path + 'EEG.npy')

        predict_valences, predict_arousals = self.valence_model.predict(
            EEGs), self.arousal_model.predict(EEGs)

        score_valence = np.sum(predict_valences) / float(len(predict_valences))
        score_arousal = np.sum(predict_arousals) / float(len(predict_arousals))

        return score_valence, score_arousal

    def predict_one_trial_results(self, trial_path, preprocessed=False):
        '''
             use model to predict one trial
             Parameter:
                 trial_path: the trial's path
                 preprocessed: whether the EEG data is preprocessed
             Return:
                 result_valence: the results of valence predicted by face model
                 result_arousal: the results of arousal predicted by EEG model
        '''
        score_valence, score_arousal = self.predict_one_trial_scores(
            trial_path, preprocessed)
        result_valence = score_valence > 0.5
        result_arousal = score_arousal > 0.5

        return result_valence, result_arousal