print("************* Learning Models Comparison ***************")

    # Here I load temporary dummy data
    iris = load_iris()
    X = iris.data
    y = iris.target

    print("-----------------------------------------------")
    print("Grid search of best C parameter for Linear SVM:\n")
    C_range = np.logspace(-2, 10, 13)

    # If we have used cross-validation, we could also choose the model
    # with the max accuracy, min training time
    scores_timed = []
    for C in C_range:
        model = LinearSVMClassifier(C=C)
        X_train, X_test, y_train, y_test = train_test_split(
            X, y, test_size=0.33, random_state=42)
        train_data = TrainData(X_train, y_train.tolist())
        t = time.time()
        model.train(train_data)
        t = time.time() - t
        test_data = TrainData(X_test, y_test.tolist())
        scores_timed.append([C, model.score(test_data), t])

    scores_timed = np.array(scores_timed)
    best_linear_C = scores_timed[np.argmax(scores_timed[:, 1])][0]

    print("The best parameter is C=%.2f with a score of %.2f"
          % (best_linear_C, scores_timed[np.argmax(scores_timed[:, 1])][1]))
    X = []
    Y = []
    for w in data:
        l = data[w]
        if len(l) < 8:
            continue
        l.sort(key=lambda tup: tup[0])
        for x in l[-50:]:
            X.append(x[1])
            Y.append(w)
    X = np.asarray(X)
    train_data = TrainData(X, Y)
    print(X.shape)
    # End of sanitize
    
    # Export model
    model = LinearSVMClassifier()
    model.fit(train_data)
    
    with open(filename_models, 'wb') as f:
        pickle.dump(model, f)
    
    for name, model in models:
        t2 = time.time()
        print("------------ %s -------------" % name)
        m = model()
        m.fit(train_data)
        print("In", m.scores(train_data))
        print("Cv", m.cross_validation_scores(train_data, 4))
        print("ok (%f s)" % (time.time() - t2))
Exemple #3
0
              the wind direction for an observer facing toward the bar is
              typically from the left and slightly behind the observer."""
    words = np.array(word_tokenize(text))
    ambiguous_data = feature_extractor.extract_features(np.array(words), "bar")
    print("Shape of the matrix of features:", ambiguous_data.data.shape)
    print("Done,", time.time() - t, "s")

    print(ambiguous_data.data)
    print(ambiguous_data.targets)
    print(ambiguous_data.words)

    # Learning model
    print("************************* Learning model *************************")
    print("-- SVM with Linear Kernel (default parameters")
    t = time.time()
    model = LinearSVMClassifier()
    model.train(train_data)
    labels = model.predict(ambiguous_data)
    print(labels)
    print("Done,", time.time() - t, "s\n\n")

    print("-- SVM with RBF Kernel (default parameters")
    t = time.time()
    model = RbfSVMClassifier()
    model.train(train_data)
    labels = model.predict(ambiguous_data)
    print(labels)
    print("Done,", time.time() - t, "s\n\n")

    print("-- Naive Bayes Classifier")
    t = time.time()