예제 #1
0
def best_svm(tracks, feature_names, n_iter=200, save=False):
    clf = machine_learning.Classifier('rbfsvm')
    X, Y = machine_learning.shape_features(tracks, feature_names)
    param_dist = {
        'C': scipy.stats.expon(scale=1000),
        'class_weight': ['auto'],
        #'loss': ['squared_hinge'],
        #'penalty': ['l2'],
        #'dual': [False],
        'tol': scipy.stats.expon(scale=0.1),
    }
    logging.info('Optimizing parameters: {}'.format(param_dist))
    random_search = sklearn.grid_search.RandomizedSearchCV(
        clf.clf,
        param_distributions=param_dist,
        n_iter=n_iter,
        verbose=10,
    )
    random_search.fit(X, Y)
    for score in random_search.grid_scores_:
        print(score)
    print('Best Score: {}'.format(random_search.best_score_))
    print('Best Params: {}'.format(random_search.best_params_))
    if save:
        logging.info('Saving classifier to disk...')
        joblib.dump(random_search.best_estimator_, save, compress=True)
    return random_search.best_estimator_
예제 #2
0
def best_svm(tracks, feature_names, n_iter=200, save=False):
    clf = machine_learning.Classifier('rbfsvm')
    X, Y = machine_learning.shape_features(tracks, feature_names)
    param_dist = {
        'C': scipy.stats.expon(scale=1000),
        'class_weight': ['auto'],
        #'loss': ['squared_hinge'],
        #'penalty': ['l2'],
        #'dual': [False],
        'tol': scipy.stats.expon(scale=0.1),
    }
    logging.info('Optimizing parameters: {}'.format(param_dist))
    random_search = sklearn.grid_search.RandomizedSearchCV(
        clf.clf,
        param_distributions=param_dist,
        n_iter=n_iter,
        verbose=10,
    )
    random_search.fit(X, Y)
    for score in random_search.grid_scores_:
        print(score)
    print('Best Score: {}'.format(random_search.best_score_))
    print('Best Params: {}'.format(random_search.best_params_))
    if save:
        logging.info('Saving classifier to disk...')
        joblib.dump(random_search.best_estimator_, save, compress=True)
    return random_search.best_estimator_
예제 #3
0
def cross_val_score(tracks, feature_names, folds=5):
    X, Y = machine_learning.shape_features(tracks, feature_names)
    clf = sklearn.svm.LinearSVC(class_weight='auto')
    scores = cross_validation.cross_val_score(clf,
                                              X,
                                              Y,
                                              cv=folds,
                                              scoring='f1_weighted')
    return scores
예제 #4
0
def cross_val_score(tracks, feature_names, folds=5):
    X, Y = machine_learning.shape_features(tracks, feature_names)
    clf = sklearn.svm.LinearSVC(class_weight='auto')
    scores = cross_validation.cross_val_score(
        clf,
        X,
        Y,
        cv=folds,
        scoring='f1_weighted'
    )
    return scores
예제 #5
0
def kfold(tracks, feature_names, folds=5, shuffle=True, **kwargs):
    labels = [track['label'] for track in tracks]
    kf = cross_validation.StratifiedKFold(labels, n_folds=folds, shuffle=shuffle)
    for train, test in kf:
        train_tracks = [tracks[i] for i in train]
        test_tracks = [tracks[i] for i in test]
        clf = machine_learning.Classifier(**kwargs)
        clf = machine_learning.train_tracks(clf, train_tracks, feature_names)
        predicted_all = []
        Y_test_all = []
        for track in test_tracks:
            X_test, Y_test = machine_learning.shape_features([track], feature_names)
            predicted = machine_learning.predict(X_test, clf)
            track['sample_predictions'] = predicted
            track['prediction'], track['predictions'] = util.most_common(predicted)
            predicted_all.extend(predicted)
            Y_test_all.extend(Y_test)
        yield test_tracks
예제 #6
0
def kfold(tracks, feature_names, folds=5, shuffle=True, **kwargs):
    labels = [track['label'] for track in tracks]
    kf = cross_validation.StratifiedKFold(labels,
                                          n_folds=folds,
                                          shuffle=shuffle)
    for train, test in kf:
        train_tracks = [tracks[i] for i in train]
        test_tracks = [tracks[i] for i in test]
        clf = machine_learning.Classifier(**kwargs)
        clf = machine_learning.train_tracks(clf, train_tracks, feature_names)
        predicted_all = []
        Y_test_all = []
        for track in test_tracks:
            X_test, Y_test = machine_learning.shape_features([track],
                                                             feature_names)
            predicted = machine_learning.predict(X_test, clf)
            track['sample_predictions'] = predicted
            track['prediction'], track['predictions'] = util.most_common(
                predicted)
            predicted_all.extend(predicted)
            Y_test_all.extend(Y_test)
        yield test_tracks