def _learn_model(scenario_name):
    '''
    Learns a classifier model for the specified scenario if one does 
    not already exist. 
    '''
    scenario = _scenarios[scenario_name]
    if path.exists(scenario['model']):
        return

    print 'Training the model for scenario {}...'.format(scenario_name)
    # Decide on classifier
    classifier = 0
    if scenario['classifier'] == 'rf':
        classifier = RandomForest()
        sys.stdout.write('TRAINING RANDOM FOREST\n')
        cutoff = [c * 0.1 for c in range(1, 10)]
    elif scenario['classifier'] == 'svm':
        classifier = sklearn_SVC(kernel='rbf', C=10, gamma=0.01)
        sys.stdout.write('TRAINING SVM\n')
        cutoff = [0.0]

    # Load the required dataset and train the model
    X, y, _ = datasets.csv2numpy(scenario['training'])
    classifier.fit(X, y)

    # Evaluate the model on the training dataset
    y_pred = classifier.decision_function(X)
    sys.stdout.write('Performance on training data:\n')
    utility.print_stats_cutoff(y, y_pred, cutoff)

    # Save the model in the corresponding file
    classifier.save_model(scenario['model'])
Beispiel #2
0
def _learn_model(scenario_name):
    '''
    Learns a classifier model for the specified scenario if one does 
    not already exist. 
    '''
    scenario = _scenarios[scenario_name]
    if path.exists(scenario['model']):
        return
    
    print 'Training the model for scenario {}...'.format(scenario_name)
    # Decide on classifier
    classifier = 0
    if scenario['classifier'] == 'rf':
        classifier = RandomForest()
        sys.stdout.write('TRAINING RANDOM FOREST\n')
        cutoff = [c * 0.1 for c in range(1, 10)]
    elif scenario['classifier'] == 'svm':
        classifier = sklearn_SVC(kernel='rbf', C=10, gamma=0.01)
        sys.stdout.write('TRAINING SVM\n')
        cutoff = [0.0]
    
    # Load the required dataset and train the model
    X, y, _ = datasets.csv2numpy(scenario['training'])
    classifier.fit(X, y)
    
    # Evaluate the model on the training dataset
    y_pred = classifier.decision_function(X)
    sys.stdout.write('Performance on training data:\n')
    utility.print_stats_cutoff(y, y_pred, cutoff)
    
    # Save the model in the corresponding file
    classifier.save_model(scenario['model'])
def evaluate_classifier(data, labels, test_data, test_labels):
    '''
    Returns the classification accuracies of the RandomForest 
    classifier trained on (data, labels) and tested on a list of 
    (test_data, test_labels). 
    '''
    rf = RandomForest()
    rf.fit(data, labels)
    accs = []
    for ted, tel in zip(test_data, test_labels):
        pred = rf.predict(ted)
        accs.append(accuracy_score(tel, pred))
    return accs
Beispiel #4
0
def evaluate_classifier(data, labels, test_data, test_labels):
    '''
    Returns the classification accuracies of the RandomForest 
    classifier trained on (data, labels) and tested on a list of 
    (test_data, test_labels). 
    '''
    rf = RandomForest()
    rf.fit(data, labels)
    accs = []
    for ted, tel in zip(test_data, test_labels):
        pred = rf.predict(ted)
        accs.append(accuracy_score(tel, pred))
    return accs