Ejemplo n.º 1
0
def train_one_vs_all_models(person_id):
    prob_classifiers = []

    #score_person = 4
    test_person = person_id

    person_ids = set(xrange(1, 48))
    #person_ids.remove(score_person)
    person_ids.remove(test_person)

    for number in xrange(10):

        # create the training data
        training_vectors, training_labels = [], []
        for person_id in person_ids:
            vectors, labels = get_labelled_number_data_for_person(person_id)
            labels = [int(label == number) for label in labels]
            training_vectors += vectors
            training_labels += labels

        # get the scoring data
        #score_vectors, score_labels = get_labelled_number_data_for_person(score_person)
        #score_labels = [int(label == number) for label in score_labels]

        classifier = train_with_data(training_vectors, training_labels)
        #scores = classifier.decision_function(score_vectors)
        scores = classifier.decision_function(training_vectors)

        #yes_scores = [scores[index] for index in xrange(len(scores)) if score_labels[index] == 1]
        #no_scores = [scores[index] for index in xrange(len(scores)) if score_labels[index] == 0]

        yes_scores = [scores[index] for index in xrange(len(scores)) if training_labels[index] == 1]
        no_scores = [scores[index] for index in xrange(len(scores)) if training_labels[index] == 0]

        curve_data = get_curve_params(yes_scores, no_scores)
        prob_classifiers.append(ProbabilisticSVM(classifier, curve_data))

    # now let's hit up the classifiers with the test data
    test_vectors, test_labels = get_labelled_number_data_for_person(test_person)

    hits = 0

    for vector, label in zip(test_vectors, test_labels):
        probabilities = []
        for index, classifier in enumerate(prob_classifiers):
            probabilities.append((classifier.get_probability(vector), index))
        probabilities.sort(reverse=True)

        hit = probabilities[0][1] == label
        if hit:
            hits += 1

    print 100 * (hits / float(len(test_labels))), '%'
    return hits, len(test_labels)
Ejemplo n.º 2
0
from sound_recorder import get_raw_wav_data
from yes_no_test import get_labelled_yn_data_for_person
from predictor import train_with_data
from vectoriser import get_normalised_vector

#1 - train classifier
training_vectors, training_labels = [], []
for person_index in xrange(1, 38):
    vectors, labels = get_labelled_yn_data_for_person(person_index)
    training_vectors += vectors
    training_labels += labels

print 'training...'
classifier = train_with_data(training_vectors, training_labels)

while True:
    #2 - get raw data
    sample_rate, data = get_raw_wav_data()

    #3 vectorise data
    vector = get_normalised_vector(sample_rate, data)

    #4 test data
    prediction = classifier.predict(vector)[0]
    print '\n\n\n\n\n'
    if prediction == 1:
        print 'yes'
    else:
        print 'no'
    print '\n\n\n\n\n'