def train_one_vs_all_models(person_id): prob_classifiers = [] #score_person = 4 test_person = person_id person_ids = set(xrange(1, 48)) #person_ids.remove(score_person) person_ids.remove(test_person) for number in xrange(10): # create the training data training_vectors, training_labels = [], [] for person_id in person_ids: vectors, labels = get_labelled_number_data_for_person(person_id) labels = [int(label == number) for label in labels] training_vectors += vectors training_labels += labels # get the scoring data #score_vectors, score_labels = get_labelled_number_data_for_person(score_person) #score_labels = [int(label == number) for label in score_labels] classifier = train_with_data(training_vectors, training_labels) #scores = classifier.decision_function(score_vectors) scores = classifier.decision_function(training_vectors) #yes_scores = [scores[index] for index in xrange(len(scores)) if score_labels[index] == 1] #no_scores = [scores[index] for index in xrange(len(scores)) if score_labels[index] == 0] yes_scores = [scores[index] for index in xrange(len(scores)) if training_labels[index] == 1] no_scores = [scores[index] for index in xrange(len(scores)) if training_labels[index] == 0] curve_data = get_curve_params(yes_scores, no_scores) prob_classifiers.append(ProbabilisticSVM(classifier, curve_data)) # now let's hit up the classifiers with the test data test_vectors, test_labels = get_labelled_number_data_for_person(test_person) hits = 0 for vector, label in zip(test_vectors, test_labels): probabilities = [] for index, classifier in enumerate(prob_classifiers): probabilities.append((classifier.get_probability(vector), index)) probabilities.sort(reverse=True) hit = probabilities[0][1] == label if hit: hits += 1 print 100 * (hits / float(len(test_labels))), '%' return hits, len(test_labels)
from sound_recorder import get_raw_wav_data from yes_no_test import get_labelled_yn_data_for_person from predictor import train_with_data from vectoriser import get_normalised_vector #1 - train classifier training_vectors, training_labels = [], [] for person_index in xrange(1, 38): vectors, labels = get_labelled_yn_data_for_person(person_index) training_vectors += vectors training_labels += labels print 'training...' classifier = train_with_data(training_vectors, training_labels) while True: #2 - get raw data sample_rate, data = get_raw_wav_data() #3 vectorise data vector = get_normalised_vector(sample_rate, data) #4 test data prediction = classifier.predict(vector)[0] print '\n\n\n\n\n' if prediction == 1: print 'yes' else: print 'no' print '\n\n\n\n\n'