def testAllButFile(file, files, scaler, return_queue, fake_decisions=False, C=None, gamma=None, leave_out_class=None): """ This function trains with all files in files except file, which is used for testing. The performance of the test is returned. """ training_set = [f for f in files if f != file] training_data, training_data_classes = getDataMatrix(training_set, leave_out_class=leave_out_class, shot=True) training_data = scaler.transform(training_data, training_data_classes) trained_svm = trainSVM(training_data, training_data_classes, C=C, gamma=gamma) context, beatList = getContextAndBeatListFromFile(file) blockList = coalesceBeats(beatList) part_blockList = [] decisions = [] correct_classification_count = 0 medium_shot_count = 0 metric_sum = 0 correct_histogram = [0, 0, 0, 0, 0, 0, 0] guessed_histogram = [0, 0, 0, 0, 0, 0, 0] #correct_histogram = [0, 0] #guessed_histogram = [0, 0] #last_block = None for block in blockList: # prepare block-list and decision-list part_blockList.append(block) if fake_decisions: decisions = [] for i in range(len(part_blockList)-1): decisions.append(part_blockList[i][-1].shot) svm_distribution, svm_classification = calculateDistributionAndClassification( trained_svm, deepcopy(context), part_blockList, decisions, scaler, shot_or_cut=True, leave_out_class=leave_out_class) if not fake_decisions: decisions.append(svm_classification) guessed_histogram[svm_classification] += 1 correct_histogram[block[-1].shot] += 1 #is_shot = True #if last_block: # is_shot = block[-1].shotId != last_block[-1].shotId #correct_histogram[int(is_shot)] += 1 if svm_classification == block[-1].shot: #if boost_classification == int(is_shot): correct_classification_count += 1 if block[-1].shot == 2: medium_shot_count += 1 if len(part_blockList) >= 2: previous_correct_class = part_blockList[-2][-1].shot if len(decisions) >= 2: previous_guessed_class = decisions[-2] else: previous_guessed_class = previous_correct_class else: previous_correct_class = part_blockList[-1][-1].shot if len(decisions) >= 1: previous_guessed_class = decisions[-1] else: previous_guessed_class = previous_correct_class metric_sum += pointMetric(svm_classification, block[-1].shot, previous_guessed_class, previous_correct_class) #last_block = block performance = float(correct_classification_count)/len(blockList) medium_shot_performance = float(medium_shot_count)/len(blockList) return_queue.put(( correct_histogram, guessed_histogram, performance, medium_shot_performance, float(metric_sum) / len(blockList))) return_queue.close()
def XValidation(files, fake_decisions = False): """ Since the decisions of the classifiers during classifying a beatscript are used this is not a classical cross-validation. Instead the training is done with all but one Training files and the remaining beatscript is tested based on the classification from that data. This process is repeated with all files. In this case the decision history is faked by using the original classes from the testfile. This function tests the performance for decisions using a SVM, each with faked History. """ reference_data, _ = getDataMatrix(TRAIN_FILES) scaler = preprocessing.Scaler() scaler.fit(reference_data) correct_histogram = [0, 0, 0, 0, 0, 0, 0] guessed_histogram = [0, 0, 0, 0, 0, 0, 0] performances = [] allover_point_sum = 0.0 medium_shot_performances = [] for file in files: print("X-Validation: ca. " + str(int(round(float(files.index(file)) / len(files) * 100))) + "% fertig.") training_set = [f for f in files if f != file] training_data, training_data_classes = getDataMatrix(training_set) training_data = scaler.transform(training_data, training_data_classes) print("Trainingsdaten erzeugt. Trainiere Classifier...") print_lock = Lock() svm_queue = Queue(maxsize=1) svm_learning_process = Process(target=trainSVM, args=(training_data, training_data_classes, svm_queue, print_lock)) svm_learning_process.start() context, beatList = getContextAndBeatListFromFile(file) blockList = coalesceBeats(beatList) part_blockList = [] decisions = [] correct_classification_count = 0 medium_shot_count = 0 metric_sum = 0 trained_svm = svm_queue.get() svm_learning_process.join() print("Training finished for: " + file) for block in blockList: # prepare blocklist and decision-list part_blockList.append(block) if fake_decisions: decisions = [] for i in range(len(part_blockList)-1): decisions.append(part_blockList[i][-1].shot) svm_queue = Queue(maxsize=1) svm_classification_process = Process( target=calculateDistributionAndClassification, args=( trained_svm, deepcopy(context), part_blockList, decisions, scaler, True, svm_queue)) svm_classification_process.start() svm_distribution, svm_classification = svm_queue.get() svm_classification_process.join() if not fake_decisions: decisions.append(svm_classification) print("SVM Classification:\t" + SHOT_NAMES[svm_classification]) guessed_histogram[svm_classification] += 1 print("Correct Class:\t\t" + SHOT_NAMES[block[-1].shot]) if len(part_blockList)>= 2: previous_correct_class = part_blockList[-2][-1].shot if len(decisions) >= 2: previous_guessed_class = decisions[-2] else: previous_guessed_class = previous_correct_class else: previous_correct_class = part_blockList[-1][-1].shot if len(decisions) >= 1: previous_guessed_class = decisions[-1] else: previous_guessed_class = previous_correct_class metric_value = pointMetric(svm_classification, block[-1].shot, previous_guessed_class, previous_correct_class) print("Wrongness:\t\t\t" + str(metric_value)) metric_sum += metric_value correct_histogram[block[-1].shot] += 1 if svm_classification == block[-1].shot: correct_classification_count += 1 if block[-1].shot == 2: medium_shot_count += 1 print("------------------------------------") print("File Performance: " + str( float(correct_classification_count) / len(blockList) * 100) + "%") print( "File Wrongness: " + str(float(metric_sum) / len(blockList)) + " Points ( 0 - 5 )") performances.append(float(correct_classification_count) / len(blockList)) medium_shot_performances.append(float(medium_shot_count) / len(blockList)) allover_point_sum += float(metric_sum) / len(blockList) print("__________________________________________") performance_sum = 0 performance_best = 0 performance_last = 1 for p in medium_shot_performances: performance_sum += p if p > performance_best: performance_best = p if p < performance_last: performance_last = p print("MS-Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" + str(performance_last) + " - " + str(performance_best) + ")") performance_sum = 0 performance_best = 0 performance_last = 1 for p in performances: performance_sum += p if p > performance_best: performance_best = p if p < performance_last: performance_last = p print("Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" + str(performance_last) + " - " + str(performance_best) + ")") print("Wrongness:\t" + str(allover_point_sum / len(performances))) return allover_point_sum / len(performances)