def getSingleFeatureLineFromFile(file, decisions, shot, leave_out_class=None): """ This is a less troublesome but slow method to get a featureLine. """ beatList, context = getContextAndBeatListFromFile(file) blockList = coalesceBeats(beatList) Features.initializeContextVars(context) lastShotId, context, blockList = applyDecisionsToBeatscript(context, blockList, decisions) featureLine = getFeatureLine(context, blockList[len(decisions)], shot, lastShotId, leave_out_class) return featureLine
def createFeatureLines(context, beatList, shot, leave_out_class=None): """ Returns the list of featureLines converted from the Beats in beatList """ featureLines = [] blockList = coalesceBeats(beatList) Features.initializeContextVars(context) lastShotId = -1 for block in blockList: featureLines.append( getFeatureLine(context, block, shot, lastShotId, leave_out_class)) context["BygoneBlocks"].append(block) lastShotId = block[-1].shotId return featureLines
def main(): beatscriptFile = open(sys.argv[1], "r") lines = beatscriptFile.readlines() context = readContext(lines) beatList = readBeatscript(lines, context) blockList = coalesceBeats(beatList) Features.initializeContextVars(context) dataLines = [] for block in blockList: dataLines.append(createDataLine(context, block)) context["BygoneBlocks"].append(block) outputFile = open(sys.argv[2], "w") for dataLine in dataLines: outputFile.write(DELIMITER.join([str(x) for x in dataLine]) + "\n") #outputFile.write(DELIMITER.join(dataLine) + "\n") outputFile.close()
def onlineFeatureLineCreator(filename, use_classified_shot = False, use_history = True): # load context and complete beatlist from file context, beatList = getContextAndBeatListFromFile(filename) Features.initializeContextVars(context) blockList = coalesceBeats(beatList) context["BygoneBlocks"] = [] for block in blockList: shot_true = block[-1].shot # get current feature line and true shot class featureLine = getFeatureLine(context, block, True, -1) features = np.array(featureLine[:-1], dtype=np.float64) shot_classified = yield features, shot_true # update block and lastShotId if use_classified_shot: for beat in block: beat.shot = shot_classified if use_history: context["BygoneBlocks"].append(block)
def testAllButFile(file, files, scaler, return_queue, fake_decisions=False, C=None, gamma=None, leave_out_class=None): """ This function trains with all files in files except file, which is used for testing. The performance of the test is returned. """ training_set = [f for f in files if f != file] training_data, training_data_classes = getDataMatrix(training_set, leave_out_class=leave_out_class, shot=True) training_data = scaler.transform(training_data, training_data_classes) trained_svm = trainSVM(training_data, training_data_classes, C=C, gamma=gamma) context, beatList = getContextAndBeatListFromFile(file) blockList = coalesceBeats(beatList) part_blockList = [] decisions = [] correct_classification_count = 0 medium_shot_count = 0 metric_sum = 0 correct_histogram = [0, 0, 0, 0, 0, 0, 0] guessed_histogram = [0, 0, 0, 0, 0, 0, 0] #correct_histogram = [0, 0] #guessed_histogram = [0, 0] #last_block = None for block in blockList: # prepare block-list and decision-list part_blockList.append(block) if fake_decisions: decisions = [] for i in range(len(part_blockList)-1): decisions.append(part_blockList[i][-1].shot) svm_distribution, svm_classification = calculateDistributionAndClassification( trained_svm, deepcopy(context), part_blockList, decisions, scaler, shot_or_cut=True, leave_out_class=leave_out_class) if not fake_decisions: decisions.append(svm_classification) guessed_histogram[svm_classification] += 1 correct_histogram[block[-1].shot] += 1 #is_shot = True #if last_block: # is_shot = block[-1].shotId != last_block[-1].shotId #correct_histogram[int(is_shot)] += 1 if svm_classification == block[-1].shot: #if boost_classification == int(is_shot): correct_classification_count += 1 if block[-1].shot == 2: medium_shot_count += 1 if len(part_blockList) >= 2: previous_correct_class = part_blockList[-2][-1].shot if len(decisions) >= 2: previous_guessed_class = decisions[-2] else: previous_guessed_class = previous_correct_class else: previous_correct_class = part_blockList[-1][-1].shot if len(decisions) >= 1: previous_guessed_class = decisions[-1] else: previous_guessed_class = previous_correct_class metric_sum += pointMetric(svm_classification, block[-1].shot, previous_guessed_class, previous_correct_class) #last_block = block performance = float(correct_classification_count)/len(blockList) medium_shot_performance = float(medium_shot_count)/len(blockList) return_queue.put(( correct_histogram, guessed_histogram, performance, medium_shot_performance, float(metric_sum) / len(blockList))) return_queue.close()
def XValidation(files, fake_decisions = False): """ Since the decisions of the classifiers during classifying a beatscript are used this is not a classical cross-validation. Instead the training is done with all but one Training files and the remaining beatscript is tested based on the classification from that data. This process is repeated with all files. In this case the decision history is faked by using the original classes from the testfile. This function tests the performance for decisions using a SVM, each with faked History. """ reference_data, _ = getDataMatrix(TRAIN_FILES) scaler = preprocessing.Scaler() scaler.fit(reference_data) correct_histogram = [0, 0, 0, 0, 0, 0, 0] guessed_histogram = [0, 0, 0, 0, 0, 0, 0] performances = [] allover_point_sum = 0.0 medium_shot_performances = [] for file in files: print("X-Validation: ca. " + str(int(round(float(files.index(file)) / len(files) * 100))) + "% fertig.") training_set = [f for f in files if f != file] training_data, training_data_classes = getDataMatrix(training_set) training_data = scaler.transform(training_data, training_data_classes) print("Trainingsdaten erzeugt. Trainiere Classifier...") print_lock = Lock() svm_queue = Queue(maxsize=1) svm_learning_process = Process(target=trainSVM, args=(training_data, training_data_classes, svm_queue, print_lock)) svm_learning_process.start() context, beatList = getContextAndBeatListFromFile(file) blockList = coalesceBeats(beatList) part_blockList = [] decisions = [] correct_classification_count = 0 medium_shot_count = 0 metric_sum = 0 trained_svm = svm_queue.get() svm_learning_process.join() print("Training finished for: " + file) for block in blockList: # prepare blocklist and decision-list part_blockList.append(block) if fake_decisions: decisions = [] for i in range(len(part_blockList)-1): decisions.append(part_blockList[i][-1].shot) svm_queue = Queue(maxsize=1) svm_classification_process = Process( target=calculateDistributionAndClassification, args=( trained_svm, deepcopy(context), part_blockList, decisions, scaler, True, svm_queue)) svm_classification_process.start() svm_distribution, svm_classification = svm_queue.get() svm_classification_process.join() if not fake_decisions: decisions.append(svm_classification) print("SVM Classification:\t" + SHOT_NAMES[svm_classification]) guessed_histogram[svm_classification] += 1 print("Correct Class:\t\t" + SHOT_NAMES[block[-1].shot]) if len(part_blockList)>= 2: previous_correct_class = part_blockList[-2][-1].shot if len(decisions) >= 2: previous_guessed_class = decisions[-2] else: previous_guessed_class = previous_correct_class else: previous_correct_class = part_blockList[-1][-1].shot if len(decisions) >= 1: previous_guessed_class = decisions[-1] else: previous_guessed_class = previous_correct_class metric_value = pointMetric(svm_classification, block[-1].shot, previous_guessed_class, previous_correct_class) print("Wrongness:\t\t\t" + str(metric_value)) metric_sum += metric_value correct_histogram[block[-1].shot] += 1 if svm_classification == block[-1].shot: correct_classification_count += 1 if block[-1].shot == 2: medium_shot_count += 1 print("------------------------------------") print("File Performance: " + str( float(correct_classification_count) / len(blockList) * 100) + "%") print( "File Wrongness: " + str(float(metric_sum) / len(blockList)) + " Points ( 0 - 5 )") performances.append(float(correct_classification_count) / len(blockList)) medium_shot_performances.append(float(medium_shot_count) / len(blockList)) allover_point_sum += float(metric_sum) / len(blockList) print("__________________________________________") performance_sum = 0 performance_best = 0 performance_last = 1 for p in medium_shot_performances: performance_sum += p if p > performance_best: performance_best = p if p < performance_last: performance_last = p print("MS-Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" + str(performance_last) + " - " + str(performance_best) + ")") performance_sum = 0 performance_best = 0 performance_last = 1 for p in performances: performance_sum += p if p > performance_best: performance_best = p if p < performance_last: performance_last = p print("Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" + str(performance_last) + " - " + str(performance_best) + ")") print("Wrongness:\t" + str(allover_point_sum / len(performances))) return allover_point_sum / len(performances)