Example #1
0
def getSingleFeatureLineFromFile(file, decisions, shot, leave_out_class=None):
    """
    This is a less troublesome but slow method to get a featureLine.
    """
    beatList, context = getContextAndBeatListFromFile(file)
    blockList = coalesceBeats(beatList)
    Features.initializeContextVars(context)
    lastShotId, context, blockList = applyDecisionsToBeatscript(context, blockList,
        decisions)
    featureLine = getFeatureLine(context, blockList[len(decisions)], shot, lastShotId,
        leave_out_class)
    return featureLine
Example #2
0
def getFeatureLinesFromFileAndModify(file, shot, leave_out=-1):
    """
    Creates featureLines by blowing up the data with duplicating SAYS in some
    cases. This rapidly increases training times.
    """
    context, beatList = getContextAndBeatListFromFile(file)
    originalList = deepcopy(beatList)
    for i in range(0, 2):
        for beat in originalList:
            beatList.append(beat)
            if beat.type == SAYS:
                if random.randint(0, 1):
                    beatList.append(beat)
    return createFeatureLines(context, beatList, shot, leave_out)
Example #3
0
def onlineFeatureLineCreator(filename, use_classified_shot = False, use_history = True):
    # load context and complete beatlist from file
    context, beatList = getContextAndBeatListFromFile(filename)
    Features.initializeContextVars(context)
    blockList = coalesceBeats(beatList)
    context["BygoneBlocks"] = []
    for block in blockList:
        shot_true = block[-1].shot
        # get current feature line and true shot class
        featureLine = getFeatureLine(context, block, True, -1)
        features = np.array(featureLine[:-1], dtype=np.float64)
        shot_classified = yield features, shot_true
        # update block and lastShotId
        if use_classified_shot:
            for beat in block:
                beat.shot = shot_classified
        if use_history:
            context["BygoneBlocks"].append(block)
Example #4
0
def testAllButFile(file, files, scaler, return_queue, fake_decisions=False, C=None,
                   gamma=None, leave_out_class=None):
    """
    This function trains with all files in files except file, which is used for
    testing. The performance of the test is returned.
    """
    training_set = [f for f in files if f != file]
    training_data, training_data_classes = getDataMatrix(training_set,
        leave_out_class=leave_out_class, shot=True)
    training_data = scaler.transform(training_data, training_data_classes)
    trained_svm = trainSVM(training_data, training_data_classes, C=C, gamma=gamma)
    context, beatList = getContextAndBeatListFromFile(file)
    blockList = coalesceBeats(beatList)
    part_blockList = []
    decisions = []
    correct_classification_count = 0
    medium_shot_count = 0
    metric_sum = 0
    correct_histogram = [0, 0, 0, 0, 0, 0, 0]
    guessed_histogram = [0, 0, 0, 0, 0, 0, 0]
    #correct_histogram = [0, 0]
    #guessed_histogram = [0, 0]
    #last_block = None
    for block in blockList:
        # prepare block-list and decision-list
        part_blockList.append(block)
        if fake_decisions:
            decisions = []
            for i in range(len(part_blockList)-1):
                decisions.append(part_blockList[i][-1].shot)
        svm_distribution, svm_classification = calculateDistributionAndClassification(
            trained_svm, deepcopy(context),  part_blockList, decisions, scaler,
            shot_or_cut=True, leave_out_class=leave_out_class)
        if not fake_decisions:
            decisions.append(svm_classification)
        guessed_histogram[svm_classification] += 1
        correct_histogram[block[-1].shot] += 1
        #is_shot = True
        #if last_block:
        #    is_shot = block[-1].shotId != last_block[-1].shotId
        #correct_histogram[int(is_shot)] += 1
        if svm_classification == block[-1].shot:
        #if boost_classification == int(is_shot):
            correct_classification_count += 1
        if block[-1].shot == 2: medium_shot_count += 1
        if len(part_blockList) >= 2:
            previous_correct_class = part_blockList[-2][-1].shot
            if len(decisions) >= 2:
                previous_guessed_class = decisions[-2]
            else: previous_guessed_class = previous_correct_class
        else:
            previous_correct_class = part_blockList[-1][-1].shot
            if len(decisions) >= 1:
                previous_guessed_class = decisions[-1]
            else: previous_guessed_class = previous_correct_class
        metric_sum += pointMetric(svm_classification, block[-1].shot,
            previous_guessed_class, previous_correct_class)
        #last_block = block
    performance = float(correct_classification_count)/len(blockList)
    medium_shot_performance = float(medium_shot_count)/len(blockList)
    return_queue.put((
    correct_histogram, guessed_histogram, performance, medium_shot_performance,
    float(metric_sum) / len(blockList)))
    return_queue.close()
Example #5
0
def XValidation(files, fake_decisions = False):
    """
    Since the decisions of the classifiers during classifying a beatscript are used this
    is not a classical cross-validation. Instead the training is done with all but one
    Training files and the remaining beatscript is tested based on the classification from
    that data. This process is repeated with all files.
    In this case the decision history is faked by using the original classes from the
    testfile.
    This function tests the performance for decisions using a SVM, each with faked History.
    """
    reference_data, _ = getDataMatrix(TRAIN_FILES)
    scaler = preprocessing.Scaler()
    scaler.fit(reference_data)
    correct_histogram = [0, 0, 0, 0, 0, 0, 0]
    guessed_histogram = [0, 0, 0, 0, 0, 0, 0]
    performances = []
    allover_point_sum = 0.0
    medium_shot_performances = []
    for file in files:
        print("X-Validation: ca. " +
              str(int(round(float(files.index(file)) / len(files) * 100))) +
              "% fertig.")
        training_set = [f for f in files if f != file]
        training_data, training_data_classes = getDataMatrix(training_set)
        training_data = scaler.transform(training_data, training_data_classes)
        print("Trainingsdaten erzeugt. Trainiere Classifier...")
        print_lock = Lock()
        svm_queue = Queue(maxsize=1)
        svm_learning_process = Process(target=trainSVM,
            args=(training_data, training_data_classes, svm_queue, print_lock))
        svm_learning_process.start()

        context, beatList = getContextAndBeatListFromFile(file)
        blockList = coalesceBeats(beatList)
        part_blockList = []
        decisions = []
        correct_classification_count = 0
        medium_shot_count = 0
        metric_sum = 0

        trained_svm = svm_queue.get()
        svm_learning_process.join()
        print("Training finished for: " + file)
        for block in blockList:
            # prepare blocklist and decision-list
            part_blockList.append(block)
            if fake_decisions:
                decisions = []
                for i in range(len(part_blockList)-1):
                    decisions.append(part_blockList[i][-1].shot)
            svm_queue = Queue(maxsize=1)
            svm_classification_process = Process(
                target=calculateDistributionAndClassification,
                args=(
                trained_svm, deepcopy(context), part_blockList, decisions, scaler, True,
                svm_queue))
            svm_classification_process.start()
            svm_distribution, svm_classification = svm_queue.get()
            svm_classification_process.join()
            if not fake_decisions:
                decisions.append(svm_classification)
            print("SVM Classification:\t" + SHOT_NAMES[svm_classification])
            guessed_histogram[svm_classification] += 1
            print("Correct Class:\t\t" + SHOT_NAMES[block[-1].shot])
            if len(part_blockList)>= 2:
                previous_correct_class = part_blockList[-2][-1].shot
                if len(decisions) >= 2:
                    previous_guessed_class = decisions[-2]
                else: previous_guessed_class = previous_correct_class
            else:
                previous_correct_class = part_blockList[-1][-1].shot
                if len(decisions) >= 1:
                    previous_guessed_class = decisions[-1]
                else: previous_guessed_class = previous_correct_class
            metric_value = pointMetric(svm_classification, block[-1].shot,
                previous_guessed_class, previous_correct_class)
            print("Wrongness:\t\t\t" + str(metric_value))
            metric_sum += metric_value
            correct_histogram[block[-1].shot] += 1
            if svm_classification == block[-1].shot:
                correct_classification_count += 1
            if block[-1].shot == 2: medium_shot_count += 1
            print("------------------------------------")

        print("File Performance: " + str(
            float(correct_classification_count) / len(blockList) * 100) + "%")
        print(
        "File Wrongness: " + str(float(metric_sum) / len(blockList)) + " Points ( 0 - 5 )")
        performances.append(float(correct_classification_count) / len(blockList))
        medium_shot_performances.append(float(medium_shot_count) / len(blockList))
        allover_point_sum += float(metric_sum) / len(blockList)
        print("__________________________________________")

    performance_sum = 0
    performance_best = 0
    performance_last = 1
    for p in medium_shot_performances:
        performance_sum += p
        if p > performance_best: performance_best = p
        if p < performance_last: performance_last = p
    print("MS-Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" +
          str(performance_last) + " - " + str(performance_best) + ")")

    performance_sum = 0
    performance_best = 0
    performance_last = 1
    for p in performances:
        performance_sum += p
        if p > performance_best: performance_best = p
        if p < performance_last: performance_last = p
    print("Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" +
          str(performance_last) + " - " + str(performance_best) + ")")
    print("Wrongness:\t" + str(allover_point_sum / len(performances)))
    return allover_point_sum / len(performances)
Example #6
0
def getFeatureLinesFromFile(file, shot, leave_out_class=None):
    """
    Returns a list of featureLines converted from the beatscript given in file.
    """
    context, beatList = getContextAndBeatListFromFile(file)
    return createFeatureLines(context, beatList, shot, leave_out_class)