Beispiel #1
0
def specific_line(number):
    files = TRAIN_FILES
    reference_data, _ = getDataMatrix(files)
    scaler = preprocessing.Scaler()
    scaler.fit(reference_data)
    # 50 Prozesse; 250 Durchläufe pro Prozess; lineare Verteilung
    C = range(200,10001,200)
    for gamma in [1.0*10.0**-(x/10.0) for x in range(-20,230)]:
        os.system("wget http://www.pinae.net/automoculus/getText.php?text=C_is_" + str(C[number]) + "_gamma_is_" + str(
            gamma) + "_Result_is_" + str(ParallelXValidation(files, scaler, True, C=C[number], gamma=gamma)))
        os.system("rm getText*")
def trainWithAllExamples(shot):
    training_data, training_data_classes = getDataMatrix(TRAIN_FILES, shot)
    scaler = preprocessing.Scaler()
    training_data = scaler.fit_transform(training_data, training_data_classes)
    lock = Lock()
    svmReturnQueue = Queue()
    svmLearningProcess = Process(target=trainSVM,
        args=(training_data, training_data_classes, svmReturnQueue, lock))
    svmLearningProcess.start()
    svmClassifier = svmReturnQueue.get()
    svmLearningProcess.join()
    return (svmClassifier,), scaler
Beispiel #3
0
def main():
    #XValidation(TRAIN_FILES, True)
    reference_data, _ = getDataMatrix(TRAIN_FILES, shot=True)
    scaler = preprocessing.Scaler()
    scaler.fit(reference_data)
    #ParallelXValidation(TRAIN_FILES, scaler, True, C=2582.61517656, gamma=0.00036375303213)
    #ParallelXValidation(TRAIN_FILES, scaler, True, C=2583.31718583, gamma=0.00191943088336)
    #ParallelXValidation(TRAIN_FILES, scaler, True, C=2585.53147506, gamma=2.60057621686e-05)
    #ParallelXValidation(TRAIN_FILES, scaler, True, C=2585.61614258, gamma=2.15704131861e-05)
    #ParallelXValidation(TRAIN_FILES, scaler, True, C=2585.81448898, gamma=1.73105463456e-05)
    #ParallelXValidation(TRAIN_FILES, scaler, True, C=1999.62466242, gamma=1.62885637292e-06)
    #ParallelXValidation(TRAIN_FILES, scaler, True, C=1999.32984556, gamma=3.03787358388e-07)
    ParallelXValidation(TRAIN_FILES, scaler, False, C=1999.85770959, gamma=6.30930490772e-07)
def TestFeatureClassRelevance(number = None):
    files = TRAIN_FILES
    feature_Classes = getAllFeatureClasses()
    results = []
    if number:
        reference_data, _ = getDataMatrix(files, leave_out_class=feature_Classes[number])
        scaler = preprocessing.Scaler()
        scaler.fit(reference_data)
        optimized_parameters = tuneParametersForSVM(files, scaler, reference_data, True, leave_out_class=feature_Classes[number])
        #optimized_parameters = (1910.41398886, 9.88131291682e-324)
        results.append((feature_Classes[number], ParallelXValidation(files, scaler, True,
            C=max(0.0,optimized_parameters[0]),
            gamma=max(1e-323,optimized_parameters[1]), leave_out_class=feature_Classes[number])))
    else:
        for number in range(len(feature_Classes))[3:]:
            reference_data, _ = getDataMatrix(files, leave_out_class=feature_Classes[number])
            scaler = preprocessing.Scaler()
            scaler.fit(reference_data)
            results.append((feature_Classes[number], ParallelXValidation(files, scaler, True, leave_out_class=feature_Classes[number])))
    for result_class, result in results:
        print(str(result_class).split(".")[1]+":"+"\t".join(["" for _ in range(int(round((50-len(str(result_class).split(".")[1]))/8.0)))])+str(result))
        os.system("wget http://www.pinae.net/automoculus/getText.php?text=FeatureClass_is_" +
                  str(result_class).split(".")[1] + "_Result_is_" + str(result))
        os.system("rm getText*")
Beispiel #5
0
def calculate_missing(filename,partno,parts):
    files = TRAIN_FILES
    reference_data, _ = getDataMatrix(files)
    scaler = preprocessing.Scaler()
    scaler.fit(reference_data)
    m_file = open(filename, 'r')
    joblist = []
    for line in m_file.readlines():
        joblist.append((float(line.split(";")[0]),float(line.split(";")[1])))
    m_file.close()
    chunk_size = len(joblist)/parts
    for C, gamma in joblist[partno*chunk_size:][:chunk_size]:
        os.system("wget http://www.pinae.net/automoculus/getText.php?text=C_is_" + str(C) + "_gamma_is_" + str(
            gamma) + "_Result_is_" + str(ParallelXValidation(files, scaler, True, C=C, gamma=gamma)))
        os.system("rm getText*")
Beispiel #6
0
def main():
    files = TRAIN_FILES[:30]
    reference_data, _ = getDataMatrix(files)
    scaler = preprocessing.Scaler()
    scaler.fit(reference_data)
    lines = []

    for C in [1,10,100,1000,2000,3000,4000,5000,6000,7000,8000,10000]:
        line = ""
        for gamma in [10,1,1e-1,1e-2,1e-3,1e-4,1e-5,1e-6,1e-7,1e-8]:
            line += str(ParallelXValidation(files, scaler, True, C=C, gamma=gamma))+"\t"
            print("C: "+str(C)+"\tgamma: "+str(gamma)+"\t:: "+line.split("\t")[-2])
        lines.append(line.rstrip("\t")+"\n")
    file = open("GridSearch_results.csv","w")
    file.writelines(lines)
    file.close()
Beispiel #7
0
def doAFullRun(C = 1.0, gamma=0.0):
    # classifier konfigurieren
    learner = getSVM(C=C, gamma=gamma)
    domain = getDomain(orange.EnumVariable(name="Shot", values=SHOT_NAMES))
    reference_data, _ = getDataMatrix(TRAIN_FILES, True)
    means, vars = getNpNormalizationTerms(reference_data)
    # trainingsdaten und testdaten zusammenstellen
    totalTest = 0.0
    totalTrain = 0.0
    totalTestW = 0.0
    totalTrainW = 0.0
    for i in range(len(TRAIN_FILES)):
        print("============= Round %d ================" %i)
        print("training classifier...")
        train_vectors, train_classes, test_vectors, test_classes, train_files, test_file = createCrossValidationSet(i)
        # train normalisieren
        train_vectors = normalizeNpData(train_vectors, means, vars)
        train_data = convertToExampleTable(domain, train_vectors, train_classes)
        # train classifier
        classifier = learner(train_data)
        print("evaluating test-set performance")
        # test set performance
        testPerf, testW = getPerformance(classifier, domain, [test_file], means, vars)
        print("evaluating train-set performance")
        # training performance
        trainPerf, trainW = getPerformance(classifier, domain, train_files, means, vars)
        print("Training Performance \t: %04f (weighted: %f)"%(trainPerf, trainW))
        print("Test Performance     \t: %04f (weighted: %f)"%(testPerf, testW))
        totalTest += testPerf
        totalTrain += trainPerf
        totalTestW += testW
        totalTrainW += trainW
        # perfomance bestimmen auf train und testdaten
    totalTest /= len(TRAIN_FILES)
    totalTrain /= len(TRAIN_FILES)
    totalTestW /= len(TRAIN_FILES)
    totalTrainW /= len(TRAIN_FILES)

    print ("Average Training Performance: %04f (weighted: %f)"%(totalTrain, totalTrainW))
    print ("Average Test Performance    : %04f (weighted: %f)"%(totalTest, totalTestW))
Beispiel #8
0
def testAllButFile(file, files, scaler, return_queue, fake_decisions=False, C=None,
                   gamma=None, leave_out_class=None):
    """
    This function trains with all files in files except file, which is used for
    testing. The performance of the test is returned.
    """
    training_set = [f for f in files if f != file]
    training_data, training_data_classes = getDataMatrix(training_set,
        leave_out_class=leave_out_class, shot=True)
    training_data = scaler.transform(training_data, training_data_classes)
    trained_svm = trainSVM(training_data, training_data_classes, C=C, gamma=gamma)
    context, beatList = getContextAndBeatListFromFile(file)
    blockList = coalesceBeats(beatList)
    part_blockList = []
    decisions = []
    correct_classification_count = 0
    medium_shot_count = 0
    metric_sum = 0
    correct_histogram = [0, 0, 0, 0, 0, 0, 0]
    guessed_histogram = [0, 0, 0, 0, 0, 0, 0]
    #correct_histogram = [0, 0]
    #guessed_histogram = [0, 0]
    #last_block = None
    for block in blockList:
        # prepare block-list and decision-list
        part_blockList.append(block)
        if fake_decisions:
            decisions = []
            for i in range(len(part_blockList)-1):
                decisions.append(part_blockList[i][-1].shot)
        svm_distribution, svm_classification = calculateDistributionAndClassification(
            trained_svm, deepcopy(context),  part_blockList, decisions, scaler,
            shot_or_cut=True, leave_out_class=leave_out_class)
        if not fake_decisions:
            decisions.append(svm_classification)
        guessed_histogram[svm_classification] += 1
        correct_histogram[block[-1].shot] += 1
        #is_shot = True
        #if last_block:
        #    is_shot = block[-1].shotId != last_block[-1].shotId
        #correct_histogram[int(is_shot)] += 1
        if svm_classification == block[-1].shot:
        #if boost_classification == int(is_shot):
            correct_classification_count += 1
        if block[-1].shot == 2: medium_shot_count += 1
        if len(part_blockList) >= 2:
            previous_correct_class = part_blockList[-2][-1].shot
            if len(decisions) >= 2:
                previous_guessed_class = decisions[-2]
            else: previous_guessed_class = previous_correct_class
        else:
            previous_correct_class = part_blockList[-1][-1].shot
            if len(decisions) >= 1:
                previous_guessed_class = decisions[-1]
            else: previous_guessed_class = previous_correct_class
        metric_sum += pointMetric(svm_classification, block[-1].shot,
            previous_guessed_class, previous_correct_class)
        #last_block = block
    performance = float(correct_classification_count)/len(blockList)
    medium_shot_performance = float(medium_shot_count)/len(blockList)
    return_queue.put((
    correct_histogram, guessed_histogram, performance, medium_shot_performance,
    float(metric_sum) / len(blockList)))
    return_queue.close()
Beispiel #9
0
def XValidation(files, fake_decisions = False):
    """
    Since the decisions of the classifiers during classifying a beatscript are used this
    is not a classical cross-validation. Instead the training is done with all but one
    Training files and the remaining beatscript is tested based on the classification from
    that data. This process is repeated with all files.
    In this case the decision history is faked by using the original classes from the
    testfile.
    This function tests the performance for decisions using a SVM, each with faked History.
    """
    reference_data, _ = getDataMatrix(TRAIN_FILES)
    scaler = preprocessing.Scaler()
    scaler.fit(reference_data)
    correct_histogram = [0, 0, 0, 0, 0, 0, 0]
    guessed_histogram = [0, 0, 0, 0, 0, 0, 0]
    performances = []
    allover_point_sum = 0.0
    medium_shot_performances = []
    for file in files:
        print("X-Validation: ca. " +
              str(int(round(float(files.index(file)) / len(files) * 100))) +
              "% fertig.")
        training_set = [f for f in files if f != file]
        training_data, training_data_classes = getDataMatrix(training_set)
        training_data = scaler.transform(training_data, training_data_classes)
        print("Trainingsdaten erzeugt. Trainiere Classifier...")
        print_lock = Lock()
        svm_queue = Queue(maxsize=1)
        svm_learning_process = Process(target=trainSVM,
            args=(training_data, training_data_classes, svm_queue, print_lock))
        svm_learning_process.start()

        context, beatList = getContextAndBeatListFromFile(file)
        blockList = coalesceBeats(beatList)
        part_blockList = []
        decisions = []
        correct_classification_count = 0
        medium_shot_count = 0
        metric_sum = 0

        trained_svm = svm_queue.get()
        svm_learning_process.join()
        print("Training finished for: " + file)
        for block in blockList:
            # prepare blocklist and decision-list
            part_blockList.append(block)
            if fake_decisions:
                decisions = []
                for i in range(len(part_blockList)-1):
                    decisions.append(part_blockList[i][-1].shot)
            svm_queue = Queue(maxsize=1)
            svm_classification_process = Process(
                target=calculateDistributionAndClassification,
                args=(
                trained_svm, deepcopy(context), part_blockList, decisions, scaler, True,
                svm_queue))
            svm_classification_process.start()
            svm_distribution, svm_classification = svm_queue.get()
            svm_classification_process.join()
            if not fake_decisions:
                decisions.append(svm_classification)
            print("SVM Classification:\t" + SHOT_NAMES[svm_classification])
            guessed_histogram[svm_classification] += 1
            print("Correct Class:\t\t" + SHOT_NAMES[block[-1].shot])
            if len(part_blockList)>= 2:
                previous_correct_class = part_blockList[-2][-1].shot
                if len(decisions) >= 2:
                    previous_guessed_class = decisions[-2]
                else: previous_guessed_class = previous_correct_class
            else:
                previous_correct_class = part_blockList[-1][-1].shot
                if len(decisions) >= 1:
                    previous_guessed_class = decisions[-1]
                else: previous_guessed_class = previous_correct_class
            metric_value = pointMetric(svm_classification, block[-1].shot,
                previous_guessed_class, previous_correct_class)
            print("Wrongness:\t\t\t" + str(metric_value))
            metric_sum += metric_value
            correct_histogram[block[-1].shot] += 1
            if svm_classification == block[-1].shot:
                correct_classification_count += 1
            if block[-1].shot == 2: medium_shot_count += 1
            print("------------------------------------")

        print("File Performance: " + str(
            float(correct_classification_count) / len(blockList) * 100) + "%")
        print(
        "File Wrongness: " + str(float(metric_sum) / len(blockList)) + " Points ( 0 - 5 )")
        performances.append(float(correct_classification_count) / len(blockList))
        medium_shot_performances.append(float(medium_shot_count) / len(blockList))
        allover_point_sum += float(metric_sum) / len(blockList)
        print("__________________________________________")

    performance_sum = 0
    performance_best = 0
    performance_last = 1
    for p in medium_shot_performances:
        performance_sum += p
        if p > performance_best: performance_best = p
        if p < performance_last: performance_last = p
    print("MS-Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" +
          str(performance_last) + " - " + str(performance_best) + ")")

    performance_sum = 0
    performance_best = 0
    performance_last = 1
    for p in performances:
        performance_sum += p
        if p > performance_best: performance_best = p
        if p < performance_last: performance_last = p
    print("Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" +
          str(performance_last) + " - " + str(performance_best) + ")")
    print("Wrongness:\t" + str(allover_point_sum / len(performances)))
    return allover_point_sum / len(performances)
def main():
    reference_data, _ = getDataMatrix(TRAIN_FILES)
    scaler = preprocessing.Scaler()
    scaler.fit(reference_data)
    tuneParametersForSVM(TRAIN_FILES, scaler, reference_data, True)
Beispiel #11
0
def createCrossValidationSet(leaveOutIndex):
    trainFiles = deepcopy(TRAIN_FILES)
    testFile = trainFiles.pop(leaveOutIndex)
    train_vectors, train_classes = getDataMatrix(trainFiles, True)
    test_vectors, test_classes = getDataMatrix([testFile], True)
    return train_vectors, train_classes, test_vectors, test_classes, trainFiles, testFile