def createLearningCurve(C = 1.0, gamma=0.0): trainFiles = deepcopy(TRAIN_FILES) # classifier konfigurieren learner = getSVM(C=C, gamma=gamma) domain = getDomain(orange.EnumVariable(name="Shot", values=SHOT_NAMES)) means, vars = getNormalizationTerms(domain) trainData = getTrainingExamples(domain, trainFiles, True) a, c, w = trainData.to_numpy() # train normalisieren a = (a-means)/vars A = np.hstack((a,c.reshape(-1,1))) np.random.shuffle(A) performance = [] wPerformance = [] for j in range(100, A.shape[0],100): print("============= TrainingFiles %d ================"%j) print("training classifier...") trainData = orange.ExampleTable(domain, A[:j,:]) # train classifier classifier = learner(trainData) # training performance print("testing classifier...") perf, perfW = getPerformance(classifier, domain, TRAIN_FILES, means, vars) print("Performance \t: %04f (weighted: %f)"%(perf, perfW)) performance.append(perf) wPerformance.append(perfW) return performance, wPerformance
def runExampleForDifferentParams(gammas = (0.0, ), Cs = (1.0, ), i = 5): domain = getDomain(orange.EnumVariable(name="Shot", values=SHOT_NAMES)) means, vars = getNormalizationTerms(domain) trainData, testData , trainFiles, testFile = createCrossValidationSet(i, domain) # train normalisieren a, c, w = trainData.to_numpy() a = (a-means)/vars A = np.hstack((a,c.reshape(-1,1))) trainData = orange.ExampleTable(domain, A) for gamma in gammas: for C in Cs: print("========== Params (C = %f, gamma = %f) ============"%(C, gamma)) # classifier konfigurieren learner = getSVM(gamma=gamma, C=C) # trainingsdaten und testdaten zusammenstellen print("training classifier...") # train classifier classifier = learner(trainData) print("evaluating test-set performance") # test set performance testPerf, testW = getPerformance(classifier, domain, [testFile], means, vars) print("evaluating train-set performance") # training performance trainPerf, trainW = getPerformance(classifier, domain, trainFiles, means, vars) print("Training Performance \t: %04f (weighted: %f)"%(trainPerf, trainW)) print("Test Performance \t: %04f (weighted: %f)"%(testPerf, testW))
def doAFullRun(C = 1.0, gamma=0.0): # classifier konfigurieren learner = getSVM(C=C, gamma=gamma) domain = getDomain(orange.EnumVariable(name="Shot", values=SHOT_NAMES)) reference_data, _ = getDataMatrix(TRAIN_FILES, True) means, vars = getNpNormalizationTerms(reference_data) # trainingsdaten und testdaten zusammenstellen totalTest = 0.0 totalTrain = 0.0 totalTestW = 0.0 totalTrainW = 0.0 for i in range(len(TRAIN_FILES)): print("============= Round %d ================" %i) print("training classifier...") train_vectors, train_classes, test_vectors, test_classes, train_files, test_file = createCrossValidationSet(i) # train normalisieren train_vectors = normalizeNpData(train_vectors, means, vars) train_data = convertToExampleTable(domain, train_vectors, train_classes) # train classifier classifier = learner(train_data) print("evaluating test-set performance") # test set performance testPerf, testW = getPerformance(classifier, domain, [test_file], means, vars) print("evaluating train-set performance") # training performance trainPerf, trainW = getPerformance(classifier, domain, train_files, means, vars) print("Training Performance \t: %04f (weighted: %f)"%(trainPerf, trainW)) print("Test Performance \t: %04f (weighted: %f)"%(testPerf, testW)) totalTest += testPerf totalTrain += trainPerf totalTestW += testW totalTrainW += trainW # perfomance bestimmen auf train und testdaten totalTest /= len(TRAIN_FILES) totalTrain /= len(TRAIN_FILES) totalTestW /= len(TRAIN_FILES) totalTrainW /= len(TRAIN_FILES) print ("Average Training Performance: %04f (weighted: %f)"%(totalTrain, totalTrainW)) print ("Average Test Performance : %04f (weighted: %f)"%(totalTest, totalTestW))