def specific_line(number): files = TRAIN_FILES reference_data, _ = getDataMatrix(files) scaler = preprocessing.Scaler() scaler.fit(reference_data) # 50 Prozesse; 250 Durchläufe pro Prozess; lineare Verteilung C = range(200,10001,200) for gamma in [1.0*10.0**-(x/10.0) for x in range(-20,230)]: os.system("wget http://www.pinae.net/automoculus/getText.php?text=C_is_" + str(C[number]) + "_gamma_is_" + str( gamma) + "_Result_is_" + str(ParallelXValidation(files, scaler, True, C=C[number], gamma=gamma))) os.system("rm getText*")
def trainWithAllExamples(shot): training_data, training_data_classes = getDataMatrix(TRAIN_FILES, shot) scaler = preprocessing.Scaler() training_data = scaler.fit_transform(training_data, training_data_classes) lock = Lock() svmReturnQueue = Queue() svmLearningProcess = Process(target=trainSVM, args=(training_data, training_data_classes, svmReturnQueue, lock)) svmLearningProcess.start() svmClassifier = svmReturnQueue.get() svmLearningProcess.join() return (svmClassifier,), scaler
def main(): #XValidation(TRAIN_FILES, True) reference_data, _ = getDataMatrix(TRAIN_FILES, shot=True) scaler = preprocessing.Scaler() scaler.fit(reference_data) #ParallelXValidation(TRAIN_FILES, scaler, True, C=2582.61517656, gamma=0.00036375303213) #ParallelXValidation(TRAIN_FILES, scaler, True, C=2583.31718583, gamma=0.00191943088336) #ParallelXValidation(TRAIN_FILES, scaler, True, C=2585.53147506, gamma=2.60057621686e-05) #ParallelXValidation(TRAIN_FILES, scaler, True, C=2585.61614258, gamma=2.15704131861e-05) #ParallelXValidation(TRAIN_FILES, scaler, True, C=2585.81448898, gamma=1.73105463456e-05) #ParallelXValidation(TRAIN_FILES, scaler, True, C=1999.62466242, gamma=1.62885637292e-06) #ParallelXValidation(TRAIN_FILES, scaler, True, C=1999.32984556, gamma=3.03787358388e-07) ParallelXValidation(TRAIN_FILES, scaler, False, C=1999.85770959, gamma=6.30930490772e-07)
def TestFeatureClassRelevance(number = None): files = TRAIN_FILES feature_Classes = getAllFeatureClasses() results = [] if number: reference_data, _ = getDataMatrix(files, leave_out_class=feature_Classes[number]) scaler = preprocessing.Scaler() scaler.fit(reference_data) optimized_parameters = tuneParametersForSVM(files, scaler, reference_data, True, leave_out_class=feature_Classes[number]) #optimized_parameters = (1910.41398886, 9.88131291682e-324) results.append((feature_Classes[number], ParallelXValidation(files, scaler, True, C=max(0.0,optimized_parameters[0]), gamma=max(1e-323,optimized_parameters[1]), leave_out_class=feature_Classes[number]))) else: for number in range(len(feature_Classes))[3:]: reference_data, _ = getDataMatrix(files, leave_out_class=feature_Classes[number]) scaler = preprocessing.Scaler() scaler.fit(reference_data) results.append((feature_Classes[number], ParallelXValidation(files, scaler, True, leave_out_class=feature_Classes[number]))) for result_class, result in results: print(str(result_class).split(".")[1]+":"+"\t".join(["" for _ in range(int(round((50-len(str(result_class).split(".")[1]))/8.0)))])+str(result)) os.system("wget http://www.pinae.net/automoculus/getText.php?text=FeatureClass_is_" + str(result_class).split(".")[1] + "_Result_is_" + str(result)) os.system("rm getText*")
def calculate_missing(filename,partno,parts): files = TRAIN_FILES reference_data, _ = getDataMatrix(files) scaler = preprocessing.Scaler() scaler.fit(reference_data) m_file = open(filename, 'r') joblist = [] for line in m_file.readlines(): joblist.append((float(line.split(";")[0]),float(line.split(";")[1]))) m_file.close() chunk_size = len(joblist)/parts for C, gamma in joblist[partno*chunk_size:][:chunk_size]: os.system("wget http://www.pinae.net/automoculus/getText.php?text=C_is_" + str(C) + "_gamma_is_" + str( gamma) + "_Result_is_" + str(ParallelXValidation(files, scaler, True, C=C, gamma=gamma))) os.system("rm getText*")
def main(): files = TRAIN_FILES[:30] reference_data, _ = getDataMatrix(files) scaler = preprocessing.Scaler() scaler.fit(reference_data) lines = [] for C in [1,10,100,1000,2000,3000,4000,5000,6000,7000,8000,10000]: line = "" for gamma in [10,1,1e-1,1e-2,1e-3,1e-4,1e-5,1e-6,1e-7,1e-8]: line += str(ParallelXValidation(files, scaler, True, C=C, gamma=gamma))+"\t" print("C: "+str(C)+"\tgamma: "+str(gamma)+"\t:: "+line.split("\t")[-2]) lines.append(line.rstrip("\t")+"\n") file = open("GridSearch_results.csv","w") file.writelines(lines) file.close()
def doAFullRun(C = 1.0, gamma=0.0): # classifier konfigurieren learner = getSVM(C=C, gamma=gamma) domain = getDomain(orange.EnumVariable(name="Shot", values=SHOT_NAMES)) reference_data, _ = getDataMatrix(TRAIN_FILES, True) means, vars = getNpNormalizationTerms(reference_data) # trainingsdaten und testdaten zusammenstellen totalTest = 0.0 totalTrain = 0.0 totalTestW = 0.0 totalTrainW = 0.0 for i in range(len(TRAIN_FILES)): print("============= Round %d ================" %i) print("training classifier...") train_vectors, train_classes, test_vectors, test_classes, train_files, test_file = createCrossValidationSet(i) # train normalisieren train_vectors = normalizeNpData(train_vectors, means, vars) train_data = convertToExampleTable(domain, train_vectors, train_classes) # train classifier classifier = learner(train_data) print("evaluating test-set performance") # test set performance testPerf, testW = getPerformance(classifier, domain, [test_file], means, vars) print("evaluating train-set performance") # training performance trainPerf, trainW = getPerformance(classifier, domain, train_files, means, vars) print("Training Performance \t: %04f (weighted: %f)"%(trainPerf, trainW)) print("Test Performance \t: %04f (weighted: %f)"%(testPerf, testW)) totalTest += testPerf totalTrain += trainPerf totalTestW += testW totalTrainW += trainW # perfomance bestimmen auf train und testdaten totalTest /= len(TRAIN_FILES) totalTrain /= len(TRAIN_FILES) totalTestW /= len(TRAIN_FILES) totalTrainW /= len(TRAIN_FILES) print ("Average Training Performance: %04f (weighted: %f)"%(totalTrain, totalTrainW)) print ("Average Test Performance : %04f (weighted: %f)"%(totalTest, totalTestW))
def testAllButFile(file, files, scaler, return_queue, fake_decisions=False, C=None, gamma=None, leave_out_class=None): """ This function trains with all files in files except file, which is used for testing. The performance of the test is returned. """ training_set = [f for f in files if f != file] training_data, training_data_classes = getDataMatrix(training_set, leave_out_class=leave_out_class, shot=True) training_data = scaler.transform(training_data, training_data_classes) trained_svm = trainSVM(training_data, training_data_classes, C=C, gamma=gamma) context, beatList = getContextAndBeatListFromFile(file) blockList = coalesceBeats(beatList) part_blockList = [] decisions = [] correct_classification_count = 0 medium_shot_count = 0 metric_sum = 0 correct_histogram = [0, 0, 0, 0, 0, 0, 0] guessed_histogram = [0, 0, 0, 0, 0, 0, 0] #correct_histogram = [0, 0] #guessed_histogram = [0, 0] #last_block = None for block in blockList: # prepare block-list and decision-list part_blockList.append(block) if fake_decisions: decisions = [] for i in range(len(part_blockList)-1): decisions.append(part_blockList[i][-1].shot) svm_distribution, svm_classification = calculateDistributionAndClassification( trained_svm, deepcopy(context), part_blockList, decisions, scaler, shot_or_cut=True, leave_out_class=leave_out_class) if not fake_decisions: decisions.append(svm_classification) guessed_histogram[svm_classification] += 1 correct_histogram[block[-1].shot] += 1 #is_shot = True #if last_block: # is_shot = block[-1].shotId != last_block[-1].shotId #correct_histogram[int(is_shot)] += 1 if svm_classification == block[-1].shot: #if boost_classification == int(is_shot): correct_classification_count += 1 if block[-1].shot == 2: medium_shot_count += 1 if len(part_blockList) >= 2: previous_correct_class = part_blockList[-2][-1].shot if len(decisions) >= 2: previous_guessed_class = decisions[-2] else: previous_guessed_class = previous_correct_class else: previous_correct_class = part_blockList[-1][-1].shot if len(decisions) >= 1: previous_guessed_class = decisions[-1] else: previous_guessed_class = previous_correct_class metric_sum += pointMetric(svm_classification, block[-1].shot, previous_guessed_class, previous_correct_class) #last_block = block performance = float(correct_classification_count)/len(blockList) medium_shot_performance = float(medium_shot_count)/len(blockList) return_queue.put(( correct_histogram, guessed_histogram, performance, medium_shot_performance, float(metric_sum) / len(blockList))) return_queue.close()
def XValidation(files, fake_decisions = False): """ Since the decisions of the classifiers during classifying a beatscript are used this is not a classical cross-validation. Instead the training is done with all but one Training files and the remaining beatscript is tested based on the classification from that data. This process is repeated with all files. In this case the decision history is faked by using the original classes from the testfile. This function tests the performance for decisions using a SVM, each with faked History. """ reference_data, _ = getDataMatrix(TRAIN_FILES) scaler = preprocessing.Scaler() scaler.fit(reference_data) correct_histogram = [0, 0, 0, 0, 0, 0, 0] guessed_histogram = [0, 0, 0, 0, 0, 0, 0] performances = [] allover_point_sum = 0.0 medium_shot_performances = [] for file in files: print("X-Validation: ca. " + str(int(round(float(files.index(file)) / len(files) * 100))) + "% fertig.") training_set = [f for f in files if f != file] training_data, training_data_classes = getDataMatrix(training_set) training_data = scaler.transform(training_data, training_data_classes) print("Trainingsdaten erzeugt. Trainiere Classifier...") print_lock = Lock() svm_queue = Queue(maxsize=1) svm_learning_process = Process(target=trainSVM, args=(training_data, training_data_classes, svm_queue, print_lock)) svm_learning_process.start() context, beatList = getContextAndBeatListFromFile(file) blockList = coalesceBeats(beatList) part_blockList = [] decisions = [] correct_classification_count = 0 medium_shot_count = 0 metric_sum = 0 trained_svm = svm_queue.get() svm_learning_process.join() print("Training finished for: " + file) for block in blockList: # prepare blocklist and decision-list part_blockList.append(block) if fake_decisions: decisions = [] for i in range(len(part_blockList)-1): decisions.append(part_blockList[i][-1].shot) svm_queue = Queue(maxsize=1) svm_classification_process = Process( target=calculateDistributionAndClassification, args=( trained_svm, deepcopy(context), part_blockList, decisions, scaler, True, svm_queue)) svm_classification_process.start() svm_distribution, svm_classification = svm_queue.get() svm_classification_process.join() if not fake_decisions: decisions.append(svm_classification) print("SVM Classification:\t" + SHOT_NAMES[svm_classification]) guessed_histogram[svm_classification] += 1 print("Correct Class:\t\t" + SHOT_NAMES[block[-1].shot]) if len(part_blockList)>= 2: previous_correct_class = part_blockList[-2][-1].shot if len(decisions) >= 2: previous_guessed_class = decisions[-2] else: previous_guessed_class = previous_correct_class else: previous_correct_class = part_blockList[-1][-1].shot if len(decisions) >= 1: previous_guessed_class = decisions[-1] else: previous_guessed_class = previous_correct_class metric_value = pointMetric(svm_classification, block[-1].shot, previous_guessed_class, previous_correct_class) print("Wrongness:\t\t\t" + str(metric_value)) metric_sum += metric_value correct_histogram[block[-1].shot] += 1 if svm_classification == block[-1].shot: correct_classification_count += 1 if block[-1].shot == 2: medium_shot_count += 1 print("------------------------------------") print("File Performance: " + str( float(correct_classification_count) / len(blockList) * 100) + "%") print( "File Wrongness: " + str(float(metric_sum) / len(blockList)) + " Points ( 0 - 5 )") performances.append(float(correct_classification_count) / len(blockList)) medium_shot_performances.append(float(medium_shot_count) / len(blockList)) allover_point_sum += float(metric_sum) / len(blockList) print("__________________________________________") performance_sum = 0 performance_best = 0 performance_last = 1 for p in medium_shot_performances: performance_sum += p if p > performance_best: performance_best = p if p < performance_last: performance_last = p print("MS-Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" + str(performance_last) + " - " + str(performance_best) + ")") performance_sum = 0 performance_best = 0 performance_last = 1 for p in performances: performance_sum += p if p > performance_best: performance_best = p if p < performance_last: performance_last = p print("Performance:\t" + str(performance_sum / len(performances) * 100.0) + "%\t(" + str(performance_last) + " - " + str(performance_best) + ")") print("Wrongness:\t" + str(allover_point_sum / len(performances))) return allover_point_sum / len(performances)
def main(): reference_data, _ = getDataMatrix(TRAIN_FILES) scaler = preprocessing.Scaler() scaler.fit(reference_data) tuneParametersForSVM(TRAIN_FILES, scaler, reference_data, True)
def createCrossValidationSet(leaveOutIndex): trainFiles = deepcopy(TRAIN_FILES) testFile = trainFiles.pop(leaveOutIndex) train_vectors, train_classes = getDataMatrix(trainFiles, True) test_vectors, test_classes = getDataMatrix([testFile], True) return train_vectors, train_classes, test_vectors, test_classes, trainFiles, testFile