) except: print "ERROR, file already exists: " + validation_files[ index ] + " -> " + TRAININGSET_DIR + "/" + os.path.basename(validation_files[index]) training_files = algo.get_imgfiles(TRAININGSET_DIR) validation_files = algo.get_imgfiles(VALIDATIONSET_DIR) dataset_split = 1 # extract features print "" print "---------------------" print "## extracting SIFT features" algo.__clear_dir(TMP_DIR_TRAINING) featureCount = algo.extract_features(training_files, TMP_DIR_TRAINING) performance["sift_feature_count"] = featureCount # generate codebook print "---------------------" print "## generating bag-of-words codebook" currentCodebook = SIFT_CODEBOOK_FILE + str(len(performances)) clusterCount = hyperparameters["codebook_size"](featureCount) hyperparameters["codebook_size"] = clusterCount performance["codebook_size"] = clusterCount algo.gen_codebook( TMP_DIR_TRAINING, clusterCount, currentCodebook, batch_size=algo.BATCH_SIZE if algo.BATCH_SIZE >= clusterCount else clusterCount, )
print "---------------------" print "## loading the images and extracting the sift features" # list files all_files = get_imgfiles(DATASETPATH) all_labels = {} all_weights = {} for i in all_files: certainty = float(i.replace("\\","/").rpartition("/")[2].partition("_")[0]) label = 1 if certainty > 0 else 0 all_labels[i] = label all_weights[i] = certainty if label == 1 else 1-certainty # extract features featureCount = algo.extract_features(all_files, TMP_DIR) # generate codebook clusterCount = int(sqrt(featureCount)) algo.gen_codebook(TMP_DIR, clusterCount, SIFT_CODEBOOK, batch_size = algo.BATCH_SIZE if algo.BATCH_SIZE >= clusterCount else clusterCount) # generate histograms algo.compute_histograms(TMP_DIR, SIFT_CODEBOOK, TMP_DIR) # train svm algo.train_svm(TMP_DIR, all_labels, SVM_MODEL_FILE, all_weights = all_weights) print "calculating predictions" predictions = algo.predict(SVM_MODEL_FILE, SIFT_CODEBOOK, DATASETPATH2, TMP_DIR)