def grid_search(model, X, y, C_range=(-5, 15, 2), gamma_range=(3, -15, -2), k=5, num_cores=1): if not isinstance(model, PredictableModel): raise TypeError( "GridSearch expects a PredictableModel. If you want to perform optimization on raw data use facerec.feature.Identity to pass unpreprocessed data!" ) if not isinstance(model.classifier, SVM): raise TypeError( "GridSearch expects a SVM as classifier. Please use a facerec.classifier.SVM!" ) logger = logging.getLogger("facerec.svm.gridsearch") logger.info("Performing a Grid Search.") # best parameter combination to return best_parameter = svm_parameter("-q") best_parameter.kernel_type = model.classifier.param.kernel_type best_parameter.nu = model.classifier.param.nu best_parameter.coef0 = model.classifier.param.coef0 # either no gamma given or kernel is linear (only C to optimize) if (gamma_range is None) or (model.classifier.param.kernel_type == LINEAR): gamma_range = (0, 0, 1) # best validation error so far best_accuracy = np.finfo('float').min # create grid (cartesian product of ranges) g = grid([C_range, gamma_range]) results = [] for p in g: C, gamma = p C, gamma = 2**C, 2**gamma model.classifier.param.C, model.classifier.param.gamma = C, gamma # perform a k-fold cross validation cv = KFoldCrossValidation(model=model, k=k) cv.validate(X, y) # append parameter into list with accuracies for all parameter combinations results.append([C, gamma, cv.accuracy]) # store best parameter combination if cv.accuracy > best_accuracy: logger.info("best_accuracy=%s" % (cv.accuracy)) best_accuracy = cv.accuracy best_parameter.C, best_parameter.gamma = C, gamma logger.info("%d-CV Result = %.2f." % (k, cv.accuracy)) # set best parameter combination to best found return best_parameter, results
def grid_search(model, X, y, C_range=(-5, 15, 2), gamma_range=(3, -15, -2), k=5, num_cores=1): if not isinstance(model, PredictableModel): raise TypeError( "GridSearch expects a PredictableModel. If you want to perform optimization on raw data use facerec.feature.Identity to pass unpreprocessed data!") if not isinstance(model.classifier, SVM): raise TypeError("GridSearch expects a SVM as classifier. Please use a facerec.classifier.SVM!") logger = logging.getLogger("facerec.svm.gridsearch") logger.info("Performing a Grid Search.") # best parameter combination to return best_parameter = svm_parameter("-q") best_parameter.kernel_type = model.classifier.param.kernel_type best_parameter.nu = model.classifier.param.nu best_parameter.coef0 = model.classifier.param.coef0 # either no gamma given or kernel is linear (only C to optimize) if (gamma_range is None) or (model.classifier.param.kernel_type == LINEAR): gamma_range = (0, 0, 1) # best validation error so far best_accuracy = np.finfo('float').min # create grid (cartesian product of ranges) g = grid([C_range, gamma_range]) results = [] for p in g: C, gamma = p C, gamma = 2 ** C, 2 ** gamma model.classifier.param.C, model.classifier.param.gamma = C, gamma # perform a k-fold cross validation cv = KFoldCrossValidation(model=model, k=k) cv.validate(X, y) # append parameter into list with accuracies for all parameter combinations results.append([C, gamma, cv.accuracy]) # store best parameter combination if cv.accuracy > best_accuracy: logger.info("best_accuracy=%s" % (cv.accuracy)) best_accuracy = cv.accuracy best_parameter.C, best_parameter.gamma = C, gamma logger.info("%d-CV Result = %.2f." % (k, cv.accuracy)) # set best parameter combination to best found return best_parameter, results
def train(self): # Check if the given dataset exists: if not os.path.exists(self.dataset): print ">> [Error] No Dataset Found at '%s'." % self.dataset sys.exit(1) # Reads the images, labels and folder_names from a given dataset. Images # are resized to given size on the fly: print ">> Loading Dataset <-- " + self.dataset [images, labels, subject_names] = self.read_images(self.dataset, self.image_size) # Zip us a {label, name} dict from the given data: list_of_labels = list(xrange(max(labels) + 1)) subject_dictionary = dict(zip(list_of_labels, subject_names)) # Get the model we want to compute: model = self.get_model(image_size=self.image_size, subject_names=subject_dictionary) # Sometimes you want to know how good the model may perform on the data # given, the script allows you to perform a k-fold Cross Validation before # the Detection & Recognition part starts: if self.numfolds is not None: print ">> Validating Model With %s Folds.." % self.numfolds # We want to have some log output, so set up a new logging handler # and point it to stdout: handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) # Add a handler to facerec modules, so we see what's going on inside: logger = logging.getLogger("facerec") logger.addHandler(handler) logger.setLevel(logging.DEBUG) # Perform the validation & print results: crossval = KFoldCrossValidation(model, k=self.numfolds) crossval.validate(images, labels) crossval.print_results() # Compute the model: print ">> Computing Model.." model.compute(images, labels) # And save the model, which uses Pythons pickle module: print ">> Saving Model.." save_model(self.model_filename, model)
def train(self): # Check if the given dataset exists: if not os.path.exists(self.dataset): print ">> [Error] No Dataset Found at '%s'." % self.dataset sys.exit(1) # Reads the images, labels and folder_names from a given dataset. Images # are resized to given size on the fly: print ">> Loading Dataset <-- " + self.dataset [images, labels, subject_names] = self.read_images(self.dataset, self.image_size) # Zip us a {label, name} dict from the given data: list_of_labels = list(xrange(max(labels) + 1)) subject_dictionary = dict(zip(list_of_labels, subject_names)) # Get the model we want to compute: model = self.get_model(image_size=self.image_size, subject_names=subject_dictionary) # Sometimes you want to know how good the model may perform on the data # given, the script allows you to perform a k-fold Cross Validation before # the Detection & Recognition part starts: if self.numfolds is not None: print ">> Validating Model With %s Folds.." % self.numfolds # We want to have some log output, so set up a new logging handler # and point it to stdout: handler = logging.StreamHandler(sys.stdout) formatter = logging.Formatter( '%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) # Add a handler to facerec modules, so we see what's going on inside: logger = logging.getLogger("facerec") logger.addHandler(handler) logger.setLevel(logging.DEBUG) # Perform the validation & print results: crossval = KFoldCrossValidation(model, k=self.numfolds) crossval.validate(images, labels) crossval.print_results() # Compute the model: print ">> Computing Model.." model.compute(images, labels) # And save the model, which uses Pythons pickle module: print ">> Saving Model.." save_model(self.model_filename, model)