def holdout_split(I, Xtr, ytr): Xtr_t = utils.allbut(Xtr, I) ytr_t = mulzip(Xtr_t, utils.allbut(ytr, I)) Xtr_t = np.vstack(Xtr_t) clf = NBNN() clf.fit(Xtr_t, ytr_t) return [clf.predict(Xtr[i]).argmax() for i in I]
def train(self, patch_size, stride, n_jobs): '''Train the model on all images in the train/ directory of the dataset''' self._patch_size = patch_size self._stride = stride # Scan for all image files in dataset training dir. self._dataset = Dataset(self._datadir + '/train') # Extract masked features from all training images. (Xtr, ytr) = extract_all_class_features(self._dataset, n_jobs=n_jobs, patch_size=patch_size, stride=stride) # Stack image features and labels into arrays (they were lists). Xtr = np.vstack(Xtr) ytr = np.hstack(ytr) # Make a Naive Bayes NN classifier and train it. self._clf = NBNN() self._clf.fit(Xtr, ytr) self._trained = True
class CNRecognizer: ''' Simple class implementing a recognizer based on local colorname histograms. This recognizer uses a Naive Bayes Nearest Neighbor classifier over local colorname patches. It trains over all image/mask pairs in the train/ directory of the dataset given as input. Instantiate it like this: clf = CNRecognizer(datadir, modelname) where datasetdir is the directory of the dataset, and modelname is the name of the serlialized model saved to the datasetdir/models directory when the serialize() method is called. NOTES: ------ The serialization code is hacky and untested. Treat it like running with scissors. ''' def __init__(self, datadir, modelname='CNRecognizer.pkl'): self._datadir = datadir self._modelname = modelname self._trained = False if os.path.exists(datadir + '/models/' + modelname): inst = unpickle(datadir + '/models/' + modelname) for att in dir(inst): setattr(self, att, getattr(inst, att)) def train(self, patch_size, stride, n_jobs): '''Train the model on all images in the train/ directory of the dataset''' self._patch_size = patch_size self._stride = stride # Scan for all image files in dataset training dir. self._dataset = Dataset(self._datadir + '/train') # Extract masked features from all training images. (Xtr, ytr) = extract_all_class_features(self._dataset, n_jobs=n_jobs, patch_size=patch_size, stride=stride) # Stack image features and labels into arrays (they were lists). Xtr = np.vstack(Xtr) ytr = np.hstack(ytr) # Make a Naive Bayes NN classifier and train it. self._clf = NBNN() self._clf.fit(Xtr, ytr) self._trained = True def predict(self, image, mask): ''' Method to classify an image based on masked colorname patches. Handles feature extraction and masking. IMPORTANT: image channels MUST range between 0 and 255 (as opposed to 0.0 and 1.0) ''' cns = extract_masked_cns(image, mask, self._stride, self._patch_size) return self._clf.predict(cns) def serialize(self): '''Pickle the trained recognizer to the models/ directory of the dataset.''' if not os.path.exists(self._datadir + '/models/'): os.mkdir(self._datadir + '/models/') pickle(self, self._datadir + '/models/' + self._modelname)