def vigra_forest_to_h5_bytes(forest: VigraRandomForest) -> VigraForestH5Bytes: tmp_file_handle, tmp_file_path = tempfile.mkstemp(suffix=".h5") # FIXME os.close(tmp_file_handle) forest.writeHDF5(tmp_file_path, f"/") with open(tmp_file_path, "rb") as f: out = VigraForestH5Bytes(f.read()) os.remove(tmp_file_path) return out
def _train_forest(random_seed: int, num_trees: int, training_data: TrainingData) -> VigraForestH5Bytes: # t = time.time() forest = VigraRandomForest(num_trees) _ = forest.learnRF(training_data.X, training_data.y, 0) # t_trained = time.time() serialized = vigra_forest_to_h5_bytes(forest) # t_serialized = time.time() # print(f"Trained in {t_trained - t}s, serialized in {t_serialized - t_trained}") return serialized
def load_from_disk(self, fn, rfgroupname='rf'): self.rf = VigraRandomForest(fn, rfgroupname) f = h5py.File(fn, 'r') groups = [] f.visit(groups.append) attrs = [g for g in groups if not g.startswith(rfgroupname)] for attr in attrs: setattr(self, attr, array(f[attr]))
def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = BaseVigraRandomForest( treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually = sample_classes_individually
class VigraRandomForest(object): def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = BaseVigraRandomForest(treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually = sample_classes_individually def fit(self, features, labels): features = self.check_features_vector(features) labels = self.check_labels_vector(labels) if self.use_feature_importance: self.oob, self.feature_importance = \ self.rf.learnRFWithFeatureSelection(features, labels) else: self.oob = self.rf.learnRF(features, labels) return self def predict_proba(self, features): features = self.check_features_vector(features) return self.rf.predictProbabilities(features) def predict(self, features): features = self.check_features_vector(features) return self.rf.predictLabels(features) def check_features_vector(self, features): if features.dtype != np.float32: features = features.astype(np.float32) if features.ndim == 1: features = features[np.newaxis, :] return features def check_labels_vector(self, labels): if labels.dtype != np.uint32: if len(np.unique(labels[labels < 0])) == 1 \ and not (labels==0).any(): labels[labels < 0] = 0 else: labels = labels + labels.min() labels = labels.astype(np.uint32) labels = labels.reshape((labels.size, 1)) return labels def save_to_disk(self, fn, rfgroupname='rf'): self.rf.writeHDF5(fn, rfgroupname) attr_list = ['oob', 'feature_importance', 'use_feature_importance', 'feature_description'] f = h5py.File(fn) for attr in attr_list: if hasattr(self, attr): f[rfgroupname].attrs[attr] = getattr(self, attr) def load_from_disk(self, fn, rfgroupname='rf'): self.rf = BaseVigraRandomForest(str(fn), rfgroupname) f = h5py.File(fn, 'r') for attr in f[rfgroupname].attrs: print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr])) setattr(self, attr, f[rfgroupname].attrs[attr])
class RandomForest(object): def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = VigraRandomForest(treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually=sample_classes_individually def fit(self, features, labels, **kwargs): features = self.check_features_vector(features) labels = self.check_labels_vector(labels) if self.use_feature_importance: self.oob, self.feature_importance = \ self.rf.learnRFWithFeatureSelection(features, labels) else: self.oob = self.rf.learnRF(features, labels) return self def predict_proba(self, features): features = self.check_features_vector(features) return self.rf.predictProbabilities(features) def predict(self, features): features = self.check_features_vector(features) return self.rf.predictLabels(features) def check_features_vector(self, features): if features.dtype != float32: features = features.astype(float32) if features.ndim == 1: features = features[newaxis,:] return features def check_labels_vector(self, labels): if labels.dtype != uint32: if len(unique(labels[labels < 0])) == 1 and not (labels==0).any(): labels[labels < 0] = 0 else: labels = labels + labels.min() labels = labels.astype(uint32) labels = labels.reshape((labels.size, 1)) return labels def save_to_disk(self, fn, rfgroupname='rf', overwrite=True): self.rf.writeHDF5(fn, rfgroupname, overwrite) attr_list = ['oob', 'feature_importance', 'use_feature_importance'] f = h5py.File(fn) for attr in attr_list: if hasattr(self, attr): f[attr] = getattr(self, attr) def load_from_disk(self, fn, rfgroupname='rf'): self.rf = VigraRandomForest(fn, rfgroupname) f = h5py.File(fn, 'r') groups = [] f.visit(groups.append) attrs = [g for g in groups if not g.startswith(rfgroupname)] for attr in attrs: setattr(self, attr, array(f[attr]))
def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = BaseVigraRandomForest(treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually = sample_classes_individually
def _compute_partial_predictions( feature_data: "np.ndarray[Any, np.dtype[np.float32]]", forest: VigraRandomForest) -> "np.ndarray[Any, np.dtype[np.float32]]": return forest.predictProbabilities(feature_data) * forest.treeCount()
from vigra.learning import RandomForest import numpy as np import sys if len(sys.argv) < 3 or sys.argv[2] != "eigentexture": clf = RandomForest('../challenge.h5', '/classifier') else: clf = RandomForest('../challenge.h5', '/etclassifier') import tiffcvt from extract_features import extract_features, blur_image, extract_eigenfeatures if len(sys.argv) < 2 or sys.argv[1] == "train": labels_name = "%s_train_labels" labels_shape = tiffcvt.train_volume.shape img = tiffcvt.h5_file["ordinal_train_volume"][:,:,:] else: labels_name = "%s_test_labels" labels_shape = tiffcvt.test_volume.shape img = tiffcvt.h5_file["ordinal_test_volume"][:,:,:] if len(sys.argv) < 3 or sys.argv[2] != "eigentexture": extract_fn = extract_features labels_name = labels_name % "predicted" else: components = tiffcvt.h5_file["components"][:,:] extract_fn = lambda img, bimg, indices:\ extract_eigenfeatures(img, bimg, components, indices) labels_name = labels_name % "eigenpredicted" predicted = tiffcvt.h5_file.require_dataset(labels_name,
def do_predict(forest: VigraRandomForest): return forest.predictProbabilities( feature_data.linear_raw()) * forest.treeCount()
def load_from_disk(self, fn, rfgroupname='rf'): self.rf = BaseVigraRandomForest(str(fn), rfgroupname) f = h5py.File(fn, 'r') for attr in f[rfgroupname].attrs: print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr])) setattr(self, attr, f[rfgroupname].attrs[attr])
class VigraRandomForest(object): def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = BaseVigraRandomForest( treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually = sample_classes_individually def fit(self, features, labels): features = self.check_features_vector(features) labels = self.check_labels_vector(labels) if self.use_feature_importance: self.oob, self.feature_importance = \ self.rf.learnRFWithFeatureSelection(features, labels) else: self.oob = self.rf.learnRF(features, labels) return self def predict_proba(self, features): features = self.check_features_vector(features) return self.rf.predictProbabilities(features) def predict(self, features): features = self.check_features_vector(features) return self.rf.predictLabels(features) def check_features_vector(self, features): if features.dtype != np.float32: features = features.astype(np.float32) if features.ndim == 1: features = features[np.newaxis, :] return features def check_labels_vector(self, labels): if labels.dtype != np.uint32: if len(np.unique(labels[labels < 0])) == 1 \ and not (labels==0).any(): labels[labels < 0] = 0 else: labels = labels + labels.min() labels = labels.astype(np.uint32) labels = labels.reshape((labels.size, 1)) return labels def save_to_disk(self, fn, rfgroupname='rf'): self.rf.writeHDF5(fn, rfgroupname) attr_list = [ 'oob', 'feature_importance', 'use_feature_importance', 'feature_description' ] f = h5py.File(fn) for attr in attr_list: if hasattr(self, attr): f[rfgroupname].attrs[attr] = getattr(self, attr) def load_from_disk(self, fn, rfgroupname='rf'): self.rf = BaseVigraRandomForest(str(fn), rfgroupname) f = h5py.File(fn, 'r') for attr in f[rfgroupname].attrs: print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr])) setattr(self, attr, f[rfgroupname].attrs[attr])
class RandomForest(object): def __init__(self, ntrees=255, use_feature_importance=False, sample_classes_individually=False): self.rf = VigraRandomForest( treeCount=ntrees, sample_classes_individually=sample_classes_individually) self.use_feature_importance = use_feature_importance self.sample_classes_individually = sample_classes_individually def fit(self, features, labels, **kwargs): features = self.check_features_vector(features) labels = self.check_labels_vector(labels) if self.use_feature_importance: self.oob, self.feature_importance = \ self.rf.learnRFWithFeatureSelection(features, labels) else: self.oob = self.rf.learnRF(features, labels) return self def predict_proba(self, features): features = self.check_features_vector(features) return self.rf.predictProbabilities(features) def predict(self, features): features = self.check_features_vector(features) return self.rf.predictLabels(features) def check_features_vector(self, features): if features.dtype != float32: features = features.astype(float32) if features.ndim == 1: features = features[newaxis, :] return features def check_labels_vector(self, labels): if labels.dtype != uint32: if len(unique( labels[labels < 0])) == 1 and not (labels == 0).any(): labels[labels < 0] = 0 else: labels = labels + labels.min() labels = labels.astype(uint32) labels = labels.reshape((labels.size, 1)) return labels def save_to_disk(self, fn, rfgroupname='rf', overwrite=True): self.rf.writeHDF5(fn, rfgroupname, overwrite) attr_list = ['oob', 'feature_importance', 'use_feature_importance'] f = h5py.File(fn) for attr in attr_list: if hasattr(self, attr): f[attr] = getattr(self, attr) def load_from_disk(self, fn, rfgroupname='rf'): self.rf = VigraRandomForest(fn, rfgroupname) f = h5py.File(fn, 'r') groups = [] f.visit(groups.append) attrs = [g for g in groups if not g.startswith(rfgroupname)] for attr in attrs: setattr(self, attr, array(f[attr]))
def train_forest(forest_index: int) -> VigraRandomForest: ntrees = (num_trees // num_forests) + (forest_index < num_trees % num_forests) forest = VigraRandomForest(ntrees) forest.learnRF(X, y, random_seed) return forest
from vigra.learning import RandomForest import numpy as np import h5py import sys from tiffcvt import h5_file if __name__=="__main__": clf = RandomForest(treeCount=40) training_set = h5_file["training_features"][:,:].astype(np.float32) training_class = h5_file["training_classification"][:].astype(np.uint32) if len(sys.argv) > 1 and sys.argv[1] == "eigentexture": from eigentexture import normalize training_set = normalize(training_set) components = h5_file["components"][:,:].transpose() training_set = np.dot(training_set, components).astype(np.float32) classifier_name = "etclassifier" else: classifier_name = "classifier" clf.learnRF(training_set, training_class) if classifier_name in h5_file.keys(): del h5_file[classifier_name] h5_file.close() clf.writeHDF5('../challenge.h5', "/"+classifier_name, True) else: classifier = RandomForest("../challenge.h5", "/classifier") et_classifier = RandomForest("../challenge.h5", "/etclassifier")