Beispiel #1
0
def vigra_forest_to_h5_bytes(forest: VigraRandomForest) -> VigraForestH5Bytes:
    tmp_file_handle, tmp_file_path = tempfile.mkstemp(suffix=".h5")  # FIXME
    os.close(tmp_file_handle)
    forest.writeHDF5(tmp_file_path, f"/")
    with open(tmp_file_path, "rb") as f:
        out = VigraForestH5Bytes(f.read())
    os.remove(tmp_file_path)
    return out
Beispiel #2
0
def _train_forest(random_seed: int, num_trees: int,
                  training_data: TrainingData) -> VigraForestH5Bytes:
    # t = time.time()
    forest = VigraRandomForest(num_trees)
    _ = forest.learnRF(training_data.X, training_data.y, 0)
    # t_trained = time.time()
    serialized = vigra_forest_to_h5_bytes(forest)
    # t_serialized = time.time()
    # print(f"Trained in {t_trained - t}s, serialized in {t_serialized - t_trained}")
    return serialized
Beispiel #3
0
 def load_from_disk(self, fn, rfgroupname='rf'):
     self.rf = VigraRandomForest(fn, rfgroupname)
     f = h5py.File(fn, 'r')
     groups = []
     f.visit(groups.append)
     attrs = [g for g in groups if not g.startswith(rfgroupname)]
     for attr in attrs:
         setattr(self, attr, array(f[attr]))
Beispiel #4
0
 def load_from_disk(self, fn, rfgroupname='rf'):
     self.rf = VigraRandomForest(fn, rfgroupname)
     f = h5py.File(fn, 'r')
     groups = []
     f.visit(groups.append)
     attrs = [g for g in groups if not g.startswith(rfgroupname)]
     for attr in attrs:
         setattr(self, attr, array(f[attr]))
Beispiel #5
0
 def __init__(self,
              ntrees=255,
              use_feature_importance=False,
              sample_classes_individually=False):
     self.rf = BaseVigraRandomForest(
         treeCount=ntrees,
         sample_classes_individually=sample_classes_individually)
     self.use_feature_importance = use_feature_importance
     self.sample_classes_individually = sample_classes_individually
Beispiel #6
0
class VigraRandomForest(object):
    def __init__(self, ntrees=255, use_feature_importance=False, 
            sample_classes_individually=False):
        self.rf = BaseVigraRandomForest(treeCount=ntrees, 
            sample_classes_individually=sample_classes_individually)
        self.use_feature_importance = use_feature_importance
        self.sample_classes_individually = sample_classes_individually

    def fit(self, features, labels):
        features = self.check_features_vector(features)
        labels = self.check_labels_vector(labels)
        if self.use_feature_importance:
            self.oob, self.feature_importance = \
                        self.rf.learnRFWithFeatureSelection(features, labels)
        else:
            self.oob = self.rf.learnRF(features, labels)
        return self

    def predict_proba(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictProbabilities(features)

    def predict(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictLabels(features)

    def check_features_vector(self, features):
        if features.dtype != np.float32:
            features = features.astype(np.float32)
        if features.ndim == 1:
            features = features[np.newaxis, :]
        return features

    def check_labels_vector(self, labels):
        if labels.dtype != np.uint32:
            if len(np.unique(labels[labels < 0])) == 1 \
                                                and not (labels==0).any():
                labels[labels < 0] = 0
            else:
                labels = labels + labels.min()
            labels = labels.astype(np.uint32)
        labels = labels.reshape((labels.size, 1))
        return labels

    def save_to_disk(self, fn, rfgroupname='rf'):
        self.rf.writeHDF5(fn, rfgroupname)
        attr_list = ['oob', 'feature_importance', 'use_feature_importance',
            'feature_description']
        f = h5py.File(fn)
        for attr in attr_list:
            if hasattr(self, attr):
                f[rfgroupname].attrs[attr] = getattr(self, attr)

    def load_from_disk(self, fn, rfgroupname='rf'):
        self.rf = BaseVigraRandomForest(str(fn), rfgroupname)
        f = h5py.File(fn, 'r')
        for attr in f[rfgroupname].attrs:
            print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr]))
            setattr(self, attr, f[rfgroupname].attrs[attr])
Beispiel #7
0
class RandomForest(object):
    def __init__(self, ntrees=255, use_feature_importance=False, 
            sample_classes_individually=False):
        self.rf = VigraRandomForest(treeCount=ntrees, 
            sample_classes_individually=sample_classes_individually)
        self.use_feature_importance = use_feature_importance
        self.sample_classes_individually=sample_classes_individually

    def fit(self, features, labels, **kwargs):
        features = self.check_features_vector(features)
        labels = self.check_labels_vector(labels)
        if self.use_feature_importance:
            self.oob, self.feature_importance = \
                        self.rf.learnRFWithFeatureSelection(features, labels)
        else:
            self.oob = self.rf.learnRF(features, labels)
        return self

    def predict_proba(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictProbabilities(features)

    def predict(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictLabels(features)

    def check_features_vector(self, features):
        if features.dtype != float32:
            features = features.astype(float32)
        if features.ndim == 1:
            features = features[newaxis,:]
        return features

    def check_labels_vector(self, labels):
        if labels.dtype != uint32:
            if len(unique(labels[labels < 0])) == 1 and not (labels==0).any():
                labels[labels < 0] = 0
            else:
                labels = labels + labels.min()
            labels = labels.astype(uint32)
        labels = labels.reshape((labels.size, 1))
        return labels

    def save_to_disk(self, fn, rfgroupname='rf', overwrite=True):
        self.rf.writeHDF5(fn, rfgroupname, overwrite)
        attr_list = ['oob', 'feature_importance', 'use_feature_importance']
        f = h5py.File(fn)
        for attr in attr_list:
            if hasattr(self, attr):
                f[attr] = getattr(self, attr)

    def load_from_disk(self, fn, rfgroupname='rf'):
        self.rf = VigraRandomForest(fn, rfgroupname)
        f = h5py.File(fn, 'r')
        groups = []
        f.visit(groups.append)
        attrs = [g for g in groups if not g.startswith(rfgroupname)]
        for attr in attrs:
            setattr(self, attr, array(f[attr]))
Beispiel #8
0
 def __init__(self, ntrees=255, use_feature_importance=False, 
         sample_classes_individually=False):
     self.rf = BaseVigraRandomForest(treeCount=ntrees, 
         sample_classes_individually=sample_classes_individually)
     self.use_feature_importance = use_feature_importance
     self.sample_classes_individually = sample_classes_individually
Beispiel #9
0
def _compute_partial_predictions(
        feature_data: "np.ndarray[Any, np.dtype[np.float32]]",
        forest: VigraRandomForest) -> "np.ndarray[Any, np.dtype[np.float32]]":
    return forest.predictProbabilities(feature_data) * forest.treeCount()
Beispiel #10
0
from vigra.learning import RandomForest
import numpy as np
import sys

if len(sys.argv) < 3 or sys.argv[2] != "eigentexture":
    clf = RandomForest('../challenge.h5', '/classifier')
else:
    clf = RandomForest('../challenge.h5', '/etclassifier')
    
import tiffcvt
from extract_features import extract_features, blur_image, extract_eigenfeatures

if len(sys.argv) < 2 or sys.argv[1] == "train":
    labels_name = "%s_train_labels"
    labels_shape = tiffcvt.train_volume.shape
    img = tiffcvt.h5_file["ordinal_train_volume"][:,:,:]
else:
    labels_name = "%s_test_labels"
    labels_shape = tiffcvt.test_volume.shape
    img = tiffcvt.h5_file["ordinal_test_volume"][:,:,:]

if len(sys.argv) < 3 or sys.argv[2] != "eigentexture":
    extract_fn = extract_features
    labels_name = labels_name % "predicted"
else:
    components = tiffcvt.h5_file["components"][:,:]
    extract_fn = lambda img, bimg, indices:\
       extract_eigenfeatures(img, bimg, components, indices)
    labels_name = labels_name % "eigenpredicted"
    
predicted = tiffcvt.h5_file.require_dataset(labels_name,
Beispiel #11
0
 def do_predict(forest: VigraRandomForest):
     return forest.predictProbabilities(
         feature_data.linear_raw()) * forest.treeCount()
Beispiel #12
0
 def load_from_disk(self, fn, rfgroupname='rf'):
     self.rf = BaseVigraRandomForest(str(fn), rfgroupname)
     f = h5py.File(fn, 'r')
     for attr in f[rfgroupname].attrs:
         print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr]))
         setattr(self, attr, f[rfgroupname].attrs[attr])
Beispiel #13
0
class VigraRandomForest(object):
    def __init__(self,
                 ntrees=255,
                 use_feature_importance=False,
                 sample_classes_individually=False):
        self.rf = BaseVigraRandomForest(
            treeCount=ntrees,
            sample_classes_individually=sample_classes_individually)
        self.use_feature_importance = use_feature_importance
        self.sample_classes_individually = sample_classes_individually

    def fit(self, features, labels):
        features = self.check_features_vector(features)
        labels = self.check_labels_vector(labels)
        if self.use_feature_importance:
            self.oob, self.feature_importance = \
                        self.rf.learnRFWithFeatureSelection(features, labels)
        else:
            self.oob = self.rf.learnRF(features, labels)
        return self

    def predict_proba(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictProbabilities(features)

    def predict(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictLabels(features)

    def check_features_vector(self, features):
        if features.dtype != np.float32:
            features = features.astype(np.float32)
        if features.ndim == 1:
            features = features[np.newaxis, :]
        return features

    def check_labels_vector(self, labels):
        if labels.dtype != np.uint32:
            if len(np.unique(labels[labels < 0])) == 1 \
                                                and not (labels==0).any():
                labels[labels < 0] = 0
            else:
                labels = labels + labels.min()
            labels = labels.astype(np.uint32)
        labels = labels.reshape((labels.size, 1))
        return labels

    def save_to_disk(self, fn, rfgroupname='rf'):
        self.rf.writeHDF5(fn, rfgroupname)
        attr_list = [
            'oob', 'feature_importance', 'use_feature_importance',
            'feature_description'
        ]
        f = h5py.File(fn)
        for attr in attr_list:
            if hasattr(self, attr):
                f[rfgroupname].attrs[attr] = getattr(self, attr)

    def load_from_disk(self, fn, rfgroupname='rf'):
        self.rf = BaseVigraRandomForest(str(fn), rfgroupname)
        f = h5py.File(fn, 'r')
        for attr in f[rfgroupname].attrs:
            print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr]))
            setattr(self, attr, f[rfgroupname].attrs[attr])
Beispiel #14
0
class RandomForest(object):
    def __init__(self,
                 ntrees=255,
                 use_feature_importance=False,
                 sample_classes_individually=False):
        self.rf = VigraRandomForest(
            treeCount=ntrees,
            sample_classes_individually=sample_classes_individually)
        self.use_feature_importance = use_feature_importance
        self.sample_classes_individually = sample_classes_individually

    def fit(self, features, labels, **kwargs):
        features = self.check_features_vector(features)
        labels = self.check_labels_vector(labels)
        if self.use_feature_importance:
            self.oob, self.feature_importance = \
                        self.rf.learnRFWithFeatureSelection(features, labels)
        else:
            self.oob = self.rf.learnRF(features, labels)
        return self

    def predict_proba(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictProbabilities(features)

    def predict(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictLabels(features)

    def check_features_vector(self, features):
        if features.dtype != float32:
            features = features.astype(float32)
        if features.ndim == 1:
            features = features[newaxis, :]
        return features

    def check_labels_vector(self, labels):
        if labels.dtype != uint32:
            if len(unique(
                    labels[labels < 0])) == 1 and not (labels == 0).any():
                labels[labels < 0] = 0
            else:
                labels = labels + labels.min()
            labels = labels.astype(uint32)
        labels = labels.reshape((labels.size, 1))
        return labels

    def save_to_disk(self, fn, rfgroupname='rf', overwrite=True):
        self.rf.writeHDF5(fn, rfgroupname, overwrite)
        attr_list = ['oob', 'feature_importance', 'use_feature_importance']
        f = h5py.File(fn)
        for attr in attr_list:
            if hasattr(self, attr):
                f[attr] = getattr(self, attr)

    def load_from_disk(self, fn, rfgroupname='rf'):
        self.rf = VigraRandomForest(fn, rfgroupname)
        f = h5py.File(fn, 'r')
        groups = []
        f.visit(groups.append)
        attrs = [g for g in groups if not g.startswith(rfgroupname)]
        for attr in attrs:
            setattr(self, attr, array(f[attr]))
Beispiel #15
0
 def load_from_disk(self, fn, rfgroupname='rf'):
     self.rf = BaseVigraRandomForest(str(fn), rfgroupname)
     f = h5py.File(fn, 'r')
     for attr in f[rfgroupname].attrs:
         print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr]))
         setattr(self, attr, f[rfgroupname].attrs[attr])
Beispiel #16
0
 def train_forest(forest_index: int) -> VigraRandomForest:
     ntrees = (num_trees //
               num_forests) + (forest_index < num_trees % num_forests)
     forest = VigraRandomForest(ntrees)
     forest.learnRF(X, y, random_seed)
     return forest
Beispiel #17
0
from vigra.learning import RandomForest
import numpy as np
import h5py
import sys
from tiffcvt import h5_file

if __name__=="__main__":
    clf = RandomForest(treeCount=40)
    training_set = h5_file["training_features"][:,:].astype(np.float32)
    training_class = h5_file["training_classification"][:].astype(np.uint32)
    if len(sys.argv) > 1 and sys.argv[1] == "eigentexture":
        from eigentexture import normalize
        training_set = normalize(training_set)
        components = h5_file["components"][:,:].transpose()
        training_set = np.dot(training_set, components).astype(np.float32)
        classifier_name = "etclassifier"
    else:
        classifier_name = "classifier"
    clf.learnRF(training_set, training_class)
    if classifier_name in h5_file.keys():
        del h5_file[classifier_name]
    h5_file.close()
    clf.writeHDF5('../challenge.h5', "/"+classifier_name, True)
else:
    classifier = RandomForest("../challenge.h5", "/classifier")
    et_classifier = RandomForest("../challenge.h5", "/etclassifier")