Beispiel #1
0
class VigraRandomForest(object):
    def __init__(self, ntrees=255, use_feature_importance=False, 
            sample_classes_individually=False):
        self.rf = BaseVigraRandomForest(treeCount=ntrees, 
            sample_classes_individually=sample_classes_individually)
        self.use_feature_importance = use_feature_importance
        self.sample_classes_individually = sample_classes_individually

    def fit(self, features, labels):
        features = self.check_features_vector(features)
        labels = self.check_labels_vector(labels)
        if self.use_feature_importance:
            self.oob, self.feature_importance = \
                        self.rf.learnRFWithFeatureSelection(features, labels)
        else:
            self.oob = self.rf.learnRF(features, labels)
        return self

    def predict_proba(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictProbabilities(features)

    def predict(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictLabels(features)

    def check_features_vector(self, features):
        if features.dtype != np.float32:
            features = features.astype(np.float32)
        if features.ndim == 1:
            features = features[np.newaxis, :]
        return features

    def check_labels_vector(self, labels):
        if labels.dtype != np.uint32:
            if len(np.unique(labels[labels < 0])) == 1 \
                                                and not (labels==0).any():
                labels[labels < 0] = 0
            else:
                labels = labels + labels.min()
            labels = labels.astype(np.uint32)
        labels = labels.reshape((labels.size, 1))
        return labels

    def save_to_disk(self, fn, rfgroupname='rf'):
        self.rf.writeHDF5(fn, rfgroupname)
        attr_list = ['oob', 'feature_importance', 'use_feature_importance',
            'feature_description']
        f = h5py.File(fn)
        for attr in attr_list:
            if hasattr(self, attr):
                f[rfgroupname].attrs[attr] = getattr(self, attr)

    def load_from_disk(self, fn, rfgroupname='rf'):
        self.rf = BaseVigraRandomForest(str(fn), rfgroupname)
        f = h5py.File(fn, 'r')
        for attr in f[rfgroupname].attrs:
            print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr]))
            setattr(self, attr, f[rfgroupname].attrs[attr])
Beispiel #2
0
class RandomForest(object):
    def __init__(self, ntrees=255, use_feature_importance=False, 
            sample_classes_individually=False):
        self.rf = VigraRandomForest(treeCount=ntrees, 
            sample_classes_individually=sample_classes_individually)
        self.use_feature_importance = use_feature_importance
        self.sample_classes_individually=sample_classes_individually

    def fit(self, features, labels, **kwargs):
        features = self.check_features_vector(features)
        labels = self.check_labels_vector(labels)
        if self.use_feature_importance:
            self.oob, self.feature_importance = \
                        self.rf.learnRFWithFeatureSelection(features, labels)
        else:
            self.oob = self.rf.learnRF(features, labels)
        return self

    def predict_proba(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictProbabilities(features)

    def predict(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictLabels(features)

    def check_features_vector(self, features):
        if features.dtype != float32:
            features = features.astype(float32)
        if features.ndim == 1:
            features = features[newaxis,:]
        return features

    def check_labels_vector(self, labels):
        if labels.dtype != uint32:
            if len(unique(labels[labels < 0])) == 1 and not (labels==0).any():
                labels[labels < 0] = 0
            else:
                labels = labels + labels.min()
            labels = labels.astype(uint32)
        labels = labels.reshape((labels.size, 1))
        return labels

    def save_to_disk(self, fn, rfgroupname='rf', overwrite=True):
        self.rf.writeHDF5(fn, rfgroupname, overwrite)
        attr_list = ['oob', 'feature_importance', 'use_feature_importance']
        f = h5py.File(fn)
        for attr in attr_list:
            if hasattr(self, attr):
                f[attr] = getattr(self, attr)

    def load_from_disk(self, fn, rfgroupname='rf'):
        self.rf = VigraRandomForest(fn, rfgroupname)
        f = h5py.File(fn, 'r')
        groups = []
        f.visit(groups.append)
        attrs = [g for g in groups if not g.startswith(rfgroupname)]
        for attr in attrs:
            setattr(self, attr, array(f[attr]))
Beispiel #3
0
class VigraRandomForest(object):
    def __init__(self,
                 ntrees=255,
                 use_feature_importance=False,
                 sample_classes_individually=False):
        self.rf = BaseVigraRandomForest(
            treeCount=ntrees,
            sample_classes_individually=sample_classes_individually)
        self.use_feature_importance = use_feature_importance
        self.sample_classes_individually = sample_classes_individually

    def fit(self, features, labels):
        features = self.check_features_vector(features)
        labels = self.check_labels_vector(labels)
        if self.use_feature_importance:
            self.oob, self.feature_importance = \
                        self.rf.learnRFWithFeatureSelection(features, labels)
        else:
            self.oob = self.rf.learnRF(features, labels)
        return self

    def predict_proba(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictProbabilities(features)

    def predict(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictLabels(features)

    def check_features_vector(self, features):
        if features.dtype != np.float32:
            features = features.astype(np.float32)
        if features.ndim == 1:
            features = features[np.newaxis, :]
        return features

    def check_labels_vector(self, labels):
        if labels.dtype != np.uint32:
            if len(np.unique(labels[labels < 0])) == 1 \
                                                and not (labels==0).any():
                labels[labels < 0] = 0
            else:
                labels = labels + labels.min()
            labels = labels.astype(np.uint32)
        labels = labels.reshape((labels.size, 1))
        return labels

    def save_to_disk(self, fn, rfgroupname='rf'):
        self.rf.writeHDF5(fn, rfgroupname)
        attr_list = [
            'oob', 'feature_importance', 'use_feature_importance',
            'feature_description'
        ]
        f = h5py.File(fn)
        for attr in attr_list:
            if hasattr(self, attr):
                f[rfgroupname].attrs[attr] = getattr(self, attr)

    def load_from_disk(self, fn, rfgroupname='rf'):
        self.rf = BaseVigraRandomForest(str(fn), rfgroupname)
        f = h5py.File(fn, 'r')
        for attr in f[rfgroupname].attrs:
            print("f[%s] = %s" % (attr, f[rfgroupname].attrs[attr]))
            setattr(self, attr, f[rfgroupname].attrs[attr])
Beispiel #4
0
class RandomForest(object):
    def __init__(self,
                 ntrees=255,
                 use_feature_importance=False,
                 sample_classes_individually=False):
        self.rf = VigraRandomForest(
            treeCount=ntrees,
            sample_classes_individually=sample_classes_individually)
        self.use_feature_importance = use_feature_importance
        self.sample_classes_individually = sample_classes_individually

    def fit(self, features, labels, **kwargs):
        features = self.check_features_vector(features)
        labels = self.check_labels_vector(labels)
        if self.use_feature_importance:
            self.oob, self.feature_importance = \
                        self.rf.learnRFWithFeatureSelection(features, labels)
        else:
            self.oob = self.rf.learnRF(features, labels)
        return self

    def predict_proba(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictProbabilities(features)

    def predict(self, features):
        features = self.check_features_vector(features)
        return self.rf.predictLabels(features)

    def check_features_vector(self, features):
        if features.dtype != float32:
            features = features.astype(float32)
        if features.ndim == 1:
            features = features[newaxis, :]
        return features

    def check_labels_vector(self, labels):
        if labels.dtype != uint32:
            if len(unique(
                    labels[labels < 0])) == 1 and not (labels == 0).any():
                labels[labels < 0] = 0
            else:
                labels = labels + labels.min()
            labels = labels.astype(uint32)
        labels = labels.reshape((labels.size, 1))
        return labels

    def save_to_disk(self, fn, rfgroupname='rf', overwrite=True):
        self.rf.writeHDF5(fn, rfgroupname, overwrite)
        attr_list = ['oob', 'feature_importance', 'use_feature_importance']
        f = h5py.File(fn)
        for attr in attr_list:
            if hasattr(self, attr):
                f[attr] = getattr(self, attr)

    def load_from_disk(self, fn, rfgroupname='rf'):
        self.rf = VigraRandomForest(fn, rfgroupname)
        f = h5py.File(fn, 'r')
        groups = []
        f.visit(groups.append)
        attrs = [g for g in groups if not g.startswith(rfgroupname)]
        for attr in attrs:
            setattr(self, attr, array(f[attr]))