class ConvNetFeatureExtractor(FeatureExtractor):
    def __init__(
            self,
            feature_layer='fc7_cudanet_out',
            pretrained_params='imagenet.decafnet.epoch90',
            pretrained_meta='imagenet.decafnet.meta',
            center_only=True
            ):
        """
        :param feature_layer: The ConvNet layer that's used for
                              feature extraction.  Defaults to
                              `fc7_cudanet_out`.  A description of all
                              available layers for the
                              ImageNet-1k-pretrained ConvNet is found
                              in the DeCAF wiki.  They are:

                                - `pool5_cudanet_out`
                                - `fc6_cudanet_out`
                                - `fc6_neuron_cudanet_out`
                                - `fc7_cudanet_out`
                                - `fc7_neuron_cudanet_out`

        :param pretrained_params: This must point to the file with the
                                  pretrained parameters.  Defaults to
                                  `imagenet.decafnet.epoch90`.  For
                                  the ImageNet-1k-pretrained ConvNet
                                  this file can be obtained from here:
                                  http://www.eecs.berkeley.edu/~jiayq/decaf_pretrained/

        :param pretrained_meta: Similar to `pretrained_params`, this
                                must file to the file with the
                                pretrained parameters' metadata.
                                Defaults to `imagenet.decafnet.meta`.

        :param center_only: Use the center patch of the image only
                            when extracting features.  If `False`, use
                            four corners, the image center and flipped
                            variants and average a total of 10 feature
                            vectors, which will usually yield better
                            results.  Defaults to `True`.
        """
        super(ConvNetFeatureExtractor, self).__init__()
        self.feature_layer = feature_layer
        self.pretrained_params = pretrained_params
        self.pretrained_meta = pretrained_meta
        self.center_only = center_only
        self.convnet = DecafNet(
            self.pretrained_params,
            self.pretrained_meta
        )

    def _extract(self, img):
        """
        :param cv2 image
        :return: np.array with shape (4096,)
        """
        img = self.convnet.oversample(img, center_only=self.center_only)
        self.convnet.classify_direct(img)
        feat = self.convnet.feature(self.feature_layer)
        if not self.center_only:
            feat = feat.mean(0)
        return feat[0]
class NetworkDecaf(Network):
    """
    Implementation for the Decaf library.
    """
    def __init__(self, model_spec_filename, model_filename=None,\
                 wnid_words_filename=None, center_only=False, wnid_subset = []):
        """
        *** PRIVATE CONSTRUCTOR ***
        """
        # the following is just an hack to allow retro-compatibility
        # with existing code
        if isinstance(model_spec_filename, NetworkDecafParams):
            params = model_spec_filename
            model_spec_filename = params.model_spec_filename
            model_filename = params.model_filename
            wnid_words_filename = params.wnid_words_filename
            center_only = params.center_only
            wnid_subset = params.wnid_subset
            if wnid_subset != []:
                print 'Warning: subset of labels not supported yet'
        else:
            assert isinstance(model_spec_filename, str)
            assert model_filename != None
            assert wnid_words_filename != None
        # load Decaf model
        self.net_ = DecafNet(model_filename, model_spec_filename)
        self.center_only_ = center_only
        # build a dictionary label --> description
        self.dict_label_desc_ = {}
        dict_desc_label = {}
        fd = open(wnid_words_filename)
        for line in fd:
            temp = line.strip().split('\t')
            wnid = temp[1].strip()
            self.dict_label_desc_[wnid] = temp[2].strip()
            dict_desc_label[temp[2].split(',')[0]] = wnid
        fd.close()
        # build a dictionary label --> label_id
        self.dict_label_id_ = {}
        self.labels_ = []
        for i, desc in enumerate(self.net_.label_names):
            self.dict_label_id_[dict_desc_label[desc]] = i
            self.labels_.append(dict_desc_label[desc])
        # Load the mean vector from file
        # mean of 3 channels
        self.net_.mean_img = np.mean(np.mean(self.net_._data_mean, axis=1),
                                     axis=0)
        # it is in BGR convert in RGB
        #self.net_.mean_img = self.net_.mean_img[::-1]

    def get_mean_img(self):
        return self.net_.mean_img

    def get_input_dim(self):
        return decaf.scripts.imagenet.INPUT_DIM

    def get_label_id(self, label):
        return self.dict_label_id_[label]

    def get_label_desc(self, label):
        return self.dict_label_desc_[label]

    def get_labels(self):
        return self.labels_

    def evaluate(self, img, layer_name='softmax'):
        # for now only center_only is supported
        assert self.center_only_ == True
        # first, extract the 227x227 center
        dim = decaf.scripts.imagenet.INPUT_DIM
        image = util.crop_image_center(decaf.util.transform.as_rgb(img))
        image = skimage.transform.resize(image, (dim, dim))
        # convert to [0,255] float32
        image = image.astype(np.float32) * 255.
        assert np.max(image) <= 255
        # Flip the image if necessary, maintaining the c_contiguous order
        if decaf.scripts.imagenet._JEFFNET_FLIP:
            image = image[::-1, :].copy()
        # subtract the mean, cropping the 256x256 mean image
        xoff = (self.net_._data_mean.shape[1] - dim) / 2
        yoff = (self.net_._data_mean.shape[0] - dim) / 2
        image -= self.net_._data_mean[yoff + yoff + dim, xoff:xoff + dim]
        # make sure the data in contiguous in memory
        images = np.ascontiguousarray(image[np.newaxis], dtype=np.float32)
        # classify
        predictions = self.net_.classify_direct(images)
        scores = predictions.mean(0)
        # look at the particular layer
        if layer_name == 'softmax':
            return scores
        elif layer_name == 'fc7_relu':
            layer_name = 'fc7_neuron_cudanet_out'
        elif layer_name == 'fc7':
            layer_name = 'fc7_cudanet_out'
        elif layer_name == 'fc6_relu':
            layer_name = 'fc6_neuron_cudanet_out'
        elif layer_name == 'fc6':
            layer_name = 'fc6_cudanet_out'
        elif layer_name == 'pool5':
            layer_name = 'pool5_cudanet_out'
        else:
            raise ValueError('layer_name not supported')
        return self.net_.feature(layer_name)
Beispiel #3
0
class ConvNetFeatures(BaseEstimator):
    """Extract features from images using a pretrained ConvNet.

    Based on Yangqing Jia and Jeff Donahue's `DeCAF
    <https://github.com/UCB-ICSI-Vision-Group/decaf-release/wiki>`_.
    Please make sure you read and accept DeCAF's license before you
    use this class.

    If ``classify_direct=False``, expects its input X to be a list of
    image filenames or arrays as produced by
    `np.array(Image.open(filename))`.
    """
    verbose = 0

    def __init__(
        self,
        feature_layer='fc7_cudanet_out',
        pretrained_params='imagenet.decafnet.epoch90',
        pretrained_meta='imagenet.decafnet.meta',
        center_only=True,
        classify_direct=False,
        verbose=0,
        ):
        """
        :param feature_layer: The ConvNet layer that's used for
                              feature extraction.  Defaults to
                              `fc7_cudanet_out`.  A description of all
                              available layers for the
                              ImageNet-1k-pretrained ConvNet is found
                              in the DeCAF wiki.  They are:

                                - `pool5_cudanet_out`
                                - `fc6_cudanet_out`
                                - `fc6_neuron_cudanet_out`
                                - `fc7_cudanet_out`
                                - `fc7_neuron_cudanet_out`
                                - `probs_cudanet_out`

        :param pretrained_params: This must point to the file with the
                                  pretrained parameters.  Defaults to
                                  `imagenet.decafnet.epoch90`.  For
                                  the ImageNet-1k-pretrained ConvNet
                                  this file can be obtained from here:
                                  http://www.eecs.berkeley.edu/~jiayq/decaf_pretrained/

        :param pretrained_meta: Similar to `pretrained_params`, this
                                must file to the file with the
                                pretrained parameters' metadata.
                                Defaults to `imagenet.decafnet.meta`.

        :param center_only: Use the center patch of the image only
                            when extracting features.  If `False`, use
                            four corners, the image center and flipped
                            variants and average a total of 10 feature
                            vectors, which will usually yield better
                            results.  Defaults to `True`.

        :param classify_direct: When `True`, assume that input X is an
                                array of shape (num x 256 x 256 x 3)
                                as returned by `prepare_image`.
        """
        self.feature_layer = feature_layer
        self.pretrained_params = pretrained_params
        self.pretrained_meta = pretrained_meta
        self.center_only = center_only
        self.classify_direct = classify_direct
        self.net_ = None

        if (not os.path.exists(pretrained_params) or
            not os.path.exists(pretrained_meta)):
            raise ValueError(
                "Pre-trained ConvNet parameters not found.  You may"
                "need to download the files from "
                "http://www.eecs.berkeley.edu/~jiayq/decaf_pretrained/ and "
                "pass the path to the two files as `pretrained_params` and "
                "`pretrained_meta` to the `{}` estimator.".format(
                    self.__class__.__name__))

    def fit(self, X=None, y=None):
        from decaf.scripts.imagenet import DecafNet  # soft dep

        if self.net_ is None:
            self.net_ = DecafNet(
                self.pretrained_params,
                self.pretrained_meta,
                )
        return self

    @cache.cached(_transform_cache_key)
    def transform(self, X):
        features = []
        for img in X:
            if self.classify_direct:
                images = self.net_.oversample(
                    img, center_only=self.center_only)
                self.net_.classify_direct(images)
            else:
                if isinstance(img, str):
                    import Image  # soft dep
                    img = np.array(Image.open(img))
                self.net_.classify(img, center_only=self.center_only)
            feat = None
            for layer in self.feature_layer.split(','):
                val = self.net_.feature(layer)
                if feat is None:
                    feat = val
                else:
                    feat = np.hstack([feat, val])
            if not self.center_only:
                feat = feat.flatten()
            features.append(feat)
            if self.verbose:
                sys.stdout.write(
                    "\r[ConvNet] %d%%" % (100. * len(features) / len(X)))
                sys.stdout.flush()
        if self.verbose:
            sys.stdout.write('\n')
        return np.vstack(features)

    def prepare_image(self, image):
        """Returns image of shape `(256, 256, 3)`, as expected by
        `transform` when `classify_direct = True`.
        """
        from decaf.util import transform  # soft dep
        _JEFFNET_FLIP = True

        # first, extract the 256x256 center.
        image = transform.scale_and_extract(transform.as_rgb(image), 256)
        # convert to [0,255] float32
        image = image.astype(np.float32) * 255.
        if _JEFFNET_FLIP:
            # Flip the image if necessary, maintaining the c_contiguous order
            image = image[::-1, :].copy()
        # subtract the mean
        image -= self.net_._data_mean
        return image
class NetworkDecaf(Network):
    """
    Implementation for the Decaf library.
    """
    def __init__(self, model_spec_filename, model_filename=None,\
                 wnid_words_filename=None, center_only=False, wnid_subset = []):
        """
        *** PRIVATE CONSTRUCTOR ***
        """
        # the following is just an hack to allow retro-compatibility
        # with existing code
        if isinstance(model_spec_filename, NetworkDecafParams):
            params = model_spec_filename
            model_spec_filename = params.model_spec_filename
            model_filename = params.model_filename
            wnid_words_filename = params.wnid_words_filename
            center_only = params.center_only
            wnid_subset = params.wnid_subset
            if wnid_subset!=[]:
                print 'Warning: subset of labels not supported yet'
        else:
            assert isinstance(model_spec_filename, str)
            assert model_filename != None
            assert wnid_words_filename != None
        # load Decaf model
        self.net_ = DecafNet(model_filename, model_spec_filename)
        self.center_only_ = center_only
        # build a dictionary label --> description
        self.dict_label_desc_ = {}
        dict_desc_label = {}
        fd = open(wnid_words_filename)
        for line in fd:
            temp = line.strip().split('\t')
            wnid = temp[1].strip()
            self.dict_label_desc_[wnid] = temp[2].strip()
            dict_desc_label[temp[2].split(',')[0]] = wnid
        fd.close()
        # build a dictionary label --> label_id
        self.dict_label_id_ = {}
        self.labels_ = []
        for i, desc in enumerate(self.net_.label_names):
            self.dict_label_id_[dict_desc_label[desc]] = i
            self.labels_.append(dict_desc_label[desc])
        # Load the mean vector from file
        # mean of 3 channels
        self.net_.mean_img =np.mean(np.mean(self.net_._data_mean,axis=1),axis=0)
        # it is in BGR convert in RGB
        #self.net_.mean_img = self.net_.mean_img[::-1]

    def get_mean_img(self):
        return self.net_.mean_img

    def get_input_dim(self):
        return decaf.scripts.imagenet.INPUT_DIM

    def get_label_id(self, label):
        return self.dict_label_id_[label]

    def get_label_desc(self, label):
        return self.dict_label_desc_[label]

    def get_labels(self):
        return self.labels_

    def evaluate(self, img, layer_name = 'softmax'):
        # for now only center_only is supported
        assert self.center_only_ == True
        # first, extract the 227x227 center
        dim = decaf.scripts.imagenet.INPUT_DIM
        image = util.crop_image_center(decaf.util.transform.as_rgb(img))
        image = skimage.transform.resize(image, (dim, dim))
        # convert to [0,255] float32
        image = image.astype(np.float32) * 255.
        assert np.max(image) <= 255
        # Flip the image if necessary, maintaining the c_contiguous order
        if decaf.scripts.imagenet._JEFFNET_FLIP:
            image = image[::-1, :].copy()
        # subtract the mean, cropping the 256x256 mean image
        xoff = (self.net_._data_mean.shape[1] - dim)/2
        yoff = (self.net_._data_mean.shape[0] - dim)/2
        image -= self.net_._data_mean[yoff+yoff+dim, xoff:xoff+dim]
        # make sure the data in contiguous in memory
        images = np.ascontiguousarray(image[np.newaxis], dtype=np.float32)
        # classify
        predictions = self.net_.classify_direct(images)
        scores = predictions.mean(0)
        # look at the particular layer
        if layer_name == 'softmax':
            return scores
        elif layer_name == 'fc7_relu':
            layer_name = 'fc7_neuron_cudanet_out'
        elif layer_name == 'fc7':
            layer_name = 'fc7_cudanet_out'
        elif layer_name == 'fc6_relu':
            layer_name = 'fc6_neuron_cudanet_out'
        elif layer_name == 'fc6':
            layer_name = 'fc6_cudanet_out'
        elif layer_name == 'pool5':
            layer_name = 'pool5_cudanet_out'
        else:
            raise ValueError('layer_name not supported')
        return self.net_.feature(layer_name)