def test_check_tensor(): """Test that check_tensor works for a variety of inputs.""" X = np.zeros((3, 4, 5)) assert_equal(check_tensor([1, 2]).shape, (2, )) assert_raises(ValueError, check_tensor, X, dtype=np.float, n_dim=1) assert_equal(check_tensor(X, dtype=np.float, n_dim=6).shape, (1, 1, 1, 3, 4, 5)) assert_equal(check_tensor(X, dtype=np.float, n_dim=3).shape, (3, 4, 5))
def transform(self, X): """ Transform a set of images. Returns the features from each layer. Parameters ---------- X : array-like, shape = [n_images, height, width, color] or shape = [height, width, color] Returns ------- T : array-like, shape = [n_images, n_features] If force_reshape = False, list of array-like, length output_layers, each shape = [n_images, n_windows, n_window_features] Returns the features extracted for each of the n_images in X.. """ X = check_tensor(X, dtype=np.float32, n_dim=4) print "len(X)", len(X) """ res = self.transform_function( X[:, y_lower_bound:y_upper_bound, x_lower_bound:x_upper_bound, :].transpose( *self.transpose_order))[0] """ if self.batch_size is None: if self.force_reshape: return self.transform_function(X.transpose(*self.transpose_order))[0].reshape((len(X), -1)) else: return self.transform_function(X.transpose(*self.transpose_order)) else: XT = X.transpose(*self.transpose_order) n_samples = XT.shape[0] for i in range(0, n_samples, self.batch_size): transformed_batch = self.transform_function(XT[i : i + self.batch_size]) # at first iteration, initialize output arrays to correct size if i == 0: shapes = [(n_samples,) + t.shape[1:] for t in transformed_batch] ravelled_shapes = [np.prod(shp[1:]) for shp in shapes] if self.force_reshape: output_width = np.sum(ravelled_shapes) output = np.empty((n_samples, output_width), dtype=transformed_batch[0].dtype) break_points = np.r_([0], np.cumsum(ravelled_shapes)) raw_output = [output[:, start:stop] for start, stop in zip(break_points[:-1], break_points[1:])] else: output = [np.empty(shape, dtype=transformed_batch.dtype) for shape in shapes] raw_output = [arr.reshape(n_samples, -1) for arr in output] for transformed, out in zip(transformed_batch, raw_output): out[i : i + self.batch_size] = transformed return output
def __layerwiseTransform__(self, input_data, float_dtype='float32', verbose=0): transformations = {} input_data = self.preprocess_image(input_data) X = check_tensor(input_data, dtype=np.float32, n_dim=4) last_expression = None current_expression = None # bc01 ordering trans_order = (0, 3, 1, 2) X = X.transpose(trans_order) layers = OrderedDict() inputs = OrderedDict() blobs = OrderedDict() for i, layer in enumerate(self.parsedmodel): layer_type = layer['type'] layer_name = layer['name'] top_blobs = layer['top_blobs'] bottom_blobs = layer['bottom_blobs'] layer_blobs = layer.get('blobs', None) if layer_name == 'fc6': break if verbose > 0: print("%d\t%s\t%s" % (i, layer_type, layer_name)) if layer_type == 'DATA': # DATA layers contain input data in top_blobs, create input # variables, float for 'data' and int for 'label' for data_blob_name in top_blobs: if data_blob_name == 'label': blobs['label'] = T.ivector() inputs['label'] = blobs['label'] else: blobs[data_blob_name] = T.tensor4(dtype=float_dtype) last_expression = blobs[data_blob_name] inputs[data_blob_name] = blobs[data_blob_name] elif layer_type == 'CONVOLUTION': # CONVOLUTION layers take input from bottom_blob, convolve with # layer_blobs[0], and add bias layer_blobs[1] stride = layer['convolution_param__stride'] stride_h = max(layer['convolution_param__stride_h'], stride) stride_w = max(layer['convolution_param__stride_w'], stride) if stride_h > 1 or stride_w > 1: subsample = (stride_h, stride_w) else: subsample = None pad = layer['convolution_param__pad'] pad_h = max(layer['convolution_param__pad_h'], pad) pad_w = max(layer['convolution_param__pad_w'], pad) conv_filter = layer_blobs[0].astype(float_dtype)[ ..., ::-1, ::-1] conv_bias = layer_blobs[1].astype(float_dtype).ravel() convolution_input = blobs[bottom_blobs[0]] convolution = Convolution(conv_filter, biases=conv_bias, activation=None, subsample=subsample, input_dtype=float_dtype) # If padding is specified, need to pad. In practice, I think # caffe prevents padding that would make the filter see only # zeros, so technically this can also be obtained by sensibly # cropping a border_mode=full convolution. However, subsampling # may then be off by 1 and would have to be done separately :/ if pad_h > 0 or pad_w > 0: zp = ZeroPad((pad_h, pad_w)) zp._build_expression(convolution_input) expression = zp.expression_ layers[layer_name] = (zp, convolution) else: layers[layer_name] = convolution expression = convolution_input convolution._build_expression(expression) expression = convolution.expression_ # if subsample is not None: # expression = expression[:, :, ::subsample[0], # ::subsample[1]] blobs[top_blobs[0]] = expression current_expression = expression elif layer_type == "RELU": # RELU layers take input from bottom_blobs, set everything # negative to zero and write the result to top_blobs relu_input = blobs[bottom_blobs[0]] relu = Relu() relu._build_expression(relu_input) layers[layer_name] = relu blobs[top_blobs[0]] = relu.expression_ current_expression = relu.expression_ elif layer_type == "POOLING": # POOLING layers take input from bottom_blobs, perform max # pooling according to stride and kernel size information # and write the result to top_blobs pooling_input = blobs[bottom_blobs[0]] kernel_size = layer['pooling_param__kernel_size'] kernel_h = max(layer['pooling_param__kernel_h'], kernel_size) kernel_w = max(layer['pooling_param__kernel_w'], kernel_size) stride = layer['pooling_param__stride'] stride_h = max(layer['pooling_param__stride_h'], stride) stride_w = max(layer['pooling_param__stride_w'], stride) pad = layer['pooling_param__pad'] pad_h = max(layer['pooling_param__pad_h'], pad) pad_w = max(layer['pooling_param__pad_w'], pad) pool_types = {0: 'max', 1: 'avg'} pool_type = pool_types[layer['pooling_param__pool']] # print "POOL TYPE is %s" % pool_type # pooling = FancyMaxPool((kernel_h, kernel_w), # (stride_h, stride_w), # ignore_border=False) pooling = CaffePool((kernel_h, kernel_w), (stride_h, stride_w), (pad_h, pad_w), pool_type=pool_type) pooling._build_expression(pooling_input) layers[layer_name] = pooling blobs[top_blobs[0]] = pooling.expression_ current_expression = pooling.expression_ elif layer_type == "DROPOUT": # DROPOUT may figure in some networks, but it is only relevant # at the learning stage, not at the prediction stage. pass elif layer_type == "SOFTMAX_LOSS" or layer_type == 'SOFTMAX': softmax_input = blobs[bottom_blobs[0]] # have to write our own softmax expression, because of shape # issues si = softmax_input.reshape( (softmax_input.shape[0], softmax_input.shape[1], -1)) shp = (si.shape[0], 1, si.shape[2]) exp = T.exp(si - si.max(axis=1).reshape(shp)) softmax_expression = (exp / exp.sum(axis=1).reshape(shp)).reshape( softmax_input.shape) layers[layer_name] = "SOFTMAX" blobs[top_blobs[0]] = softmax_expression current_expression = softmax_expression elif layer_type == "SPLIT": split_input = blobs[bottom_blobs[0]] for top_blob in top_blobs: blobs[top_blob] = split_input # Should probably make a class to be able to add to layers layers[layer_name] = "SPLIT" elif layer_type == "LRN": # Local normalization layer lrn_input = blobs[bottom_blobs[0]] lrn_factor = layer['lrn_param__alpha'] lrn_exponent = layer['lrn_param__beta'] axis = {0: 'channels'}[layer['lrn_param__norm_region']] nsize = layer['lrn_param__local_size'] lrn = LRN(nsize, lrn_factor, lrn_exponent, axis=axis) lrn._build_expression(lrn_input) layers[layer_name] = lrn blobs[top_blobs[0]] = lrn.expression_ current_expression = lrn.expression_ elif layer_type == "CONCAT": input_expressions = [ blobs[bottom_blob] for bottom_blob in bottom_blobs ] axis = layer['concat_param__concat_dim'] output_expression = T.concatenate(input_expressions, axis=axis) blobs[top_blobs[0]] = output_expression layers[layer_name] = "CONCAT" current_expression = output_expression elif layer_type == "INNER_PRODUCT": weights = layer_blobs[0].astype(float_dtype) biases = layer_blobs[1].astype(float_dtype).squeeze() fully_connected_input = blobs[bottom_blobs[0]] if layer_name == 'fc6': fully_connected_input = fully_connected_input.reshape( (fully_connected_input.shape[0], 18432, 1, 1)) fc_layer = Convolution(weights.transpose((2, 3, 0, 1)), biases, activation=None) fc_layer._build_expression(fully_connected_input) layers[layer_name] = fc_layer blobs[top_blobs[0]] = fc_layer.expression_ current_expression = fc_layer.expression_ else: raise ValueError( 'layer type %s is not known to sklearn-theano' % layer_type) if layer_type == 'DATA': continue else: if isinstance(X, list): X = X[0] to_compile = [current_expression] transform_function = theano.function([last_expression], to_compile) X = transform_function(X) transformations[top_blobs[0]] = X[0] last_expression = current_expression return transformations
def __layerwiseTransform__(self, input_data, float_dtype='float32', verbose=0): transformations = {} input_data = self.preprocess_image(input_data) X = check_tensor(input_data, dtype=np.float32, n_dim=4) last_expression = None current_expression = None # bc01 ordering trans_order = (0, 3, 1, 2) X = X.transpose(trans_order) layers = OrderedDict() inputs = OrderedDict() blobs = OrderedDict() for i, layer in enumerate(self.parsedmodel): layer_type = layer['type'] layer_name = layer['name'] top_blobs = layer['top_blobs'] bottom_blobs = layer['bottom_blobs'] layer_blobs = layer.get('blobs', None) if layer_name == 'fc6': break if verbose > 0: print("%d\t%s\t%s" % (i, layer_type, layer_name)) if layer_type == 'DATA': # DATA layers contain input data in top_blobs, create input # variables, float for 'data' and int for 'label' for data_blob_name in top_blobs: if data_blob_name == 'label': blobs['label'] = T.ivector() inputs['label'] = blobs['label'] else: blobs[data_blob_name] = T.tensor4(dtype=float_dtype) last_expression = blobs[data_blob_name] inputs[data_blob_name] = blobs[data_blob_name] elif layer_type == 'CONVOLUTION': # CONVOLUTION layers take input from bottom_blob, convolve with # layer_blobs[0], and add bias layer_blobs[1] stride = layer['convolution_param__stride'] stride_h = max(layer['convolution_param__stride_h'], stride) stride_w = max(layer['convolution_param__stride_w'], stride) if stride_h > 1 or stride_w > 1: subsample = (stride_h, stride_w) else: subsample = None pad = layer['convolution_param__pad'] pad_h = max(layer['convolution_param__pad_h'], pad) pad_w = max(layer['convolution_param__pad_w'], pad) conv_filter = layer_blobs[0].astype(float_dtype)[..., ::-1, ::-1] conv_bias = layer_blobs[1].astype(float_dtype).ravel() convolution_input = blobs[bottom_blobs[0]] convolution = Convolution(conv_filter, biases=conv_bias, activation=None, subsample=subsample, input_dtype=float_dtype) # If padding is specified, need to pad. In practice, I think # caffe prevents padding that would make the filter see only # zeros, so technically this can also be obtained by sensibly # cropping a border_mode=full convolution. However, subsampling # may then be off by 1 and would have to be done separately :/ if pad_h > 0 or pad_w > 0: zp = ZeroPad((pad_h, pad_w)) zp._build_expression(convolution_input) expression = zp.expression_ layers[layer_name] = (zp, convolution) else: layers[layer_name] = convolution expression = convolution_input convolution._build_expression(expression) expression = convolution.expression_ # if subsample is not None: # expression = expression[:, :, ::subsample[0], # ::subsample[1]] blobs[top_blobs[0]] = expression current_expression = expression elif layer_type == "RELU": # RELU layers take input from bottom_blobs, set everything # negative to zero and write the result to top_blobs relu_input = blobs[bottom_blobs[0]] relu = Relu() relu._build_expression(relu_input) layers[layer_name] = relu blobs[top_blobs[0]] = relu.expression_ current_expression = relu.expression_ elif layer_type == "POOLING": # POOLING layers take input from bottom_blobs, perform max # pooling according to stride and kernel size information # and write the result to top_blobs pooling_input = blobs[bottom_blobs[0]] kernel_size = layer['pooling_param__kernel_size'] kernel_h = max(layer['pooling_param__kernel_h'], kernel_size) kernel_w = max(layer['pooling_param__kernel_w'], kernel_size) stride = layer['pooling_param__stride'] stride_h = max(layer['pooling_param__stride_h'], stride) stride_w = max(layer['pooling_param__stride_w'], stride) pad = layer['pooling_param__pad'] pad_h = max(layer['pooling_param__pad_h'], pad) pad_w = max(layer['pooling_param__pad_w'], pad) pool_types = {0: 'max', 1: 'avg'} pool_type = pool_types[layer['pooling_param__pool']] # print "POOL TYPE is %s" % pool_type # pooling = FancyMaxPool((kernel_h, kernel_w), # (stride_h, stride_w), # ignore_border=False) pooling = CaffePool((kernel_h, kernel_w), (stride_h, stride_w), (pad_h, pad_w), pool_type=pool_type) pooling._build_expression(pooling_input) layers[layer_name] = pooling blobs[top_blobs[0]] = pooling.expression_ current_expression = pooling.expression_ elif layer_type == "DROPOUT": # DROPOUT may figure in some networks, but it is only relevant # at the learning stage, not at the prediction stage. pass elif layer_type == "SOFTMAX_LOSS" or layer_type == 'SOFTMAX': softmax_input = blobs[bottom_blobs[0]] # have to write our own softmax expression, because of shape # issues si = softmax_input.reshape((softmax_input.shape[0], softmax_input.shape[1], -1)) shp = (si.shape[0], 1, si.shape[2]) exp = T.exp(si - si.max(axis=1).reshape(shp)) softmax_expression = (exp / exp.sum(axis=1).reshape(shp) ).reshape(softmax_input.shape) layers[layer_name] = "SOFTMAX" blobs[top_blobs[0]] = softmax_expression current_expression = softmax_expression elif layer_type == "SPLIT": split_input = blobs[bottom_blobs[0]] for top_blob in top_blobs: blobs[top_blob] = split_input # Should probably make a class to be able to add to layers layers[layer_name] = "SPLIT" elif layer_type == "LRN": # Local normalization layer lrn_input = blobs[bottom_blobs[0]] lrn_factor = layer['lrn_param__alpha'] lrn_exponent = layer['lrn_param__beta'] axis = {0:'channels'}[layer['lrn_param__norm_region']] nsize = layer['lrn_param__local_size'] lrn = LRN(nsize, lrn_factor, lrn_exponent, axis=axis) lrn._build_expression(lrn_input) layers[layer_name] = lrn blobs[top_blobs[0]] = lrn.expression_ current_expression = lrn.expression_ elif layer_type == "CONCAT": input_expressions = [blobs[bottom_blob] for bottom_blob in bottom_blobs] axis = layer['concat_param__concat_dim'] output_expression = T.concatenate(input_expressions, axis=axis) blobs[top_blobs[0]] = output_expression layers[layer_name] = "CONCAT" current_expression = output_expression elif layer_type == "INNER_PRODUCT": weights = layer_blobs[0].astype(float_dtype) biases = layer_blobs[1].astype(float_dtype).squeeze() fully_connected_input = blobs[bottom_blobs[0]] if layer_name == 'fc6': fully_connected_input = fully_connected_input.reshape((fully_connected_input.shape[0], 18432, 1, 1)) fc_layer = Convolution(weights.transpose((2, 3, 0, 1)), biases, activation=None) fc_layer._build_expression(fully_connected_input) layers[layer_name] = fc_layer blobs[top_blobs[0]] = fc_layer.expression_ current_expression = fc_layer.expression_ else: raise ValueError('layer type %s is not known to sklearn-theano' % layer_type) if layer_type == 'DATA': continue else: if isinstance(X, list): X = X[0] to_compile = [current_expression] transform_function = theano.function([last_expression], to_compile) X = transform_function(X) transformations[top_blobs[0]] = X[0] last_expression = current_expression return transformations
def transform(self, X): """ Transform a set of images. Returns the features from each layer. Parameters ---------- X : array-like, shape = [n_images, height, width, color] or shape = [height, width, color] Returns ------- T : array-like, shape = [n_images, n_features] If force_reshape = False, list of array-like, length output_layers, each shape = [n_images, n_windows, n_window_features] Returns the features extracted for each of the n_images in X.. """ X = check_tensor(X, dtype=np.float32, n_dim=4) print "len(X)", len(X) """ res = self.transform_function( X[:, y_lower_bound:y_upper_bound, x_lower_bound:x_upper_bound, :].transpose( *self.transpose_order))[0] """ if self.batch_size is None: if self.force_reshape: return self.transform_function(X.transpose( *self.transpose_order))[0].reshape((len(X), -1)) else: return self.transform_function( X.transpose(*self.transpose_order)) else: XT = X.transpose(*self.transpose_order) n_samples = XT.shape[0] for i in range(0, n_samples, self.batch_size): transformed_batch = self.transform_function( XT[i:i + self.batch_size]) # at first iteration, initialize output arrays to correct size if i == 0: shapes = [(n_samples,) + t.shape[1:] for t in transformed_batch] ravelled_shapes = [np.prod(shp[1:]) for shp in shapes] if self.force_reshape: output_width = np.sum(ravelled_shapes) output = np.empty((n_samples, output_width), dtype=transformed_batch[0].dtype) break_points = np.r_([0], np.cumsum(ravelled_shapes)) raw_output = [ output[:, start:stop] for start, stop in zip(break_points[:-1], break_points[1:])] else: output = [np.empty(shape, dtype=transformed_batch.dtype) for shape in shapes] raw_output = [arr.reshape(n_samples, -1) for arr in output] for transformed, out in zip(transformed_batch, raw_output): out[i:i + self.batch_size] = transformed return output